DDC 0.4.1
Loading...
Searching...
No Matches
splines_linear_problem_sparse.hpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#pragma once
6
7#include <algorithm>
8#include <cassert>
9#include <cstddef>
10#include <memory>
11#include <optional>
12#include <stdexcept>
13#include <type_traits>
14
15#include <ginkgo/extensions/kokkos.hpp>
16#include <ginkgo/ginkgo.hpp>
17
18#include <Kokkos_Core.hpp>
19
20#include "splines_linear_problem.hpp"
21
22namespace ddc::detail {
23
24/**
25 * @brief Convert KokkosView to Ginkgo Dense matrix.
26 *
27 * @param[in] gko_exec A Ginkgo executor that has access to the Kokkos::View memory space
28 * @param[in] view A 2-D Kokkos::View with unit stride in the second dimension
29 * @return A Ginkgo Dense matrix view over the Kokkos::View data
30 */
31template <class KokkosViewType>
32auto to_gko_dense(std::shared_ptr<const gko::Executor> const& gko_exec, KokkosViewType const& view)
33{
34 static_assert((Kokkos::is_view_v<KokkosViewType> && KokkosViewType::rank == 2));
35 using value_type = typename KokkosViewType::traits::value_type;
36
37 if (view.stride_1() != 1) {
38 throw std::runtime_error("The view needs to be contiguous in the second dimension");
39 }
40
41 return gko::matrix::Dense<value_type>::
42 create(gko_exec,
43 gko::dim<2>(view.extent(0), view.extent(1)),
44 gko::array<value_type>::view(gko_exec, view.span(), view.data()),
45 view.stride_0());
46}
47
48/**
49 * @brief Return the default value of the parameter cols_per_chunk for a given Kokkos::ExecutionSpace.
50 *
51 * The values are hardware-specific (but they can be overridden in the constructor of SplinesLinearProblemSparse).
52 * They have been tuned on the basis of ddc/benchmarks/splines.cpp results on 4xIntel 6230 + Nvidia V100.
53 *
54 * @tparam ExecSpace The Kokkos::ExecutionSpace type.
55 * @return The default value for the parameter cols_per_chunk.
56 */
57template <class ExecSpace>
58std::size_t default_cols_per_chunk() noexcept
59{
60#if defined(KOKKOS_ENABLE_SERIAL)
61 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
62 return 8192;
63 }
64#endif
65#if defined(KOKKOS_ENABLE_OPENMP)
66 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
67 return 8192;
68 }
69#endif
70#if defined(KOKKOS_ENABLE_CUDA)
71 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
72 return 65535;
73 }
74#endif
75#if defined(KOKKOS_ENABLE_HIP)
76 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
77 return 65535;
78 }
79#endif
80 return 1;
81}
82
83/**
84 * @brief Return the default value of the parameter preconditioner_max_block_size for a given Kokkos::ExecutionSpace.
85 *
86 * The values are hardware-specific (but they can be overridden in the constructor of SplinesLinearProblemSparse).
87 * They have been tuned on the basis of ddc/benchmarks/splines.cpp results on 4xIntel 6230 + Nvidia V100.
88 *
89 * @tparam ExecSpace The Kokkos::ExecutionSpace type.
90 * @return The default value for the parameter preconditioner_max_block_size.
91 */
92template <class ExecSpace>
93unsigned int default_preconditioner_max_block_size() noexcept
94{
95#if defined(KOKKOS_ENABLE_SERIAL)
96 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
97 return 32U;
98 }
99#endif
100#if defined(KOKKOS_ENABLE_OPENMP)
101 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
102 return 1U;
103 }
104#endif
105#if defined(KOKKOS_ENABLE_CUDA)
106 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
107 return 1U;
108 }
109#endif
110#if defined(KOKKOS_ENABLE_HIP)
111 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
112 return 1U;
113 }
114#endif
115 return 1U;
116}
117
118/**
119 * @brief A sparse linear problem dedicated to the computation of a spline approximation.
120 *
121 * The storage format is CSR. Ginkgo is used to perform every matrix and linear solver-related operations.
122 *
123 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are performed.
124 */
125template <class ExecSpace>
126class SplinesLinearProblemSparse : public SplinesLinearProblem<ExecSpace>
127{
128public:
129 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
130 using SplinesLinearProblem<ExecSpace>::size;
131
132private:
133 using matrix_sparse_type = gko::matrix::Csr<double, gko::int32>;
134#if defined(KOKKOS_ENABLE_OPENMP)
135 using solver_type = std::conditional_t<
136 std::is_same_v<ExecSpace, Kokkos::OpenMP>,
137 gko::solver::Gmres<double>,
138 gko::solver::Bicgstab<double>>;
139#else
140 using solver_type = gko::solver::Bicgstab<double>;
141#endif
142
143
144private:
145 std::unique_ptr<gko::matrix::Dense<double>> m_matrix_dense;
146
147 std::shared_ptr<matrix_sparse_type> m_matrix_sparse;
148
149 std::shared_ptr<solver_type> m_solver;
150 std::shared_ptr<gko::LinOp> m_solver_tr;
151
152 std::size_t m_cols_per_chunk; // Maximum number of columns of B to be passed to a Ginkgo solver
153
154 unsigned int m_preconditioner_max_block_size; // Maximum size of Jacobi-block preconditioner
155
156public:
157 /**
158 * @brief SplinesLinearProblemSparse constructor.
159 *
160 * @param mat_size The size of one of the dimensions of the square matrix.
161 * @param cols_per_chunk An optional parameter used to define the number of right-hand sides to pass to
162 * Ginkgo solver calls. see default_cols_per_chunk.
163 * @param preconditioner_max_block_size An optional parameter used to define the maximum size of a block
164 * used by the block-Jacobi preconditioner. see default_preconditioner_max_block_size.
165 */
166 explicit SplinesLinearProblemSparse(
167 const std::size_t mat_size,
168 std::optional<std::size_t> cols_per_chunk = std::nullopt,
169 std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
170 : SplinesLinearProblem<ExecSpace>(mat_size)
171 , m_cols_per_chunk(cols_per_chunk.value_or(default_cols_per_chunk<ExecSpace>()))
172 , m_preconditioner_max_block_size(preconditioner_max_block_size.value_or(
173 default_preconditioner_max_block_size<ExecSpace>()))
174 {
175 std::shared_ptr const gko_exec = gko::ext::kokkos::create_executor(ExecSpace());
176 m_matrix_dense = gko::matrix::Dense<
177 double>::create(gko_exec->get_master(), gko::dim<2>(mat_size, mat_size));
178 m_matrix_dense->fill(0);
179 m_matrix_sparse = matrix_sparse_type::create(gko_exec, gko::dim<2>(mat_size, mat_size));
180 }
181
182 double get_element(std::size_t i, std::size_t j) const override
183 {
184 return m_matrix_dense->at(i, j);
185 }
186
187 void set_element(std::size_t i, std::size_t j, double aij) override
188 {
189 m_matrix_dense->at(i, j) = aij;
190 }
191
192 /**
193 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
194 *
195 * Removes the zeros from the CSR object and instantiate a Ginkgo solver. It also constructs a transposed version of the solver.
196 *
197 * The stopping criterion is a reduction factor ||Ax-b||/||b||<1e-15 with 1000 maximum iterations.
198 */
199 void setup_solver() override
200 {
201 // Remove zeros
202 gko::matrix_data<double> matrix_data(gko::dim<2>(size(), size()));
203 m_matrix_dense->write(matrix_data);
204 m_matrix_dense.reset();
205 matrix_data.remove_zeros();
206 m_matrix_sparse->read(matrix_data);
207 std::shared_ptr const gko_exec = m_matrix_sparse->get_executor();
208
209 // Create the solver factory
210 std::shared_ptr const residual_criterion
211 = gko::stop::ResidualNorm<double>::build().with_reduction_factor(1e-15).on(
212 gko_exec);
213
214 std::shared_ptr const iterations_criterion
215 = gko::stop::Iteration::build().with_max_iters(1000U).on(gko_exec);
216
217 std::shared_ptr const preconditioner
218 = gko::preconditioner::Jacobi<double>::build()
219 .with_max_block_size(m_preconditioner_max_block_size)
220 .on(gko_exec);
221
222 std::unique_ptr const solver_factory
223 = solver_type::build()
224 .with_preconditioner(preconditioner)
225 .with_criteria(residual_criterion, iterations_criterion)
226 .on(gko_exec);
227
228 m_solver = solver_factory->generate(m_matrix_sparse);
229 m_solver_tr = m_solver->transpose();
230 gko_exec->synchronize();
231 }
232
233 /**
234 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
235 *
236 * The solver method is currently Bicgstab on CPU Serial and GPU and Gmres on OMP (because of Ginkgo issue #1563).
237 *
238 * Multiple right-hand sides are sliced in chunks of size cols_per_chunk which are passed one-after-the-other to Ginkgo.
239 *
240 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
241 * @param transpose Choose between the direct or transposed version of the linear problem.
242 */
243 void solve(MultiRHS const b, bool const transpose) const override
244 {
245 assert(b.extent(0) == size());
246
247 std::shared_ptr const gko_exec = m_solver->get_executor();
248 std::shared_ptr const convergence_logger = gko::log::Convergence<double>::create();
249
250 std::size_t const main_chunk_size = std::min(m_cols_per_chunk, b.extent(1));
251
252 Kokkos::View<double**, Kokkos::LayoutRight, ExecSpace> const
253 b_buffer("ddc_sparse_b_buffer", size(), main_chunk_size);
254 Kokkos::View<double**, Kokkos::LayoutRight, ExecSpace> const
255 x("ddc_sparse_x", size(), main_chunk_size);
256
257 std::size_t const iend = (b.extent(1) + main_chunk_size - 1) / main_chunk_size;
258 for (std::size_t i = 0; i < iend; ++i) {
259 std::size_t const subview_begin = i * main_chunk_size;
260 std::size_t const subview_end
261 = (i + 1 == iend) ? b.extent(1) : (subview_begin + main_chunk_size);
262
263 auto const b_chunk
264 = Kokkos::subview(b, Kokkos::ALL, Kokkos::pair(subview_begin, subview_end));
265 auto const b_buffer_chunk = Kokkos::
266 subview(b_buffer,
267 Kokkos::ALL,
268 Kokkos::pair(std::size_t(0), subview_end - subview_begin));
269 auto const x_chunk = Kokkos::
270 subview(x,
271 Kokkos::ALL,
272 Kokkos::pair(std::size_t(0), subview_end - subview_begin));
273
274 Kokkos::deep_copy(b_buffer_chunk, b_chunk);
275 Kokkos::deep_copy(x_chunk, b_chunk);
276
277 if (!transpose) {
278 m_solver->add_logger(convergence_logger);
279 m_solver
280 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
281 to_gko_dense(gko_exec, x_chunk));
282 m_solver->remove_logger(convergence_logger);
283 } else {
284 m_solver_tr->add_logger(convergence_logger);
285 m_solver_tr
286 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
287 to_gko_dense(gko_exec, x_chunk));
288 m_solver_tr->remove_logger(convergence_logger);
289 }
290
291 if (!convergence_logger->has_converged()) {
292 throw std::runtime_error(
293 "Ginkgo did not converged in ddc::detail::SplinesLinearProblemSparse");
294 }
295
296 Kokkos::deep_copy(b_chunk, x_chunk);
297 }
298 }
299};
300
301} // namespace ddc::detail
The top-level namespace of DDC.