DDC 0.5.2
Loading...
Searching...
No Matches
splines_linear_problem_sparse.hpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#pragma once
6
7#include <algorithm>
8#include <cassert>
9#include <cstddef>
10#include <memory>
11#include <optional>
12#include <stdexcept>
13#include <type_traits>
14
15#include <ginkgo/extensions/kokkos.hpp>
16#include <ginkgo/ginkgo.hpp>
17
18#include <Kokkos_Core.hpp>
19
20#include "splines_linear_problem.hpp"
21
22namespace ddc::detail {
23
24/**
25 * @brief Convert KokkosView to Ginkgo Dense matrix.
26 *
27 * @param[in] gko_exec A Ginkgo executor that has access to the Kokkos::View memory space
28 * @param[in] view A 2-D Kokkos::View with unit stride in the second dimension
29 * @return A Ginkgo Dense matrix view over the Kokkos::View data
30 */
31template <class KokkosViewType>
32auto to_gko_dense(std::shared_ptr<const gko::Executor> const& gko_exec, KokkosViewType const& view)
33{
34 static_assert((Kokkos::is_view_v<KokkosViewType> && KokkosViewType::rank == 2));
35 using value_type = typename KokkosViewType::traits::value_type;
36
37 if (view.stride_1() != 1) {
38 throw std::runtime_error("The view needs to be contiguous in the second dimension");
39 }
40
41 return gko::matrix::Dense<value_type>::
42 create(gko_exec,
43 gko::dim<2>(view.extent(0), view.extent(1)),
44 gko::array<value_type>::view(gko_exec, view.span(), view.data()),
45 view.stride_0());
46}
47
48/**
49 * @brief Return the default value of the parameter cols_per_chunk for a given Kokkos::ExecutionSpace.
50 *
51 * The values are hardware-specific (but they can be overridden in the constructor of SplinesLinearProblemSparse).
52 * They have been tuned on the basis of ddc/benchmarks/splines.cpp results on 4xIntel 6230 + Nvidia V100.
53 *
54 * @tparam ExecSpace The Kokkos::ExecutionSpace type.
55 * @return The default value for the parameter cols_per_chunk.
56 */
57template <class ExecSpace>
58std::size_t default_cols_per_chunk() noexcept
59{
60#if defined(KOKKOS_ENABLE_SERIAL)
61 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
62 return 8192;
63 }
64#endif
65#if defined(KOKKOS_ENABLE_OPENMP)
66 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
67 return 8192;
68 }
69#endif
70#if defined(KOKKOS_ENABLE_CUDA)
71 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
72 return 65535;
73 }
74#endif
75#if defined(KOKKOS_ENABLE_HIP)
76 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
77 return 65535;
78 }
79#endif
80#if defined(KOKKOS_ENABLE_SYCL)
81 if (std::is_same_v<ExecSpace, Kokkos::SYCL>) {
82 return 65535;
83 }
84#endif
85 return 1;
86}
87
88/**
89 * @brief Return the default value of the parameter preconditioner_max_block_size for a given Kokkos::ExecutionSpace.
90 *
91 * The values are hardware-specific (but they can be overridden in the constructor of SplinesLinearProblemSparse).
92 * They have been tuned on the basis of ddc/benchmarks/splines.cpp results on 4xIntel 6230 + Nvidia V100.
93 *
94 * @tparam ExecSpace The Kokkos::ExecutionSpace type.
95 * @return The default value for the parameter preconditioner_max_block_size.
96 */
97template <class ExecSpace>
98unsigned int default_preconditioner_max_block_size() noexcept
99{
100#if defined(KOKKOS_ENABLE_SERIAL)
101 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
102 return 32U;
103 }
104#endif
105#if defined(KOKKOS_ENABLE_OPENMP)
106 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
107 return 1U;
108 }
109#endif
110#if defined(KOKKOS_ENABLE_CUDA)
111 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
112 return 1U;
113 }
114#endif
115#if defined(KOKKOS_ENABLE_HIP)
116 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
117 return 1U;
118 }
119#endif
120#if defined(KOKKOS_ENABLE_SYCL)
121 if (std::is_same_v<ExecSpace, Kokkos::SYCL>) {
122 return 1U;
123 }
124#endif
125 return 1U;
126}
127
128/**
129 * @brief A sparse linear problem dedicated to the computation of a spline approximation.
130 *
131 * The storage format is CSR. Ginkgo is used to perform every matrix and linear solver-related operations.
132 *
133 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are performed.
134 */
135template <class ExecSpace>
136class SplinesLinearProblemSparse : public SplinesLinearProblem<ExecSpace>
137{
138public:
139 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
140 using SplinesLinearProblem<ExecSpace>::size;
141
142private:
143 using matrix_sparse_type = gko::matrix::Csr<double, gko::int32>;
144#if defined(KOKKOS_ENABLE_OPENMP)
145 using solver_type = std::conditional_t<
146 std::is_same_v<ExecSpace, Kokkos::OpenMP>,
147 gko::solver::Gmres<double>,
148 gko::solver::Bicgstab<double>>;
149#else
150 using solver_type = gko::solver::Bicgstab<double>;
151#endif
152
153
154private:
155 std::unique_ptr<gko::matrix::Dense<double>> m_matrix_dense;
156
157 std::shared_ptr<matrix_sparse_type> m_matrix_sparse;
158
159 std::shared_ptr<solver_type> m_solver;
160 std::shared_ptr<gko::LinOp> m_solver_tr;
161
162 std::size_t m_cols_per_chunk; // Maximum number of columns of B to be passed to a Ginkgo solver
163
164 unsigned int m_preconditioner_max_block_size; // Maximum size of Jacobi-block preconditioner
165
166public:
167 /**
168 * @brief SplinesLinearProblemSparse constructor.
169 *
170 * @param mat_size The size of one of the dimensions of the square matrix.
171 * @param cols_per_chunk An optional parameter used to define the number of right-hand sides to pass to
172 * Ginkgo solver calls. see default_cols_per_chunk.
173 * @param preconditioner_max_block_size An optional parameter used to define the maximum size of a block
174 * used by the block-Jacobi preconditioner. see default_preconditioner_max_block_size.
175 */
176 explicit SplinesLinearProblemSparse(
177 const std::size_t mat_size,
178 std::optional<std::size_t> cols_per_chunk = std::nullopt,
179 std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
180 : SplinesLinearProblem<ExecSpace>(mat_size)
181 , m_cols_per_chunk(cols_per_chunk.value_or(default_cols_per_chunk<ExecSpace>()))
182 , m_preconditioner_max_block_size(preconditioner_max_block_size.value_or(
183 default_preconditioner_max_block_size<ExecSpace>()))
184 {
185 std::shared_ptr const gko_exec = gko::ext::kokkos::create_executor(ExecSpace());
186 m_matrix_dense = gko::matrix::Dense<
187 double>::create(gko_exec->get_master(), gko::dim<2>(mat_size, mat_size));
188 m_matrix_dense->fill(0);
189 m_matrix_sparse = matrix_sparse_type::create(gko_exec, gko::dim<2>(mat_size, mat_size));
190 }
191
192 double get_element(std::size_t i, std::size_t j) const override
193 {
194 return m_matrix_dense->at(i, j);
195 }
196
197 void set_element(std::size_t i, std::size_t j, double aij) override
198 {
199 m_matrix_dense->at(i, j) = aij;
200 }
201
202 /**
203 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
204 *
205 * Removes the zeros from the CSR object and instantiate a Ginkgo solver. It also constructs a transposed version of the solver.
206 *
207 * The stopping criterion is a reduction factor ||Ax-b||/||b||<1e-15 with 1000 maximum iterations.
208 */
209 void setup_solver() override
210 {
211 // Remove zeros
212 gko::matrix_data<double> matrix_data(gko::dim<2>(size(), size()));
213 m_matrix_dense->write(matrix_data);
214 m_matrix_dense.reset();
215 matrix_data.remove_zeros();
216 m_matrix_sparse->read(matrix_data);
217 std::shared_ptr const gko_exec = m_matrix_sparse->get_executor();
218
219 // Create the solver factory
220 std::shared_ptr const residual_criterion
221 = gko::stop::ResidualNorm<double>::build().with_reduction_factor(1e-15).on(
222 gko_exec);
223
224 std::shared_ptr const iterations_criterion
225 = gko::stop::Iteration::build().with_max_iters(1000U).on(gko_exec);
226
227 std::shared_ptr const preconditioner
228 = gko::preconditioner::Jacobi<double>::build()
229 .with_max_block_size(m_preconditioner_max_block_size)
230 .on(gko_exec);
231
232 std::unique_ptr const solver_factory
233 = solver_type::build()
234 .with_preconditioner(preconditioner)
235 .with_criteria(residual_criterion, iterations_criterion)
236 .on(gko_exec);
237
238 m_solver = solver_factory->generate(m_matrix_sparse);
239 m_solver_tr = m_solver->transpose();
240 gko_exec->synchronize();
241 }
242
243 /**
244 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
245 *
246 * The solver method is currently Bicgstab on CPU Serial and GPU and Gmres on OMP (because of Ginkgo issue #1563).
247 *
248 * Multiple right-hand sides are sliced in chunks of size cols_per_chunk which are passed one-after-the-other to Ginkgo.
249 *
250 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
251 * @param transpose Choose between the direct or transposed version of the linear problem.
252 */
253 void solve(MultiRHS const b, bool const transpose) const override
254 {
255 assert(b.extent(0) == size());
256
257 std::shared_ptr const gko_exec = m_solver->get_executor();
258 std::shared_ptr const convergence_logger = gko::log::Convergence<double>::create();
259
260 std::size_t const main_chunk_size = std::min(m_cols_per_chunk, b.extent(1));
261
262 Kokkos::View<double**, Kokkos::LayoutRight, ExecSpace> const
263 b_buffer("ddc_sparse_b_buffer", size(), main_chunk_size);
264 Kokkos::View<double**, Kokkos::LayoutRight, ExecSpace> const
265 x("ddc_sparse_x", size(), main_chunk_size);
266
267 std::size_t const iend = (b.extent(1) + main_chunk_size - 1) / main_chunk_size;
268 for (std::size_t i = 0; i < iend; ++i) {
269 std::size_t const subview_begin = i * main_chunk_size;
270 std::size_t const subview_end
271 = (i + 1 == iend) ? b.extent(1) : (subview_begin + main_chunk_size);
272
273 auto const b_chunk
274 = Kokkos::subview(b, Kokkos::ALL, Kokkos::pair(subview_begin, subview_end));
275 auto const b_buffer_chunk = Kokkos::
276 subview(b_buffer,
277 Kokkos::ALL,
278 Kokkos::pair(std::size_t(0), subview_end - subview_begin));
279 auto const x_chunk = Kokkos::
280 subview(x,
281 Kokkos::ALL,
282 Kokkos::pair(std::size_t(0), subview_end - subview_begin));
283
284 Kokkos::deep_copy(b_buffer_chunk, b_chunk);
285 Kokkos::deep_copy(x_chunk, b_chunk);
286
287 if (!transpose) {
288 m_solver->add_logger(convergence_logger);
289 m_solver
290 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
291 to_gko_dense(gko_exec, x_chunk));
292 m_solver->remove_logger(convergence_logger);
293 } else {
294 m_solver_tr->add_logger(convergence_logger);
295 m_solver_tr
296 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
297 to_gko_dense(gko_exec, x_chunk));
298 m_solver_tr->remove_logger(convergence_logger);
299 }
300
301 if (!convergence_logger->has_converged()) {
302 throw std::runtime_error(
303 "Ginkgo did not converged in ddc::detail::SplinesLinearProblemSparse");
304 }
305
306 Kokkos::deep_copy(b_chunk, x_chunk);
307 }
308 }
309};
310
311} // namespace ddc::detail
The top-level namespace of DDC.