DDC 0.10.0
Loading...
Searching...
No Matches
splines_linear_problem_sparse.cpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#include <algorithm>
6#include <cassert>
7#include <cstddef>
8#include <memory>
9#include <optional>
10#include <stdexcept>
11#include <type_traits>
12
13#include <ginkgo/extensions/kokkos.hpp>
14#include <ginkgo/ginkgo.hpp>
15
16#include <Kokkos_Core.hpp>
17
20
21namespace ddc::detail {
22
23namespace {
24
25/**
26 * @brief Convert KokkosView to Ginkgo Dense matrix.
27 *
28 * @param[in] gko_exec A Ginkgo executor that has access to the Kokkos::View memory space
29 * @param[in] view A 2-D Kokkos::View with unit stride in the second dimension
30 * @return A Ginkgo Dense matrix view over the Kokkos::View data
31 */
32template <class KokkosViewType>
33auto to_gko_dense(std::shared_ptr<gko::Executor const> const& gko_exec, KokkosViewType const& view)
34{
35 static_assert(Kokkos::is_view_v<KokkosViewType> && KokkosViewType::rank == 2);
36 using value_type = typename KokkosViewType::traits::value_type;
37
38 if (view.stride(1) != 1) {
39 throw std::runtime_error("The view needs to be contiguous in the second dimension");
40 }
41
42 return gko::matrix::Dense<value_type>::
43 create(gko_exec,
44 gko::dim<2>(view.extent(0), view.extent(1)),
45 gko::array<value_type>::view(gko_exec, view.span(), view.data()),
46 view.stride(0));
47}
48
49/**
50 * @brief Return the default value of the parameter cols_per_chunk for a given Kokkos::ExecutionSpace.
51 *
52 * The values are hardware-specific (but they can be overridden in the constructor of SplinesLinearProblemSparse).
53 * They have been tuned on the basis of ddc/benchmarks/splines.cpp results on 4xIntel 6230 + Nvidia V100.
54 *
55 * @tparam ExecSpace The Kokkos::ExecutionSpace type.
56 * @return The default value for the parameter cols_per_chunk.
57 */
58template <class ExecSpace>
59std::size_t default_cols_per_chunk() noexcept
60{
61#if defined(KOKKOS_ENABLE_SERIAL)
62 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
63 return 8192;
64 }
65#endif
66#if defined(KOKKOS_ENABLE_OPENMP)
67 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
68 return 8192;
69 }
70#endif
71#if defined(KOKKOS_ENABLE_CUDA)
72 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
73 return 65535;
74 }
75#endif
76#if defined(KOKKOS_ENABLE_HIP)
77 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
78 return 65535;
79 }
80#endif
81#if defined(KOKKOS_ENABLE_SYCL)
82 if (std::is_same_v<ExecSpace, Kokkos::SYCL>) {
83 return 65535;
84 }
85#endif
86 return 1;
87}
88
89/**
90 * @brief Return the default value of the parameter preconditioner_max_block_size for a given Kokkos::ExecutionSpace.
91 *
92 * The values are hardware-specific (but they can be overridden in the constructor of SplinesLinearProblemSparse).
93 * They have been tuned on the basis of ddc/benchmarks/splines.cpp results on 4xIntel 6230 + Nvidia V100.
94 *
95 * @tparam ExecSpace The Kokkos::ExecutionSpace type.
96 * @return The default value for the parameter preconditioner_max_block_size.
97 */
98template <class ExecSpace>
99unsigned int default_preconditioner_max_block_size() noexcept
100{
101#if defined(KOKKOS_ENABLE_SERIAL)
102 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
103 return 32U;
104 }
105#endif
106#if defined(KOKKOS_ENABLE_OPENMP)
107 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
108 return 1U;
109 }
110#endif
111#if defined(KOKKOS_ENABLE_CUDA)
112 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
113 return 1U;
114 }
115#endif
116#if defined(KOKKOS_ENABLE_HIP)
117 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
118 return 1U;
119 }
120#endif
121#if defined(KOKKOS_ENABLE_SYCL)
122 if (std::is_same_v<ExecSpace, Kokkos::SYCL>) {
123 return 1U;
124 }
125#endif
126 return 1U;
127}
128
129} // namespace
130
131template <class ExecSpace>
132class SplinesLinearProblemSparse<ExecSpace>::Impl
133{
134public:
135 using MultiRHS = typename SplinesLinearProblem<ExecSpace>::MultiRHS;
136
137private:
138 using matrix_sparse_type = gko::matrix::Csr<double, gko::int32>;
139#if defined(KOKKOS_ENABLE_OPENMP)
140 using solver_type = std::conditional_t<
141 std::is_same_v<ExecSpace, Kokkos::OpenMP>,
142 gko::solver::Gmres<double>,
143 gko::solver::Bicgstab<double>>;
144#else
145 using solver_type = gko::solver::Bicgstab<double>;
146#endif
147
148private:
149 std::size_t m_mat_size;
150
151 std::unique_ptr<gko::matrix::Dense<double>> m_matrix_dense;
152
153 std::shared_ptr<matrix_sparse_type> m_matrix_sparse;
154
155 std::shared_ptr<solver_type> m_solver;
156 std::shared_ptr<gko::LinOp> m_solver_tr;
157
158 std::size_t m_cols_per_chunk; // Maximum number of columns of B to be passed to a Ginkgo solver
159
160 unsigned int m_preconditioner_max_block_size; // Maximum size of Jacobi-block preconditioner
161
162public:
163 explicit Impl(
164 std::size_t const mat_size,
165 std::optional<std::size_t> const cols_per_chunk = std::nullopt,
166 std::optional<unsigned int> const preconditioner_max_block_size = std::nullopt)
167 : m_mat_size(mat_size)
168 , m_cols_per_chunk(cols_per_chunk.value_or(default_cols_per_chunk<ExecSpace>()))
169 , m_preconditioner_max_block_size(preconditioner_max_block_size.value_or(
170 default_preconditioner_max_block_size<ExecSpace>()))
171 {
172 std::shared_ptr const gko_exec = gko::ext::kokkos::create_executor(ExecSpace());
173 m_matrix_dense = gko::matrix::Dense<
174 double>::create(gko_exec->get_master(), gko::dim<2>(mat_size, mat_size));
175 m_matrix_dense->fill(0);
176 m_matrix_sparse = matrix_sparse_type::create(gko_exec, gko::dim<2>(mat_size, mat_size));
177 }
178
179 double get_element(std::size_t const i, std::size_t const j) const
180 {
181 return m_matrix_dense->at(i, j);
182 }
183
184 void set_element(std::size_t const i, std::size_t const j, double const aij)
185 {
186 m_matrix_dense->at(i, j) = aij;
187 }
188
189 void setup_solver()
190 {
191 // Remove zeros
192 gko::matrix_data<double> matrix_data(gko::dim<2>(m_mat_size, m_mat_size));
193 m_matrix_dense->write(matrix_data);
194 m_matrix_dense.reset();
195 matrix_data.remove_zeros();
196 m_matrix_sparse->read(matrix_data);
197 std::shared_ptr const gko_exec = m_matrix_sparse->get_executor();
198
199 // Create the solver factory
200 std::shared_ptr const residual_criterion
201 = gko::stop::ResidualNorm<double>::build().with_reduction_factor(1e-15).on(
202 gko_exec);
203
204 std::shared_ptr const iterations_criterion
205 = gko::stop::Iteration::build().with_max_iters(1000U).on(gko_exec);
206
207 std::shared_ptr const preconditioner
208 = gko::preconditioner::Jacobi<double>::build()
209 .with_max_block_size(m_preconditioner_max_block_size)
210 .on(gko_exec);
211
212 std::unique_ptr const solver_factory
213 = solver_type::build()
214 .with_preconditioner(preconditioner)
215 .with_criteria(residual_criterion, iterations_criterion)
216 .on(gko_exec);
217
218 m_solver = solver_factory->generate(m_matrix_sparse);
219 m_solver_tr = m_solver->transpose();
220 gko_exec->synchronize();
221 }
222
223 /**
224 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
225 *
226 * The solver method is currently Bicgstab on CPU Serial and GPU and Gmres on OMP (because of Ginkgo issue #1563).
227 *
228 * Multiple right-hand sides are sliced in chunks of size cols_per_chunk which are passed one-after-the-other to Ginkgo.
229 *
230 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
231 * @param transpose Choose between the direct or transposed version of the linear problem.
232 */
233 void solve(MultiRHS const b, bool const transpose) const
234 {
235 assert(b.extent(0) == m_mat_size);
236
237 std::shared_ptr const gko_exec = m_solver->get_executor();
238 std::shared_ptr const convergence_logger = gko::log::Convergence<double>::create();
239
240 std::size_t const main_chunk_size = std::min(m_cols_per_chunk, b.extent(1));
241
242 MultiRHS const b_buffer("ddc_sparse_b_buffer", m_mat_size, main_chunk_size);
243 MultiRHS const x("ddc_sparse_x", m_mat_size, main_chunk_size);
244
245 std::size_t const iend = (b.extent(1) + main_chunk_size - 1) / main_chunk_size;
246 for (std::size_t i = 0; i < iend; ++i) {
247 std::size_t const subview_begin = i * main_chunk_size;
248 std::size_t const subview_end
249 = (i + 1 == iend) ? b.extent(1) : (subview_begin + main_chunk_size);
250
251 auto const b_chunk
252 = Kokkos::subview(b, Kokkos::ALL, Kokkos::pair(subview_begin, subview_end));
253 auto const b_buffer_chunk = Kokkos::
254 subview(b_buffer,
255 Kokkos::ALL,
256 Kokkos::pair(static_cast<std::size_t>(0), subview_end - subview_begin));
257 auto const x_chunk = Kokkos::
258 subview(x,
259 Kokkos::ALL,
260 Kokkos::pair(static_cast<std::size_t>(0), subview_end - subview_begin));
261
262 Kokkos::deep_copy(b_buffer_chunk, b_chunk);
263 Kokkos::deep_copy(x_chunk, b_chunk);
264
265 if (!transpose) {
266 m_solver->add_logger(convergence_logger);
267 m_solver
268 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
269 to_gko_dense(gko_exec, x_chunk));
270 m_solver->remove_logger(convergence_logger);
271 } else {
272 m_solver_tr->add_logger(convergence_logger);
273 m_solver_tr
274 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
275 to_gko_dense(gko_exec, x_chunk));
276 m_solver_tr->remove_logger(convergence_logger);
277 }
278
279 if (!convergence_logger->has_converged()) {
280 throw std::runtime_error(
281 "Ginkgo did not converged in ddc::detail::SplinesLinearProblemSparse");
282 }
283
284 Kokkos::deep_copy(b_chunk, x_chunk);
285 }
286 }
287};
288
289template <class ExecSpace>
290SplinesLinearProblemSparse<ExecSpace>::SplinesLinearProblemSparse(
291 std::size_t const mat_size,
292 std::optional<std::size_t> cols_per_chunk,
293 std::optional<unsigned int> preconditioner_max_block_size)
294 : SplinesLinearProblem<ExecSpace>(mat_size)
295 , m_impl(std::make_unique<Impl>(mat_size, cols_per_chunk, preconditioner_max_block_size))
296{
297}
298
299template <class ExecSpace>
300SplinesLinearProblemSparse<ExecSpace>::~SplinesLinearProblemSparse() = default;
301
302template <class ExecSpace>
303double SplinesLinearProblemSparse<ExecSpace>::get_element(std::size_t i, std::size_t j) const
304{
305 return m_impl->get_element(i, j);
306}
307
308template <class ExecSpace>
309void SplinesLinearProblemSparse<ExecSpace>::set_element(std::size_t i, std::size_t j, double aij)
310{
311 m_impl->set_element(i, j, aij);
312}
313
314template <class ExecSpace>
315void SplinesLinearProblemSparse<ExecSpace>::setup_solver()
316{
317 m_impl->setup_solver();
318}
319
320template <class ExecSpace>
321void SplinesLinearProblemSparse<ExecSpace>::solve(MultiRHS const b, bool const transpose) const
322{
323 m_impl->solve(b, transpose);
324}
325
326#if defined(KOKKOS_ENABLE_SERIAL)
327template class SplinesLinearProblemSparse<Kokkos::Serial>;
328#endif
329#if defined(KOKKOS_ENABLE_OPENMP)
330template class SplinesLinearProblemSparse<Kokkos::OpenMP>;
331#endif
332#if defined(KOKKOS_ENABLE_CUDA)
333template class SplinesLinearProblemSparse<Kokkos::Cuda>;
334#endif
335#if defined(KOKKOS_ENABLE_HIP)
336template class SplinesLinearProblemSparse<Kokkos::HIP>;
337#endif
338#if defined(KOKKOS_ENABLE_SYCL)
339template class SplinesLinearProblemSparse<Kokkos::SYCL>;
340#endif
341
342} // namespace ddc::detail
The top-level namespace of DDC.