15#include <ginkgo/extensions/kokkos.hpp>
16#include <ginkgo/ginkgo.hpp>
18#include <Kokkos_Core.hpp>
20#include "splines_linear_problem.hpp"
22namespace ddc::detail {
25
26
27
28
29
30
31template <
class KokkosViewType>
32auto to_gko_dense(std::shared_ptr<
const gko::Executor>
const& gko_exec, KokkosViewType
const& view)
34 static_assert((Kokkos::is_view_v<KokkosViewType> && KokkosViewType::rank == 2));
35 using value_type =
typename KokkosViewType::traits::value_type;
37 if (view.stride_1() != 1) {
38 throw std::runtime_error(
"The view needs to be contiguous in the second dimension");
41 return gko::matrix::Dense<value_type>::
43 gko::dim<2>(view.extent(0), view.extent(1)),
44 gko::array<value_type>::view(gko_exec, view.span(), view.data()),
49
50
51
52
53
54
55
56
57template <
class ExecSpace>
58std::size_t default_cols_per_chunk()
noexcept
60#if defined(KOKKOS_ENABLE_SERIAL)
61 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
65#if defined(KOKKOS_ENABLE_OPENMP)
66 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
70#if defined(KOKKOS_ENABLE_CUDA)
71 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
75#if defined(KOKKOS_ENABLE_HIP)
76 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
80#if defined(KOKKOS_ENABLE_SYCL)
81 if (std::is_same_v<ExecSpace, Kokkos::SYCL>) {
89
90
91
92
93
94
95
96
97template <
class ExecSpace>
98unsigned int default_preconditioner_max_block_size()
noexcept
100#if defined(KOKKOS_ENABLE_SERIAL)
101 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
105#if defined(KOKKOS_ENABLE_OPENMP)
106 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
110#if defined(KOKKOS_ENABLE_CUDA)
111 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
115#if defined(KOKKOS_ENABLE_HIP)
116 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
120#if defined(KOKKOS_ENABLE_SYCL)
121 if (std::is_same_v<ExecSpace, Kokkos::SYCL>) {
129
130
131
132
133
134
135template <
class ExecSpace>
136class SplinesLinearProblemSparse :
public SplinesLinearProblem<ExecSpace>
139 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
140 using SplinesLinearProblem<ExecSpace>::size;
143 using matrix_sparse_type = gko::matrix::Csr<
double, gko::int32>;
144#if defined(KOKKOS_ENABLE_OPENMP)
145 using solver_type = std::conditional_t<
146 std::is_same_v<ExecSpace, Kokkos::OpenMP>,
147 gko::solver::Gmres<
double>,
148 gko::solver::Bicgstab<
double>>;
150 using solver_type = gko::solver::Bicgstab<
double>;
155 std::unique_ptr<gko::matrix::Dense<
double>> m_matrix_dense;
157 std::shared_ptr<matrix_sparse_type> m_matrix_sparse;
159 std::shared_ptr<solver_type> m_solver;
160 std::shared_ptr<gko::LinOp> m_solver_tr;
162 std::size_t m_cols_per_chunk;
164 unsigned int m_preconditioner_max_block_size;
168
169
170
171
172
173
174
175
176 explicit SplinesLinearProblemSparse(
177 const std::size_t mat_size,
178 std::optional<std::size_t> cols_per_chunk = std::nullopt,
179 std::optional<
unsigned int> preconditioner_max_block_size = std::nullopt)
180 : SplinesLinearProblem<ExecSpace>(mat_size)
181 , m_cols_per_chunk(cols_per_chunk.value_or(default_cols_per_chunk<ExecSpace>()))
182 , m_preconditioner_max_block_size(preconditioner_max_block_size.value_or(
183 default_preconditioner_max_block_size<ExecSpace>()))
185 std::shared_ptr
const gko_exec = gko::ext::kokkos::create_executor(ExecSpace());
186 m_matrix_dense = gko::matrix::Dense<
187 double>::create(gko_exec->get_master(), gko::dim<2>(mat_size, mat_size));
188 m_matrix_dense->fill(0);
189 m_matrix_sparse = matrix_sparse_type::create(gko_exec, gko::dim<2>(mat_size, mat_size));
192 double get_element(std::size_t i, std::size_t j)
const override
194 return m_matrix_dense->at(i, j);
197 void set_element(std::size_t i, std::size_t j,
double aij)
override
199 m_matrix_dense->at(i, j) = aij;
203
204
205
206
207
208
209 void setup_solver()
override
212 gko::matrix_data<
double> matrix_data(gko::dim<2>(size(), size()));
213 m_matrix_dense->write(matrix_data);
214 m_matrix_dense.reset();
215 matrix_data.remove_zeros();
216 m_matrix_sparse->read(matrix_data);
217 std::shared_ptr
const gko_exec = m_matrix_sparse->get_executor();
220 std::shared_ptr
const residual_criterion
221 = gko::stop::ResidualNorm<
double>::build().with_reduction_factor(1e-15).on(
224 std::shared_ptr
const iterations_criterion
225 = gko::stop::Iteration::build().with_max_iters(1000U).on(gko_exec);
227 std::shared_ptr
const preconditioner
228 = gko::preconditioner::Jacobi<
double>::build()
229 .with_max_block_size(m_preconditioner_max_block_size)
232 std::unique_ptr
const solver_factory
233 = solver_type::build()
234 .with_preconditioner(preconditioner)
235 .with_criteria(residual_criterion, iterations_criterion)
238 m_solver = solver_factory->generate(m_matrix_sparse);
239 m_solver_tr = m_solver->transpose();
240 gko_exec->synchronize();
244
245
246
247
248
249
250
251
252
253 void solve(MultiRHS
const b,
bool const transpose)
const override
255 assert(b.extent(0) == size());
257 std::shared_ptr
const gko_exec = m_solver->get_executor();
258 std::shared_ptr
const convergence_logger = gko::log::Convergence<
double>::create();
260 std::size_t
const main_chunk_size = std::min(m_cols_per_chunk, b.extent(1));
262 Kokkos::View<
double**, Kokkos::LayoutRight, ExecSpace>
const
263 b_buffer(
"ddc_sparse_b_buffer", size(), main_chunk_size);
264 Kokkos::View<
double**, Kokkos::LayoutRight, ExecSpace>
const
265 x(
"ddc_sparse_x", size(), main_chunk_size);
267 std::size_t
const iend = (b.extent(1) + main_chunk_size - 1) / main_chunk_size;
268 for (std::size_t i = 0; i < iend; ++i) {
269 std::size_t
const subview_begin = i * main_chunk_size;
270 std::size_t
const subview_end
271 = (i + 1 == iend) ? b.extent(1) : (subview_begin + main_chunk_size);
274 = Kokkos::subview(b, Kokkos::ALL, Kokkos::pair(subview_begin, subview_end));
275 auto const b_buffer_chunk = Kokkos::
278 Kokkos::pair(std::size_t(0), subview_end - subview_begin));
279 auto const x_chunk = Kokkos::
282 Kokkos::pair(std::size_t(0), subview_end - subview_begin));
284 Kokkos::deep_copy(b_buffer_chunk, b_chunk);
285 Kokkos::deep_copy(x_chunk, b_chunk);
288 m_solver->add_logger(convergence_logger);
290 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
291 to_gko_dense(gko_exec, x_chunk));
292 m_solver->remove_logger(convergence_logger);
294 m_solver_tr->add_logger(convergence_logger);
296 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
297 to_gko_dense(gko_exec, x_chunk));
298 m_solver_tr->remove_logger(convergence_logger);
301 if (!convergence_logger->has_converged()) {
302 throw std::runtime_error(
303 "Ginkgo did not converged in ddc::detail::SplinesLinearProblemSparse");
306 Kokkos::deep_copy(b_chunk, x_chunk);
The top-level namespace of DDC.