15#include <ginkgo/extensions/kokkos.hpp>
16#include <ginkgo/ginkgo.hpp>
18#include <Kokkos_Core.hpp>
20#include "splines_linear_problem.hpp"
22namespace ddc::detail {
25
26
27
28
29
30
31template <
class KokkosViewType>
32auto to_gko_dense(std::shared_ptr<
const gko::Executor>
const& gko_exec, KokkosViewType
const& view)
34 static_assert((Kokkos::is_view_v<KokkosViewType> && KokkosViewType::rank == 2));
35 using value_type =
typename KokkosViewType::traits::value_type;
37 if (view.stride_1() != 1) {
38 throw std::runtime_error(
"The view needs to be contiguous in the second dimension");
41 return gko::matrix::Dense<value_type>::
43 gko::dim<2>(view.extent(0), view.extent(1)),
44 gko::array<value_type>::view(gko_exec, view.span(), view.data()),
49
50
51
52
53
54
55
56
57template <
class ExecSpace>
58std::size_t default_cols_per_chunk()
noexcept
60#if defined(KOKKOS_ENABLE_SERIAL)
61 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
65#if defined(KOKKOS_ENABLE_OPENMP)
66 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
70#if defined(KOKKOS_ENABLE_CUDA)
71 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
75#if defined(KOKKOS_ENABLE_HIP)
76 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
84
85
86
87
88
89
90
91
92template <
class ExecSpace>
93unsigned int default_preconditioner_max_block_size()
noexcept
95#if defined(KOKKOS_ENABLE_SERIAL)
96 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
100#if defined(KOKKOS_ENABLE_OPENMP)
101 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
105#if defined(KOKKOS_ENABLE_CUDA)
106 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
110#if defined(KOKKOS_ENABLE_HIP)
111 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
119
120
121
122
123
124
125template <
class ExecSpace>
126class SplinesLinearProblemSparse :
public SplinesLinearProblem<ExecSpace>
129 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
130 using SplinesLinearProblem<ExecSpace>::size;
133 using matrix_sparse_type = gko::matrix::Csr<
double, gko::int32>;
134#if defined(KOKKOS_ENABLE_OPENMP)
135 using solver_type = std::conditional_t<
136 std::is_same_v<ExecSpace, Kokkos::OpenMP>,
137 gko::solver::Gmres<
double>,
138 gko::solver::Bicgstab<
double>>;
140 using solver_type = gko::solver::Bicgstab<
double>;
145 std::unique_ptr<gko::matrix::Dense<
double>> m_matrix_dense;
147 std::shared_ptr<matrix_sparse_type> m_matrix_sparse;
149 std::shared_ptr<solver_type> m_solver;
150 std::shared_ptr<gko::LinOp> m_solver_tr;
152 std::size_t m_cols_per_chunk;
154 unsigned int m_preconditioner_max_block_size;
158
159
160
161
162
163
164
165
166 explicit SplinesLinearProblemSparse(
167 const std::size_t mat_size,
168 std::optional<std::size_t> cols_per_chunk = std::nullopt,
169 std::optional<
unsigned int> preconditioner_max_block_size = std::nullopt)
170 : SplinesLinearProblem<ExecSpace>(mat_size)
171 , m_cols_per_chunk(cols_per_chunk.value_or(default_cols_per_chunk<ExecSpace>()))
172 , m_preconditioner_max_block_size(preconditioner_max_block_size.value_or(
173 default_preconditioner_max_block_size<ExecSpace>()))
175 std::shared_ptr
const gko_exec = gko::ext::kokkos::create_executor(ExecSpace());
176 m_matrix_dense = gko::matrix::Dense<
177 double>::create(gko_exec->get_master(), gko::dim<2>(mat_size, mat_size));
178 m_matrix_dense->fill(0);
179 m_matrix_sparse = matrix_sparse_type::create(gko_exec, gko::dim<2>(mat_size, mat_size));
182 double get_element(std::size_t i, std::size_t j)
const override
184 return m_matrix_dense->at(i, j);
187 void set_element(std::size_t i, std::size_t j,
double aij)
override
189 m_matrix_dense->at(i, j) = aij;
193
194
195
196
197
198
199 void setup_solver()
override
202 gko::matrix_data<
double> matrix_data(gko::dim<2>(size(), size()));
203 m_matrix_dense->write(matrix_data);
204 m_matrix_dense.reset();
205 matrix_data.remove_zeros();
206 m_matrix_sparse->read(matrix_data);
207 std::shared_ptr
const gko_exec = m_matrix_sparse->get_executor();
210 std::shared_ptr
const residual_criterion
211 = gko::stop::ResidualNorm<
double>::build().with_reduction_factor(1e-15).on(
214 std::shared_ptr
const iterations_criterion
215 = gko::stop::Iteration::build().with_max_iters(1000U).on(gko_exec);
217 std::shared_ptr
const preconditioner
218 = gko::preconditioner::Jacobi<
double>::build()
219 .with_max_block_size(m_preconditioner_max_block_size)
222 std::unique_ptr
const solver_factory
223 = solver_type::build()
224 .with_preconditioner(preconditioner)
225 .with_criteria(residual_criterion, iterations_criterion)
228 m_solver = solver_factory->generate(m_matrix_sparse);
229 m_solver_tr = m_solver->transpose();
230 gko_exec->synchronize();
234
235
236
237
238
239
240
241
242
243 void solve(MultiRHS
const b,
bool const transpose)
const override
245 assert(b.extent(0) == size());
247 std::shared_ptr
const gko_exec = m_solver->get_executor();
248 std::shared_ptr
const convergence_logger = gko::log::Convergence<
double>::create();
250 std::size_t
const main_chunk_size = std::min(m_cols_per_chunk, b.extent(1));
252 Kokkos::View<
double**, Kokkos::LayoutRight, ExecSpace>
const
253 b_buffer(
"ddc_sparse_b_buffer", size(), main_chunk_size);
254 Kokkos::View<
double**, Kokkos::LayoutRight, ExecSpace>
const
255 x(
"ddc_sparse_x", size(), main_chunk_size);
257 std::size_t
const iend = (b.extent(1) + main_chunk_size - 1) / main_chunk_size;
258 for (std::size_t i = 0; i < iend; ++i) {
259 std::size_t
const subview_begin = i * main_chunk_size;
260 std::size_t
const subview_end
261 = (i + 1 == iend) ? b.extent(1) : (subview_begin + main_chunk_size);
264 = Kokkos::subview(b, Kokkos::ALL, Kokkos::pair(subview_begin, subview_end));
265 auto const b_buffer_chunk = Kokkos::
268 Kokkos::pair(std::size_t(0), subview_end - subview_begin));
269 auto const x_chunk = Kokkos::
272 Kokkos::pair(std::size_t(0), subview_end - subview_begin));
274 Kokkos::deep_copy(b_buffer_chunk, b_chunk);
275 Kokkos::deep_copy(x_chunk, b_chunk);
278 m_solver->add_logger(convergence_logger);
280 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
281 to_gko_dense(gko_exec, x_chunk));
282 m_solver->remove_logger(convergence_logger);
284 m_solver_tr->add_logger(convergence_logger);
286 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
287 to_gko_dense(gko_exec, x_chunk));
288 m_solver_tr->remove_logger(convergence_logger);
291 if (!convergence_logger->has_converged()) {
292 throw std::runtime_error(
293 "Ginkgo did not converged in ddc::detail::SplinesLinearProblemSparse");
296 Kokkos::deep_copy(b_chunk, x_chunk);
The top-level namespace of DDC.