13#include <ginkgo/extensions/kokkos.hpp>
14#include <ginkgo/ginkgo.hpp>
16#include <Kokkos_Core.hpp>
21namespace ddc::detail {
26
27
28
29
30
31
32template <
class KokkosViewType>
33auto to_gko_dense(std::shared_ptr<gko::Executor
const>
const& gko_exec, KokkosViewType
const& view)
35 static_assert(Kokkos::is_view_v<KokkosViewType> && KokkosViewType::rank == 2);
36 using value_type =
typename KokkosViewType::traits::value_type;
38 if (view.stride(1) != 1) {
39 throw std::runtime_error(
"The view needs to be contiguous in the second dimension");
42 return gko::matrix::Dense<value_type>::
44 gko::dim<2>(view.extent(0), view.extent(1)),
45 gko::array<value_type>::view(gko_exec, view.span(), view.data()),
50
51
52
53
54
55
56
57
58template <
class ExecSpace>
59std::size_t default_cols_per_chunk()
noexcept
61#if defined(KOKKOS_ENABLE_SERIAL
)
62 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
66#if defined(KOKKOS_ENABLE_OPENMP)
67 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
71#if defined(KOKKOS_ENABLE_CUDA)
72 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
76#if defined(KOKKOS_ENABLE_HIP)
77 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
81#if defined(KOKKOS_ENABLE_SYCL)
82 if (std::is_same_v<ExecSpace, Kokkos::SYCL>) {
90
91
92
93
94
95
96
97
98template <
class ExecSpace>
99unsigned int default_preconditioner_max_block_size()
noexcept
101#if defined(KOKKOS_ENABLE_SERIAL
)
102 if (std::is_same_v<ExecSpace, Kokkos::Serial>) {
106#if defined(KOKKOS_ENABLE_OPENMP)
107 if (std::is_same_v<ExecSpace, Kokkos::OpenMP>) {
111#if defined(KOKKOS_ENABLE_CUDA)
112 if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
116#if defined(KOKKOS_ENABLE_HIP)
117 if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
121#if defined(KOKKOS_ENABLE_SYCL)
122 if (std::is_same_v<ExecSpace, Kokkos::SYCL>) {
131template <
class ExecSpace>
132class SplinesLinearProblemSparse<ExecSpace>::Impl
135 using MultiRHS =
typename SplinesLinearProblem<ExecSpace>::MultiRHS;
138 using matrix_sparse_type = gko::matrix::Csr<
double, gko::int32>;
139#if defined(KOKKOS_ENABLE_OPENMP)
140 using solver_type = std::conditional_t<
141 std::is_same_v<ExecSpace, Kokkos::OpenMP>,
142 gko::solver::Gmres<
double>,
143 gko::solver::Bicgstab<
double>>;
145 using solver_type = gko::solver::Bicgstab<
double>;
149 std::size_t m_mat_size;
151 std::unique_ptr<gko::matrix::Dense<
double>> m_matrix_dense;
153 std::shared_ptr<matrix_sparse_type> m_matrix_sparse;
155 std::shared_ptr<solver_type> m_solver;
156 std::shared_ptr<gko::LinOp> m_solver_tr;
158 std::size_t m_cols_per_chunk;
160 unsigned int m_preconditioner_max_block_size;
164 std::size_t
const mat_size,
165 std::optional<std::size_t>
const cols_per_chunk = std::nullopt,
166 std::optional<
unsigned int>
const preconditioner_max_block_size = std::nullopt)
167 : m_mat_size(mat_size)
168 , m_cols_per_chunk(cols_per_chunk.value_or(default_cols_per_chunk<ExecSpace>()))
169 , m_preconditioner_max_block_size(preconditioner_max_block_size.value_or(
170 default_preconditioner_max_block_size<ExecSpace>()))
172 std::shared_ptr
const gko_exec = gko::ext::kokkos::create_executor(ExecSpace());
173 m_matrix_dense = gko::matrix::Dense<
174 double>::create(gko_exec->get_master(), gko::dim<2>(mat_size, mat_size));
175 m_matrix_dense->fill(0);
176 m_matrix_sparse = matrix_sparse_type::create(gko_exec, gko::dim<2>(mat_size, mat_size));
179 double get_element(std::size_t
const i, std::size_t
const j)
const
181 return m_matrix_dense->at(i, j);
184 void set_element(std::size_t
const i, std::size_t
const j,
double const aij)
186 m_matrix_dense->at(i, j) = aij;
192 gko::matrix_data<
double> matrix_data(gko::dim<2>(m_mat_size, m_mat_size));
193 m_matrix_dense->write(matrix_data);
194 m_matrix_dense.reset();
195 matrix_data.remove_zeros();
196 m_matrix_sparse->read(matrix_data);
197 std::shared_ptr
const gko_exec = m_matrix_sparse->get_executor();
200 std::shared_ptr
const residual_criterion
201 = gko::stop::ResidualNorm<
double>::build().with_reduction_factor(1e-15).on(
204 std::shared_ptr
const iterations_criterion
205 = gko::stop::Iteration::build().with_max_iters(1000U).on(gko_exec);
207 std::shared_ptr
const preconditioner
208 = gko::preconditioner::Jacobi<
double>::build()
209 .with_max_block_size(m_preconditioner_max_block_size)
212 std::unique_ptr
const solver_factory
213 = solver_type::build()
214 .with_preconditioner(preconditioner)
215 .with_criteria(residual_criterion, iterations_criterion)
218 m_solver = solver_factory->generate(m_matrix_sparse);
219 m_solver_tr = m_solver->transpose();
220 gko_exec->synchronize();
224
225
226
227
228
229
230
231
232
233 void solve(MultiRHS
const b,
bool const transpose)
const
235 assert(b.extent(0) == m_mat_size);
237 std::shared_ptr
const gko_exec = m_solver->get_executor();
238 std::shared_ptr
const convergence_logger = gko::log::Convergence<
double>::create();
240 std::size_t
const main_chunk_size = std::min(m_cols_per_chunk, b.extent(1));
242 MultiRHS
const b_buffer(
"ddc_sparse_b_buffer", m_mat_size, main_chunk_size);
243 MultiRHS
const x(
"ddc_sparse_x", m_mat_size, main_chunk_size);
245 std::size_t
const iend = (b.extent(1) + main_chunk_size - 1) / main_chunk_size;
246 for (std::size_t i = 0; i < iend; ++i) {
247 std::size_t
const subview_begin = i * main_chunk_size;
248 std::size_t
const subview_end
249 = (i + 1 == iend) ? b.extent(1) : (subview_begin + main_chunk_size);
252 = Kokkos::subview(b, Kokkos::ALL, Kokkos::pair(subview_begin, subview_end));
253 auto const b_buffer_chunk = Kokkos::
256 Kokkos::pair(
static_cast<std::size_t>(0), subview_end - subview_begin));
257 auto const x_chunk = Kokkos::
260 Kokkos::pair(
static_cast<std::size_t>(0), subview_end - subview_begin));
262 Kokkos::deep_copy(b_buffer_chunk, b_chunk);
263 Kokkos::deep_copy(x_chunk, b_chunk);
266 m_solver->add_logger(convergence_logger);
268 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
269 to_gko_dense(gko_exec, x_chunk));
270 m_solver->remove_logger(convergence_logger);
272 m_solver_tr->add_logger(convergence_logger);
274 ->apply(to_gko_dense(gko_exec, b_buffer_chunk),
275 to_gko_dense(gko_exec, x_chunk));
276 m_solver_tr->remove_logger(convergence_logger);
279 if (!convergence_logger->has_converged()) {
280 throw std::runtime_error(
281 "Ginkgo did not converged in ddc::detail::SplinesLinearProblemSparse");
284 Kokkos::deep_copy(b_chunk, x_chunk);
289template <
class ExecSpace>
290SplinesLinearProblemSparse<ExecSpace>::SplinesLinearProblemSparse(
291 std::size_t
const mat_size,
292 std::optional<std::size_t> cols_per_chunk,
293 std::optional<
unsigned int> preconditioner_max_block_size)
294 : SplinesLinearProblem<ExecSpace>(mat_size)
295 , m_impl(std::make_unique<Impl>(mat_size, cols_per_chunk, preconditioner_max_block_size))
299template <
class ExecSpace>
300SplinesLinearProblemSparse<ExecSpace>::~SplinesLinearProblemSparse() =
default;
302template <
class ExecSpace>
303double SplinesLinearProblemSparse<ExecSpace>::get_element(std::size_t i, std::size_t j)
const
305 return m_impl->get_element(i, j);
308template <
class ExecSpace>
309void SplinesLinearProblemSparse<ExecSpace>::set_element(std::size_t i, std::size_t j,
double aij)
311 m_impl->set_element(i, j, aij);
314template <
class ExecSpace>
315void SplinesLinearProblemSparse<ExecSpace>::setup_solver()
317 m_impl->setup_solver();
320template <
class ExecSpace>
321void SplinesLinearProblemSparse<ExecSpace>::solve(MultiRHS
const b,
bool const transpose)
const
323 m_impl->solve(b, transpose);
326#if defined(KOKKOS_ENABLE_SERIAL
)
327template class SplinesLinearProblemSparse<Kokkos::Serial>;
329#if defined(KOKKOS_ENABLE_OPENMP)
330template class SplinesLinearProblemSparse<Kokkos::OpenMP>;
332#if defined(KOKKOS_ENABLE_CUDA)
333template class SplinesLinearProblemSparse<Kokkos::Cuda>;
335#if defined(KOKKOS_ENABLE_HIP)
336template class SplinesLinearProblemSparse<Kokkos::HIP>;
338#if defined(KOKKOS_ENABLE_SYCL)
339template class SplinesLinearProblemSparse<Kokkos::SYCL>;
The top-level namespace of DDC.