10#include <Kokkos_Core.hpp>
16namespace ddc::detail {
18template <
class ExecSpace>
19struct SplinesLinearProblem2x2Blocks<ExecSpace>::Coo
23 Kokkos::View<
int*, Kokkos::LayoutRight, memory_space> m_rows_idx;
24 Kokkos::View<
int*, Kokkos::LayoutRight, memory_space> m_cols_idx;
25 Kokkos::View<
double*, Kokkos::LayoutRight, memory_space> m_values;
27 Coo() : m_nrows(0), m_ncols(0) {}
29 Coo(std::size_t
const nrows_,
30 std::size_t
const ncols_,
31 Kokkos::View<
int*, Kokkos::LayoutRight, memory_space> rows_idx_,
32 Kokkos::View<
int*, Kokkos::LayoutRight, memory_space> cols_idx_,
33 Kokkos::View<
double*, Kokkos::LayoutRight, memory_space> values_)
36 , m_rows_idx(std::move(rows_idx_))
37 , m_cols_idx(std::move(cols_idx_))
38 , m_values(std::move(values_))
40 assert(m_rows_idx.extent(0) == m_cols_idx.extent(0));
41 assert(m_rows_idx.extent(0) == m_values.extent(0));
44 KOKKOS_FUNCTION std::size_t nnz()
const
46 return m_values.extent(0);
49 KOKKOS_FUNCTION std::size_t nrows()
const
54 KOKKOS_FUNCTION std::size_t ncols()
const
59 KOKKOS_FUNCTION Kokkos::View<
int*, Kokkos::LayoutRight, memory_space> rows_idx()
const
64 KOKKOS_FUNCTION Kokkos::View<
int*, Kokkos::LayoutRight, memory_space> cols_idx()
const
69 KOKKOS_FUNCTION Kokkos::View<
double*, Kokkos::LayoutRight, memory_space> values()
const
75template <
class ExecSpace>
76SplinesLinearProblem2x2Blocks<ExecSpace>::SplinesLinearProblem2x2Blocks(
77 std::size_t
const mat_size,
78 std::unique_ptr<SplinesLinearProblem<ExecSpace>> top_left_block)
79 : SplinesLinearProblem<ExecSpace>(mat_size)
80 , m_top_left_block(std::move(top_left_block))
83 m_top_left_block->size(),
84 mat_size - m_top_left_block->size())
85 , m_bottom_left_block(
87 mat_size - m_top_left_block->size(),
88 m_top_left_block->size())
89 , m_bottom_right_block(
90 new SplinesLinearProblemDense<ExecSpace>(mat_size - m_top_left_block->size()))
92 assert(m_top_left_block->size() <= mat_size);
94 Kokkos::deep_copy(m_top_right_block.view_host(), 0.);
95 Kokkos::deep_copy(m_bottom_left_block.view_host(), 0.);
98template <
class ExecSpace>
99SplinesLinearProblem2x2Blocks<ExecSpace>::~SplinesLinearProblem2x2Blocks() =
default;
101template <
class ExecSpace>
102double SplinesLinearProblem2x2Blocks<ExecSpace>::get_element(
104 std::size_t
const j)
const
109 std::size_t
const nq = m_top_left_block->size();
110 if (i < nq && j < nq) {
111 return m_top_left_block->get_element(i, j);
114 if (i >= nq && j >= nq) {
115 return m_bottom_right_block->get_element(i - nq, j - nq);
119 return m_top_right_block.view_host()(i, j - nq);
122 return m_bottom_left_block.view_host()(i - nq, j);
125template <
class ExecSpace>
126void SplinesLinearProblem2x2Blocks<ExecSpace>::set_element(
134 std::size_t
const nq = m_top_left_block->size();
135 if (i < nq && j < nq) {
136 m_top_left_block->set_element(i, j, aij);
137 }
else if (i >= nq && j >= nq) {
138 m_bottom_right_block->set_element(i - nq, j - nq, aij);
139 }
else if (j >= nq) {
140 m_top_right_block.view_host()(i, j - nq) = aij;
142 m_bottom_left_block.view_host()(i - nq, j) = aij;
146template <
class ExecSpace>
147std::unique_ptr<
typename SplinesLinearProblem2x2Blocks<ExecSpace>::Coo>
148SplinesLinearProblem2x2Blocks<ExecSpace>::dense2coo(
149 Kokkos::View<
double const**, Kokkos::LayoutRight, memory_space> dense_matrix,
152 Kokkos::View<
int*, Kokkos::LayoutRight, memory_space>
153 rows_idx(
"ddc_splines_coo_rows_idx", dense_matrix.extent(0) * dense_matrix.extent(1));
154 Kokkos::View<
int*, Kokkos::LayoutRight, memory_space>
155 cols_idx(
"ddc_splines_coo_cols_idx", dense_matrix.extent(0) * dense_matrix.extent(1));
156 Kokkos::View<
double*, Kokkos::LayoutRight, memory_space>
157 values(
"ddc_splines_coo_values", dense_matrix.extent(0) * dense_matrix.extent(1));
159 Kokkos::DualView<std::size_t, Kokkos::LayoutRight, memory_space> n_nonzeros(
160 "ddc_splines_n_nonzeros");
161 n_nonzeros.view_host()() = 0;
162 n_nonzeros.modify_host();
163 n_nonzeros.sync_device();
165 auto const n_nonzeros_device = n_nonzeros.view_device();
166 Kokkos::parallel_for(
168 Kokkos::RangePolicy(ExecSpace(), 0, 1),
169 KOKKOS_LAMBDA(
int const) {
170 for (
int i = 0; i < dense_matrix.extent(0); ++i) {
171 for (
int j = 0; j < dense_matrix.extent(1); ++j) {
172 double const aij = dense_matrix(i, j);
173 if (Kokkos::abs(aij) >= tol) {
174 rows_idx(n_nonzeros_device()) = i;
175 cols_idx(n_nonzeros_device()) = j;
176 values(n_nonzeros_device()) = aij;
177 n_nonzeros_device()++;
182 n_nonzeros.modify_device();
183 n_nonzeros.sync_host();
184 Kokkos::resize(rows_idx, n_nonzeros.view_host()());
185 Kokkos::resize(cols_idx, n_nonzeros.view_host()());
186 Kokkos::resize(values, n_nonzeros.view_host()());
188 return std::make_unique<
189 Coo>(dense_matrix.extent(0), dense_matrix.extent(1), rows_idx, cols_idx, values);
192template <
class ExecSpace>
193void SplinesLinearProblem2x2Blocks<ExecSpace>::compute_schur_complement()
195 auto const bottom_left_block = m_bottom_left_block.view_host();
196 auto const top_right_block = m_top_right_block.view_host();
197 Kokkos::parallel_for(
198 "compute_schur_complement",
199 Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>(
201 {m_bottom_right_block->size(), m_bottom_right_block->size()}),
202 [&](
int const i,
int const j) {
204 for (
int l = 0; l < m_top_left_block->size(); ++l) {
205 val += bottom_left_block(i, l) * top_right_block(l, j);
208 ->set_element(i, j, m_bottom_right_block->get_element(i, j) - val);
212template <
class ExecSpace>
213void SplinesLinearProblem2x2Blocks<ExecSpace>::setup_solver()
216 m_top_left_block->setup_solver();
219 m_top_right_block.modify_host();
220 m_top_right_block.sync_device();
221 m_top_left_block->solve(m_top_right_block.view_device(),
false);
222 m_top_right_block_coo = dense2coo(m_top_right_block.view_device());
223 m_top_right_block.modify_device();
224 m_top_right_block.sync_host();
227 m_bottom_left_block.modify_host();
228 m_bottom_left_block.sync_device();
229 m_bottom_left_block_coo = dense2coo(m_bottom_left_block.view_device());
232 compute_schur_complement();
233 m_bottom_right_block->setup_solver();
236template <
class ExecSpace>
237void SplinesLinearProblem2x2Blocks<ExecSpace>::spdm_minus1_1(
241 bool const transpose)
const
243 assert((!transpose && LinOp.nrows() == y.extent(0))
244 || (transpose && LinOp.ncols() == y.extent(0)));
245 assert((!transpose && LinOp.ncols() == x.extent(0))
246 || (transpose && LinOp.nrows() == x.extent(0)));
247 assert(x.extent(1) == y.extent(1));
250 Kokkos::parallel_for(
251 "ddc_splines_spdm_minus1_1",
252 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
253 KOKKOS_LAMBDA(
int const j) {
254 for (
int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
255 int const i = LinOp.rows_idx()(nz_idx);
256 int const k = LinOp.cols_idx()(nz_idx);
257 y(i, j) -= LinOp.values()(nz_idx) * x(k, j);
261 Kokkos::parallel_for(
262 "ddc_splines_spdm_minus1_1_tr",
263 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
264 KOKKOS_LAMBDA(
int const j) {
265 for (
int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
266 int const i = LinOp.rows_idx()(nz_idx);
267 int const k = LinOp.cols_idx()(nz_idx);
268 y(k, j) -= LinOp.values()(nz_idx) * x(i, j);
274template <
class ExecSpace>
275void SplinesLinearProblem2x2Blocks<ExecSpace>::solve(MultiRHS
const b,
bool const transpose)
const
277 assert(b.extent(0) == size());
279 MultiRHS
const b1 = Kokkos::
281 std::pair<std::size_t, std::size_t>(0, m_top_left_block->size()),
283 MultiRHS
const b2 = Kokkos::
285 std::pair<std::size_t, std::size_t>(m_top_left_block->size(), b.extent(0)),
288 m_top_left_block->solve(b1,
false);
289 spdm_minus1_1(*m_bottom_left_block_coo, b1, b2);
290 m_bottom_right_block->solve(b2,
false);
291 spdm_minus1_1(*m_top_right_block_coo, b2, b1);
293 spdm_minus1_1(*m_top_right_block_coo, b1, b2,
true);
294 m_bottom_right_block->solve(b2,
true);
295 spdm_minus1_1(*m_bottom_left_block_coo, b2, b1,
true);
296 m_top_left_block->solve(b1,
true);
300#if defined(KOKKOS_ENABLE_SERIAL
)
301template class SplinesLinearProblem2x2Blocks<Kokkos::Serial>;
303#if defined(KOKKOS_ENABLE_OPENMP)
304template class SplinesLinearProblem2x2Blocks<Kokkos::OpenMP>;
306#if defined(KOKKOS_ENABLE_CUDA)
307template class SplinesLinearProblem2x2Blocks<Kokkos::Cuda>;
309#if defined(KOKKOS_ENABLE_HIP)
310template class SplinesLinearProblem2x2Blocks<Kokkos::HIP>;
312#if defined(KOKKOS_ENABLE_SYCL)
313template class SplinesLinearProblem2x2Blocks<Kokkos::SYCL>;
The top-level namespace of DDC.