DDC 0.11.0
Loading...
Searching...
No Matches
splines_linear_problem_2x2_blocks.cpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#include <cassert>
6#include <cstddef>
7#include <memory>
8#include <utility>
9
10#include <Kokkos_Core.hpp>
11
15
16namespace ddc::detail {
17
18template <class ExecSpace>
19struct SplinesLinearProblem2x2Blocks<ExecSpace>::Coo
20{
21 std::size_t m_nrows;
22 std::size_t m_ncols;
23 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_rows_idx;
24 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_cols_idx;
25 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_values;
26
27 Coo() : m_nrows(0), m_ncols(0) {}
28
29 Coo(std::size_t const nrows_,
30 std::size_t const ncols_,
31 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> rows_idx_,
32 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> cols_idx_,
33 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> values_)
34 : m_nrows(nrows_)
35 , m_ncols(ncols_)
36 , m_rows_idx(std::move(rows_idx_))
37 , m_cols_idx(std::move(cols_idx_))
38 , m_values(std::move(values_))
39 {
40 assert(m_rows_idx.extent(0) == m_cols_idx.extent(0));
41 assert(m_rows_idx.extent(0) == m_values.extent(0));
42 }
43
44 KOKKOS_FUNCTION std::size_t nnz() const
45 {
46 return m_values.extent(0);
47 }
48
49 KOKKOS_FUNCTION std::size_t nrows() const
50 {
51 return m_nrows;
52 }
53
54 KOKKOS_FUNCTION std::size_t ncols() const
55 {
56 return m_ncols;
57 }
58
59 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
60 rows_idx() const
61 {
62 return m_rows_idx;
63 }
64
65 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
66 cols_idx() const
67 {
68 return m_cols_idx;
69 }
70
71 KOKKOS_FUNCTION Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
72 values() const
73 {
74 return m_values;
75 }
76};
77
78template <class ExecSpace>
79SplinesLinearProblem2x2Blocks<ExecSpace>::SplinesLinearProblem2x2Blocks(
80 std::size_t const mat_size,
81 std::unique_ptr<SplinesLinearProblem<ExecSpace>> top_left_block)
82 : SplinesLinearProblem<ExecSpace>(mat_size)
83 , m_top_left_block(std::move(top_left_block))
84 , m_top_right_block(
85 "top_right_block",
86 m_top_left_block->size(),
87 mat_size - m_top_left_block->size())
88 , m_bottom_left_block(
89 "bottom_left_block",
90 mat_size - m_top_left_block->size(),
91 m_top_left_block->size())
92 , m_bottom_right_block(
93 new SplinesLinearProblemDense<ExecSpace>(mat_size - m_top_left_block->size()))
94{
95 assert(m_top_left_block->size() <= mat_size);
96
97 Kokkos::deep_copy(m_top_right_block.view_host(), 0.);
98 Kokkos::deep_copy(m_bottom_left_block.view_host(), 0.);
99}
100
101template <class ExecSpace>
102SplinesLinearProblem2x2Blocks<ExecSpace>::~SplinesLinearProblem2x2Blocks() = default;
103
104template <class ExecSpace>
105double SplinesLinearProblem2x2Blocks<ExecSpace>::get_element(
106 std::size_t const i,
107 std::size_t const j) const
108{
109 assert(i < size());
110 assert(j < size());
111
112 std::size_t const nq = m_top_left_block->size();
113 if (i < nq && j < nq) {
114 return m_top_left_block->get_element(i, j);
115 }
116
117 if (i >= nq && j >= nq) {
118 return m_bottom_right_block->get_element(i - nq, j - nq);
119 }
120
121 if (j >= nq) {
122 return m_top_right_block.view_host()(i, j - nq);
123 }
124
125 return m_bottom_left_block.view_host()(i - nq, j);
126}
127
128template <class ExecSpace>
129void SplinesLinearProblem2x2Blocks<ExecSpace>::set_element(
130 std::size_t const i,
131 std::size_t const j,
132 double const aij)
133{
134 assert(i < size());
135 assert(j < size());
136
137 std::size_t const nq = m_top_left_block->size();
138 if (i < nq && j < nq) {
139 m_top_left_block->set_element(i, j, aij);
140 } else if (i >= nq && j >= nq) {
141 m_bottom_right_block->set_element(i - nq, j - nq, aij);
142 } else if (j >= nq) {
143 m_top_right_block.view_host()(i, j - nq) = aij;
144 } else {
145 m_bottom_left_block.view_host()(i - nq, j) = aij;
146 }
147}
148
149template <class ExecSpace>
150std::unique_ptr<typename SplinesLinearProblem2x2Blocks<ExecSpace>::Coo>
151SplinesLinearProblem2x2Blocks<ExecSpace>::dense2coo(
152 Kokkos::View<double const**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
153 dense_matrix,
154 double const tol)
155{
156 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
157 rows_idx("ddc_splines_coo_rows_idx", dense_matrix.extent(0) * dense_matrix.extent(1));
158 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
159 cols_idx("ddc_splines_coo_cols_idx", dense_matrix.extent(0) * dense_matrix.extent(1));
160 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
161 values("ddc_splines_coo_values", dense_matrix.extent(0) * dense_matrix.extent(1));
162
163 Kokkos::DualView<std::size_t, Kokkos::LayoutRight, typename ExecSpace::memory_space> n_nonzeros(
164 "ddc_splines_n_nonzeros");
165 n_nonzeros.view_host()() = 0;
166 n_nonzeros.modify_host();
167 n_nonzeros.sync_device();
168
169 auto const n_nonzeros_device = n_nonzeros.view_device();
170 Kokkos::parallel_for(
171 "dense2coo",
172 Kokkos::RangePolicy(ExecSpace(), 0, 1),
173 KOKKOS_LAMBDA(int const) {
174 for (int i = 0; i < dense_matrix.extent(0); ++i) {
175 for (int j = 0; j < dense_matrix.extent(1); ++j) {
176 double const aij = dense_matrix(i, j);
177 if (Kokkos::abs(aij) >= tol) {
178 rows_idx(n_nonzeros_device()) = i;
179 cols_idx(n_nonzeros_device()) = j;
180 values(n_nonzeros_device()) = aij;
181 n_nonzeros_device()++;
182 }
183 }
184 }
185 });
186 n_nonzeros.modify_device();
187 n_nonzeros.sync_host();
188 Kokkos::resize(rows_idx, n_nonzeros.view_host()());
189 Kokkos::resize(cols_idx, n_nonzeros.view_host()());
190 Kokkos::resize(values, n_nonzeros.view_host()());
191
192 return std::make_unique<
193 Coo>(dense_matrix.extent(0), dense_matrix.extent(1), rows_idx, cols_idx, values);
194}
195
196template <class ExecSpace>
197void SplinesLinearProblem2x2Blocks<ExecSpace>::compute_schur_complement()
198{
199 auto const bottom_left_block = m_bottom_left_block.view_host();
200 auto const top_right_block = m_top_right_block.view_host();
201 Kokkos::parallel_for(
202 "compute_schur_complement",
203 Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>(
204 {0, 0},
205 {m_bottom_right_block->size(), m_bottom_right_block->size()}),
206 [&](int const i, int const j) {
207 double val = 0.0;
208 for (int l = 0; l < m_top_left_block->size(); ++l) {
209 val += bottom_left_block(i, l) * top_right_block(l, j);
210 }
211 m_bottom_right_block
212 ->set_element(i, j, m_bottom_right_block->get_element(i, j) - val);
213 });
214}
215
216template <class ExecSpace>
217void SplinesLinearProblem2x2Blocks<ExecSpace>::setup_solver()
218{
219 // Setup the top-left solver
220 m_top_left_block->setup_solver();
221
222 // Compute Q^-1*gamma in top-right block
223 m_top_right_block.modify_host();
224 m_top_right_block.sync_device();
225 m_top_left_block->solve(m_top_right_block.view_device(), false);
226 m_top_right_block_coo = dense2coo(m_top_right_block.view_device());
227 m_top_right_block.modify_device();
228 m_top_right_block.sync_host();
229
230 // Push lambda on device in bottom-left block
231 m_bottom_left_block.modify_host();
232 m_bottom_left_block.sync_device();
233 m_bottom_left_block_coo = dense2coo(m_bottom_left_block.view_device());
234
235 // Compute delta - lambda*Q^-1*gamma in bottom-right block & setup the bottom-right solver
236 compute_schur_complement();
237 m_bottom_right_block->setup_solver();
238}
239
240template <class ExecSpace>
241void SplinesLinearProblem2x2Blocks<ExecSpace>::spdm_minus1_1(
242 Coo const& LinOp,
243 MultiRHS const x,
244 MultiRHS const y,
245 bool const transpose) const
246{
247 assert((!transpose && LinOp.nrows() == y.extent(0))
248 || (transpose && LinOp.ncols() == y.extent(0)));
249 assert((!transpose && LinOp.ncols() == x.extent(0))
250 || (transpose && LinOp.nrows() == x.extent(0)));
251 assert(x.extent(1) == y.extent(1));
252
253 if (!transpose) {
254 Kokkos::parallel_for(
255 "ddc_splines_spdm_minus1_1",
256 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
257 KOKKOS_LAMBDA(int const j) {
258 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
259 int const i = LinOp.rows_idx()(nz_idx);
260 int const k = LinOp.cols_idx()(nz_idx);
261 y(i, j) -= LinOp.values()(nz_idx) * x(k, j);
262 }
263 });
264 } else {
265 Kokkos::parallel_for(
266 "ddc_splines_spdm_minus1_1_tr",
267 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
268 KOKKOS_LAMBDA(int const j) {
269 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
270 int const i = LinOp.rows_idx()(nz_idx);
271 int const k = LinOp.cols_idx()(nz_idx);
272 y(k, j) -= LinOp.values()(nz_idx) * x(i, j);
273 }
274 });
275 }
276}
277
278template <class ExecSpace>
279void SplinesLinearProblem2x2Blocks<ExecSpace>::solve(MultiRHS const b, bool const transpose) const
280{
281 assert(b.extent(0) == size());
282
283 MultiRHS const b1 = Kokkos::
284 subview(b,
285 std::pair<std::size_t, std::size_t>(0, m_top_left_block->size()),
286 Kokkos::ALL);
287 MultiRHS const b2 = Kokkos::
288 subview(b,
289 std::pair<std::size_t, std::size_t>(m_top_left_block->size(), b.extent(0)),
290 Kokkos::ALL);
291 if (!transpose) {
292 m_top_left_block->solve(b1, false);
293 spdm_minus1_1(*m_bottom_left_block_coo, b1, b2);
294 m_bottom_right_block->solve(b2, false);
295 spdm_minus1_1(*m_top_right_block_coo, b2, b1);
296 } else {
297 spdm_minus1_1(*m_top_right_block_coo, b1, b2, true);
298 m_bottom_right_block->solve(b2, true);
299 spdm_minus1_1(*m_bottom_left_block_coo, b2, b1, true);
300 m_top_left_block->solve(b1, true);
301 }
302}
303
304#if defined(KOKKOS_ENABLE_SERIAL)
305template class SplinesLinearProblem2x2Blocks<Kokkos::Serial>;
306#endif
307#if defined(KOKKOS_ENABLE_OPENMP)
308template class SplinesLinearProblem2x2Blocks<Kokkos::OpenMP>;
309#endif
310#if defined(KOKKOS_ENABLE_CUDA)
311template class SplinesLinearProblem2x2Blocks<Kokkos::Cuda>;
312#endif
313#if defined(KOKKOS_ENABLE_HIP)
314template class SplinesLinearProblem2x2Blocks<Kokkos::HIP>;
315#endif
316#if defined(KOKKOS_ENABLE_SYCL)
317template class SplinesLinearProblem2x2Blocks<Kokkos::SYCL>;
318#endif
319
320} // namespace ddc::detail
The top-level namespace of DDC.