DDC 0.10.0
Loading...
Searching...
No Matches
splines_linear_problem_2x2_blocks.cpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#include <cassert>
6#include <cstddef>
7#include <memory>
8#include <utility>
9
10#include <Kokkos_Core.hpp>
11#include <Kokkos_DualView.hpp>
12
16
17namespace ddc::detail {
18
19template <class ExecSpace>
20struct SplinesLinearProblem2x2Blocks<ExecSpace>::Coo
21{
22 std::size_t m_nrows;
23 std::size_t m_ncols;
24 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_rows_idx;
25 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_cols_idx;
26 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_values;
27
28 Coo() : m_nrows(0), m_ncols(0) {}
29
30 Coo(std::size_t const nrows_,
31 std::size_t const ncols_,
32 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> rows_idx_,
33 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> cols_idx_,
34 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> values_)
35 : m_nrows(nrows_)
36 , m_ncols(ncols_)
37 , m_rows_idx(std::move(rows_idx_))
38 , m_cols_idx(std::move(cols_idx_))
39 , m_values(std::move(values_))
40 {
41 assert(m_rows_idx.extent(0) == m_cols_idx.extent(0));
42 assert(m_rows_idx.extent(0) == m_values.extent(0));
43 }
44
45 KOKKOS_FUNCTION std::size_t nnz() const
46 {
47 return m_values.extent(0);
48 }
49
50 KOKKOS_FUNCTION std::size_t nrows() const
51 {
52 return m_nrows;
53 }
54
55 KOKKOS_FUNCTION std::size_t ncols() const
56 {
57 return m_ncols;
58 }
59
60 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
61 rows_idx() const
62 {
63 return m_rows_idx;
64 }
65
66 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
67 cols_idx() const
68 {
69 return m_cols_idx;
70 }
71
72 KOKKOS_FUNCTION Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
73 values() const
74 {
75 return m_values;
76 }
77};
78
79template <class ExecSpace>
80SplinesLinearProblem2x2Blocks<ExecSpace>::SplinesLinearProblem2x2Blocks(
81 std::size_t const mat_size,
82 std::unique_ptr<SplinesLinearProblem<ExecSpace>> top_left_block)
83 : SplinesLinearProblem<ExecSpace>(mat_size)
84 , m_top_left_block(std::move(top_left_block))
85 , m_top_right_block(
86 "top_right_block",
87 m_top_left_block->size(),
88 mat_size - m_top_left_block->size())
89 , m_bottom_left_block(
90 "bottom_left_block",
91 mat_size - m_top_left_block->size(),
92 m_top_left_block->size())
93 , m_bottom_right_block(
94 new SplinesLinearProblemDense<ExecSpace>(mat_size - m_top_left_block->size()))
95{
96 assert(m_top_left_block->size() <= mat_size);
97
98 Kokkos::deep_copy(m_top_right_block.view_host(), 0.);
99 Kokkos::deep_copy(m_bottom_left_block.view_host(), 0.);
100}
101
102template <class ExecSpace>
103SplinesLinearProblem2x2Blocks<ExecSpace>::~SplinesLinearProblem2x2Blocks() = default;
104
105template <class ExecSpace>
106double SplinesLinearProblem2x2Blocks<ExecSpace>::get_element(
107 std::size_t const i,
108 std::size_t const j) const
109{
110 assert(i < size());
111 assert(j < size());
112
113 std::size_t const nq = m_top_left_block->size();
114 if (i < nq && j < nq) {
115 return m_top_left_block->get_element(i, j);
116 }
117
118 if (i >= nq && j >= nq) {
119 return m_bottom_right_block->get_element(i - nq, j - nq);
120 }
121
122 if (j >= nq) {
123 return m_top_right_block.view_host()(i, j - nq);
124 }
125
126 return m_bottom_left_block.view_host()(i - nq, j);
127}
128
129template <class ExecSpace>
130void SplinesLinearProblem2x2Blocks<ExecSpace>::set_element(
131 std::size_t const i,
132 std::size_t const j,
133 double const aij)
134{
135 assert(i < size());
136 assert(j < size());
137
138 std::size_t const nq = m_top_left_block->size();
139 if (i < nq && j < nq) {
140 m_top_left_block->set_element(i, j, aij);
141 } else if (i >= nq && j >= nq) {
142 m_bottom_right_block->set_element(i - nq, j - nq, aij);
143 } else if (j >= nq) {
144 m_top_right_block.view_host()(i, j - nq) = aij;
145 } else {
146 m_bottom_left_block.view_host()(i - nq, j) = aij;
147 }
148}
149
150template <class ExecSpace>
151std::unique_ptr<typename SplinesLinearProblem2x2Blocks<ExecSpace>::Coo>
152SplinesLinearProblem2x2Blocks<ExecSpace>::dense2coo(
153 Kokkos::View<double const**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
154 dense_matrix,
155 double const tol)
156{
157 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
158 rows_idx("ddc_splines_coo_rows_idx", dense_matrix.extent(0) * dense_matrix.extent(1));
159 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
160 cols_idx("ddc_splines_coo_cols_idx", dense_matrix.extent(0) * dense_matrix.extent(1));
161 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
162 values("ddc_splines_coo_values", dense_matrix.extent(0) * dense_matrix.extent(1));
163
164 Kokkos::DualView<std::size_t, Kokkos::LayoutRight, typename ExecSpace::memory_space> n_nonzeros(
165 "ddc_splines_n_nonzeros");
166 n_nonzeros.view_host()() = 0;
167 n_nonzeros.modify_host();
168 n_nonzeros.sync_device();
169
170 auto const n_nonzeros_device = n_nonzeros.view_device();
171 Kokkos::parallel_for(
172 "dense2coo",
173 Kokkos::RangePolicy(ExecSpace(), 0, 1),
174 KOKKOS_LAMBDA(int const) {
175 for (int i = 0; i < dense_matrix.extent(0); ++i) {
176 for (int j = 0; j < dense_matrix.extent(1); ++j) {
177 double const aij = dense_matrix(i, j);
178 if (Kokkos::abs(aij) >= tol) {
179 rows_idx(n_nonzeros_device()) = i;
180 cols_idx(n_nonzeros_device()) = j;
181 values(n_nonzeros_device()) = aij;
182 n_nonzeros_device()++;
183 }
184 }
185 }
186 });
187 n_nonzeros.modify_device();
188 n_nonzeros.sync_host();
189 Kokkos::resize(rows_idx, n_nonzeros.view_host()());
190 Kokkos::resize(cols_idx, n_nonzeros.view_host()());
191 Kokkos::resize(values, n_nonzeros.view_host()());
192
193 return std::make_unique<
194 Coo>(dense_matrix.extent(0), dense_matrix.extent(1), rows_idx, cols_idx, values);
195}
196
197template <class ExecSpace>
198void SplinesLinearProblem2x2Blocks<ExecSpace>::compute_schur_complement()
199{
200 auto const bottom_left_block = m_bottom_left_block.view_host();
201 auto const top_right_block = m_top_right_block.view_host();
202 Kokkos::parallel_for(
203 "compute_schur_complement",
204 Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>(
205 {0, 0},
206 {m_bottom_right_block->size(), m_bottom_right_block->size()}),
207 [&](int const i, int const j) {
208 double val = 0.0;
209 for (int l = 0; l < m_top_left_block->size(); ++l) {
210 val += bottom_left_block(i, l) * top_right_block(l, j);
211 }
212 m_bottom_right_block
213 ->set_element(i, j, m_bottom_right_block->get_element(i, j) - val);
214 });
215}
216
217template <class ExecSpace>
218void SplinesLinearProblem2x2Blocks<ExecSpace>::setup_solver()
219{
220 // Setup the top-left solver
221 m_top_left_block->setup_solver();
222
223 // Compute Q^-1*gamma in top-right block
224 m_top_right_block.modify_host();
225 m_top_right_block.sync_device();
226 m_top_left_block->solve(m_top_right_block.view_device(), false);
227 m_top_right_block_coo = dense2coo(m_top_right_block.view_device());
228 m_top_right_block.modify_device();
229 m_top_right_block.sync_host();
230
231 // Push lambda on device in bottom-left block
232 m_bottom_left_block.modify_host();
233 m_bottom_left_block.sync_device();
234 m_bottom_left_block_coo = dense2coo(m_bottom_left_block.view_device());
235
236 // Compute delta - lambda*Q^-1*gamma in bottom-right block & setup the bottom-right solver
237 compute_schur_complement();
238 m_bottom_right_block->setup_solver();
239}
240
241template <class ExecSpace>
242void SplinesLinearProblem2x2Blocks<ExecSpace>::spdm_minus1_1(
243 Coo const& LinOp,
244 MultiRHS const x,
245 MultiRHS const y,
246 bool const transpose) const
247{
248 assert((!transpose && LinOp.nrows() == y.extent(0))
249 || (transpose && LinOp.ncols() == y.extent(0)));
250 assert((!transpose && LinOp.ncols() == x.extent(0))
251 || (transpose && LinOp.nrows() == x.extent(0)));
252 assert(x.extent(1) == y.extent(1));
253
254 if (!transpose) {
255 Kokkos::parallel_for(
256 "ddc_splines_spdm_minus1_1",
257 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
258 KOKKOS_LAMBDA(int const j) {
259 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
260 int const i = LinOp.rows_idx()(nz_idx);
261 int const k = LinOp.cols_idx()(nz_idx);
262 y(i, j) -= LinOp.values()(nz_idx) * x(k, j);
263 }
264 });
265 } else {
266 Kokkos::parallel_for(
267 "ddc_splines_spdm_minus1_1_tr",
268 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
269 KOKKOS_LAMBDA(int const j) {
270 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
271 int const i = LinOp.rows_idx()(nz_idx);
272 int const k = LinOp.cols_idx()(nz_idx);
273 y(k, j) -= LinOp.values()(nz_idx) * x(i, j);
274 }
275 });
276 }
277}
278
279template <class ExecSpace>
280void SplinesLinearProblem2x2Blocks<ExecSpace>::solve(MultiRHS const b, bool const transpose) const
281{
282 assert(b.extent(0) == size());
283
284 MultiRHS const b1 = Kokkos::
285 subview(b,
286 std::pair<std::size_t, std::size_t>(0, m_top_left_block->size()),
287 Kokkos::ALL);
288 MultiRHS const b2 = Kokkos::
289 subview(b,
290 std::pair<std::size_t, std::size_t>(m_top_left_block->size(), b.extent(0)),
291 Kokkos::ALL);
292 if (!transpose) {
293 m_top_left_block->solve(b1, false);
294 spdm_minus1_1(*m_bottom_left_block_coo, b1, b2);
295 m_bottom_right_block->solve(b2, false);
296 spdm_minus1_1(*m_top_right_block_coo, b2, b1);
297 } else {
298 spdm_minus1_1(*m_top_right_block_coo, b1, b2, true);
299 m_bottom_right_block->solve(b2, true);
300 spdm_minus1_1(*m_bottom_left_block_coo, b2, b1, true);
301 m_top_left_block->solve(b1, true);
302 }
303}
304
305#if defined(KOKKOS_ENABLE_SERIAL)
306template class SplinesLinearProblem2x2Blocks<Kokkos::Serial>;
307#endif
308#if defined(KOKKOS_ENABLE_OPENMP)
309template class SplinesLinearProblem2x2Blocks<Kokkos::OpenMP>;
310#endif
311#if defined(KOKKOS_ENABLE_CUDA)
312template class SplinesLinearProblem2x2Blocks<Kokkos::Cuda>;
313#endif
314#if defined(KOKKOS_ENABLE_HIP)
315template class SplinesLinearProblem2x2Blocks<Kokkos::HIP>;
316#endif
317#if defined(KOKKOS_ENABLE_SYCL)
318template class SplinesLinearProblem2x2Blocks<Kokkos::SYCL>;
319#endif
320
321} // namespace ddc::detail
The top-level namespace of DDC.