DDC 0.10.0
Loading...
Searching...
No Matches
splines_linear_problem_2x2_blocks.hpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#pragma once
6
7#include <cassert>
8#include <cstddef>
9#include <memory>
10#include <utility>
11
12#include <Kokkos_Core.hpp>
13#include <Kokkos_DualView.hpp>
14
15#include "splines_linear_problem.hpp"
16#include "splines_linear_problem_dense.hpp"
17
18namespace ddc::detail {
19
20/**
21 * @brief A 2x2-blocks linear problem dedicated to the computation of a spline approximation,
22 * with all blocks except top-left one being stored in dense format.
23 *
24 * A = | Q | gamma |
25 * | lambda | delta |
26 *
27 * The storage format is dense row-major for top-left, top-right and bottom-left blocks, and determined by
28 * its type for the top-left block.
29 *
30 * This class implements a Schur complement method to perform a block-LU factorization and solve,
31 * calling top-left block and bottom-right block setup_solver() and solve() methods for internal operations.
32 *
33 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are supposed to be performed.
34 */
35template <class ExecSpace>
36class SplinesLinearProblem2x2Blocks : public SplinesLinearProblem<ExecSpace>
37{
38public:
39 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
40 using SplinesLinearProblem<ExecSpace>::size;
41
42 /**
43 * @brief COO storage.
44 *
45 * [SHOULD BE PRIVATE (GPU programming limitation)]
46 */
47 struct Coo
48 {
49 std::size_t m_nrows;
50 std::size_t m_ncols;
51 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_rows_idx;
52 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_cols_idx;
53 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_values;
54
55 Coo() : m_nrows(0), m_ncols(0) {}
56
57 Coo(std::size_t const nrows_,
58 std::size_t const ncols_,
59 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> rows_idx_,
60 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> cols_idx_,
61 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> values_)
62 : m_nrows(nrows_)
63 , m_ncols(ncols_)
64 , m_rows_idx(std::move(rows_idx_))
65 , m_cols_idx(std::move(cols_idx_))
66 , m_values(std::move(values_))
67 {
68 assert(m_rows_idx.extent(0) == m_cols_idx.extent(0));
69 assert(m_rows_idx.extent(0) == m_values.extent(0));
70 }
71
72 KOKKOS_FUNCTION std::size_t nnz() const
73 {
74 return m_values.extent(0);
75 }
76
77 KOKKOS_FUNCTION std::size_t nrows() const
78 {
79 return m_nrows;
80 }
81
82 KOKKOS_FUNCTION std::size_t ncols() const
83 {
84 return m_ncols;
85 }
86
87 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
88 rows_idx() const
89 {
90 return m_rows_idx;
91 }
92
93 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
94 cols_idx() const
95 {
96 return m_cols_idx;
97 }
98
99 KOKKOS_FUNCTION Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
100 values() const
101 {
102 return m_values;
103 }
104 };
105
106protected:
107 std::unique_ptr<SplinesLinearProblem<ExecSpace>> m_top_left_block;
108 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
109 m_top_right_block;
110 Coo m_top_right_block_coo;
111 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
112 m_bottom_left_block;
113 Coo m_bottom_left_block_coo;
114 std::unique_ptr<SplinesLinearProblem<ExecSpace>> m_bottom_right_block;
115
116public:
117 /**
118 * @brief SplinesLinearProblem2x2Blocks constructor.
119 *
120 * @param mat_size The size of one of the dimensions of the square matrix.
121 * @param top_left_block A pointer toward the top-left SplinesLinearProblem. `setup_solver` must not have been called on it.
122 */
123 explicit SplinesLinearProblem2x2Blocks(
124 std::size_t const mat_size,
125 std::unique_ptr<SplinesLinearProblem<ExecSpace>> top_left_block)
126 : SplinesLinearProblem<ExecSpace>(mat_size)
127 , m_top_left_block(std::move(top_left_block))
128 , m_top_right_block(
129 "top_right_block",
130 m_top_left_block->size(),
131 mat_size - m_top_left_block->size())
132 , m_bottom_left_block(
133 "bottom_left_block",
134 mat_size - m_top_left_block->size(),
135 m_top_left_block->size())
136 , m_bottom_right_block(
137 new SplinesLinearProblemDense<ExecSpace>(mat_size - m_top_left_block->size()))
138 {
139 assert(m_top_left_block->size() <= mat_size);
140
141 Kokkos::deep_copy(m_top_right_block.view_host(), 0.);
142 Kokkos::deep_copy(m_bottom_left_block.view_host(), 0.);
143 }
144
145 double get_element(std::size_t const i, std::size_t const j) const override
146 {
147 assert(i < size());
148 assert(j < size());
149
150 std::size_t const nq = m_top_left_block->size();
151 if (i < nq && j < nq) {
152 return m_top_left_block->get_element(i, j);
153 }
154
155 if (i >= nq && j >= nq) {
156 return m_bottom_right_block->get_element(i - nq, j - nq);
157 }
158
159 if (j >= nq) {
160 return m_top_right_block.view_host()(i, j - nq);
161 }
162
163 return m_bottom_left_block.view_host()(i - nq, j);
164 }
165
166 void set_element(std::size_t const i, std::size_t const j, double const aij) override
167 {
168 assert(i < size());
169 assert(j < size());
170
171 std::size_t const nq = m_top_left_block->size();
172 if (i < nq && j < nq) {
173 m_top_left_block->set_element(i, j, aij);
174 } else if (i >= nq && j >= nq) {
175 m_bottom_right_block->set_element(i - nq, j - nq, aij);
176 } else if (j >= nq) {
177 m_top_right_block.view_host()(i, j - nq) = aij;
178 } else {
179 m_bottom_left_block.view_host()(i - nq, j) = aij;
180 }
181 }
182
183 /**
184 * @brief Fill a COO version of a Dense matrix (remove zeros).
185 *
186 * [SHOULD BE PRIVATE (GPU programming limitation)]
187 *
188 * Runs on a single thread to guarantee ordering.
189 *
190 * @param[in] dense_matrix The dense storage matrix whose non-zeros are extracted to fill the COO matrix.
191 * @param[in] tol The tolerancy applied to filter the non-zeros.
192 *
193 * @return The COO storage matrix filled with the non-zeros from dense_matrix.
194 */
195 Coo dense2coo(
196 Kokkos::View<double const**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
197 dense_matrix,
198 double const tol = 1e-14)
199 {
200 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> rows_idx(
201 "ddc_splines_coo_rows_idx",
202 dense_matrix.extent(0) * dense_matrix.extent(1));
203 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> cols_idx(
204 "ddc_splines_coo_cols_idx",
205 dense_matrix.extent(0) * dense_matrix.extent(1));
206 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
207 values("ddc_splines_coo_values", dense_matrix.extent(0) * dense_matrix.extent(1));
208
209 Kokkos::DualView<std::size_t, Kokkos::LayoutRight, typename ExecSpace::memory_space>
210 n_nonzeros("ddc_splines_n_nonzeros");
211 n_nonzeros.view_host()() = 0;
212 n_nonzeros.modify_host();
213 n_nonzeros.sync_device();
214
215 auto const n_nonzeros_device = n_nonzeros.view_device();
216 Kokkos::parallel_for(
217 "dense2coo",
218 Kokkos::RangePolicy(ExecSpace(), 0, 1),
219 KOKKOS_LAMBDA(int const) {
220 for (int i = 0; i < dense_matrix.extent(0); ++i) {
221 for (int j = 0; j < dense_matrix.extent(1); ++j) {
222 double const aij = dense_matrix(i, j);
223 if (Kokkos::abs(aij) >= tol) {
224 rows_idx(n_nonzeros_device()) = i;
225 cols_idx(n_nonzeros_device()) = j;
226 values(n_nonzeros_device()) = aij;
227 n_nonzeros_device()++;
228 }
229 }
230 }
231 });
232 n_nonzeros.modify_device();
233 n_nonzeros.sync_host();
234 Kokkos::resize(rows_idx, n_nonzeros.view_host()());
235 Kokkos::resize(cols_idx, n_nonzeros.view_host()());
236 Kokkos::resize(values, n_nonzeros.view_host()());
237
238 return Coo(dense_matrix.extent(0), dense_matrix.extent(1), rows_idx, cols_idx, values);
239 }
240
241private:
242 /// @brief Compute the Schur complement delta - lambda*Q^-1*gamma.
243 void compute_schur_complement()
244 {
245 auto const bottom_left_block = m_bottom_left_block.view_host();
246 auto const top_right_block = m_top_right_block.view_host();
247 Kokkos::parallel_for(
248 "compute_schur_complement",
249 Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>(
250 {0, 0},
251 {m_bottom_right_block->size(), m_bottom_right_block->size()}),
252 [&](int const i, int const j) {
253 double val = 0.0;
254 for (int l = 0; l < m_top_left_block->size(); ++l) {
255 val += bottom_left_block(i, l) * top_right_block(l, j);
256 }
257 m_bottom_right_block
258 ->set_element(i, j, m_bottom_right_block->get_element(i, j) - val);
259 });
260 }
261
262public:
263 /**
264 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
265 *
266 * Block-LU factorize the matrix A according to the Schur complement method. The block-LU factorization is:
267 *
268 * A = | Q | 0 | | I | Q^-1*gamma |
269 * | lambda | delta - lambda*Q^-1*gamma | | 0 | I |
270 *
271 * So we perform the factorization inplace to store only the relevant blocks in the matrix (while factorizing
272 * the blocks themselves if necessary):
273 *
274 * | Q | Q^-1*gamma |
275 * | lambda | delta - lambda*Q^-1*gamma |
276 */
277 void setup_solver() override
278 {
279 // Setup the top-left solver
280 m_top_left_block->setup_solver();
281
282 // Compute Q^-1*gamma in top-right block
283 m_top_right_block.modify_host();
284 m_top_right_block.sync_device();
285 m_top_left_block->solve(m_top_right_block.view_device(), false);
286 m_top_right_block_coo = dense2coo(m_top_right_block.view_device());
287 m_top_right_block.modify_device();
288 m_top_right_block.sync_host();
289
290 // Push lambda on device in bottom-left block
291 m_bottom_left_block.modify_host();
292 m_bottom_left_block.sync_device();
293 m_bottom_left_block_coo = dense2coo(m_bottom_left_block.view_device());
294
295 // Compute delta - lambda*Q^-1*gamma in bottom-right block & setup the bottom-right solver
296 compute_schur_complement();
297 m_bottom_right_block->setup_solver();
298 }
299
300 /**
301 * @brief Compute y <- y - LinOp*x or y <- y - LinOp^t*x with a sparse LinOp.
302 *
303 * [SHOULD BE PRIVATE (GPU programming limitation)]
304 *
305 * Perform a spdm operation (sparse-dense matrix multiplication) with parameters alpha=-1 and beta=1 between
306 * a sparse matrix stored in COO format and a dense matrix x.
307 *
308 * @param[in] LinOp The sparse matrix, left side of the matrix multiplication.
309 * @param[in] x The dense matrix, right side of the matrix multiplication.
310 * @param[inout] y The dense matrix to be altered by the operation.
311 * @param transpose A flag to indicate if the direct or transposed version of the operation is performed.
312 */
313 void spdm_minus1_1(Coo LinOp, MultiRHS const x, MultiRHS const y, bool const transpose = false)
314 const
315 {
316 assert((!transpose && LinOp.nrows() == y.extent(0))
317 || (transpose && LinOp.ncols() == y.extent(0)));
318 assert((!transpose && LinOp.ncols() == x.extent(0))
319 || (transpose && LinOp.nrows() == x.extent(0)));
320 assert(x.extent(1) == y.extent(1));
321
322 if (!transpose) {
323 Kokkos::parallel_for(
324 "ddc_splines_spdm_minus1_1",
325 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
326 KOKKOS_LAMBDA(int const j) {
327 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
328 int const i = LinOp.rows_idx()(nz_idx);
329 int const k = LinOp.cols_idx()(nz_idx);
330 y(i, j) -= LinOp.values()(nz_idx) * x(k, j);
331 }
332 });
333 } else {
334 Kokkos::parallel_for(
335 "ddc_splines_spdm_minus1_1_tr",
336 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
337 KOKKOS_LAMBDA(int const j) {
338 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
339 int const i = LinOp.rows_idx()(nz_idx);
340 int const k = LinOp.cols_idx()(nz_idx);
341 y(k, j) -= LinOp.values()(nz_idx) * x(i, j);
342 }
343 });
344 }
345 }
346
347 /**
348 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
349 *
350 * The solver method is the one known as Schur complement method. It can be summarized as follow,
351 * starting with the pre-computed elements of the matrix:
352 *
353 * | Q | Q^-1*gamma |
354 * | lambda | delta - lambda*Q^-1*gamma |
355 *
356 * For the non-transposed case:
357 * - Solve inplace Q * x'1 = b1 (using the solver internal to Q).
358 * - Compute inplace b'2 = b2 - lambda*x'1.
359 * - Solve inplace (delta - lambda*Q^-1*gamma) * x2 = b'2.
360 * - Compute inplace x1 = x'1 - (delta - lambda*Q^-1*gamma)*x2.
361 *
362 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
363 * @param transpose Choose between the direct or transposed version of the linear problem.
364 */
365 void solve(MultiRHS const b, bool const transpose) const override
366 {
367 assert(b.extent(0) == size());
368
369 MultiRHS const b1 = Kokkos::
370 subview(b,
371 std::pair<std::size_t, std::size_t>(0, m_top_left_block->size()),
372 Kokkos::ALL);
373 MultiRHS const b2 = Kokkos::
374 subview(b,
375 std::pair<std::size_t, std::size_t>(m_top_left_block->size(), b.extent(0)),
376 Kokkos::ALL);
377 if (!transpose) {
378 m_top_left_block->solve(b1, false);
379 spdm_minus1_1(m_bottom_left_block_coo, b1, b2);
380 m_bottom_right_block->solve(b2, false);
381 spdm_minus1_1(m_top_right_block_coo, b2, b1);
382 } else {
383 spdm_minus1_1(m_top_right_block_coo, b1, b2, true);
384 m_bottom_right_block->solve(b2, true);
385 spdm_minus1_1(m_bottom_left_block_coo, b2, b1, true);
386 m_top_left_block->solve(b1, true);
387 }
388 }
389};
390
391} // namespace ddc::detail
The top-level namespace of DDC.