DDC 0.4.1
Loading...
Searching...
No Matches
splines_linear_problem_2x2_blocks.hpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#pragma once
6
7#include <cassert>
8#include <cstddef>
9#include <memory>
10#include <string>
11#include <utility>
12
13#include <Kokkos_Core.hpp>
14#include <Kokkos_DualView.hpp>
15
16#include "splines_linear_problem.hpp"
17#include "splines_linear_problem_dense.hpp"
18
19namespace ddc::detail {
20
21/**
22 * @brief A 2x2-blocks linear problem dedicated to the computation of a spline approximation,
23 * with all blocks except top-left one being stored in dense format.
24 *
25 * A = | Q | gamma |
26 * | lambda | delta |
27 *
28 * The storage format is dense row-major for top-left, top-right and bottom-left blocks, and determined by
29 * its type for the top-left block.
30 *
31 * This class implements a Schur complement method to perform a block-LU factorization and solve,
32 * calling top-left block and bottom-right block setup_solver() and solve() methods for internal operations.
33 *
34 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are supposed to be performed.
35 */
36template <class ExecSpace>
37class SplinesLinearProblem2x2Blocks : public SplinesLinearProblem<ExecSpace>
38{
39public:
40 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
41 using SplinesLinearProblem<ExecSpace>::size;
42
43 /**
44 * @brief COO storage.
45 *
46 * [SHOULD BE PRIVATE (GPU programming limitation)]
47 */
48 struct Coo
49 {
50 std::size_t m_nrows;
51 std::size_t m_ncols;
52 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_rows_idx;
53 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_cols_idx;
54 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_values;
55
56 Coo() : m_nrows(0), m_ncols(0) {}
57
58 Coo(std::size_t const nrows_,
59 std::size_t const ncols_,
60 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> rows_idx_,
61 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> cols_idx_,
62 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> values_)
63 : m_nrows(nrows_)
64 , m_ncols(ncols_)
65 , m_rows_idx(std::move(rows_idx_))
66 , m_cols_idx(std::move(cols_idx_))
67 , m_values(std::move(values_))
68 {
69 assert(m_rows_idx.extent(0) == m_cols_idx.extent(0));
70 assert(m_rows_idx.extent(0) == m_values.extent(0));
71 }
72
73 KOKKOS_FUNCTION std::size_t nnz() const
74 {
75 return m_values.extent(0);
76 }
77
78 KOKKOS_FUNCTION std::size_t nrows() const
79 {
80 return m_nrows;
81 }
82
83 KOKKOS_FUNCTION std::size_t ncols() const
84 {
85 return m_ncols;
86 }
87
88 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
89 rows_idx() const
90 {
91 return m_rows_idx;
92 }
93
94 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
95 cols_idx() const
96 {
97 return m_cols_idx;
98 }
99
100 KOKKOS_FUNCTION Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
101 values() const
102 {
103 return m_values;
104 }
105 };
106
107protected:
108 std::unique_ptr<SplinesLinearProblem<ExecSpace>> m_top_left_block;
109 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
110 m_top_right_block;
111 Coo m_top_right_block_coo;
112 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
113 m_bottom_left_block;
114 Coo m_bottom_left_block_coo;
115 std::unique_ptr<SplinesLinearProblem<ExecSpace>> m_bottom_right_block;
116
117public:
118 /**
119 * @brief SplinesLinearProblem2x2Blocks constructor.
120 *
121 * @param mat_size The size of one of the dimensions of the square matrix.
122 * @param top_left_block A pointer toward the top-left SplinesLinearProblem. `setup_solver` must not have been called on it.
123 */
124 explicit SplinesLinearProblem2x2Blocks(
125 std::size_t const mat_size,
126 std::unique_ptr<SplinesLinearProblem<ExecSpace>> top_left_block)
127 : SplinesLinearProblem<ExecSpace>(mat_size)
128 , m_top_left_block(std::move(top_left_block))
129 , m_top_right_block(
130 "top_right_block",
131 m_top_left_block->size(),
132 mat_size - m_top_left_block->size())
133 , m_bottom_left_block(
134 "bottom_left_block",
135 mat_size - m_top_left_block->size(),
136 m_top_left_block->size())
137 , m_bottom_right_block(
138 new SplinesLinearProblemDense<ExecSpace>(mat_size - m_top_left_block->size()))
139 {
140 assert(m_top_left_block->size() <= mat_size);
141
142 Kokkos::deep_copy(m_top_right_block.h_view, 0.);
143 Kokkos::deep_copy(m_bottom_left_block.h_view, 0.);
144 }
145
146 double get_element(std::size_t const i, std::size_t const j) const override
147 {
148 assert(i < size());
149 assert(j < size());
150
151 std::size_t const nq = m_top_left_block->size();
152 if (i < nq && j < nq) {
153 return m_top_left_block->get_element(i, j);
154 }
155
156 if (i >= nq && j >= nq) {
157 return m_bottom_right_block->get_element(i - nq, j - nq);
158 }
159
160 if (j >= nq) {
161 return m_top_right_block.h_view(i, j - nq);
162 }
163
164 return m_bottom_left_block.h_view(i - nq, j);
165 }
166
167 void set_element(std::size_t const i, std::size_t const j, double const aij) override
168 {
169 assert(i < size());
170 assert(j < size());
171
172 std::size_t const nq = m_top_left_block->size();
173 if (i < nq && j < nq) {
174 m_top_left_block->set_element(i, j, aij);
175 } else if (i >= nq && j >= nq) {
176 m_bottom_right_block->set_element(i - nq, j - nq, aij);
177 } else if (j >= nq) {
178 m_top_right_block.h_view(i, j - nq) = aij;
179 } else {
180 m_bottom_left_block.h_view(i - nq, j) = aij;
181 }
182 }
183
184 /**
185 * @brief Fill a COO version of a Dense matrix (remove zeros).
186 *
187 * [SHOULD BE PRIVATE (GPU programming limitation)]
188 *
189 * Runs on a single thread to guarantee ordering.
190 *
191 * @param[in] dense_matrix The dense storage matrix whose non-zeros are extracted to fill the COO matrix.
192 * @param[in] tol The tolerancy applied to filter the non-zeros.
193 *
194 * @return The COO storage matrix filled with the non-zeros from dense_matrix.
195 */
196 Coo dense2coo(
197 Kokkos::View<const double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
198 dense_matrix,
199 double const tol = 1e-14)
200 {
201 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> rows_idx(
202 "ddc_splines_coo_rows_idx",
203 dense_matrix.extent(0) * dense_matrix.extent(1));
204 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> cols_idx(
205 "ddc_splines_coo_cols_idx",
206 dense_matrix.extent(0) * dense_matrix.extent(1));
207 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
208 values("ddc_splines_coo_values", dense_matrix.extent(0) * dense_matrix.extent(1));
209
210 Kokkos::DualView<std::size_t, Kokkos::LayoutRight, typename ExecSpace::memory_space>
211 n_nonzeros("ddc_splines_n_nonzeros");
212 n_nonzeros.h_view() = 0;
213 n_nonzeros.modify_host();
214 n_nonzeros.sync_device();
215
216 Kokkos::parallel_for(
217 "dense2coo",
218 Kokkos::RangePolicy(ExecSpace(), 0, 1),
219 KOKKOS_LAMBDA(const int) {
220 for (int i = 0; i < dense_matrix.extent(0); ++i) {
221 for (int j = 0; j < dense_matrix.extent(1); ++j) {
222 double const aij = dense_matrix(i, j);
223 if (Kokkos::abs(aij) >= tol) {
224 rows_idx(n_nonzeros.d_view()) = i;
225 cols_idx(n_nonzeros.d_view()) = j;
226 values(n_nonzeros.d_view()) = aij;
227 n_nonzeros.d_view()++;
228 }
229 }
230 }
231 });
232 n_nonzeros.modify_device();
233 n_nonzeros.sync_host();
234 Kokkos::resize(rows_idx, n_nonzeros.h_view());
235 Kokkos::resize(cols_idx, n_nonzeros.h_view());
236 Kokkos::resize(values, n_nonzeros.h_view());
237
238 return Coo(dense_matrix.extent(0), dense_matrix.extent(1), rows_idx, cols_idx, values);
239 }
240
241private:
242 /// @brief Compute the Schur complement delta - lambda*Q^-1*gamma.
243 void compute_schur_complement()
244 {
245 Kokkos::parallel_for(
246 "compute_schur_complement",
247 Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>(
248 {0, 0},
249 {m_bottom_right_block->size(), m_bottom_right_block->size()}),
250 [&](const int i, const int j) {
251 double val = 0.0;
252 for (int l = 0; l < m_top_left_block->size(); ++l) {
253 val += m_bottom_left_block.h_view(i, l) * m_top_right_block.h_view(l, j);
254 }
255 m_bottom_right_block
256 ->set_element(i, j, m_bottom_right_block->get_element(i, j) - val);
257 });
258 }
259
260public:
261 /**
262 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
263 *
264 * Block-LU factorize the matrix A according to the Schur complement method. The block-LU factorization is:
265 *
266 * A = | Q | 0 | | I | Q^-1*gamma |
267 * | lambda | delta - lambda*Q^-1*gamma | | 0 | I |
268 *
269 * So we perform the factorization inplace to store only the relevant blocks in the matrix (while factorizing
270 * the blocks themselves if necessary):
271 *
272 * | Q | Q^-1*gamma |
273 * | lambda | delta - lambda*Q^-1*gamma |
274 */
275 void setup_solver() override
276 {
277 // Setup the top-left solver
278 m_top_left_block->setup_solver();
279
280 // Compute Q^-1*gamma in top-right block
281 m_top_right_block.modify_host();
282 m_top_right_block.sync_device();
283 m_top_left_block->solve(m_top_right_block.d_view, false);
284 m_top_right_block_coo = dense2coo(m_top_right_block.d_view);
285 m_top_right_block.modify_device();
286 m_top_right_block.sync_host();
287
288 // Push lambda on device in bottom-left block
289 m_bottom_left_block.modify_host();
290 m_bottom_left_block.sync_device();
291 m_bottom_left_block_coo = dense2coo(m_bottom_left_block.d_view);
292
293 // Compute delta - lambda*Q^-1*gamma in bottom-right block & setup the bottom-right solver
294 compute_schur_complement();
295 m_bottom_right_block->setup_solver();
296 }
297
298 /**
299 * @brief Compute y <- y - LinOp*x or y <- y - LinOp^t*x with a sparse LinOp.
300 *
301 * [SHOULD BE PRIVATE (GPU programming limitation)]
302 *
303 * Perform a spdm operation (sparse-dense matrix multiplication) with parameters alpha=-1 and beta=1 between
304 * a sparse matrix stored in COO format and a dense matrix x.
305 *
306 * @param[in] LinOp The sparse matrix, left side of the matrix multiplication.
307 * @param[in] x The dense matrix, right side of the matrix multiplication.
308 * @param[inout] y The dense matrix to be altered by the operation.
309 * @param transpose A flag to indicate if the direct or transposed version of the operation is performed.
310 */
311 void spdm_minus1_1(Coo LinOp, MultiRHS const x, MultiRHS const y, bool const transpose = false)
312 const
313 {
314 assert((!transpose && LinOp.nrows() == y.extent(0))
315 || (transpose && LinOp.ncols() == y.extent(0)));
316 assert((!transpose && LinOp.ncols() == x.extent(0))
317 || (transpose && LinOp.nrows() == x.extent(0)));
318 assert(x.extent(1) == y.extent(1));
319
320 if (!transpose) {
321 Kokkos::parallel_for(
322 "ddc_splines_spdm_minus1_1",
323 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
324 KOKKOS_LAMBDA(const int j) {
325 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
326 const int i = LinOp.rows_idx()(nz_idx);
327 const int k = LinOp.cols_idx()(nz_idx);
328 y(i, j) -= LinOp.values()(nz_idx) * x(k, j);
329 }
330 });
331 } else {
332 Kokkos::parallel_for(
333 "ddc_splines_spdm_minus1_1_tr",
334 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
335 KOKKOS_LAMBDA(const int j) {
336 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
337 const int i = LinOp.rows_idx()(nz_idx);
338 const int k = LinOp.cols_idx()(nz_idx);
339 y(k, j) -= LinOp.values()(nz_idx) * x(i, j);
340 }
341 });
342 }
343 }
344
345 /**
346 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
347 *
348 * The solver method is the one known as Schur complement method. It can be summarized as follow,
349 * starting with the pre-computed elements of the matrix:
350 *
351 * | Q | Q^-1*gamma |
352 * | lambda | delta - lambda*Q^-1*gamma |
353 *
354 * For the non-transposed case:
355 * - Solve inplace Q * x'1 = b1 (using the solver internal to Q).
356 * - Compute inplace b'2 = b2 - lambda*x'1.
357 * - Solve inplace (delta - lambda*Q^-1*gamma) * x2 = b'2.
358 * - Compute inplace x1 = x'1 - (delta - lambda*Q^-1*gamma)*x2.
359 *
360 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
361 * @param transpose Choose between the direct or transposed version of the linear problem.
362 */
363 void solve(MultiRHS const b, bool const transpose) const override
364 {
365 assert(b.extent(0) == size());
366
367 MultiRHS const b1 = Kokkos::
368 subview(b,
369 std::pair<std::size_t, std::size_t>(0, m_top_left_block->size()),
370 Kokkos::ALL);
371 MultiRHS const b2 = Kokkos::
372 subview(b,
373 std::pair<std::size_t, std::size_t>(m_top_left_block->size(), b.extent(0)),
374 Kokkos::ALL);
375 if (!transpose) {
376 m_top_left_block->solve(b1, false);
377 spdm_minus1_1(m_bottom_left_block_coo, b1, b2);
378 m_bottom_right_block->solve(b2, false);
379 spdm_minus1_1(m_top_right_block_coo, b2, b1);
380 } else {
381 spdm_minus1_1(m_top_right_block_coo, b1, b2, true);
382 m_bottom_right_block->solve(b2, true);
383 spdm_minus1_1(m_bottom_left_block_coo, b2, b1, true);
384 m_top_left_block->solve(b1, true);
385 }
386 }
387};
388
389} // namespace ddc::detail
The top-level namespace of DDC.