DDC 0.5.0
Loading...
Searching...
No Matches
splines_linear_problem_2x2_blocks.hpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#pragma once
6
7#include <cassert>
8#include <cstddef>
9#include <memory>
10#include <string>
11#include <utility>
12
13#include <Kokkos_Core.hpp>
14#include <Kokkos_DualView.hpp>
15
16#include "splines_linear_problem.hpp"
17#include "splines_linear_problem_dense.hpp"
18
19namespace ddc::detail {
20
21/**
22 * @brief A 2x2-blocks linear problem dedicated to the computation of a spline approximation,
23 * with all blocks except top-left one being stored in dense format.
24 *
25 * A = | Q | gamma |
26 * | lambda | delta |
27 *
28 * The storage format is dense row-major for top-left, top-right and bottom-left blocks, and determined by
29 * its type for the top-left block.
30 *
31 * This class implements a Schur complement method to perform a block-LU factorization and solve,
32 * calling top-left block and bottom-right block setup_solver() and solve() methods for internal operations.
33 *
34 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are supposed to be performed.
35 */
36template <class ExecSpace>
37class SplinesLinearProblem2x2Blocks : public SplinesLinearProblem<ExecSpace>
38{
39public:
40 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
41 using SplinesLinearProblem<ExecSpace>::size;
42
43 /**
44 * @brief COO storage.
45 *
46 * [SHOULD BE PRIVATE (GPU programming limitation)]
47 */
48 struct Coo
49 {
50 std::size_t m_nrows;
51 std::size_t m_ncols;
52 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_rows_idx;
53 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_cols_idx;
54 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_values;
55
56 Coo() : m_nrows(0), m_ncols(0) {}
57
58 Coo(std::size_t const nrows_,
59 std::size_t const ncols_,
60 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> rows_idx_,
61 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> cols_idx_,
62 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space> values_)
63 : m_nrows(nrows_)
64 , m_ncols(ncols_)
65 , m_rows_idx(std::move(rows_idx_))
66 , m_cols_idx(std::move(cols_idx_))
67 , m_values(std::move(values_))
68 {
69 assert(m_rows_idx.extent(0) == m_cols_idx.extent(0));
70 assert(m_rows_idx.extent(0) == m_values.extent(0));
71 }
72
73 KOKKOS_FUNCTION std::size_t nnz() const
74 {
75 return m_values.extent(0);
76 }
77
78 KOKKOS_FUNCTION std::size_t nrows() const
79 {
80 return m_nrows;
81 }
82
83 KOKKOS_FUNCTION std::size_t ncols() const
84 {
85 return m_ncols;
86 }
87
88 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
89 rows_idx() const
90 {
91 return m_rows_idx;
92 }
93
94 KOKKOS_FUNCTION Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
95 cols_idx() const
96 {
97 return m_cols_idx;
98 }
99
100 KOKKOS_FUNCTION Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
101 values() const
102 {
103 return m_values;
104 }
105 };
106
107protected:
108 std::unique_ptr<SplinesLinearProblem<ExecSpace>> m_top_left_block;
109 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
110 m_top_right_block;
111 Coo m_top_right_block_coo;
112 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
113 m_bottom_left_block;
114 Coo m_bottom_left_block_coo;
115 std::unique_ptr<SplinesLinearProblem<ExecSpace>> m_bottom_right_block;
116
117public:
118 /**
119 * @brief SplinesLinearProblem2x2Blocks constructor.
120 *
121 * @param mat_size The size of one of the dimensions of the square matrix.
122 * @param top_left_block A pointer toward the top-left SplinesLinearProblem. `setup_solver` must not have been called on it.
123 */
124 explicit SplinesLinearProblem2x2Blocks(
125 std::size_t const mat_size,
126 std::unique_ptr<SplinesLinearProblem<ExecSpace>> top_left_block)
127 : SplinesLinearProblem<ExecSpace>(mat_size)
128 , m_top_left_block(std::move(top_left_block))
129 , m_top_right_block(
130 "top_right_block",
131 m_top_left_block->size(),
132 mat_size - m_top_left_block->size())
133 , m_bottom_left_block(
134 "bottom_left_block",
135 mat_size - m_top_left_block->size(),
136 m_top_left_block->size())
137 , m_bottom_right_block(
138 new SplinesLinearProblemDense<ExecSpace>(mat_size - m_top_left_block->size()))
139 {
140 assert(m_top_left_block->size() <= mat_size);
141
142 Kokkos::deep_copy(m_top_right_block.view_host(), 0.);
143 Kokkos::deep_copy(m_bottom_left_block.view_host(), 0.);
144 }
145
146 double get_element(std::size_t const i, std::size_t const j) const override
147 {
148 assert(i < size());
149 assert(j < size());
150
151 std::size_t const nq = m_top_left_block->size();
152 if (i < nq && j < nq) {
153 return m_top_left_block->get_element(i, j);
154 }
155
156 if (i >= nq && j >= nq) {
157 return m_bottom_right_block->get_element(i - nq, j - nq);
158 }
159
160 if (j >= nq) {
161 return m_top_right_block.view_host()(i, j - nq);
162 }
163
164 return m_bottom_left_block.view_host()(i - nq, j);
165 }
166
167 void set_element(std::size_t const i, std::size_t const j, double const aij) override
168 {
169 assert(i < size());
170 assert(j < size());
171
172 std::size_t const nq = m_top_left_block->size();
173 if (i < nq && j < nq) {
174 m_top_left_block->set_element(i, j, aij);
175 } else if (i >= nq && j >= nq) {
176 m_bottom_right_block->set_element(i - nq, j - nq, aij);
177 } else if (j >= nq) {
178 m_top_right_block.view_host()(i, j - nq) = aij;
179 } else {
180 m_bottom_left_block.view_host()(i - nq, j) = aij;
181 }
182 }
183
184 /**
185 * @brief Fill a COO version of a Dense matrix (remove zeros).
186 *
187 * [SHOULD BE PRIVATE (GPU programming limitation)]
188 *
189 * Runs on a single thread to guarantee ordering.
190 *
191 * @param[in] dense_matrix The dense storage matrix whose non-zeros are extracted to fill the COO matrix.
192 * @param[in] tol The tolerancy applied to filter the non-zeros.
193 *
194 * @return The COO storage matrix filled with the non-zeros from dense_matrix.
195 */
196 Coo dense2coo(
197 Kokkos::View<const double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
198 dense_matrix,
199 double const tol = 1e-14)
200 {
201 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> rows_idx(
202 "ddc_splines_coo_rows_idx",
203 dense_matrix.extent(0) * dense_matrix.extent(1));
204 Kokkos::View<int*, Kokkos::LayoutRight, typename ExecSpace::memory_space> cols_idx(
205 "ddc_splines_coo_cols_idx",
206 dense_matrix.extent(0) * dense_matrix.extent(1));
207 Kokkos::View<double*, Kokkos::LayoutRight, typename ExecSpace::memory_space>
208 values("ddc_splines_coo_values", dense_matrix.extent(0) * dense_matrix.extent(1));
209
210 Kokkos::DualView<std::size_t, Kokkos::LayoutRight, typename ExecSpace::memory_space>
211 n_nonzeros("ddc_splines_n_nonzeros");
212 n_nonzeros.view_host()() = 0;
213 n_nonzeros.modify_host();
214 n_nonzeros.sync_device();
215
216 auto const n_nonzeros_device = n_nonzeros.view_device();
217 Kokkos::parallel_for(
218 "dense2coo",
219 Kokkos::RangePolicy(ExecSpace(), 0, 1),
220 KOKKOS_LAMBDA(const int) {
221 for (int i = 0; i < dense_matrix.extent(0); ++i) {
222 for (int j = 0; j < dense_matrix.extent(1); ++j) {
223 double const aij = dense_matrix(i, j);
224 if (Kokkos::abs(aij) >= tol) {
225 rows_idx(n_nonzeros_device()) = i;
226 cols_idx(n_nonzeros_device()) = j;
227 values(n_nonzeros_device()) = aij;
228 n_nonzeros_device()++;
229 }
230 }
231 }
232 });
233 n_nonzeros.modify_device();
234 n_nonzeros.sync_host();
235 Kokkos::resize(rows_idx, n_nonzeros.view_host()());
236 Kokkos::resize(cols_idx, n_nonzeros.view_host()());
237 Kokkos::resize(values, n_nonzeros.view_host()());
238
239 return Coo(dense_matrix.extent(0), dense_matrix.extent(1), rows_idx, cols_idx, values);
240 }
241
242private:
243 /// @brief Compute the Schur complement delta - lambda*Q^-1*gamma.
244 void compute_schur_complement()
245 {
246 auto const bottom_left_block = m_bottom_left_block.view_host();
247 auto const top_right_block = m_top_right_block.view_host();
248 Kokkos::parallel_for(
249 "compute_schur_complement",
250 Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>(
251 {0, 0},
252 {m_bottom_right_block->size(), m_bottom_right_block->size()}),
253 [&](const int i, const int j) {
254 double val = 0.0;
255 for (int l = 0; l < m_top_left_block->size(); ++l) {
256 val += bottom_left_block(i, l) * top_right_block(l, j);
257 }
258 m_bottom_right_block
259 ->set_element(i, j, m_bottom_right_block->get_element(i, j) - val);
260 });
261 }
262
263public:
264 /**
265 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
266 *
267 * Block-LU factorize the matrix A according to the Schur complement method. The block-LU factorization is:
268 *
269 * A = | Q | 0 | | I | Q^-1*gamma |
270 * | lambda | delta - lambda*Q^-1*gamma | | 0 | I |
271 *
272 * So we perform the factorization inplace to store only the relevant blocks in the matrix (while factorizing
273 * the blocks themselves if necessary):
274 *
275 * | Q | Q^-1*gamma |
276 * | lambda | delta - lambda*Q^-1*gamma |
277 */
278 void setup_solver() override
279 {
280 // Setup the top-left solver
281 m_top_left_block->setup_solver();
282
283 // Compute Q^-1*gamma in top-right block
284 m_top_right_block.modify_host();
285 m_top_right_block.sync_device();
286 m_top_left_block->solve(m_top_right_block.view_device(), false);
287 m_top_right_block_coo = dense2coo(m_top_right_block.view_device());
288 m_top_right_block.modify_device();
289 m_top_right_block.sync_host();
290
291 // Push lambda on device in bottom-left block
292 m_bottom_left_block.modify_host();
293 m_bottom_left_block.sync_device();
294 m_bottom_left_block_coo = dense2coo(m_bottom_left_block.view_device());
295
296 // Compute delta - lambda*Q^-1*gamma in bottom-right block & setup the bottom-right solver
297 compute_schur_complement();
298 m_bottom_right_block->setup_solver();
299 }
300
301 /**
302 * @brief Compute y <- y - LinOp*x or y <- y - LinOp^t*x with a sparse LinOp.
303 *
304 * [SHOULD BE PRIVATE (GPU programming limitation)]
305 *
306 * Perform a spdm operation (sparse-dense matrix multiplication) with parameters alpha=-1 and beta=1 between
307 * a sparse matrix stored in COO format and a dense matrix x.
308 *
309 * @param[in] LinOp The sparse matrix, left side of the matrix multiplication.
310 * @param[in] x The dense matrix, right side of the matrix multiplication.
311 * @param[inout] y The dense matrix to be altered by the operation.
312 * @param transpose A flag to indicate if the direct or transposed version of the operation is performed.
313 */
314 void spdm_minus1_1(Coo LinOp, MultiRHS const x, MultiRHS const y, bool const transpose = false)
315 const
316 {
317 assert((!transpose && LinOp.nrows() == y.extent(0))
318 || (transpose && LinOp.ncols() == y.extent(0)));
319 assert((!transpose && LinOp.ncols() == x.extent(0))
320 || (transpose && LinOp.nrows() == x.extent(0)));
321 assert(x.extent(1) == y.extent(1));
322
323 if (!transpose) {
324 Kokkos::parallel_for(
325 "ddc_splines_spdm_minus1_1",
326 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
327 KOKKOS_LAMBDA(const int j) {
328 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
329 const int i = LinOp.rows_idx()(nz_idx);
330 const int k = LinOp.cols_idx()(nz_idx);
331 y(i, j) -= LinOp.values()(nz_idx) * x(k, j);
332 }
333 });
334 } else {
335 Kokkos::parallel_for(
336 "ddc_splines_spdm_minus1_1_tr",
337 Kokkos::RangePolicy(ExecSpace(), 0, y.extent(1)),
338 KOKKOS_LAMBDA(const int j) {
339 for (int nz_idx = 0; nz_idx < LinOp.nnz(); ++nz_idx) {
340 const int i = LinOp.rows_idx()(nz_idx);
341 const int k = LinOp.cols_idx()(nz_idx);
342 y(k, j) -= LinOp.values()(nz_idx) * x(i, j);
343 }
344 });
345 }
346 }
347
348 /**
349 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
350 *
351 * The solver method is the one known as Schur complement method. It can be summarized as follow,
352 * starting with the pre-computed elements of the matrix:
353 *
354 * | Q | Q^-1*gamma |
355 * | lambda | delta - lambda*Q^-1*gamma |
356 *
357 * For the non-transposed case:
358 * - Solve inplace Q * x'1 = b1 (using the solver internal to Q).
359 * - Compute inplace b'2 = b2 - lambda*x'1.
360 * - Solve inplace (delta - lambda*Q^-1*gamma) * x2 = b'2.
361 * - Compute inplace x1 = x'1 - (delta - lambda*Q^-1*gamma)*x2.
362 *
363 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
364 * @param transpose Choose between the direct or transposed version of the linear problem.
365 */
366 void solve(MultiRHS const b, bool const transpose) const override
367 {
368 assert(b.extent(0) == size());
369
370 MultiRHS const b1 = Kokkos::
371 subview(b,
372 std::pair<std::size_t, std::size_t>(0, m_top_left_block->size()),
373 Kokkos::ALL);
374 MultiRHS const b2 = Kokkos::
375 subview(b,
376 std::pair<std::size_t, std::size_t>(m_top_left_block->size(), b.extent(0)),
377 Kokkos::ALL);
378 if (!transpose) {
379 m_top_left_block->solve(b1, false);
380 spdm_minus1_1(m_bottom_left_block_coo, b1, b2);
381 m_bottom_right_block->solve(b2, false);
382 spdm_minus1_1(m_top_right_block_coo, b2, b1);
383 } else {
384 spdm_minus1_1(m_top_right_block_coo, b1, b2, true);
385 m_bottom_right_block->solve(b2, true);
386 spdm_minus1_1(m_bottom_left_block_coo, b2, b1, true);
387 m_top_left_block->solve(b1, true);
388 }
389 }
390};
391
392} // namespace ddc::detail
The top-level namespace of DDC.