DDC 0.10.0
Loading...
Searching...
No Matches
splines_linear_problem_pds_band.hpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#pragma once
6
7#include <cassert>
8#if !defined(NDEBUG)
9# include <cmath>
10#endif
11#include <cstddef>
12#include <stdexcept>
13#include <string>
14#include <utility>
15
16#include <Kokkos_Core.hpp>
17#include <Kokkos_DualView.hpp>
18
19#if __has_include(<mkl_lapacke.h>)
20# include <mkl_lapacke.h>
21#else
22# include <lapacke.h>
23#endif
24
25#include <KokkosBatched_Pbtrs.hpp>
26#include <KokkosBatched_Util.hpp>
27
28#include "splines_linear_problem.hpp"
29
30namespace ddc::detail {
31
32/**
33 * @brief A positive-definite symmetric band linear problem dedicated to the computation of a spline approximation.
34 *
35 * The storage format is dense row-major. Lapack is used to perform every matrix and linear solver-related operations.
36 *
37 * Given the linear system A*x=b, we assume that A is a square (n by n)
38 * with kd sub and superdiagonals.
39 * All non-zero elements of A are stored in the rectangular matrix q, using
40 * the format required by DPBTRF (LAPACK): (super-)diagonals of A are rows of q.
41 * q has 1 row for the diagonal and kd rows for the superdiagonals.
42 *
43 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are supposed to be performed.
44 */
45template <class ExecSpace>
46class SplinesLinearProblemPDSBand : public SplinesLinearProblem<ExecSpace>
47{
48public:
49 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
50 using SplinesLinearProblem<ExecSpace>::size;
51
52protected:
53 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
54 m_q; // pds band matrix representation
55
56public:
57 /**
58 * @brief SplinesLinearProblemPDSBand constructor.
59 *
60 * @param mat_size The size of one of the dimensions of the square matrix.
61 * @param kd The number of sub/superdiagonals of the matrix.
62 */
63 explicit SplinesLinearProblemPDSBand(std::size_t const mat_size, std::size_t const kd)
64 : SplinesLinearProblem<ExecSpace>(mat_size)
65 , m_q("q", kd + 1, mat_size)
66 {
67 assert(m_q.extent(0) <= mat_size);
68
69 Kokkos::deep_copy(m_q.view_host(), 0.);
70 }
71
72 double get_element(std::size_t i, std::size_t j) const override
73 {
74 assert(i < size());
75 assert(j < size());
76
77 // Indices are swapped for an element on subdiagonal
78 if (i > j) {
79 std::swap(i, j);
80 }
81
82 if (j - i < m_q.extent(0)) {
83 return m_q.view_host()(j - i, i);
84 }
85
86 return 0.0;
87 }
88
89 void set_element(std::size_t i, std::size_t j, double const aij) override
90 {
91 assert(i < size());
92 assert(j < size());
93
94 // Indices are swapped for an element on subdiagonal
95 if (i > j) {
96 std::swap(i, j);
97 }
98 if (j - i < m_q.extent(0)) {
99 m_q.view_host()(j - i, i) = aij;
100 } else {
101 assert(std::fabs(aij) < 1e-20);
102 }
103 }
104
105 /**
106 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
107 *
108 * LU-factorize the matrix A and store the pivots using the LAPACK dpbtrf() implementation.
109 */
110 void setup_solver() override
111 {
112 int const info = LAPACKE_dpbtrf(
113 LAPACK_ROW_MAJOR,
114 'L',
115 size(),
116 m_q.extent(0) - 1,
117 m_q.view_host().data(),
118 m_q.view_host().stride(
119 0) // m_q.view_host().stride(0) if LAPACK_ROW_MAJOR, m_q.view_host().stride(1) if LAPACK_COL_MAJOR
120 );
121 if (info != 0) {
122 throw std::runtime_error(
123 "LAPACKE_dpbtrf failed with error code " + std::to_string(info));
124 }
125
126 // Push on device
127 m_q.modify_host();
128 m_q.sync_device();
129 }
130
131 /**
132 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
133 *
134 * The solver method is band gaussian elimination with partial pivoting using the LU-factorized matrix A. The implementation is LAPACK method dpbtrs.
135 *
136 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
137 * @param transpose Choose between the direct or transposed version of the linear problem (unused for a symmetric problem).
138 */
139 void solve(MultiRHS const b, bool const) const override
140 {
141 assert(b.extent(0) == size());
142
143 auto q_device = m_q.view_device();
144 Kokkos::RangePolicy<ExecSpace> const policy(0, b.extent(1));
145 Kokkos::parallel_for(
146 "pbtrs",
147 policy,
148 KOKKOS_LAMBDA(int const i) {
149 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
150 KokkosBatched::SerialPbtrs<
151 KokkosBatched::Uplo::Lower,
152 KokkosBatched::Algo::Pbtrs::Unblocked>::invoke(q_device, sub_b);
153 });
154 }
155};
156
157} // namespace ddc::detail
The top-level namespace of DDC.