DDC 0.10.0
Loading...
Searching...
No Matches
splines_linear_problem_band.cpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#include <algorithm>
6#include <cassert>
7#if !defined(NDEBUG)
8# include <cmath>
9#endif
10#include <cstddef>
11#include <stdexcept>
12#include <string>
13
14#include <Kokkos_Core.hpp>
15#include <Kokkos_DualView.hpp>
16
17#if __has_include(<mkl_lapacke.h>)
18# include <mkl_lapacke.h>
19#else
20# include <lapacke.h>
21#endif
22
23#include <KokkosBatched_Util.hpp>
24
25#include "kokkos-kernels-ext/KokkosBatched_Gbtrs.hpp"
26
29
30namespace ddc::detail {
31
32template <class ExecSpace>
33SplinesLinearProblemBand<ExecSpace>::SplinesLinearProblemBand(
34 std::size_t const mat_size,
35 std::size_t const kl,
36 std::size_t const ku)
37 : SplinesLinearProblem<ExecSpace>(mat_size)
38 , m_kl(kl)
39 , m_ku(ku)
40 /*
41 * The matrix itself stored in band format requires a (kl + ku + 1)*mat_size
42 * allocation, but the LU-factorization requires an additional kl*mat_size block
43 */
44 , m_q("q", 2 * kl + ku + 1, mat_size)
45 , m_ipiv("ipiv", mat_size)
46{
47 assert(m_kl <= mat_size);
48 assert(m_ku <= mat_size);
49
50 Kokkos::deep_copy(m_q.view_host(), 0.);
51}
52
53template <class ExecSpace>
54SplinesLinearProblemBand<ExecSpace>::~SplinesLinearProblemBand() = default;
55
56template <class ExecSpace>
57std::size_t SplinesLinearProblemBand<ExecSpace>::band_storage_row_index(
58 std::size_t const i,
59 std::size_t const j) const
60{
61 return m_kl + m_ku + i - j;
62}
63
64template <class ExecSpace>
65double SplinesLinearProblemBand<ExecSpace>::get_element(std::size_t const i, std::size_t const j)
66 const
67{
68 assert(i < size());
69 assert(j < size());
70 /*
71 * The "row index" of the band format storage identify the (sub/super)-diagonal
72 * while the column index is actually the column index of the matrix. Two layouts
73 * are supported by LAPACKE. The m_kl first lines are irrelevant for the storage of
74 * the matrix itself but required for the storage of its LU factorization.
75 */
76 if (i >= std::
77 max(static_cast<std::ptrdiff_t>(0),
78 static_cast<std::ptrdiff_t>(j) - static_cast<std::ptrdiff_t>(m_ku))
79 && i < std::min(size(), j + m_kl + 1)) {
80 return m_q.view_host()(band_storage_row_index(i, j), j);
81 }
82
83 return 0.0;
84}
85
86template <class ExecSpace>
87void SplinesLinearProblemBand<ExecSpace>::set_element(
88 std::size_t const i,
89 std::size_t const j,
90 double const aij)
91{
92 assert(i < size());
93 assert(j < size());
94 /*
95 * The "row index" of the band format storage identify the (sub/super)-diagonal
96 * while the column index is actually the column index of the matrix. Two layouts
97 * are supported by LAPACKE. The m_kl first lines are irrelevant for the storage of
98 * the matrix itself but required for the storage of its LU factorization.
99 */
100 if (i >= std::
101 max(static_cast<std::ptrdiff_t>(0),
102 static_cast<std::ptrdiff_t>(j) - static_cast<std::ptrdiff_t>(m_ku))
103 && i < std::min(size(), j + m_kl + 1)) {
104 m_q.view_host()(band_storage_row_index(i, j), j) = aij;
105 } else {
106 assert(std::fabs(aij) < 1e-15);
107 }
108}
109
110template <class ExecSpace>
111void SplinesLinearProblemBand<ExecSpace>::setup_solver()
112{
113 int const info = LAPACKE_dgbtrf(
114 LAPACK_ROW_MAJOR,
115 size(),
116 size(),
117 m_kl,
118 m_ku,
119 m_q.view_host().data(),
120 m_q.view_host().stride(
121 0), // m_q.view_host().stride(0) if LAPACK_ROW_MAJOR, m_q.view_host().stride(1) if LAPACK_COL_MAJOR
122 m_ipiv.view_host().data());
123 if (info != 0) {
124 throw std::runtime_error("LAPACKE_dgbtrf failed with error code " + std::to_string(info));
125 }
126
127 // Convert 1-based index to 0-based index
128 for (std::size_t i = 0; i < size(); ++i) {
129 m_ipiv.view_host()(i) -= 1;
130 }
131
132 // Push on device
133 m_q.modify_host();
134 m_q.sync_device();
135 m_ipiv.modify_host();
136 m_ipiv.sync_device();
137}
138
139template <class ExecSpace>
140void SplinesLinearProblemBand<ExecSpace>::solve(MultiRHS const b, bool const transpose) const
141{
142 assert(b.extent(0) == size());
143
144 std::size_t const kl_proxy = m_kl;
145 std::size_t const ku_proxy = m_ku;
146 auto q_device = m_q.view_device();
147 auto ipiv_device = m_ipiv.view_device();
148 Kokkos::RangePolicy<ExecSpace> const policy(0, b.extent(1));
149 if (transpose) {
150 Kokkos::parallel_for(
151 "gbtrs",
152 policy,
153 KOKKOS_LAMBDA(int const i) {
154 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
155 KokkosBatched::SerialGbtrs<
156 KokkosBatched::Trans::Transpose,
157 KokkosBatched::Algo::Level3::Unblocked>::
158 invoke(q_device, sub_b, ipiv_device, kl_proxy, ku_proxy);
159 });
160 } else {
161 Kokkos::parallel_for(
162 "gbtrs",
163 policy,
164 KOKKOS_LAMBDA(int const i) {
165 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
166 KokkosBatched::SerialGbtrs<
167 KokkosBatched::Trans::NoTranspose,
168 KokkosBatched::Algo::Level3::Unblocked>::
169 invoke(q_device, sub_b, ipiv_device, kl_proxy, ku_proxy);
170 });
171 }
172}
173
174#if defined(KOKKOS_ENABLE_SERIAL)
175template class SplinesLinearProblemBand<Kokkos::Serial>;
176#endif
177#if defined(KOKKOS_ENABLE_OPENMP)
178template class SplinesLinearProblemBand<Kokkos::OpenMP>;
179#endif
180#if defined(KOKKOS_ENABLE_CUDA)
181template class SplinesLinearProblemBand<Kokkos::Cuda>;
182#endif
183#if defined(KOKKOS_ENABLE_HIP)
184template class SplinesLinearProblemBand<Kokkos::HIP>;
185#endif
186#if defined(KOKKOS_ENABLE_SYCL)
187template class SplinesLinearProblemBand<Kokkos::SYCL>;
188#endif
189
190} // namespace ddc::detail
The top-level namespace of DDC.