14#include <Kokkos_Core.hpp>
15#include <Kokkos_DualView.hpp>
17#if __has_include
(<mkl_lapacke.h>)
18# include <mkl_lapacke.h>
23#include <KokkosBatched_Util.hpp>
25#include "kokkos-kernels-ext/KokkosBatched_Gbtrs.hpp"
30namespace ddc::detail {
32template <
class ExecSpace>
33SplinesLinearProblemBand<ExecSpace>::SplinesLinearProblemBand(
34 std::size_t
const mat_size,
37 : SplinesLinearProblem<ExecSpace>(mat_size)
41
42
43
44 , m_q(
"q", 2 * kl + ku + 1, mat_size)
45 , m_ipiv(
"ipiv", mat_size)
47 assert(m_kl <= mat_size);
48 assert(m_ku <= mat_size);
50 Kokkos::deep_copy(m_q.view_host(), 0.);
53template <
class ExecSpace>
54SplinesLinearProblemBand<ExecSpace>::~SplinesLinearProblemBand() =
default;
56template <
class ExecSpace>
57std::size_t SplinesLinearProblemBand<ExecSpace>::band_storage_row_index(
59 std::size_t
const j)
const
61 return m_kl + m_ku + i - j;
64template <
class ExecSpace>
65double SplinesLinearProblemBand<ExecSpace>::get_element(std::size_t
const i, std::size_t
const j)
71
72
73
74
75
77 max(
static_cast<std::ptrdiff_t>(0),
78 static_cast<std::ptrdiff_t>(j) -
static_cast<std::ptrdiff_t>(m_ku))
79 && i < std::min(size(), j + m_kl + 1)) {
80 return m_q.view_host()(band_storage_row_index(i, j), j);
86template <
class ExecSpace>
87void SplinesLinearProblemBand<ExecSpace>::set_element(
95
96
97
98
99
101 max(
static_cast<std::ptrdiff_t>(0),
102 static_cast<std::ptrdiff_t>(j) -
static_cast<std::ptrdiff_t>(m_ku))
103 && i < std::min(size(), j + m_kl + 1)) {
104 m_q.view_host()(band_storage_row_index(i, j), j) = aij;
106 assert(std::fabs(aij) < 1e-15);
110template <
class ExecSpace>
111void SplinesLinearProblemBand<ExecSpace>::setup_solver()
113 int const info = LAPACKE_dgbtrf(
119 m_q.view_host().data(),
120 m_q.view_host().stride(
122 m_ipiv.view_host().data());
124 throw std::runtime_error(
"LAPACKE_dgbtrf failed with error code " + std::to_string(info));
128 for (std::size_t i = 0; i < size(); ++i) {
129 m_ipiv.view_host()(i) -= 1;
135 m_ipiv.modify_host();
136 m_ipiv.sync_device();
139template <
class ExecSpace>
140void SplinesLinearProblemBand<ExecSpace>::solve(MultiRHS
const b,
bool const transpose)
const
142 assert(b.extent(0) == size());
144 std::size_t
const kl_proxy = m_kl;
145 std::size_t
const ku_proxy = m_ku;
146 auto q_device = m_q.view_device();
147 auto ipiv_device = m_ipiv.view_device();
148 Kokkos::RangePolicy<ExecSpace>
const policy(0, b.extent(1));
150 Kokkos::parallel_for(
153 KOKKOS_LAMBDA(
int const i) {
154 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
155 KokkosBatched::SerialGbtrs<
156 KokkosBatched::Trans::Transpose,
157 KokkosBatched::Algo::Level3::Unblocked>::
158 invoke(q_device, sub_b, ipiv_device, kl_proxy, ku_proxy);
161 Kokkos::parallel_for(
164 KOKKOS_LAMBDA(
int const i) {
165 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
166 KokkosBatched::SerialGbtrs<
167 KokkosBatched::Trans::NoTranspose,
168 KokkosBatched::Algo::Level3::Unblocked>::
169 invoke(q_device, sub_b, ipiv_device, kl_proxy, ku_proxy);
174#if defined(KOKKOS_ENABLE_SERIAL
)
175template class SplinesLinearProblemBand<Kokkos::Serial>;
177#if defined(KOKKOS_ENABLE_OPENMP)
178template class SplinesLinearProblemBand<Kokkos::OpenMP>;
180#if defined(KOKKOS_ENABLE_CUDA)
181template class SplinesLinearProblemBand<Kokkos::Cuda>;
183#if defined(KOKKOS_ENABLE_HIP)
184template class SplinesLinearProblemBand<Kokkos::HIP>;
186#if defined(KOKKOS_ENABLE_SYCL)
187template class SplinesLinearProblemBand<Kokkos::SYCL>;
The top-level namespace of DDC.