10#include <Kokkos_Core.hpp>
11#include <Kokkos_DualView.hpp>
13#if __has_include
(<mkl_lapacke.h>)
14# include <mkl_lapacke.h>
19#include <KokkosBatched_Util.hpp>
21#include "kokkos-kernels-ext/KokkosBatched_Getrs.hpp"
26namespace ddc::detail {
28template <
class ExecSpace>
29SplinesLinearProblemDense<ExecSpace>::SplinesLinearProblemDense(std::size_t
const mat_size)
30 : SplinesLinearProblem<ExecSpace>(mat_size)
31 , m_a(
"a", mat_size, mat_size)
32 , m_ipiv(
"ipiv", mat_size)
34 Kokkos::deep_copy(m_a.view_host(), 0.);
37template <
class ExecSpace>
38SplinesLinearProblemDense<ExecSpace>::~SplinesLinearProblemDense() =
default;
40template <
class ExecSpace>
41double SplinesLinearProblemDense<ExecSpace>::get_element(std::size_t
const i, std::size_t
const j)
46 return m_a.view_host()(i, j);
49template <
class ExecSpace>
50void SplinesLinearProblemDense<ExecSpace>::set_element(
57 m_a.view_host()(i, j) = aij;
60template <
class ExecSpace>
61void SplinesLinearProblemDense<ExecSpace>::setup_solver()
63 int const info = LAPACKE_dgetrf(
67 m_a.view_host().data(),
69 m_ipiv.view_host().data());
71 throw std::runtime_error(
"LAPACKE_dgetrf failed with error code " + std::to_string(info));
75 for (std::size_t i = 0; i < size(); ++i) {
76 m_ipiv.view_host()(i) -= 1;
86template <
class ExecSpace>
87void SplinesLinearProblemDense<ExecSpace>::solve(MultiRHS
const b,
bool const transpose)
const
89 assert(b.extent(0) == size());
96 auto a_device = m_a.view_device();
97 auto ipiv_device = m_ipiv.view_device();
99 Kokkos::RangePolicy<ExecSpace>
const policy(0, b.extent(1));
102 Kokkos::parallel_for(
105 KOKKOS_LAMBDA(
int const i) {
106 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
107 KokkosBatched::SerialGetrs<
108 KokkosBatched::Trans::Transpose,
109 KokkosBatched::Algo::Level3::Unblocked>::
110 invoke(a_device, ipiv_device, sub_b);
113 Kokkos::parallel_for(
116 KOKKOS_LAMBDA(
int const i) {
117 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
118 KokkosBatched::SerialGetrs<
119 KokkosBatched::Trans::NoTranspose,
120 KokkosBatched::Algo::Level3::Unblocked>::
121 invoke(a_device, ipiv_device, sub_b);
126#if defined(KOKKOS_ENABLE_SERIAL
)
127template class SplinesLinearProblemDense<Kokkos::Serial>;
129#if defined(KOKKOS_ENABLE_OPENMP)
130template class SplinesLinearProblemDense<Kokkos::OpenMP>;
132#if defined(KOKKOS_ENABLE_CUDA)
133template class SplinesLinearProblemDense<Kokkos::Cuda>;
135#if defined(KOKKOS_ENABLE_HIP)
136template class SplinesLinearProblemDense<Kokkos::HIP>;
138#if defined(KOKKOS_ENABLE_SYCL)
139template class SplinesLinearProblemDense<Kokkos::SYCL>;
The top-level namespace of DDC.