10#include <Kokkos_Core.hpp>
12#if __has_include
(<mkl_lapacke.h>)
13# include <mkl_lapacke.h>
22#include <KokkosBatched_Laswp.hpp>
23#include <KokkosBatched_Trsm_Decl.hpp>
26#include <KokkosBatched_Getrs.hpp>
27#include <KokkosBatched_Util.hpp>
32namespace ddc::detail {
34template <
class ExecSpace>
35SplinesLinearProblemDense<ExecSpace>::SplinesLinearProblemDense(std::size_t
const mat_size)
36 : SplinesLinearProblem<ExecSpace>(mat_size)
37 , m_a(
"a", mat_size, mat_size)
38 , m_ipiv(
"ipiv", mat_size)
40 Kokkos::deep_copy(m_a.view_host(), 0.);
43template <
class ExecSpace>
44SplinesLinearProblemDense<ExecSpace>::~SplinesLinearProblemDense() =
default;
46template <
class ExecSpace>
47double SplinesLinearProblemDense<ExecSpace>::get_element(std::size_t
const i, std::size_t
const j)
52 return m_a.view_host()(i, j);
55template <
class ExecSpace>
56void SplinesLinearProblemDense<ExecSpace>::set_element(
63 m_a.view_host()(i, j) = aij;
66template <
class ExecSpace>
67void SplinesLinearProblemDense<ExecSpace>::setup_solver()
69 int const info = LAPACKE_dgetrf(
73 m_a.view_host().data(),
75 m_ipiv.view_host().data());
77 throw std::runtime_error(
"LAPACKE_dgetrf failed with error code " + std::to_string(info));
81 for (std::size_t i = 0; i < size(); ++i) {
82 m_ipiv.view_host()(i) -= 1;
92template <
class ExecSpace>
93void SplinesLinearProblemDense<ExecSpace>::solve(MultiRHS
const b,
bool const transpose)
const
95 assert(b.extent(0) == size());
102 auto a_device = m_a.view_device();
103 auto ipiv_device = m_ipiv.view_device();
105 Kokkos::RangePolicy<ExecSpace>
const policy(0, b.extent(1));
108 Kokkos::parallel_for(
111 KOKKOS_LAMBDA(
int const i) {
112 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
113 KokkosBatched::SerialGetrs<
114 KokkosBatched::Trans::Transpose,
115 KokkosBatched::Algo::Getrs::Unblocked>::
116 invoke(a_device, ipiv_device, sub_b);
119 Kokkos::parallel_for(
122 KOKKOS_LAMBDA(
int const i) {
123 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
124 KokkosBatched::SerialGetrs<
125 KokkosBatched::Trans::NoTranspose,
126 KokkosBatched::Algo::Getrs::Unblocked>::
127 invoke(a_device, ipiv_device, sub_b);
132#if defined(KOKKOS_ENABLE_SERIAL
)
133template class SplinesLinearProblemDense<Kokkos::Serial>;
135#if defined(KOKKOS_ENABLE_OPENMP)
136template class SplinesLinearProblemDense<Kokkos::OpenMP>;
138#if defined(KOKKOS_ENABLE_CUDA)
139template class SplinesLinearProblemDense<Kokkos::Cuda>;
141#if defined(KOKKOS_ENABLE_HIP)
142template class SplinesLinearProblemDense<Kokkos::HIP>;
144#if defined(KOKKOS_ENABLE_SYCL)
145template class SplinesLinearProblemDense<Kokkos::SYCL>;
The top-level namespace of DDC.