12#include <Kokkos_Core.hpp>
13#include <Kokkos_DualView.hpp>
15#if __has_include(<mkl_lapacke.h>)
16#include <mkl_lapacke.h>
21#include <KokkosBatched_Util.hpp>
23#include "kokkos-kernels-ext/KokkosBatched_Getrs.hpp"
25#include "splines_linear_problem.hpp"
27namespace ddc::detail {
30
31
32
33
34
35
36template <
class ExecSpace>
37class SplinesLinearProblemDense :
public SplinesLinearProblem<ExecSpace>
40 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
41 using SplinesLinearProblem<ExecSpace>::size;
44 Kokkos::DualView<
double**, Kokkos::LayoutRight,
typename ExecSpace::memory_space> m_a;
45 Kokkos::DualView<
int*,
typename ExecSpace::memory_space> m_ipiv;
49
50
51
52
53 explicit SplinesLinearProblemDense(std::size_t
const mat_size)
54 : SplinesLinearProblem<ExecSpace>(mat_size)
55 , m_a(
"a", mat_size, mat_size)
56 , m_ipiv(
"ipiv", mat_size)
58 Kokkos::deep_copy(m_a.h_view, 0.);
61 double get_element(std::size_t
const i, std::size_t
const j)
const override
65 return m_a.h_view(i, j);
68 void set_element(std::size_t
const i, std::size_t
const j,
double const aij)
override
72 m_a.h_view(i, j) = aij;
76
77
78
79
80 void setup_solver()
override
82 int const info = LAPACKE_dgetrf(
88 m_ipiv.h_view.data());
90 throw std::runtime_error(
91 "LAPACKE_dgetrf failed with error code " + std::to_string(info));
95 for (
int i = 0; i < size(); ++i) {
96 m_ipiv.h_view(i) -= 1;
102 m_ipiv.modify_host();
103 m_ipiv.sync_device();
107
108
109
110
111
112
113
114 void solve(MultiRHS
const b,
bool const transpose)
const override
116 assert(b.extent(0) == size());
123 auto a_device = m_a.d_view;
124 auto ipiv_device = m_ipiv.d_view;
126 Kokkos::RangePolicy<ExecSpace>
const policy(0, b.extent(1));
129 Kokkos::parallel_for(
132 KOKKOS_LAMBDA(
const int i) {
133 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
134 KokkosBatched::SerialGetrs<
135 KokkosBatched::Trans::Transpose,
136 KokkosBatched::Algo::Level3::Unblocked>::
137 invoke(a_device, ipiv_device, sub_b);
140 Kokkos::parallel_for(
143 KOKKOS_LAMBDA(
const int i) {
144 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
145 KokkosBatched::SerialGetrs<
146 KokkosBatched::Trans::NoTranspose,
147 KokkosBatched::Algo::Level3::Unblocked>::
148 invoke(a_device, ipiv_device, sub_b);
The top-level namespace of DDC.