DDC 0.10.0
Loading...
Searching...
No Matches
splines_linear_problem_dense.cpp
1// Copyright (C) The DDC development team, see COPYRIGHT.md file
2//
3// SPDX-License-Identifier: MIT
4
5#include <cassert>
6#include <cstddef>
7#include <stdexcept>
8#include <string>
9
10#include <Kokkos_Core.hpp>
11#include <Kokkos_DualView.hpp>
12
13#if __has_include(<mkl_lapacke.h>)
14# include <mkl_lapacke.h>
15#else
16# include <lapacke.h>
17#endif
18
19#include <KokkosBatched_Util.hpp>
20
21#include "kokkos-kernels-ext/KokkosBatched_Getrs.hpp"
22
25
26namespace ddc::detail {
27
28template <class ExecSpace>
29SplinesLinearProblemDense<ExecSpace>::SplinesLinearProblemDense(std::size_t const mat_size)
30 : SplinesLinearProblem<ExecSpace>(mat_size)
31 , m_a("a", mat_size, mat_size)
32 , m_ipiv("ipiv", mat_size)
33{
34 Kokkos::deep_copy(m_a.view_host(), 0.);
35}
36
37template <class ExecSpace>
38SplinesLinearProblemDense<ExecSpace>::~SplinesLinearProblemDense() = default;
39
40template <class ExecSpace>
41double SplinesLinearProblemDense<ExecSpace>::get_element(std::size_t const i, std::size_t const j)
42 const
43{
44 assert(i < size());
45 assert(j < size());
46 return m_a.view_host()(i, j);
47}
48
49template <class ExecSpace>
50void SplinesLinearProblemDense<ExecSpace>::set_element(
51 std::size_t const i,
52 std::size_t const j,
53 double const aij)
54{
55 assert(i < size());
56 assert(j < size());
57 m_a.view_host()(i, j) = aij;
58}
59
60template <class ExecSpace>
61void SplinesLinearProblemDense<ExecSpace>::setup_solver()
62{
63 int const info = LAPACKE_dgetrf(
64 LAPACK_ROW_MAJOR,
65 size(),
66 size(),
67 m_a.view_host().data(),
68 size(),
69 m_ipiv.view_host().data());
70 if (info != 0) {
71 throw std::runtime_error("LAPACKE_dgetrf failed with error code " + std::to_string(info));
72 }
73
74 // Convert 1-based index to 0-based index
75 for (std::size_t i = 0; i < size(); ++i) {
76 m_ipiv.view_host()(i) -= 1;
77 }
78
79 // Push on device
80 m_a.modify_host();
81 m_a.sync_device();
82 m_ipiv.modify_host();
83 m_ipiv.sync_device();
84}
85
86template <class ExecSpace>
87void SplinesLinearProblemDense<ExecSpace>::solve(MultiRHS const b, bool const transpose) const
88{
89 assert(b.extent(0) == size());
90
91 // For order 1 splines, size() can be 0 then we bypass the solver call.
92 if (size() == 0) {
93 return;
94 }
95
96 auto a_device = m_a.view_device();
97 auto ipiv_device = m_ipiv.view_device();
98
99 Kokkos::RangePolicy<ExecSpace> const policy(0, b.extent(1));
100
101 if (transpose) {
102 Kokkos::parallel_for(
103 "gerts",
104 policy,
105 KOKKOS_LAMBDA(int const i) {
106 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
107 KokkosBatched::SerialGetrs<
108 KokkosBatched::Trans::Transpose,
109 KokkosBatched::Algo::Level3::Unblocked>::
110 invoke(a_device, ipiv_device, sub_b);
111 });
112 } else {
113 Kokkos::parallel_for(
114 "gerts",
115 policy,
116 KOKKOS_LAMBDA(int const i) {
117 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
118 KokkosBatched::SerialGetrs<
119 KokkosBatched::Trans::NoTranspose,
120 KokkosBatched::Algo::Level3::Unblocked>::
121 invoke(a_device, ipiv_device, sub_b);
122 });
123 }
124}
125
126#if defined(KOKKOS_ENABLE_SERIAL)
127template class SplinesLinearProblemDense<Kokkos::Serial>;
128#endif
129#if defined(KOKKOS_ENABLE_OPENMP)
130template class SplinesLinearProblemDense<Kokkos::OpenMP>;
131#endif
132#if defined(KOKKOS_ENABLE_CUDA)
133template class SplinesLinearProblemDense<Kokkos::Cuda>;
134#endif
135#if defined(KOKKOS_ENABLE_HIP)
136template class SplinesLinearProblemDense<Kokkos::HIP>;
137#endif
138#if defined(KOKKOS_ENABLE_SYCL)
139template class SplinesLinearProblemDense<Kokkos::SYCL>;
140#endif
141
142} // namespace ddc::detail
The top-level namespace of DDC.