MNASolverGpuSparse.h 4.25 KB
Newer Older
Philipp Fensch's avatar
Philipp Fensch committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#pragma once

#include <dpsim/MNASolver.h>

#include <cuda_runtime.h>
#include <cusparse_v2.h>

//Error Handling
#define CUDA_ENABLE_ERROR_CHECK

#ifdef CUDA_ENABLE_ERROR_CHECK
#define checkCudaErrors(CUDA_CALL) \
   {cudaError_t code = (cudaError_t)CUDA_CALL; \
   if (code != cudaSuccess) { \
      printf("CUDA Error: %s\n", cudaGetErrorString(code)); \
   }}
#else
#define checkCudaErrors(CUDA_CALL) CUDA_CALL
#endif

namespace DPsim {

	namespace cuda {

		template <typename T>
		void copybackAndPrint(const char *msg, T *ptr, int n) {
			std::vector<T> buffer(n);
			cudaMemcpy(buffer.data(), ptr, n * sizeof(T), cudaMemcpyDeviceToHost);
			std::cout << msg;
			for(T d : buffer) {
				std::cout << d << ' ';
			}
			std::cout << std::endl;
		}

		struct cuda_delete {
			void operator()(void *ptr) {
				cudaFree(ptr);
			}
		};

		template<typename T>
		struct cuda_new {
			T *operator()(size_t n) {
				T *ptr;
				checkCudaErrors(cudaMalloc(&ptr, n * sizeof(T)));
				return ptr;
			}
		};

		template <typename T>
		struct unique_ptr : public std::unique_ptr<T, cuda_delete> {
			unique_ptr():
				std::unique_ptr<T, cuda_delete>() {
			}

			unique_ptr(size_t n):
				std::unique_ptr<T, cuda_delete>(cuda_new<T>()(n)) {
			}

			//"Auto-Dereferencing"
			operator T*() {
				return std::unique_ptr<T, cuda_delete>::get();
			}
		};

		// Matrix (CSR)
		struct CudaMatrix {
			CudaMatrix(const Eigen::SparseMatrix<double, Eigen::RowMajor> &mat, int dim):
				non_zero(mat.nonZeros()),
				row(cuda::unique_ptr<int>(dim + 1)),
				col(cuda::unique_ptr<int>(mat.nonZeros())),
				val(cuda::unique_ptr<double>(mat.nonZeros())) {

				// Copy Matrix (Host -> Device)
				cudaMemcpy(row, mat.outerIndexPtr(), (dim + 1) * sizeof(int), cudaMemcpyHostToDevice);
				cudaMemcpy(col, mat.innerIndexPtr(), non_zero * sizeof(int), cudaMemcpyHostToDevice);
				cudaMemcpy(val, mat.valuePtr(), non_zero * sizeof(double), cudaMemcpyHostToDevice);
			}

			//Matrix Data
			const int non_zero;
			cuda::unique_ptr<int> row;
			cuda::unique_ptr<int> col;
			cuda::unique_ptr<double> val;
		};
	}

	template <typename VarType>
    class MnaSolverGpuSparse : public MnaSolver<VarType>{
	protected:

		// #### Attributes required for GPU ####
		/// Solver-Handle
		cusparseHandle_t mCusparsehandle;

		/// Systemmatrix on Device
		std::unique_ptr<cuda::CudaMatrix> mSysMat;

		/// RHS-Vector
		cuda::unique_ptr<double> mGpuRhsVec;
		/// LHS-Vector
		cuda::unique_ptr<double> mGpuLhsVec;
		/// Intermediate Vector
		cuda::unique_ptr<double> mGpuIntermediateVec;

		/// Initialize cuSparse-library
        void initialize();
		/// ILU factorization
		void iluPreconditioner();
		///
		void solve(Real time, Int timeStepCount);

	private:
		///Required shared Variables
		void *pBuffer = nullptr;
		cusparseMatDescr_t descr_L = nullptr;
    	cusparseMatDescr_t descr_U = nullptr;
		csrsv2Info_t info_L = nullptr;
    	csrsv2Info_t info_U = nullptr;

	public:
		MnaSolverGpuSparse(String name,
			CPS::Domain domain = CPS::Domain::DP,
			CPS::Logger::Level logLevel = CPS::Logger::Level::info);

		virtual ~MnaSolverGpuSparse();

		CPS::Task::List getTasks();

		class SolveTask : public CPS::Task {
		public:
			SolveTask(MnaSolverGpuSparse<VarType>& solver) :
				Task(solver.mName + ".Solve"), mSolver(solver) {

				for (auto it : solver.mMNAComponents) {
					if (it->template attribute<Matrix>("right_vector")->get().size() != 0)
						mAttributeDependencies.push_back(it->attribute("right_vector"));
				}
				for (auto node : solver.mNodes) {
					mModifiedAttributes.push_back(node->attribute("v"));
				}
				mModifiedAttributes.push_back(solver.attribute("left_vector"));
			}

			void execute(Real time, Int timeStepCount) { mSolver.solve(time, timeStepCount); }

		private:
			MnaSolverGpuSparse<VarType>& mSolver;
		};

		class LogTask : public CPS::Task {
		public:
			LogTask(MnaSolverGpuSparse<VarType>& solver) :
				Task(solver.mName + ".Log"), mSolver(solver) {
				mAttributeDependencies.push_back(solver.attribute("left_vector"));
				mModifiedAttributes.push_back(Scheduler::external);
			}

			void execute(Real time, Int timeStepCount) { mSolver.log(time, timeStepCount); }

		private:
			MnaSolverGpuSparse<VarType>& mSolver;
		};
    };

template class DPsim::MnaSolverGpuSparse<Real>;
template class DPsim::MnaSolverGpuSparse<Complex>;

}