Commit 5f317866 authored by Philipp Fensch's avatar Philipp Fensch
Browse files

Fixed & improved readability

parent b0988954
#pragma once
#include <exception>
#include <dpsim/MNASolver.h>
#include <cuda_runtime.h>
......@@ -12,12 +10,6 @@
/**
* TODO:
* -Proper error-handling
*
* || Test & fix ||
* -initialize();
* -class SolveTask : public CPS::Task
* -class LogTask : public CPS::Task
* -CPS::Task::List getTasks();
*/
namespace DPsim {
......@@ -26,17 +18,19 @@ namespace DPsim {
protected:
// #### Attributes required for GPU ####
///Sovler-Handle
/// Solver-Handle
cusolverDnHandle_t mCusolverHandle;
///Stream
/// Stream
cudaStream_t mStream;
/// Variables for solving one Equation-system
/// Variables for solving one Equation-system (All pointer are device-pointer)
struct GpuData {
/// Device copy of System-Matrix
double *matrix;
/// Size of one dimension
UInt size;
/// Device copy of Vector
double *rightVector;
double *vector;
/// Device-Workspace for getrf
double *workSpace;
......
......@@ -17,7 +17,7 @@ MnaSolverGpu<VarType>::MnaSolverGpu(String name,
cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
cudaError_t error = cudaSuccess;
if((status = cusolverDnCreate(&mCusolverHandle)) != CUSOLVER_STATUS_SUCCESS)
std::cerr << "cusolverDnCreate() failed" << std::endl;
std::cerr << "cusolverDnCreate() failed (initializing cusolver-library)" << std::endl;
if((error = cudaStreamCreateWithFlags(&mStream, cudaStreamNonBlocking)) != cudaSuccess)
std::cerr << cudaGetErrorString(error) << std::endl;
if((status = cusolverDnSetStream(mCusolverHandle, mStream)) != CUSOLVER_STATUS_SUCCESS)
......@@ -35,8 +35,8 @@ MnaSolverGpu<VarType>::~MnaSolverGpu() {
//Memory allocated on device
if(mDeviceCopy.matrix)
cudaFree(mDeviceCopy.matrix);
if(mDeviceCopy.rightVector)
cudaFree(mDeviceCopy.rightVector);
if(mDeviceCopy.vector)
cudaFree(mDeviceCopy.vector);
if(mDeviceCopy.workSpace)
cudaFree(mDeviceCopy.workSpace);
if(mDeviceCopy.pivSeq)
......@@ -51,37 +51,35 @@ template <typename VarType>
void MnaSolverGpu<VarType>::initialize() {
MnaSolver<VarType>::initialize();
mDeviceCopy.size = this->mRightSideVector.rows();
//Allocate Memory on Device
allocateDeviceMemory();
//Copy Systemmatrix to device
copySystemMatrixToDevice();
auto index = this->mRightSideVector.rows();
DPsim::Matrix mat;
mat.resize(index, index);
// Debug logging, whether LU-factorization and copying was successfull
/*DPsim::Matrix mat;
mat.resize(mDeviceCopy.size, mDeviceCopy.size);
double *buffer = &mat(0);
CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, index * index * sizeof(Real), cudaMemcpyDeviceToHost))
this->mSLog->info("Systemmatrix Gpu: \n{}", mat);
CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
this->mSLog->info("Systemmatrix Gpu: \n{}", mat);*/
//LU factorization
LUfactorization();
CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, index * index * sizeof(Real), cudaMemcpyDeviceToHost))
this->mSLog->info("LU decomposition Gpu: \n{}", mat);
/*CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
this->mSLog->info("LU decomposition Gpu: \n{}", mat);*/
}
/// Allocate Space for Vectors & Matrices
template <typename VarType>
void MnaSolverGpu<VarType>::allocateDeviceMemory() {
//Get required size
auto index = this->mRightSideVector.rows();
auto size = index * sizeof(Real);
//Vectors
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.rightVector, size))
//Allocate memory for...
//Vector
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.vector, mDeviceCopy.size * sizeof(Real)))
//Matrix
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.matrix, size * index))
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real)))
//Pivoting-Sequence
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.pivSeq, size))
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.pivSeq, mDeviceCopy.size * sizeof(Real)))
//Errorcode
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.errInfo, sizeof(int)))
......@@ -91,47 +89,48 @@ void MnaSolverGpu<VarType>::allocateDeviceMemory() {
if((status =
cusolverDnDgetrf_bufferSize(
mCusolverHandle,
index,
index,
mDeviceCopy.size,
mDeviceCopy.size,
mDeviceCopy.matrix,
index,
mDeviceCopy.size,
&workSpaceSize)
) != CUSOLVER_STATUS_SUCCESS)
std::cerr << "cusolverDnDgetrf_bufferSize() failed" << std::endl;
std::cerr << "cusolverDnDgetrf_bufferSize() failed (calculating required space for LU-factorization)" << std::endl;
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.workSpace, workSpaceSize))
}
template <typename VarType>
void MnaSolverGpu<VarType>::copySystemMatrixToDevice() {
//TODO Error Checking
auto dim = this->mRightSideVector.rows();
Real *mat = &MnaSolver<VarType>::systemMatrix()(0);
CUDA_ERROR_HANDLER(cudaMemcpy(mDeviceCopy.matrix, mat, sizeof(Real) * dim * dim, cudaMemcpyHostToDevice))
auto *mat = &MnaSolver<VarType>::systemMatrix()(0);
CUDA_ERROR_HANDLER(cudaMemcpy(mDeviceCopy.matrix, mat, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyHostToDevice))
}
template <typename VarType>
void MnaSolverGpu<VarType>::LUfactorization() {
auto dim = this->mRightSideVector.rows();
//TODO Error checking
cusolverStatus_t status;
//Variables for error-handling
cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
int info;
//LU-factorization
status = cusolverDnDgetrf(
mCusolverHandle,
dim,
dim,
mDeviceCopy.size,
mDeviceCopy.size,
mDeviceCopy.matrix,
dim,
mDeviceCopy.size,
mDeviceCopy.workSpace,
mDeviceCopy.pivSeq,
mDeviceCopy.errInfo);
CUDA_ERROR_HANDLER(cudaDeviceSynchronize())
if(status != CUSOLVER_STATUS_SUCCESS) {
std::cerr << "cusolverDnDgetrf() failed" << std::endl;
std::cerr << "cusolverDnDgetrf() failed (calculating LU-factorization)" << std::endl;
}
int info;
CUDA_ERROR_HANDLER(cudaMemcpy(&info, mDeviceCopy.errInfo, sizeof(int), cudaMemcpyDeviceToHost))
if(0 > info) {
std::cerr << -info << "-th parameter is wrong" << std::endl;
}
CUDA_ERROR_HANDLER(cudaDeviceSynchronize())
}
template <typename VarType>
......@@ -160,8 +159,6 @@ Task::List MnaSolverGpu<VarType>::getTasks() {
template <typename VarType>
void MnaSolverGpu<VarType>::SolveTask::execute(Real time, Int timeStepCount) {
const auto dim = mSolver.mRightSideVector.rows();
const auto size = dim * sizeof(Real);
// Reset source vector
mSolver.mRightSideVector.setZero();
......@@ -171,47 +168,35 @@ void MnaSolverGpu<VarType>::SolveTask::execute(Real time, Int timeStepCount) {
mSolver.mRightSideVector += *stamp;
//Copy right vector to device
CUDA_ERROR_HANDLER(cudaMemcpy(mSolver.mDeviceCopy.rightVector, &mSolver.mRightSideVector(0), size, cudaMemcpyHostToDevice))
mSolver.mSLog->info("Right-Side-Vector Cpu: \n{}", mSolver.mRightSideVector);
//Print RHS-vector
DPsim::Matrix mat;
mat.resize(dim, 1);
double *buffer = &mat(0);
CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mSolver.mDeviceCopy.rightVector, dim * sizeof(Real), cudaMemcpyDeviceToHost))
mSolver.mSLog->info("Right-Side-Vector Gpu: \n{}", mat);
CUDA_ERROR_HANDLER(cudaMemcpy(mSolver.mDeviceCopy.vector, &mSolver.mRightSideVector(0), mSolver.mDeviceCopy.size * sizeof(Real), cudaMemcpyHostToDevice))
// Solve
if (mSolver.mSwitchedMatrices.size() > 0) {
cusolverStatus_t status = cusolverDnDgetrs(
mSolver.mCusolverHandle,
CUBLAS_OP_N,
dim,
mSolver.mDeviceCopy.size,
1, /* nrhs */
mSolver.mDeviceCopy.matrix,
dim,
mSolver.mDeviceCopy.size,
mSolver.mDeviceCopy.pivSeq,
mSolver.mDeviceCopy.rightVector,
dim,
mSolver.mDeviceCopy.vector,
mSolver.mDeviceCopy.size,
mSolver.mDeviceCopy.errInfo);
CUDA_ERROR_HANDLER(cudaDeviceSynchronize())
if(status != CUSOLVER_STATUS_SUCCESS)
std::cerr << "cusolverDnDgetrs() failed" << std::endl;
std::cerr << "cusolverDnDgetrs() failed (Solving A*x = b)" << std::endl;
int info;
CUDA_ERROR_HANDLER(cudaMemcpy(&info, mSolver.mDeviceCopy.errInfo, sizeof(int), cudaMemcpyDeviceToHost))
if(0 > info) {
std::cerr << -info << "-th parameter is wrong" << std::endl;
}
}
//Copy Leftvector back
buffer = new double[size];
CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mSolver.mDeviceCopy.rightVector, size, cudaMemcpyDeviceToHost))
for(UInt i = 0; i < dim; i++) {
mSolver.mLeftSideVector(i, 0) = buffer[i]; // TODO check
}
}
delete[] buffer;
//Copy Solution back
CUDA_ERROR_HANDLER(cudaMemcpy(&mSolver.mLeftSideVector(0), mSolver.mDeviceCopy.vector, mSolver.mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
// TODO split into separate task? (dependent on x, updating all v attributes)
for (UInt nodeIdx = 0; nodeIdx < mSolver.mNumNetNodes; nodeIdx++)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment