Commit 7ca0828b authored by Markus Mirz's avatar Markus Mirz
Browse files

Merge branch 'cuda' into 'master'

CUDA network solution

See merge request !172
parents d97cbe70 4df467e1
......@@ -39,7 +39,7 @@ build:linux:
script:
- mkdir -p build
- cd build
- cmake ..
- cmake -DWITH_CUDA=OFF ..
- make -j 32
image: ${DOCKER_IMAGE_DEV}:${DOCKER_TAG}
cache:
......@@ -52,6 +52,24 @@ build:linux:
tags:
- docker
build:linux-cuda:
stage: build
script:
- mkdir -p build
- cd build
- cmake ..
- make -j 32
image: ${DOCKER_IMAGE_DEV}:${DOCKER_TAG}
cache:
paths:
- build
key: build-linux-cuda
artifacts:
paths:
- build
tags:
- docker
build:windows:
stage: build
script:
......@@ -217,4 +235,4 @@ deploy:packages:
tags:
- fein-deploy
- shell
- linux
\ No newline at end of file
- linux
cmake_minimum_required(VERSION 3.12)
cmake_minimum_required(VERSION 3.13)
project(DPsim CXX)
set(PROJECT_AUTHOR "Institute for Automation of Complex Power Systems, RWTH Aachen University")
......@@ -78,6 +78,8 @@ find_package(Threads REQUIRED)
find_package(CIMpp)
find_package(Sundials)
find_package(OpenMP)
find_package(CUDA)
find_package(GSL)
find_package(Graphviz)
......@@ -98,19 +100,24 @@ endif()
include(CMakeDependentOption)
option(BUILD_SHARED_LIBS "Build shared library" OFF)
option(BUILD_EXAMPLES "Build C++ examples" ON )
cmake_dependent_option(WITH_GSL "Enable GSL" ON "GSL_FOUND" OFF)
cmake_dependent_option(WITH_SUNDIALS "Enable sundials solver suite" ON "Sundials_FOUND" OFF)
cmake_dependent_option(WITH_SHMEM "Enable shared memory interface" ON "VILLASnode_FOUND" OFF)
cmake_dependent_option(WITH_RT "Enable real-time features" ON "Linux_FOUND" OFF)
cmake_dependent_option(WITH_PYTHON "Enable Python support" ON "Python_FOUND" OFF)
cmake_dependent_option(WITH_CIM "Enable support for parsing CIM files" ON "CIMpp_FOUND" OFF)
cmake_dependent_option(WITH_OPENMP "Enable OpenMP-based parallelisation" ON "OPENMP_FOUND" OFF)
cmake_dependent_option(WITH_CUDA "Enable CUDA-based parallelisation" ON "CUDA_FOUND" OFF)
if(WITH_CUDA)
enable_language(CUDA)
endif()
option(BUILD_SHARED_LIBS "Build shared library" OFF)
option(BUILD_EXAMPLES "Build C++ examples" ON)
cmake_dependent_option(WITH_GSL "Enable GSL" ON "GSL_FOUND" OFF)
cmake_dependent_option(WITH_SUNDIALS "Enable sundials solver suite" ON "Sundials_FOUND" OFF)
cmake_dependent_option(WITH_SHMEM "Enable shared memory interface" ON "VILLASnode_FOUND" OFF)
cmake_dependent_option(WITH_RT "Enable real-time features" ON "Linux_FOUND" OFF)
cmake_dependent_option(WITH_PYTHON "Enable Python support" ON "Python_FOUND" OFF)
cmake_dependent_option(WITH_NUMPY "Enable NumPy" ON "NumPy_FOUND" OFF)
cmake_dependent_option(WITH_CIM "Enable CIM file parsing" ON "CIMpp_FOUND" OFF)
cmake_dependent_option(WITH_OPENMP "Enable OpenMP parallelisation" ON "OPENMP_FOUND" OFF)
cmake_dependent_option(WITH_GRAPHVIZ "Enable Graphviz Graphs" ON "Graphviz_FOUND" OFF)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/Include/dpsim/Config.h.in
${CMAKE_CURRENT_BINARY_DIR}/Include/dpsim/Config.h
......
......@@ -24,6 +24,7 @@
#cmakedefine WITH_PYTHON
#cmakedefine WITH_SUNDIALS
#cmakedefine WITH_OPENMP
#cmakedefine WITH_CUDA
#cmakedefine HAVE_TIMERFD
#cmakedefine HAVE_PIPE
......
......@@ -80,7 +80,6 @@ namespace DPsim {
std::unordered_map< std::bitset<SWITCH_NUM>, CPS::LUFactorized > mLuFactorizations;
std::unordered_map< std::bitset<SWITCH_NUM>, std::vector<CPS::LUFactorized> > mLuFactorizationsHarm;
// #### Attributes related to switching ####
/// Index of the next switching event
UInt mSwitchTimeIndex = 0;
......
#pragma once
#include <dpsim/MNASolver.h>
#include <cuda_runtime.h>
#include <cusolverDn.h>
#define CUDA_ERROR_HANDLER(func) {cudaError_t error; if((error = func) != cudaSuccess) std::cerr << cudaGetErrorString(error) << std::endl; }
/**
* TODO:
* -Proper error-handling
*/
namespace DPsim {
template <typename VarType>
class MnaSolverGpu : public MnaSolver<VarType>{
protected:
// #### Attributes required for GPU ####
/// Solver-Handle
cusolverDnHandle_t mCusolverHandle;
/// Stream
cudaStream_t mStream;
/// Variables for solving one Equation-system (All pointer are device-pointer)
struct GpuData {
/// Device copy of System-Matrix
double *matrix;
/// Size of one dimension
UInt size;
/// Device copy of Vector
double *vector;
/// Device-Workspace for getrf
double *workSpace;
/// Pivoting-Sequence
int *pivSeq;
/// Errorinfo
int *errInfo;
} mDeviceCopy;
/// Initialize cuSolver-library
void initialize();
/// Allocate Space for Vectors & Matrices on GPU
void allocateDeviceMemory();
/// Copy Systemmatrix to Device
void copySystemMatrixToDevice();
/// LU factorization
void LUfactorization();
public:
MnaSolverGpu(String name,
CPS::Domain domain = CPS::Domain::DP,
CPS::Logger::Level logLevel = CPS::Logger::Level::info);
virtual ~MnaSolverGpu();
CPS::Task::List getTasks();
class SolveTask : public CPS::Task {
public:
SolveTask(MnaSolverGpu<VarType>& solver, Bool steadyStateInit) :
Task(solver.mName + ".Solve"), mSolver(solver), mSteadyStateInit(steadyStateInit) {
for (auto it : solver.mMNAComponents) {
if (it->template attribute<Matrix>("right_vector")->get().size() != 0) {
mAttributeDependencies.push_back(it->attribute("right_vector"));
}
}
for (auto node : solver.mNodes) {
mModifiedAttributes.push_back(node->attribute("v"));
}
mModifiedAttributes.push_back(solver.attribute("left_vector"));
}
void execute(Real time, Int timeStepCount);
private:
MnaSolverGpu<VarType>& mSolver;
Bool mSteadyStateInit;
};
class LogTask : public CPS::Task {
public:
LogTask(MnaSolverGpu<VarType>& solver) :
Task(solver.mName + ".Log"), mSolver(solver) {
mAttributeDependencies.push_back(solver.attribute("left_vector"));
mModifiedAttributes.push_back(Scheduler::external);
}
void execute(Real time, Int timeStepCount);
private:
MnaSolverGpu<VarType>& mSolver;
};
};
}
\ No newline at end of file
......@@ -36,12 +36,21 @@ RUN dnf --refresh -y install \
sundials-devel \
gsl-devel
# CUDA dependencies
RUN dnf -y install https://developer.download.nvidia.com/compute/cuda/repos/fedora29/x86_64/cuda-repo-fedora29-10.2.89-1.x86_64.rpm && \
dnf --refresh -y install cuda-compiler-10-2 cuda-libraries-dev-10-2 && \
ln -s cuda-10.2 /usr/local/cuda
ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
# Install some debuginfos
RUN dnf -y debuginfo-install \
python3
# CIMpp and VILLAS are installed here
ENV LD_LIBRARY_PATH /usr/local/lib64
ENV LD_LIBRARY_PATH="/usr/local/lib64:${LD_LIBRARY_PATH}"
# VILLAS dependencies
RUN dnf -y install \
......
......@@ -52,6 +52,15 @@ if(WITH_SHMEM)
list(APPEND DPSIM_INCLUDE_DIRS ${VILLASNODE_INCLUDE_DIRS})
endif()
if(WITH_CUDA)
list(APPEND DPSIM_SOURCES
MNASolverGpu.cpp
)
list(APPEND DPSIM_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS} ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
list(APPEND DPSIM_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_cusolver_LIBRARY})
endif()
if(WITH_OPENMP)
list(APPEND DPSIM_SOURCES
OpenMPLevelScheduler.cpp
......
......@@ -6,6 +6,7 @@
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*********************************************************************************/
#include <dpsim/MNASolver.h>
#include <dpsim/SequentialScheduler.h>
......@@ -51,6 +52,7 @@ void MnaSolver<VarType>::initialize() {
// The system topology is prepared and we create the MNA matrices.
createEmptyVectors();
createEmptySystemMatrix();
// Register attribute for solution vector
if (mFrequencyParallel) {
mSLog->info("Computing network harmonics in parallel.");
......
#include <dpsim/MNASolverGpu.h>
#include <dpsim/SequentialScheduler.h>
using namespace DPsim;
using namespace CPS;
namespace DPsim {
template <typename VarType>
MnaSolverGpu<VarType>::MnaSolverGpu(String name,
CPS::Domain domain, CPS::Logger::Level logLevel) :
MnaSolver<VarType>(name, domain, logLevel),
mCusolverHandle(nullptr), mStream(nullptr) {
mDeviceCopy = {};
cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
cudaError_t error = cudaSuccess;
if((status = cusolverDnCreate(&mCusolverHandle)) != CUSOLVER_STATUS_SUCCESS)
std::cerr << "cusolverDnCreate() failed (initializing cusolver-library)" << std::endl;
if((error = cudaStreamCreateWithFlags(&mStream, cudaStreamNonBlocking)) != cudaSuccess)
std::cerr << cudaGetErrorString(error) << std::endl;
if((status = cusolverDnSetStream(mCusolverHandle, mStream)) != CUSOLVER_STATUS_SUCCESS)
std::cerr << "cusolverDnSetStream() failed" << std::endl;
}
template <typename VarType>
MnaSolverGpu<VarType>::~MnaSolverGpu() {
//Handle & Stream
if(mCusolverHandle)
cusolverDnDestroy(mCusolverHandle);
if(mStream)
cudaStreamDestroy(mStream);
//Memory allocated on device
cudaFree(mDeviceCopy.matrix);
cudaFree(mDeviceCopy.vector);
cudaFree(mDeviceCopy.workSpace);
cudaFree(mDeviceCopy.pivSeq);
cudaFree(mDeviceCopy.errInfo);
cudaDeviceReset();
}
template <typename VarType>
void MnaSolverGpu<VarType>::initialize() {
MnaSolver<VarType>::initialize();
mDeviceCopy.size = this->mRightSideVector.rows();
//Allocate Memory on Device
allocateDeviceMemory();
//Copy Systemmatrix to device
copySystemMatrixToDevice();
// Debug logging, whether LU-factorization and copying was successfull
/*DPsim::Matrix mat;
mat.resize(mDeviceCopy.size, mDeviceCopy.size);
double *buffer = &mat(0);
CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
this->mSLog->info("Systemmatrix Gpu: \n{}", mat);*/
//LU factorization
LUfactorization();
/*CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
this->mSLog->info("LU decomposition Gpu: \n{}", mat);*/
}
template <typename VarType>
void MnaSolverGpu<VarType>::allocateDeviceMemory() {
//Allocate memory for...
//Vector
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.vector, mDeviceCopy.size * sizeof(Real)))
//Matrix
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real)))
//Pivoting-Sequence
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.pivSeq, mDeviceCopy.size * sizeof(Real)))
//Errorcode
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.errInfo, sizeof(int)))
//Workspace
int workSpaceSize = 0;
cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
if((status =
cusolverDnDgetrf_bufferSize(
mCusolverHandle,
mDeviceCopy.size,
mDeviceCopy.size,
mDeviceCopy.matrix,
mDeviceCopy.size,
&workSpaceSize)
) != CUSOLVER_STATUS_SUCCESS)
std::cerr << "cusolverDnDgetrf_bufferSize() failed (calculating required space for LU-factorization)" << std::endl;
CUDA_ERROR_HANDLER(cudaMalloc((void**)&mDeviceCopy.workSpace, workSpaceSize))
}
template <typename VarType>
void MnaSolverGpu<VarType>::copySystemMatrixToDevice() {
auto *mat = &MnaSolver<VarType>::systemMatrix()(0);
CUDA_ERROR_HANDLER(cudaMemcpy(mDeviceCopy.matrix, mat, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyHostToDevice))
}
template <typename VarType>
void MnaSolverGpu<VarType>::LUfactorization() {
//Variables for error-handling
cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
int info;
//LU-factorization
status = cusolverDnDgetrf(
mCusolverHandle,
mDeviceCopy.size,
mDeviceCopy.size,
mDeviceCopy.matrix,
mDeviceCopy.size,
mDeviceCopy.workSpace,
mDeviceCopy.pivSeq,
mDeviceCopy.errInfo);
CUDA_ERROR_HANDLER(cudaDeviceSynchronize())
if(status != CUSOLVER_STATUS_SUCCESS) {
std::cerr << "cusolverDnDgetrf() failed (calculating LU-factorization)" << std::endl;
}
CUDA_ERROR_HANDLER(cudaMemcpy(&info, mDeviceCopy.errInfo, sizeof(int), cudaMemcpyDeviceToHost))
if(0 > info) {
std::cerr << -info << "-th parameter is wrong" << std::endl;
}
}
template <typename VarType>
Task::List MnaSolverGpu<VarType>::getTasks() {
Task::List l;
for (auto comp : this->mMNAComponents) {
for (auto task : comp->mnaTasks()) {
l.push_back(task);
}
}
for (auto node : this->mNodes) {
for (auto task : node->mnaTasks())
l.push_back(task);
}
// TODO signal components should be moved out of MNA solver
for (auto comp : this->mSimSignalComps) {
for (auto task : comp->getTasks()) {
l.push_back(task);
}
}
l.push_back(std::make_shared<MnaSolverGpu<VarType>::SolveTask>(*this, false));
l.push_back(std::make_shared<MnaSolverGpu<VarType>::LogTask>(*this));
return l;
}
template <typename VarType>
void MnaSolverGpu<VarType>::SolveTask::execute(Real time, Int timeStepCount) {
// Reset source vector
mSolver.mRightSideVector.setZero();
// Add together the right side vector (computed by the components'
// pre-step tasks)
for (const auto &stamp : mSolver.mRightVectorStamps)
mSolver.mRightSideVector += *stamp;
//Copy right vector to device
CUDA_ERROR_HANDLER(cudaMemcpy(mSolver.mDeviceCopy.vector, &mSolver.mRightSideVector(0), mSolver.mDeviceCopy.size * sizeof(Real), cudaMemcpyHostToDevice))
// Solve
if (mSolver.mSwitchedMatrices.size() > 0) {
cusolverStatus_t status = cusolverDnDgetrs(
mSolver.mCusolverHandle,
CUBLAS_OP_N,
mSolver.mDeviceCopy.size,
1, /* nrhs */
mSolver.mDeviceCopy.matrix,
mSolver.mDeviceCopy.size,
mSolver.mDeviceCopy.pivSeq,
mSolver.mDeviceCopy.vector,
mSolver.mDeviceCopy.size,
mSolver.mDeviceCopy.errInfo);
CUDA_ERROR_HANDLER(cudaDeviceSynchronize())
if(status != CUSOLVER_STATUS_SUCCESS)
std::cerr << "cusolverDnDgetrs() failed (Solving A*x = b)" << std::endl;
int info;
CUDA_ERROR_HANDLER(cudaMemcpy(&info, mSolver.mDeviceCopy.errInfo, sizeof(int), cudaMemcpyDeviceToHost))
if(0 > info) {
std::cerr << -info << "-th parameter is wrong" << std::endl;
}
}
//Copy Solution back
CUDA_ERROR_HANDLER(cudaMemcpy(&mSolver.mLeftSideVector(0), mSolver.mDeviceCopy.vector, mSolver.mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
// TODO split into separate task? (dependent on x, updating all v attributes)
for (UInt nodeIdx = 0; nodeIdx < mSolver.mNumNetNodes; nodeIdx++)
mSolver.mNodes[nodeIdx]->mnaUpdateVoltage(mSolver.mLeftSideVector);
if (!mSteadyStateInit)
mSolver.updateSwitchStatus();
// Components' states will be updated by the post-step tasks
}
template <typename VarType>
void MnaSolverGpu<VarType>::LogTask::execute(Real time, Int timeStepCount) {
mSolver.log(time);
}
}
template class DPsim::MnaSolverGpu<Real>;
template class DPsim::MnaSolverGpu<Complex>;
\ No newline at end of file
......@@ -39,6 +39,10 @@
#include <dpsim/ODESolver.h>
#endif
#ifdef WITH_CUDA
#include <dpsim/MNASolverGpu.h>
#endif
using namespace CPS;
using namespace DPsim;
......@@ -144,8 +148,13 @@ void Simulation::createSolvers(
solver = std::make_shared<DiakopticsSolver<VarType>>(mName,
subnets[net], tearComponents, mTimeStep, mLogLevel);
} else {
#ifdef WITH_CUDA
solver = std::make_shared<MnaSolverGpu<VarType>>(
mName + copySuffix, mDomain, mLogLevel);
#else
solver = std::make_shared<MnaSolver<VarType>>(
mName + copySuffix, mDomain, mLogLevel);
#endif /* WITH_CUDA */
solver->setTimeStep(mTimeStep);
solver->doSteadyStateInitialization(mSteadyStateInit);
solver->doFrequencyParallelization(mHarmParallel);
......@@ -158,6 +167,7 @@ void Simulation::createSolvers(
solver = std::make_shared<DAESolver>(mName + copySuffix, subnets[net], mTimeStep, 0.0);
break;
#endif /* WITH_SUNDIALS */
default:
throw UnsupportedSolverException();
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment