Commit f60359a8 authored by Philipp Fensch's avatar Philipp Fensch Committed by Niklas Eiling
Browse files

Moved utility-classes for cuda to seperate file, added cuda::Vector class, new simple Example


Signed-off-by: Philipp Fensch's avatarPhilipp Fensch <philipp.fensch@rwth-aachen.de>
parent 9be4d26c
...@@ -43,6 +43,52 @@ void DP_CS_R1() { ...@@ -43,6 +43,52 @@ void DP_CS_R1() {
sim.run(); sim.run();
} }
void DP_CS_R5() {
Real timeStep = 0.0001;
Real finalTime = 0.1;
String simName = "DP_CS_R5";
Logger::setLogDir("logs/"+simName);
// Nodes
auto n1 = SimNode::make("n1");
auto n2 = SimNode::make("n2");
auto n3 = SimNode::make("n3");
// Components
auto cs = Ph1::CurrentSource::make("cs");
cs->setParameters(Complex(10, 0));
auto r1 = Ph1::Resistor::make("r_1");
r1->setParameters(1);
auto r2 = Ph1::Resistor::make("r_2");
r2->setParameters(1);
auto r3 = Ph1::Resistor::make("r_3");
r3->setParameters(1);
auto r4 = Ph1::Resistor::make("r_4");
r4->setParameters(1);
auto r5 = Ph1::Resistor::make("r_5");
r5->setParameters(1);
// Topology
cs->connect({ SimNode::GND, n1 });
r1->connect({ SimNode::GND, n1 });
r2->connect({ n1, n2 });
r3->connect({ SimNode::GND, n2 });
r4->connect({ n2, n3 });
r5->connect({ SimNode::GND, n3 });
auto sys = SystemTopology(50, SystemNodeList{n1, n2, n3}, SystemComponentList{cs, r1, r2, r3, r4, r5});
// Logging
auto logger = DataLogger::make(simName);
logger->addAttribute("v1", n1->attribute("v"));
logger->addAttribute("i10", r1->attribute("i_intf"));
Simulation sim(simName, sys, timeStep, finalTime, Domain::DP, Solver::Type::MNA, Logger::Level::debug);
sim.addLogger(logger);
sim.run();
}
void DP_VS_R1() { void DP_VS_R1() {
Real timeStep = 0.0001; Real timeStep = 0.0001;
Real finalTime = 0.1; Real finalTime = 0.1;
...@@ -416,14 +462,15 @@ void DP_Ph3_VS_RC1() { ...@@ -416,14 +462,15 @@ void DP_Ph3_VS_RC1() {
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
DP_CS_R1(); //DP_CS_R1();
DP_VS_R1(); DP_CS_R5();
DP_CS_R2CL(); //DP_VS_R1();
DP_VS_CS_R4(); //DP_CS_R2CL();
DP_VS_R2L3(); //DP_VS_CS_R4();
DP_VS_RC1(); //DP_VS_R2L3();
DP_VS_RL2(); //DP_VS_RC1();
//DP_VS_RL2();
DP_Ph3_VS_R2L3();
DP_Ph3_VS_RC1(); //DP_Ph3_VS_R2L3();
//DP_Ph3_VS_RC1();
} }
#include <utility>
#include <iostream>
#include <cuda_runtime.h>
#include <cusparse_v2.h>
//Error Handling
#define CUDA_ENABLE_ERROR_CHECK
#ifdef CUDA_ENABLE_ERROR_CHECK
#define CUDA_CHECK_ERROR(CUDA_CALL) \
{cudaError_t code = (cudaError_t)CUDA_CALL; \
if (code != cudaSuccess) { \
printf("CUDA Error: %s\n", cudaGetErrorString(code)); \
}}
#else
#define CUDA_CHECK_ERROR(CUDA_CALL) CUDA_CALL
#endif
/**
* * copybackAndPrint()
* Copies memory back and puts data into "stream"
* * class Vector
* works like std::vector, however with internal buffer on device-memory
* * class CudaMatrix
* Representation of a row-major sparse matrtix on device-memory (compatible with eigen)
*/
namespace DPsim {
namespace cuda {
template <typename T>
void copybackAndPrint(std::ostream& stream, const char *msg, T *ptr, int n) {
std::vector<T> buffer(n);
CUDA_CHECK_ERROR(cudaMemcpy(buffer.data(), ptr, n * sizeof(T), cudaMemcpyDeviceToHost));
stream << msg;
for(T d : buffer) {
std::cout << d << ' ';
}
stream << std::endl;
}
template <typename T>
class Vector {
public:
Vector(size_t preAlloc):
buf(nullptr), n(preAlloc) {
if(n > 0) {
CUDA_CHECK_ERROR(cudaMalloc(&buf, preAlloc * sizeof(T)));
}
}
Vector(Vector&) = delete;
Vector(Vector &&other) {
std::swap(buf, other.buf);
std::swap(n, other.n);
}
~Vector() {
if(n > 0) {
CUDA_CHECK_ERROR(cudaFree(buf));
}
}
Vector& operator=(const Vector&) = delete;
Vector& operator=(Vector&& other) {
std::swap(buf, other.buf);
std::swap(n, other.n);
return *this;
}
T operator[](size_t pos) {
T element;
CUDA_CHECK_ERROR(cudaMemcpy(&element, &buf[pos], sizeof(T), cudaMemcpyDeviceToHost));
return element;
}
T *data() const {
return buf;
}
class iterator : public std::iterator<std::random_access_iterator_tag, T> {
public:
iterator(T *buf, long num) : buf(buf), pos(num) {}
iterator& operator++() {pos++; return *this;}
iterator operator++(int) {iterator retval = *this; ++(*this); return retval;}
iterator& operator--() {pos--; return *this;}
iterator operator--(int) {iterator retval = *this; --(*this); return retval;}
bool operator==(iterator other) const {return buf == other.buf && pos == other.pos;}
bool operator!=(iterator other) const {return !(*this == other);}
T operator*() const {
T element;
CUDA_CHECK_ERROR(cudaMemcpy(&element, &buf[pos], sizeof(T), cudaMemcpyDeviceToHost));
return element;
}
private:
size_t pos;
T *buf;
};
iterator begin() {return iterator(buf, 0);}
iterator end() {return iterator(buf, n - 1);}
private:
T *buf;
size_t n;
};
// Matrix (CSR)
template<typename ValueType, typename IndexType>
struct CudaMatrix {
CudaMatrix(const Eigen::SparseMatrix<ValueType, Eigen::RowMajor> &mat, int dim):
dim(dim),
non_zero(mat.nonZeros()),
row(dim + 1),
col(mat.nonZeros()),
val(mat.nonZeros()) {
// Copy Matrix (Host -> Device)
CUDA_CHECK_ERROR(cudaMemcpy(row.data(), mat.outerIndexPtr(), (dim + 1) * sizeof(int), cudaMemcpyHostToDevice));
CUDA_CHECK_ERROR(cudaMemcpy(col.data(), mat.innerIndexPtr(), non_zero * sizeof(int), cudaMemcpyHostToDevice));
CUDA_CHECK_ERROR(cudaMemcpy(val.data(), mat.valuePtr(), non_zero * sizeof(double), cudaMemcpyHostToDevice));
}
//Matrix Data
const int dim;
const int non_zero;
cuda::Vector<IndexType> row;
cuda::Vector<IndexType> col;
cuda::Vector<ValueType> val;
friend std::ostream& operator<<(std::ostream& os, const DPsim::cuda::CudaMatrix<ValueType, IndexType>& mat) {
//Copy back
std::vector<double> bufferVal(mat.non_zero);
std::vector<int> bufferCol(mat.non_zero);
std::vector<int> bufferRow(mat.dim);
CUDA_CHECK_ERROR(cudaMemcpy(bufferVal.data(), mat.val.data(), mat.non_zero * sizeof(double), cudaMemcpyDeviceToHost));
CUDA_CHECK_ERROR(cudaMemcpy(bufferCol.data(), mat.col.data(), mat.non_zero * sizeof(int), cudaMemcpyDeviceToHost));
CUDA_CHECK_ERROR(cudaMemcpy(bufferRow.data(), mat.row.data(), mat.dim * sizeof(int), cudaMemcpyDeviceToHost));
//Print Sparse Structures (Eigen-Format)
os << "Nonzero entries:\n";
for(int i = 0; i < mat.non_zero; i++) {
os << '(' << bufferVal[i] << ',' << bufferCol[i] << ") ";
}
os << "\n\n";
os << "Outer pointers:\n";
for(auto i : bufferRow) {
os << i << ' ';
}
os << " $\n";
// TODO Print whole Matrix
// for(int i = 0; i < mat.dim; i++) {
// for(int j = 0; j < mat.dim; j++) {
// }
// }
return os;
}
};
}
}
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
namespace DPsim { namespace DPsim {
template <typename VarType> template <typename VarType>
class MnaSolverGpu : public MnaSolver<VarType>{ class MnaSolverGpuDense : public MnaSolver<VarType>{
protected: protected:
// #### Attributes required for GPU #### // #### Attributes required for GPU ####
...@@ -47,17 +47,17 @@ namespace DPsim { ...@@ -47,17 +47,17 @@ namespace DPsim {
void solve(Real time, Int timeStepCount); void solve(Real time, Int timeStepCount);
public: public:
MnaSolverGpu(String name, MnaSolverGpuDense(String name,
CPS::Domain domain = CPS::Domain::DP, CPS::Domain domain = CPS::Domain::DP,
CPS::Logger::Level logLevel = CPS::Logger::Level::info); CPS::Logger::Level logLevel = CPS::Logger::Level::info);
virtual ~MnaSolverGpu(); virtual ~MnaSolverGpuDense();
CPS::Task::List getTasks(); CPS::Task::List getTasks();
class SolveTask : public CPS::Task { class SolveTask : public CPS::Task {
public: public:
SolveTask(MnaSolverGpu<VarType>& solver) : SolveTask(MnaSolverGpuDense<VarType>& solver) :
Task(solver.mName + ".Solve"), mSolver(solver) { Task(solver.mName + ".Solve"), mSolver(solver) {
for (auto it : solver.mMNAComponents) { for (auto it : solver.mMNAComponents) {
...@@ -73,12 +73,12 @@ namespace DPsim { ...@@ -73,12 +73,12 @@ namespace DPsim {
void execute(Real time, Int timeStepCount) { mSolver.solve(time, timeStepCount); } void execute(Real time, Int timeStepCount) { mSolver.solve(time, timeStepCount); }
private: private:
MnaSolverGpu<VarType>& mSolver; MnaSolverGpuDense<VarType>& mSolver;
}; };
class LogTask : public CPS::Task { class LogTask : public CPS::Task {
public: public:
LogTask(MnaSolverGpu<VarType>& solver) : LogTask(MnaSolverGpuDense<VarType>& solver) :
Task(solver.mName + ".Log"), mSolver(solver) { Task(solver.mName + ".Log"), mSolver(solver) {
mAttributeDependencies.push_back(solver.attribute("left_vector")); mAttributeDependencies.push_back(solver.attribute("left_vector"));
mModifiedAttributes.push_back(Scheduler::external); mModifiedAttributes.push_back(Scheduler::external);
...@@ -87,7 +87,7 @@ namespace DPsim { ...@@ -87,7 +87,7 @@ namespace DPsim {
void execute(Real time, Int timeStepCount) { mSolver.log(time, timeStepCount); } void execute(Real time, Int timeStepCount) { mSolver.log(time, timeStepCount); }
private: private:
MnaSolverGpu<VarType>& mSolver; MnaSolverGpuDense<VarType>& mSolver;
}; };
}; };
} }
#pragma once #pragma once
#include <dpsim/MNASolver.h> #include <dpsim/MNASolver.h>
#include <dpsim/Cuda/CudaUtility.h>
#include <cuda_runtime.h>
#include <cusparse_v2.h>
//Error Handling
#define CUDA_ENABLE_ERROR_CHECK
#ifdef CUDA_ENABLE_ERROR_CHECK
#define checkCudaErrors(CUDA_CALL) \
{cudaError_t code = (cudaError_t)CUDA_CALL; \
if (code != cudaSuccess) { \
printf("CUDA Error: %s\n", cudaGetErrorString(code)); \
}}
#else
#define checkCudaErrors(CUDA_CALL) CUDA_CALL
#endif
namespace DPsim { namespace DPsim {
namespace cuda {
template <typename T>
void copybackAndPrint(const char *msg, T *ptr, int n) {
std::vector<T> buffer(n);
cudaMemcpy(buffer.data(), ptr, n * sizeof(T), cudaMemcpyDeviceToHost);
std::cout << msg;
for(T d : buffer) {
std::cout << d << ' ';
}
std::cout << std::endl;
}
struct cuda_delete {
void operator()(void *ptr) {
cudaFree(ptr);
}
};
template<typename T>
struct cuda_new {
T *operator()(size_t n) {
T *ptr;
checkCudaErrors(cudaMalloc(&ptr, n * sizeof(T)));
return ptr;
}
};
template <typename T>
struct unique_ptr : public std::unique_ptr<T, cuda_delete> {
unique_ptr():
std::unique_ptr<T, cuda_delete>() {
}
unique_ptr(size_t n):
std::unique_ptr<T, cuda_delete>(cuda_new<T>()(n)) {
}
//"Auto-Dereferencing"
operator T*() {
return std::unique_ptr<T, cuda_delete>::get();
}
};
// Matrix (CSR)
struct CudaMatrix {
CudaMatrix(const Eigen::SparseMatrix<double, Eigen::RowMajor> &mat, int dim):
non_zero(mat.nonZeros()),
row(cuda::unique_ptr<int>(dim + 1)),
col(cuda::unique_ptr<int>(mat.nonZeros())),
val(cuda::unique_ptr<double>(mat.nonZeros())) {
// Copy Matrix (Host -> Device)
cudaMemcpy(row, mat.outerIndexPtr(), (dim + 1) * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(col, mat.innerIndexPtr(), non_zero * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(val, mat.valuePtr(), non_zero * sizeof(double), cudaMemcpyHostToDevice);
}
//Matrix Data
const int non_zero;
cuda::unique_ptr<int> row;
cuda::unique_ptr<int> col;
cuda::unique_ptr<double> val;
};
}
template <typename VarType> template <typename VarType>
class MnaSolverGpuSparse : public MnaSolver<VarType>{ class MnaSolverGpuSparse : public MnaSolver<VarType>{
protected: protected:
...@@ -95,14 +14,14 @@ namespace DPsim { ...@@ -95,14 +14,14 @@ namespace DPsim {
cusparseHandle_t mCusparsehandle; cusparseHandle_t mCusparsehandle;
/// Systemmatrix on Device /// Systemmatrix on Device
std::unique_ptr<cuda::CudaMatrix> mSysMat; std::unique_ptr<cuda::CudaMatrix<double, int>> mSysMat;
/// RHS-Vector /// RHS-Vector
cuda::unique_ptr<double> mGpuRhsVec; cuda::Vector<double> mGpuRhsVec;
/// LHS-Vector /// LHS-Vector
cuda::unique_ptr<double> mGpuLhsVec; cuda::Vector<double> mGpuLhsVec;
/// Intermediate Vector /// Intermediate Vector
cuda::unique_ptr<double> mGpuIntermediateVec; cuda::Vector<double> mGpuIntermediateVec;
/// Initialize cuSparse-library /// Initialize cuSparse-library
void initialize(); void initialize();
...@@ -113,7 +32,7 @@ namespace DPsim { ...@@ -113,7 +32,7 @@ namespace DPsim {
private: private:
///Required shared Variables ///Required shared Variables
void *pBuffer = nullptr; cuda::Vector<char> pBuffer;
cusparseMatDescr_t descr_L = nullptr; cusparseMatDescr_t descr_L = nullptr;
cusparseMatDescr_t descr_U = nullptr; cusparseMatDescr_t descr_U = nullptr;
csrsv2Info_t info_L = nullptr; csrsv2Info_t info_L = nullptr;
......
...@@ -11,7 +11,8 @@ MnaSolverGpuSparse<VarType>::MnaSolverGpuSparse(String name, ...@@ -11,7 +11,8 @@ MnaSolverGpuSparse<VarType>::MnaSolverGpuSparse(String name,
CPS::Domain domain, CPS::Logger::Level logLevel) : CPS::Domain domain, CPS::Logger::Level logLevel) :
MnaSolver<VarType>(name, domain, logLevel), MnaSolver<VarType>(name, domain, logLevel),
mCusparsehandle(nullptr), mSysMat(nullptr), mCusparsehandle(nullptr), mSysMat(nullptr),
mGpuRhsVec(0), mGpuLhsVec(0), mGpuIntermediateVec(0){ mGpuRhsVec(0), mGpuLhsVec(0), mGpuIntermediateVec(0),
pBuffer(0){
//TODO Error-Handling //TODO Error-Handling
cusparseCreate(&mCusparsehandle); cusparseCreate(&mCusparsehandle);
...@@ -28,14 +29,14 @@ void MnaSolverGpuSparse<VarType>::initialize() { ...@@ -28,14 +29,14 @@ void MnaSolverGpuSparse<VarType>::initialize() {
int dim = this->mRightSideVector.rows(); int dim = this->mRightSideVector.rows();
mSysMat = std::unique_ptr<cuda::CudaMatrix>(new cuda::CudaMatrix(this->mSwitchedMatrices[std::bitset<SWITCH_NUM>(0)], dim)); mSysMat = std::unique_ptr<cuda::CudaMatrix<double, int>>(new cuda::CudaMatrix<double, int>(this->mSwitchedMatrices[std::bitset<SWITCH_NUM>(0)], dim));
mGpuRhsVec = cuda::unique_ptr<double>(dim); mGpuRhsVec = cuda::Vector<double>(dim);
mGpuLhsVec = cuda::unique_ptr<double>(dim); mGpuLhsVec = cuda::Vector<double>(dim);
mGpuIntermediateVec = cuda::unique_ptr<double>(dim); mGpuIntermediateVec = cuda::Vector<double>(dim);
//cuda::copybackAndPrint<double>("Mat: ", mSysMat->val, 100); std::cout << this->mSwitchedMatrices[std::bitset<SWITCH_NUM>(0)] << std::endl;
//cuda::copybackAndPrint<int>("Col: ", mSysMat->col, 100); const cuda::CudaMatrix<double, int> &x = *mSysMat.get();
//cuda::copybackAndPrint<int>("Row: ", mSysMat->row, 20); std::cout << x << std::endl;
//ILU factorization //ILU factorization
iluPreconditioner(); iluPreconditioner();
...@@ -45,16 +46,12 @@ template <typename VarType> ...@@ -45,16 +46,12 @@ template <typename VarType>
void MnaSolverGpuSparse<VarType>::iluPreconditioner() { void MnaSolverGpuSparse<VarType>::iluPreconditioner() {
cusparseMatDescr_t descr_M = 0; cusparseMatDescr_t descr_M = 0;
csrilu02Info_t info_M = 0; csrilu02Info_t info_M = 0;
int pBufferSize_M;
int pBufferSize_L;
int pBufferSize_U;
int pBufferSize;
int structural_zero; int structural_zero;
int numerical_zero; int numerical_zero;
double *d_csrVal = mSysMat->val; double *d_csrVal = mSysMat->val.data();
int *d_csrRowPtr = mSysMat->row; int *d_csrRowPtr = mSysMat->row.data();
int *d_csrColInd = mSysMat->col; int *d_csrColInd = mSysMat->col.data();
int N = this->mRightSideVector.rows(); int N = this->mRightSideVector.rows();
int nnz = mSysMat->non_zero; int nnz = mSysMat->non_zero;
...@@ -89,37 +86,37 @@ void MnaSolverGpuSparse<VarType>::iluPreconditioner() { ...@@ -89,37 +86,37 @@ void MnaSolverGpuSparse<VarType>::iluPreconditioner() {
cusparseCreateCsrsv2Info(&info_U); cusparseCreateCsrsv2Info(&info_U);
// step 3: query how much memory used in csrilu02 and csrsv2, and allocate the buffer // step 3: query how much memory used in csrilu02 and csrsv2, and allocate the buffer
int pBufferSize_M, pBufferSize_L, pBufferSize_U;
int pBufferSize;
cusparseDcsrilu02_bufferSize(mCusparsehandle, N, nnz, descr_M, d_csrVal, d_csrRowPtr, d_csrColInd, info_M, &pBufferSize_M); cusparseDcsrilu02_bufferSize(mCusparsehandle, N, nnz, descr_M, d_csrVal, d_csrRowPtr, d_csrColInd, info_M, &pBufferSize_M);
cusparseDcsrsv2_bufferSize(mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_L, d_csrVal, d_csrRowPtr, d_csrColInd, info_L, &pBufferSize_L); cusparseDcsrsv2_bufferSize(mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_L, d_csrVal, d_csrRowPtr, d_csrColInd, info_L, &pBufferSize_L);
cusparseDcsrsv2_bufferSize(mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_U, d_csrVal, d_csrRowPtr, d_csrColInd, info_U, &pBufferSize_U); cusparseDcsrsv2_bufferSize(mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_U, d_csrVal, d_csrRowPtr, d_csrColInd, info_U, &pBufferSize_U);
// Buffer // Buffer
pBufferSize = std::max({pBufferSize_M, pBufferSize_L, pBufferSize_U}); pBufferSize = std::max({pBufferSize_M, pBufferSize_L, pBufferSize_U});
cudaMalloc((void**)&pBuffer, pBufferSize); pBuffer = cuda::Vector<char>(pBufferSize);
// step 4: perform analysis of incomplete Cholesky on M // step 4: perform analysis of incomplete Cholesky on M
// perform analysis of triangular solve on L // perform analysis of triangular solve on L
// perform analysis of triangular solve on U // perform analysis of triangular solve on U
// The lower(upper) triangular part of M has the same sparsity pattern as L(U), // The lower(upper) triangular part of M has the same sparsity pattern as L(U),
// we can do analysis of csrilu0 and csrsv2 simultaneously. // we can do analysis of csrilu0 and csrsv2 simultaneously.
cusparseDcsrilu02_analysis(mCusparsehandle, N, nnz, descr_M, d_csrVal, d_csrRowPtr, d_csrColInd, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer); cusparseDcsrilu02_analysis(mCusparsehandle, N, nnz, descr_M, d_csrVal, d_csrRowPtr, d_csrColInd, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer.data());
auto status = cusparseXcsrilu02_zeroPivot(mCusparsehandle, info_M, &structural_zero); auto status = cusparseXcsrilu02_zeroPivot(mCusparsehandle, info_M, &structural_zero);
if (CUSPARSE_STATUS_ZERO_PIVOT == status){ if (CUSPARSE_STATUS_ZERO_PIVOT == status){
std::cout << "A(" << structural_zero << ',' << structural_zero << ") is missing\n" << std::endl; std::cout << "A(" << structural_zero << ',' << structural_zero << ") is missing\n" << std::endl;
} }
cusparseDcsrsv2_analysis(mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_L, d_csrVal, d_csrRowPtr, d_csrColInd, info_L, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer); cusparseDcsrsv2_analysis(mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_L, d_csrVal, d_csrRowPtr, d_csrColInd, info_L, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer.data());
cusparseDcsrsv2_analysis(mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_U, d_csrVal, d_csrRowPtr, d_csrColInd, info_U, CUSPARSE_SOLVE_POLICY_USE_LEVEL, pBuffer); cusparseDcsrsv2_analysis(mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_U, d_csrVal, d_csrRowPtr, d_csrColInd, info_U, CUSPARSE_SOLVE_POLICY_USE_LEVEL, pBuffer.data());
// step 5: M = L * U // step 5: M = L * U
cusparseDcsrilu02(mCusparsehandle, N, nnz, descr_M, d_csrVal, d_csrRowPtr, d_csrColInd, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer); cusparseDcsrilu02(mCusparsehandle, N, nnz, descr_M, d_csrVal, d_csrRowPtr, d_csrColInd, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer.data());
status = cusparseXcsrilu02_zeroPivot(mCusparsehandle, info_M, &numerical_zero); status = cusparseXcsrilu02_zeroPivot(mCusparsehandle, info_M, &numerical_zero);
if (CUSPARSE_STATUS_ZERO_PIVOT == status){ if (CUSPARSE_STATUS_ZERO_PIVOT == status){
printf("U(%d,%d) is zero\n", numerical_zero, numerical_zero); printf("U(%d,%d) is zero\n", numerical_zero, numerical_zero);
} }
cuda::copybackAndPrint<double>("LU: ", d_csrVal, 8); std::cout << *mSysMat.get() << std::endl;
cuda::copybackAndPrint<int>("LU: ", d_csrColInd, 8);
cuda::copybackAndPrint<int>("LU: ", d_csrRowPtr, 4);