4#if defined(__INTELLISENSE__) && !defined(OPENNN_HAS_CUDA)
8#ifndef EIGEN_USE_THREADS
9#define EIGEN_USE_THREADS
12#define EIGEN_MAX_ALIGN_BYTES 64
15#define _SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING
16#define _CRT_SECURE_NO_WARNINGS
17#define EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
18#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING
34#include <unordered_map>
35#include <unordered_set>
50#include <unsupported/Eigen/CXX11/Tensor>
51#include <Eigen/src/Core/util/DisableStupidWarnings.h>
56#include <cuda_runtime.h>
63#include <nvtx3/nvToolsExt.h>
107#ifdef OPENNN_HAS_CUDA
109#include "../opennn/kernel.cuh"
112void check_cuda_status(T status,
const char* file,
int line,
const char* msg)
115 throw std::runtime_error(std::string(msg) +
" Error: " + std::to_string(
static_cast<int>(status)) +
116 " in " + file +
":" + std::to_string(line));
119#define CHECK_CUDA(x) check_cuda_status(x, __FILE__, __LINE__, "CUDA")
120#define CHECK_CUBLAS(x) check_cuda_status(x, __FILE__, __LINE__, "CuBLAS")
121#define CHECK_CUDNN(x) check_cuda_status(x, __FILE__, __LINE__, "cuDNN")
126using namespace Eigen;
129constexpr float EPSILON = numeric_limits<float>::epsilon();
130constexpr float MAX = numeric_limits<float>::max();
132constexpr float QUIET_NAN = numeric_limits<float>::quiet_NaN();
139 for (
size_t i = 0; i < vec.size(); ++i)
142 if (i + 1 < vec.size()) os <<
"; ";
151using MatrixR = Matrix<float, Dynamic, Dynamic, Layout>;
152using MatrixI = Matrix<Index, Dynamic, Dynamic, Layout>;
153using MatrixB = Matrix<bool, Dynamic, Dynamic, Layout>;
162using Tensor0 = Tensor<float, 0, Layout | AlignedMax>;
163using Tensor2 = Tensor<float, 2, Layout | AlignedMax>;
164using Tensor3 = Tensor<float, 3, Layout | AlignedMax>;
165using Tensor4 = Tensor<float, 4, Layout | AlignedMax>;
168using TensorR = Tensor<float, Rank, Layout | AlignedMax>;
170using TensorMap2 = TensorMap<Tensor<float, 2, Layout | AlignedMax>, AlignedMax>;
171using TensorMap3 = TensorMap<Tensor<float, 3, Layout | AlignedMax>, AlignedMax>;
172using TensorMap4 = TensorMap<Tensor<float, 4, Layout | AlignedMax>, AlignedMax>;
175using TensorMapR = TensorMap<Tensor<float, Rank, Layout | AlignedMax>, AlignedMax>;
177#pragma GCC diagnostic push
178#pragma GCC diagnostic ignored "-Wunused-parameter"
180#pragma GCC diagnostic pop
Definition adaptive_moment_estimation.h:19
constexpr float MAX
Definition pch.h:130
constexpr float NEG_INFINITY
Definition pch.h:131
constexpr float EPSILON
Definition pch.h:129
constexpr float SOFTMAX_MASK_VALUE
Definition pch.h:133
constexpr float QUIET_NAN
Definition pch.h:132
ostream & operator<<(ostream &os, const vector< T > &vec)
Definition pch.h:136
cudnnTensorStruct * cudnnTensorDescriptor_t
Definition pch.h:97
Map< VectorR, AlignedMax > VectorMap
Definition pch.h:159
TensorMap< Tensor< float, Rank, Layout|AlignedMax >, AlignedMax > TensorMapR
Definition pch.h:175
void * cudnnHandle_t
Definition pch.h:74
Tensor< float, 3, Layout|AlignedMax > Tensor3
Definition pch.h:164
void * cudaEvent_t
Definition pch.h:71
Matrix< float, Dynamic, 1 > VectorR
Definition pch.h:155
void * cudaStream_t
Definition pch.h:70
cudnnConvolutionFwdAlgo_t
Definition pch.h:91
@ CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
Definition pch.h:91
Matrix< bool, Dynamic, 1 > VectorB
Definition pch.h:157
void * cudnnFilterDescriptor_t
Definition pch.h:98
cudaDataType_t
Definition pch.h:81
@ CUDA_R_32F
Definition pch.h:81
@ CUDA_R_16F
Definition pch.h:81
@ CUDA_R_8I
Definition pch.h:81
@ CUDA_R_32I
Definition pch.h:81
@ CUDA_R_16BF
Definition pch.h:81
Tensor< float, Rank, Layout|AlignedMax > TensorR
Definition pch.h:168
Tensor< float, 2, Layout|AlignedMax > Tensor2
Definition pch.h:163
TensorMap< Tensor< float, 4, Layout|AlignedMax >, AlignedMax > TensorMap4
Definition pch.h:172
void * cublasLtHandle_t
Definition pch.h:73
cudnnActivationMode_t
Definition pch.h:88
@ CUDNN_ACTIVATION_RELU
Definition pch.h:88
@ CUDNN_ACTIVATION_SIGMOID
Definition pch.h:88
@ CUDNN_ACTIVATION_TANH
Definition pch.h:88
@ CUDNN_ACTIVATION_IDENTITY
Definition pch.h:88
@ CUDNN_ACTIVATION_ELU
Definition pch.h:88
void * cudnnOpTensorDescriptor_t
Definition pch.h:103
Matrix< Index, Dynamic, Dynamic, Layout > MatrixI
Definition pch.h:152
cudnnPoolingMode_t
Definition pch.h:89
@ CUDNN_POOLING_MAX
Definition pch.h:89
Tensor< float, 0, Layout|AlignedMax > Tensor0
Definition pch.h:162
void * cudnnDropoutDescriptor_t
Definition pch.h:102
cublasLtEpilogue_t
Definition pch.h:84
@ CUBLASLT_EPILOGUE_BIAS
Definition pch.h:84
@ CUBLASLT_EPILOGUE_RELU_BIAS
Definition pch.h:84
@ CUBLASLT_EPILOGUE_DEFAULT
Definition pch.h:84
void * cublasHandle_t
Definition pch.h:72
cublasOperation_t
Definition pch.h:83
@ CUBLAS_OP_N
Definition pch.h:83
@ CUBLAS_OP_T
Definition pch.h:83
cudnnDataType_t
Definition pch.h:87
@ CUDNN_DATA_INT32
Definition pch.h:87
@ CUDNN_DATA_BFLOAT16
Definition pch.h:87
@ CUDNN_DATA_HALF
Definition pch.h:87
@ CUDNN_DATA_INT8
Definition pch.h:87
@ CUDNN_DATA_FLOAT
Definition pch.h:87
Matrix< bool, Dynamic, Dynamic, Layout > MatrixB
Definition pch.h:153
void * cudnnPoolingDescriptor_t
Definition pch.h:100
cudnnConvolutionBwdDataAlgo_t
Definition pch.h:92
@ CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
Definition pch.h:92
cudnnBatchNormMode_t
Definition pch.h:90
@ CUDNN_BATCHNORM_PER_ACTIVATION
Definition pch.h:90
Matrix< float, Dynamic, Dynamic, Layout > MatrixR
Definition pch.h:151
TensorMap< Tensor< float, 2, Layout|AlignedMax >, AlignedMax > TensorMap2
Definition pch.h:170
Matrix< Index, Dynamic, 1 > VectorI
Definition pch.h:156
Map< MatrixR, Layout|AlignedMax > MatrixMap
Definition pch.h:160
void * cudnnActivationDescriptor_t
Definition pch.h:101
void * cudnnConvolutionDescriptor_t
Definition pch.h:99
constexpr int Layout
Definition pch.h:149
TensorMap< Tensor< float, 3, Layout|AlignedMax >, AlignedMax > TensorMap3
Definition pch.h:171
Tensor< float, 4, Layout|AlignedMax > Tensor4
Definition pch.h:165
cudnnConvolutionBwdFilterAlgo_t
Definition pch.h:93
@ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
Definition pch.h:93
cublasComputeType_t
Definition pch.h:82
@ CUBLAS_COMPUTE_32F_FAST_TF32
Definition pch.h:82
@ CUBLAS_COMPUTE_32F
Definition pch.h:82
@ CUBLAS_COMPUTE_32F_FAST_16BF
Definition pch.h:82