TR-mbed 1.0
Loading...
Searching...
No Matches
Classes | Macros | Functions | Variables
gpu_common.h File Reference
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <iostream>
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>

Go to the source code of this file.

Classes

struct  compile_time_device_info
 

Macros

#define EIGEN_USE_GPU
 

Functions

template<typename Kernel , typename Input , typename Output >
void run_on_cpu (const Kernel &ker, int n, const Input &in, Output &out)
 
template<typename Kernel , typename Input , typename Output >
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void run_on_gpu_meta_kernel (const Kernel ker, int n, const Input *in, Output *out)
 
template<typename Kernel , typename Input , typename Output >
void run_on_gpu (const Kernel &ker, int n, const Input &in, Output &out)
 
template<typename Kernel , typename Input , typename Output >
void run_and_compare_to_gpu (const Kernel &ker, int n, const Input &in, Output &out)
 
void ei_test_init_gpu ()
 

Variables

dim3 threadIdx
 
dim3 blockDim
 
dim3 blockIdx
 

Macro Definition Documentation

◆ EIGEN_USE_GPU

#define EIGEN_USE_GPU

Function Documentation

◆ ei_test_init_gpu()

void ei_test_init_gpu ( )

◆ run_and_compare_to_gpu()

template<typename Kernel , typename Input , typename Output >
void run_and_compare_to_gpu ( const Kernel &  ker,
int  n,
const Input &  in,
Output &  out 
)

◆ run_on_cpu()

template<typename Kernel , typename Input , typename Output >
void run_on_cpu ( const Kernel &  ker,
int  n,
const Input &  in,
Output &  out 
)

◆ run_on_gpu()

template<typename Kernel , typename Input , typename Output >
void run_on_gpu ( const Kernel &  ker,
int  n,
const Input &  in,
Output &  out 
)

◆ run_on_gpu_meta_kernel()

template<typename Kernel , typename Input , typename Output >
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void run_on_gpu_meta_kernel ( const Kernel  ker,
int  n,
const Input *  in,
Output *  out 
)

Variable Documentation

◆ blockDim

dim3 blockDim

◆ blockIdx

dim3 blockIdx

◆ threadIdx

dim3 threadIdx