22#ifdef __HIP_PLATFORM_AMD__
23#include <hip/hip_runtime.h>
24typedef hipStream_t shtns_gpu_stream_t;
26#include <cuda_runtime.h>
27typedef cudaStream_t shtns_gpu_stream_t;
61void cu_SHsph_to_spat_float(
shtns_cfg,
cplx_f *Slm,
float *Vt,
float *Vp,
int ltr);
64void cu_SHtor_to_spat_float(
shtns_cfg,
cplx_f *Tlm,
float *Vt,
float *Vp,
int ltr);
99double cushtns_profiling_read_time(
shtns_cfg,
double* time_1,
double* time_2);
101const char* cushtns_get_cfg_info(
shtns_cfg);
void cu_spat_to_SH_float(shtns_cfg shtns, float *Vr, cplx_f *Qlm, int ltr)
Same as cu_spat_to_SH, but working on single precision data.
void cu_SHqst_to_spat(shtns_cfg, cplx *Qlm, cplx *Slm, cplx *Tlm, double *Vr, double *Vt, double *Vp, int ltr)
Same as SHqst_to_spat, but working on data residing on the GPU.
int cushtns_init_gpu(shtns_cfg shtns)
Initialize given config to work on the current (or default) GPU, allowing to call GPU transforms cu_*...
void cu_SHsphtor_to_spat(shtns_cfg, cplx *Slm, cplx *Tlm, double *Vt, double *Vp, int ltr)
Same as SHsphtor_to_spat, but working on data residing on the GPU.
void cu_SH_to_spat_float(shtns_cfg shtns, cplx_f *Qlm, float *Vr, int ltr)
Same as cu_SH_to_spat, but working on single precision data.
void cu_spat_to_SHqst(shtns_cfg, double *Vr, double *Vt, double *Vp, cplx *Qlm, cplx *Slm, cplx *Tlm, int ltr)
Same as spat_to_SHqst, but working on data residing on the GPU.
void cu_spat_to_SHsphtor(shtns_cfg, double *Vt, double *Vp, cplx *Slm, cplx *Tlm, int ltr)
Same as spat_to_SHsphtor, but working on data residing on the GPU.
void cu_spat_to_SH(shtns_cfg shtns, double *Vr, cplx *Qlm, int ltr)
Same as spat_to_SH, but working on data residing on the GPU.
void cu_SHtor_to_spat(shtns_cfg, cplx *Tlm, double *Vt, double *Vp, int ltr)
Same as SHtor_to_spat, but working on data residing on the GPU.
void cushtns_set_streams(shtns_cfg shtns, shtns_gpu_stream_t compute_stream, shtns_gpu_stream_t transfer_stream)
Set user-specified streams for compute (including fft) and transfer.
shtns_cfg cushtns_clone(shtns_cfg shtns, shtns_gpu_stream_t compute_stream, shtns_gpu_stream_t transfer_stream)
Clone a gpu-enabled shtns config, and assign it to different streams (to allow compute overlap and/or...
void cu_SHsph_to_spat(shtns_cfg, cplx *Slm, double *Vt, double *Vp, int ltr)
Same as SHsph_to_spat, but working on data residing on the GPU.
void cushtns_release_gpu(shtns_cfg)
Release resources needed for GPU transforms, which won't work after this call.
void cu_SH_to_spat(shtns_cfg shtns, cplx *Qlm, double *Vr, int ltr)
Same as SH_to_spat, but working on data residing on the GPU.
_Complex double cplx
double precision complex number data type
Definition shtns.h:28
_Complex float cplx_f
single precision (float) complex number data type
Definition shtns.h:29