SHTns 3.7
SHT_gpu_example.py

Shows how to perform transforms on GPU if available.

Shows how to perform transforms on GPU if available.

1#
2# Copyright (c) 2010-2023 Centre National de la Recherche Scientifique.
3# written by Nathanael Schaeffer (CNRS, ISTerre, Grenoble, France).
4#
5# nathanael.schaeffer@univ-grenoble-alpes.fr
6#
7# This software is governed by the CeCILL license under French law and
8# abiding by the rules of distribution of free software. You can use,
9# modify and/or redistribute the software under the terms of the CeCILL
10# license as circulated by CEA, CNRS and INRIA at the following URL
11# "http://www.cecill.info".
12#
13# The fact that you are presently reading this means that you have had
14# knowledge of the CeCILL license and that you accept its terms.
15#
16
17
20
21import numpy # numpy for arrays
22import shtns # shtns module compiled and installed using
23 # pip install shtns
24import time # to measure elapsed time
25
26lmax = 1023 # maximum degree of spherical harmonic representation. GPU acceleration works better on large lmax.
27
28sh_cpu = shtns.sht(lmax) # create sht object with given lmax and mmax (orthonormalized)
29sh_gpu = shtns.sht(lmax) # another one, which will run on GPU
30
31# For now, the GPU only supports theta-contiguous grid. We need to tell shtns we want that, as well as GPU offload
32sh_cpu.set_grid(flags=shtns.SHT_THETA_CONTIGUOUS) # build grid to run on CPU (gauss grid, theta-contiguous)
33sh_gpu.set_grid(flags=shtns.SHT_ALLOW_GPU + shtns.SHT_THETA_CONTIGUOUS) # build grid (gauss grid, theta-contiguous, allowed to run on GPU)
34
35# Note that even if specifying SHT_ALLOW_GPU, it may silently default to cpu for
36# various reasons (shtns not compiled with cuda support, grid not supported on gpu, too small sizes, ...)
37sh_cpu.print_info()
38sh_gpu.print_info() # if the printed output says many times 'gpu', then we can be sure it will run on gpu.
39
40print('cpu grid size:', sh_cpu.nlat, sh_cpu.nphi) # displays the latitudinal and longitudinal grid sizes...
41print('gpu grid size:', sh_gpu.nlat, sh_gpu.nphi) # ... not guaranteed to be the same, unless we ask for specific sizes.
42
43cost = sh_gpu.cos_theta # latitudinal coordinates of the grid as cos(theta)
44el = sh_gpu.l # array of size sh.nlm giving the spherical harmonic degree l for any sh coefficient
45l2 = el*(el+1) # array l(l+1) that is useful for computing laplacian
46
47alm = sh_gpu.spec_array() # a spherical harmonic spectral array, same as numpy.zeros(sh_gpu.nlm, dtype=complex)
48alm[sh_gpu.idx(1, 0)] = 1.0 # set sh coefficient l=1, m=0 to value 1
49
50t0 = time.perf_counter()
51x_cpu = sh_cpu.synth(alm) # transform sh description alm into spatial representation x (scalar transform)
52t1 = time.perf_counter()
53x_gpu = sh_gpu.synth(alm)
54t2 = time.perf_counter()
55
56print('transform on CPU took %.3g seconds' % (t1-t0))
57print('transform on GPU took %.3g seconds (including copies between cpu and gpu -- this is %.3g times faster than CPU)' % (t2-t1, ((t1-t0)/(t2-t1))))
58max_diff=numpy.max(abs(x_gpu-x_cpu))
59print('maximum difference between gpu and cpu transformed data is', max_diff)
60