Source code for nabu.cuda.utils

import atexit
from math import ceil
import numpy as np
from ..resources.gpu import GPUDescription

    import pycuda
    import pycuda.driver as cuda
    from pycuda import gpuarray as garray
    from import clear_context_caches
    from pycuda.compiler import get_nvcc_version as pycuda_get_nvcc_version

    __has_pycuda__ = True
    __pycuda_error_msg__ = None
    if pycuda.VERSION[0] < 2020:
        print("Error: need pycuda >= 2020.1")
        __has_pycuda__ = False
except ImportError as err:
    __has_pycuda__ = False
    __pycuda_error_msg__ = str(err)
    import skcuda

    __has_cufft__ = True
except ImportError:
    __has_cufft__ = False

    import cupy

    __has_cupy__ = True
except ImportError:
    __has_cupy__ = False

[docs] def get_cuda_context(device_id=None, cleanup_at_exit=True): """ Create or get a CUDA context. """ current_ctx = cuda.Context.get_current() # If a context already exists, use this one # TODO what if the device used is different from device_id ? if current_ctx is not None: return current_ctx # Otherwise create a new context cuda.init() if device_id is None: device_id = 0 # Use the Context obtained by retaining the device's primary context, # which is the one used by the CUDA runtime API (ex. scikit-cuda). # Unlike Context.make_context(), the newly-created context is not made current. context = cuda.Device(device_id).retain_primary_context() context.push() # Register a clean-up function at exit def _finish_up(context): if context is not None: context.pop() context = None clear_context_caches() if cleanup_at_exit: atexit.register(_finish_up, context) return context
[docs] def count_cuda_devices(): if cuda.Context.get_current() is None: cuda.init() return cuda.Device.count()
[docs] def get_gpu_memory(device_id): """ Return the total memory (in GigaBytes) of a device. """ cuda.init() return cuda.Device(device_id).total_memory() / 1e9
[docs] def is_gpu_usable(): """ Test whether at least one Nvidia GPU is available. """ try: n_gpus = count_cuda_devices() except Exception as exc: # Fragile if exc.__str__() != "cuInit failed: no CUDA-capable device is detected": raise n_gpus = 0 res = n_gpus > 0 return res
[docs] def detect_cuda_gpus(): """ Detect the available Nvidia CUDA GPUs on the current host. Returns -------- gpus: dict Dictionary where the key is the GPU ID, and the value is a `pycuda.driver.Device` object. error_msg: str In the case where there is an error, the message is returned in this item. Otherwise, it is a None object. """ gpus = {} error_msg = None if not (__has_pycuda__): return {}, __pycuda_error_msg__ try: cuda.init() except Exception as exc: error_msg = str(exc) if error_msg is not None: return {}, error_msg try: n_gpus = cuda.Device.count() except Exception as exc: error_msg = str(exc) if error_msg is not None: return {}, error_msg for i in range(n_gpus): gpus[i] = cuda.Device(i) return gpus, None
[docs] def collect_cuda_gpus(): """ Return a dictionary of GPU ids and brief description of each CUDA-compatible GPU with a few fields. """ gpus, error_msg = detect_cuda_gpus() if error_msg is not None: return None cuda_gpus = {} for gpu_id, gpu in gpus.items(): cuda_gpus[gpu_id] = GPUDescription(gpu).get_dict() return cuda_gpus
[docs] def get_nvcc_version(nvcc_cmd="nvcc"): try: ver = "".join(pycuda_get_nvcc_version(nvcc_cmd)).split("release")[1].strip().split(" ")[0].strip(",") except: ver = None return ver
[docs] def check_textures_availability(): """ Check whether Cuda textures can be used. The only limitation is pycuda which does not support texture objects. Textures references were deprecated, and removed from Cuda 12. """ nvcc_ver = get_nvcc_version() if nvcc_ver is None: return False # unknown - can't parse NVCC version for some reason nvcc_major = int(nvcc_ver.split(".")[0]) return nvcc_major < 12
""" pycuda/ np.complex64: SIGNED_INT32, num_channels = 2 np.float64: SIGNED_INT32, num_channels = 2 np.complex128: array_format.SIGNED_INT32, num_channels = 4 double precision: pycuda-helpers.hpp: typedef float fp_tex_float; // --> float32 typedef int2 fp_tex_double; // --> float64 typedef uint2 fp_tex_cfloat; // --> complex64 typedef int4 fp_tex_cdouble; // --> complex128 """
[docs] def cuarray_format_to_dtype(cuarr_fmt): # reverse of cuda.dtype_to_array_format fmt = cuda.array_format mapping = { fmt.UNSIGNED_INT8: np.uint8, fmt.UNSIGNED_INT16: np.uint16, fmt.UNSIGNED_INT32: np.uint32, fmt.SIGNED_INT8: np.int8, fmt.SIGNED_INT16: np.int16, fmt.SIGNED_INT32: np.int32, fmt.FLOAT: np.float32, } if cuarr_fmt not in mapping: raise TypeError("Unknown format %s" % cuarr_fmt) return mapping[cuarr_fmt]
[docs] def cuarray_shape_dtype(cuarray): desc = cuarray.get_descriptor_3d() shape = (desc.height, desc.width) if desc.depth > 0: shape = (desc.depth,) + shape dtype = cuarray_format_to_dtype(desc.format) return shape, dtype
[docs] def get_shape_dtype(arr): if isinstance(arr, garray.GPUArray) or isinstance(arr, np.ndarray): return arr.shape, arr.dtype elif isinstance(arr, cuda.Array): return cuarray_shape_dtype(arr) else: raise ValueError("Unknown array type %s" % str(type(arr)))
[docs] def copy_array(dst, src, check=False, src_dtype=None, dst_x_in_bytes=0, dst_y=0): """ Copy a source array to a destination array. Source and destination can be either numpy.ndarray, pycuda.Driver.Array, or pycuda.gpuarray.GPUArray. Parameters ----------- dst: pycuda.driver.Array or pycuda.gpuarray.GPUArray or numpy.ndarray Destination array. Its content will be overwritten by copy. src: pycuda.driver.Array or pycuda.gpuarray.GPUArray or numpy.ndarray Source array. check: bool, optional Whether to check src and dst shape and data type. """ shape_src, dtype_src = get_shape_dtype(src) shape_dst, dtype_dst = get_shape_dtype(dst) dtype_src = src_dtype or dtype_src if check: if shape_src != shape_dst: raise ValueError("shape_src != shape_dst : have %s and %s" % (str(shape_src), str(shape_dst))) if dtype_src != dtype_dst: raise ValueError("dtype_src != dtype_dst : have %s and %s" % (str(dtype_src), str(dtype_dst))) if len(shape_src) == 2: copy = cuda.Memcpy2D() h, w = shape_src elif len(shape_src) == 3: copy = cuda.Memcpy3D() d, h, w = shape_src copy.depth = d else: raise ValueError("Expected arrays with 2 or 3 dimensions") if isinstance(src, cuda.Array): copy.set_src_array(src) elif isinstance(src, garray.GPUArray): copy.set_src_device(src.gpudata) else: # numpy copy.set_src_host(src) if isinstance(dst, cuda.Array): copy.set_dst_array(dst) # Support offset (x, y) in target (for copying to texture) copy.dst_x_in_bytes = dst_x_in_bytes copy.dst_y = dst_y elif isinstance(dst, garray.GPUArray): copy.set_dst_device(dst.gpudata) else: # numpy copy.set_dst_host(dst) copy.width_in_bytes = copy.dst_pitch = w * np.dtype(dtype_src).itemsize copy.dst_height = copy.height = h # ?? if len(shape_src) == 2: copy(True) else: copy()
[docs] def copy_big_gpuarray(dst, src, itemsize=4, checks=False): """ Copy a big `pycuda.gpuarray.GPUArray` into another. Transactions of more than 2**32 -1 octets fail, so are doing several partial copies of smaller arrays. """ d2h = isinstance(dst, np.ndarray) if checks: assert dst.dtype == src.dtype assert dst.shape == src.shape limit = 2**32 - 1 if * itemsize < limit: if d2h: src.get(ary=dst) else: dst[:] = src[:] return def get_shape2(shape): shape2 = list(shape) while * 4 > limit: shape2[0] //= 2 return tuple(shape2) shape2 = get_shape2(dst.shape) nz0 = dst.shape[0] nz = shape2[0] n_transfers = ceil(nz0 / nz) for i in range(n_transfers): zmax = min((i + 1) * nz, nz0) if d2h: src[i * nz : zmax].get(ary=dst[i * nz : zmax]) else: dst[i * nz : zmax] = src[i * nz : zmax]
[docs] def replace_array_memory(arr, new_shape): """ Replace the underlying buffer data of a `pycuda.gpuarray.GPUArray`. This function is dangerous ! It should merely be used to clear memory, the array should not be used afterwise. """ arr.gpudata = arr.allocator(int( * arr.dtype.itemsize)) arr.shape = new_shape # TODO re-compute strides return arr
[docs] def pycuda_to_cupy(arr_pycuda): arr_cupy_mem = cupy.cuda.UnownedMemory(arr_pycuda.ptr, arr_pycuda.size, arr_pycuda) arr_cupy_memptr = cupy.cuda.MemoryPointer(arr_cupy_mem, offset=0) return cupy.ndarray(arr_pycuda.shape, dtype=arr_pycuda.dtype, memptr=arr_cupy_memptr) # pylint: disable=E1123
[docs] def cupy_to_pycuda(arr_cupy): return garray.empty(arr_cupy.shape, arr_cupy.dtype,