Source code for nabu.processing.kernel_base

"""
Base class for CudaKernel and OpenCLKernel
Should not be used directly
"""
from ..utils import updiv


[docs] class KernelBase: """ A base class for OpenCL and Cuda kernels. Parameters ----------- kernel_name: str Name of the CUDA kernel. filename: str, optional Path to the file name containing kernels definitions src: str, optional Source code of kernels definitions automation_params: dict, optional Automation parameters, see below Automation parameters ---------------------- automation_params is a dictionary with the following keys and default values. guess_block: bool (True) If block is not specified during calls, choose a block size based on the size/dimensions of the first array. Mind that it is unlikely to be the optimal choice. guess_grid: bool (True): If the grid size is not specified during calls, choose a grid size based on the size of the first array. follow_device_ptr: bool (True) specify gpuarray.gpudata for all cuda GPUArrays (and pyopencl.array.data for pyopencl arrays). Otherwise, raise an error. """ _default_automation_params = { "guess_block": True, "guess_grid": True, "follow_device_ptr": True, } def __init__( self, kernel_name, filename=None, src=None, automation_params=None, ): self.check_filename_src(filename, src) self.set_automation_params(automation_params)
[docs] def check_filename_src(self, filename, src): err_msg = "Please provide either filename or src" if filename is None and src is None: raise ValueError(err_msg) if filename is not None and src is not None: raise ValueError(err_msg) if filename is not None: with open(filename) as fid: src = fid.read() self.filename = filename self.src = src
[docs] def set_automation_params(self, automation_params): self.automation_params = self._default_automation_params.copy() self.automation_params.update(automation_params or {})
[docs] @staticmethod def guess_grid_size(shape, block_size): # python: (z, y, x) -> cuda: (x, y, z) res = tuple(map(lambda x: updiv(x[0], x[1]), zip(shape[::-1], block_size))) if len(res) == 2: res += (1,) return res
[docs] @staticmethod def guess_block_size(shape): """ Guess a block size based on the shape of an array. """ ndim = len(shape) if ndim == 1: return (128, 1, 1) if ndim == 2: return (32, 32, 1) else: return (16, 8, 8)
[docs] def get_block_grid(self, *args, **kwargs): block = None grid = None if ("block" not in kwargs) or (kwargs["block"] is None): if self.automation_params["guess_block"]: block = self.guess_block_size(args[0].shape) else: raise ValueError("Please provide block size") else: block = kwargs["block"] if ("grid" not in kwargs) or (kwargs["grid"] is None): if self.automation_params["guess_grid"]: grid = self.guess_grid_size(args[0].shape, block) else: raise ValueError("Please provide block grid") else: grid = kwargs["grid"] self.last_block_size = block self.last_grid_size = grid return block, grid
[docs] def follow_device_arr(self, args): raise ValueError("Base class")
def _prepare_call(self, *args, **kwargs): block, grid = self.get_block_grid(*args, **kwargs) # pycuda crashes when any element of block/grid is not a python int (ex. numpy.int64). # A weird behavior once observed is "data.shape" returning (np.int64, int, int) (!). # Ensure that everything is a python integer. grid = tuple(int(x) for x in grid) if block is not None: block = tuple(int(x) for x in block) # args = self.follow_device_arr(args) return grid, block, args, kwargs