Source code for nabu.processing.roll_opencl

#
# WIP !
#
import numpy as np
from ..opencl.utils import __has_pyopencl__
from ..utils import get_opencl_srcfile

if __has_pyopencl__:
    import pyopencl as cl
    from ..opencl.processing import OpenCLProcessing
    from ..opencl.kernel import OpenCLKernel
    from pyopencl.tools import dtype_to_ctype as cl_dtype_to_ctype


[docs] class OpenCLRoll: def __init__(self, dtype, direction=1, offset=None, **processing_kwargs): self.processing = OpenCLProcessing(queue=processing_kwargs.get("queue", None)) self.dtype = np.dtype(dtype) compile_options = ["-DDTYPE=%s" % cl_dtype_to_ctype(self.dtype)] self.offset = offset or 0 self.roll_kernel = OpenCLKernel( "roll_forward_x", None, queue=self.processing.queue, filename=get_opencl_srcfile("roll.cl"), options=compile_options, ) self.shmem = cl.LocalMemory(self.dtype.itemsize) self.direction = direction if self.direction < 0: self.revert_kernel = OpenCLKernel( "revert_array_x", None, queue=self.processing.queue, filename=get_opencl_srcfile("roll.cl"), options=compile_options, ) def __call__(self, arr): ny, nx = arr.shape # Launch one big horizontal workgroup wg_x = min((nx - self.offset) // 2, self.processing.queue.device.max_work_group_size) local_size = (wg_x, 1, 1) global_size = [wg_x, ny] if self.direction < 0: local_size2 = None global_size2 = [nx - self.offset, ny] self.revert_kernel( arr, np.int32(nx), np.int32(ny), np.int32(self.offset), local_size=local_size2, global_size=global_size2 ) self.roll_kernel( arr, np.int32(nx), np.int32(ny), np.int32(self.offset), self.shmem, local_size=local_size, global_size=global_size, ) if self.direction < 0: self.revert_kernel( arr, np.int32(nx), np.int32(ny), np.int32(self.offset), local_size=local_size2, global_size=global_size2 ) return arr