Source code for nabu.processing.rotation_cuda

import numpy as np
from .rotation import Rotation
from ..utils import get_cuda_srcfile, updiv
from ..cuda.utils import __has_pycuda__, copy_array
from ..cuda.processing import CudaProcessing

if __has_pycuda__:
    from ..cuda.kernel import CudaKernel
    import pycuda.driver as cuda


[docs] class CudaRotation(Rotation): def __init__(self, shape, angle, center=None, mode="edge", reshape=False, cuda_options=None, **sk_kwargs): if center is None: center = ((shape[1] - 1) / 2.0, (shape[0] - 1) / 2.0) super().__init__(shape, angle, center=center, mode=mode, reshape=reshape, **sk_kwargs) self._init_cuda_rotation(cuda_options) def _init_cuda_rotation(self, cuda_options): cuda_options = cuda_options or {} self.cuda_processing = CudaProcessing(**cuda_options) self._allocate_arrays() self._init_rotation_kernel() def _allocate_arrays(self): self._d_image_cua = cuda.np_to_array(np.zeros(self.shape, "f"), "C") self.cuda_processing.init_arrays_to_none(["d_output"]) def _init_rotation_kernel(self): self.cuda_rotation_kernel = CudaKernel("rotate", get_cuda_srcfile("rotation.cu")) self.texref_image = self.cuda_rotation_kernel.module.get_texref("tex_image") self.texref_image.set_filter_mode(cuda.filter_mode.LINEAR) # bilinear self.texref_image.set_address_mode(0, cuda.address_mode.CLAMP) # TODO tune self.texref_image.set_address_mode(1, cuda.address_mode.CLAMP) # TODO tune self.cuda_rotation_kernel.prepare("Piiffff", [self.texref_image]) self.texref_image.set_array(self._d_image_cua) self._cos_theta = np.cos(np.deg2rad(self.angle)) self._sin_theta = np.sin(np.deg2rad(self.angle)) self._Nx = np.int32(self.shape[1]) self._Ny = np.int32(self.shape[0]) self._center_x = np.float32(self.center[0]) self._center_y = np.float32(self.center[1]) self._block = (32, 32, 1) # tune ? self._grid = (updiv(self.shape[1], self._block[1]), updiv(self.shape[0], self._block[0]), 1)
[docs] def rotate(self, img, output=None, do_checks=True): copy_array(self._d_image_cua, img, check=do_checks) if output is not None: d_out = output else: self.cuda_processing.allocate_array("d_output", self.shape, np.float32) d_out = self.cuda_processing.d_output self.cuda_rotation_kernel( d_out, self._Nx, self._Ny, self._cos_theta, self._sin_theta, self._center_x, self._center_y, grid=self._grid, block=self._block, ) return d_out
__call__ = rotate