Source code for nabu.processing.transpose

import numpy as np
from ..utils import get_opencl_srcfile, get_cuda_srcfile, updiv, BaseClassError, MissingComponentError
from ..opencl.utils import __has_pyopencl__
from ..cuda.utils import __has_pycuda__

if __has_pyopencl__:
    from ..opencl.kernel import OpenCLKernel
    from ..opencl.processing import OpenCLProcessing
    from pyopencl.tools import dtype_to_ctype as cl_dtype_to_ctype
else:
    OpenCLKernel = OpenCLProcessing = cl_dtype_to_ctype = MissingComponentError("need pyopencl to use this class")
if __has_pycuda__:
    from ..cuda.kernel import CudaKernel
    from ..cuda.processing import CudaProcessing
    from pycuda.tools import base_dtype_to_ctype as cu_dtype_to_ctype
else:
    CudaKernel = CudaProcessing = cu_dtype_to_ctype = MissingComponentError("need pycuda to use this class")


# pylint: disable=E1101, E1102

[docs]
class TransposeBase:
    """
    A class for transposing (out-of-place) a cuda or opencl array
    """

    KernelCls = BaseClassError
    ProcessingCls = BaseClassError
    dtype_to_ctype = BaseClassError
    backend = "none"

    def __init__(self, shape, dtype, dst_dtype=None, **backend_options):
        self.processing = self.ProcessingCls(**(backend_options or {}))
        self.shape = shape
        self.dtype = dtype
        self.dst_dtype = dst_dtype or dtype
        if len(shape) != 2:
            raise ValueError("Expected 2D array")

        self._kernel_init_args = [
            "transpose",
        ]
        self._kernel_init_kwargs = {
            "options": [
                "-DSRC_DTYPE=%s" % self.dtype_to_ctype(self.dtype),
                "-DDST_DTYPE=%s" % self.dtype_to_ctype(self.dst_dtype),
            ],
        }
        self._configure_kenel_initialization()
        self._transpose_kernel = self.KernelCls(*self._kernel_init_args, **self._kernel_init_kwargs)
        self._configure_kernel_call()

    def __call__(self, arr, dst=None):
        if dst is None:
            dst = self.processing.allocate_array("dst", self.shape[::-1], dtype=self.dst_dtype)
        self._transpose_kernel(arr, dst, np.int32(self.shape[1]), np.int32(self.shape[0]), **self._kernel_kwargs)
        return dst




[docs]
class CudaTranspose(TransposeBase):
    KernelCls = CudaKernel
    ProcessingCls = CudaProcessing
    dtype_to_ctype = cu_dtype_to_ctype
    backend = "cuda"

    def _configure_kenel_initialization(self):
        self._kernel_init_kwargs.update(
            {
                "filename": get_cuda_srcfile("transpose.cu"),
                "signature": "PPii",
            }
        )

    def _configure_kernel_call(self):
        block = (32, 32, 1)
        grid = [updiv(a, b) for a, b in zip(self.shape, block)]
        self._kernel_kwargs = {"grid": grid, "block": block}




[docs]
class OpenCLTranspose(TransposeBase):
    KernelCls = OpenCLKernel
    ProcessingCls = OpenCLProcessing
    dtype_to_ctype = cl_dtype_to_ctype
    backend = "opencl"

    def _configure_kenel_initialization(self):
        self._kernel_init_args.append(self.processing.ctx)
        self._kernel_init_kwargs.update(
            {
                "filename": get_opencl_srcfile("transpose.cl"),
                "queue": self.processing.queue,
            }
        )

    def _configure_kernel_call(self):
        block = (16, 16, 1)
        grid = [updiv(a, b) * b for a, b in zip(self.shape, block)]
        self._kernel_kwargs = {"global_size": grid, "local_size": block}



#
# An attempt to have a simplified access to transpose operation
#

# (backend, shape, dtype, dtype_out)
_transposes_store = {}



[docs]
def transpose(array, dst=None, **backend_options):
    if hasattr(array, "with_queue"):
        backend = "opencl"
        transpose_cls = OpenCLTranspose
        backend_options["queue"] = array.queue  # !
    elif hasattr(array, "bind_to_texref"):
        backend = "cuda"
        transpose_cls = CudaTranspose
    else:
        raise ValueError("array should be either a pycuda.gpuarray.GPUArray or pyopencl.array.Array instance")

    dst_dtype = dst.dtype if dst is not None else None
    key = (backend, array.shape, np.dtype(array.dtype), dst_dtype)
    transpose_instance = _transposes_store.get(key, None)
    if transpose_instance is None:
        transpose_instance = transpose_cls(array.shape, array.dtype, dst_dtype=dst_dtype, **backend_options)
        _transposes_store[key] = transpose_instance

    return transpose_instance(array, dst=dst)