import numpy as np
from ..utils import get_opencl_srcfile, get_cuda_srcfile, updiv, BaseClassError, MissingComponentError
from ..opencl.utils import __has_pyopencl__
from ..cuda.utils import __has_pycuda__
if __has_pyopencl__:
from ..opencl.kernel import OpenCLKernel
from ..opencl.processing import OpenCLProcessing
from pyopencl.tools import dtype_to_ctype as cl_dtype_to_ctype
else:
OpenCLKernel = OpenCLProcessing = cl_dtype_to_ctype = MissingComponentError("need pyopencl to use this class")
if __has_pycuda__:
from ..cuda.kernel import CudaKernel
from ..cuda.processing import CudaProcessing
from pycuda.tools import base_dtype_to_ctype as cu_dtype_to_ctype
else:
CudaKernel = CudaProcessing = cu_dtype_to_ctype = MissingComponentError("need pycuda to use this class")
# pylint: disable=E1101, E1102
[docs]
class TransposeBase:
"""
A class for transposing (out-of-place) a cuda or opencl array
"""
KernelCls = BaseClassError
ProcessingCls = BaseClassError
dtype_to_ctype = BaseClassError
backend = "none"
def __init__(self, shape, dtype, dst_dtype=None, **backend_options):
self.processing = self.ProcessingCls(**(backend_options or {}))
self.shape = shape
self.dtype = dtype
self.dst_dtype = dst_dtype or dtype
if len(shape) != 2:
raise ValueError("Expected 2D array")
self._kernel_init_args = [
"transpose",
]
self._kernel_init_kwargs = {
"options": [
"-DSRC_DTYPE=%s" % self.dtype_to_ctype(self.dtype),
"-DDST_DTYPE=%s" % self.dtype_to_ctype(self.dst_dtype),
],
}
self._configure_kenel_initialization()
self._transpose_kernel = self.KernelCls(*self._kernel_init_args, **self._kernel_init_kwargs)
self._configure_kernel_call()
def __call__(self, arr, dst=None):
if dst is None:
dst = self.processing.allocate_array("dst", self.shape[::-1], dtype=self.dst_dtype)
self._transpose_kernel(arr, dst, np.int32(self.shape[1]), np.int32(self.shape[0]), **self._kernel_kwargs)
return dst
[docs]
class CudaTranspose(TransposeBase):
KernelCls = CudaKernel
ProcessingCls = CudaProcessing
dtype_to_ctype = cu_dtype_to_ctype
backend = "cuda"
def _configure_kenel_initialization(self):
self._kernel_init_kwargs.update(
{
"filename": get_cuda_srcfile("transpose.cu"),
"signature": "PPii",
}
)
def _configure_kernel_call(self):
block = (32, 32, 1)
grid = [updiv(a, b) for a, b in zip(self.shape, block)]
self._kernel_kwargs = {"grid": grid, "block": block}
[docs]
class OpenCLTranspose(TransposeBase):
KernelCls = OpenCLKernel
ProcessingCls = OpenCLProcessing
dtype_to_ctype = cl_dtype_to_ctype
backend = "opencl"
def _configure_kenel_initialization(self):
self._kernel_init_args.append(self.processing.ctx)
self._kernel_init_kwargs.update(
{
"filename": get_opencl_srcfile("transpose.cl"),
"queue": self.processing.queue,
}
)
def _configure_kernel_call(self):
block = (16, 16, 1)
grid = [updiv(a, b) * b for a, b in zip(self.shape, block)]
self._kernel_kwargs = {"global_size": grid, "local_size": block}
#
# An attempt to have a simplified access to transpose operation
#
# (backend, shape, dtype, dtype_out)
_transposes_store = {}
[docs]
def transpose(array, dst=None, **backend_options):
if hasattr(array, "with_queue"):
backend = "opencl"
transpose_cls = OpenCLTranspose
backend_options["queue"] = array.queue # !
elif hasattr(array, "bind_to_texref"):
backend = "cuda"
transpose_cls = CudaTranspose
else:
raise ValueError("array should be either a pycuda.gpuarray.GPUArray or pyopencl.array.Array instance")
dst_dtype = dst.dtype if dst is not None else None
key = (backend, array.shape, np.dtype(array.dtype), dst_dtype)
transpose_instance = _transposes_store.get(key, None)
if transpose_instance is None:
transpose_instance = transpose_cls(array.shape, array.dtype, dst_dtype=dst_dtype, **backend_options)
_transposes_store[key] = transpose_instance
return transpose_instance(array, dst=dst)