Source code for nabu.reconstruction.sinogram_cuda

import numpy as np
from ..utils import get_cuda_srcfile, updiv, deprecated_class
from .sinogram import SinoBuilder, SinoNormalization, SinoMult
from .sinogram import _convert_halftomo_right  # FIXME Temporary patch
from ..cuda.processing import CudaProcessing



[docs]
class CudaSinoBuilder(SinoBuilder):
    def __init__(
        self, sinos_shape=None, radios_shape=None, rot_center=None, halftomo=False, angles=None, cuda_options=None
    ):
        """
        Initialize a CudaSinoBuilder instance.
        Please see the documentation of nabu.reconstruction.sinogram.Builder
        and nabu.cuda.processing.CudaProcessing.
        """
        super().__init__(
            sinos_shape=sinos_shape, radios_shape=radios_shape, rot_center=rot_center, halftomo=halftomo, angles=angles
        )
        self.cuda_processing = CudaProcessing(**(cuda_options or {}))
        self._init_cuda_halftomo()

    def _init_cuda_halftomo(self):
        if not (self.halftomo):
            return
        kernel_name = "halftomo_kernel"
        self.halftomo_kernel = self.cuda_processing.kernel(
            kernel_name,
            get_cuda_srcfile("halftomo.cu"),
            signature="PPPiii",
        )
        blk = (32, 32, 1)  # tune ?
        self._halftomo_blksize = blk
        self._halftomo_gridsize = (updiv(self.extended_sino_width, blk[0]), updiv((self.n_angles + 1) // 2, blk[1]), 1)
        d = self.n_x - self.extended_sino_width // 2  # will have to be adapted for varying axis pos
        self.halftomo_weights = np.linspace(0, 1, 2 * abs(d), endpoint=True, dtype="f")
        self.d_halftomo_weights = self.cuda_processing.to_device("d_halftomo_weights", self.halftomo_weights)
        # Allocate one single sinogram (kernel needs c-contiguous array).
        # If odd number of angles: repeat last angle.
        self.d_sino = self.cuda_processing.allocate_array(
            "d_sino", (self.n_angles + (self.n_angles & 1), self.n_x), "f"
        )
        self.h_sino = self.d_sino.get()
        #
        self.cuda_processing.init_arrays_to_none(["d_output"])
        if self._halftomo_flip:
            self.xflip_kernel = self.cuda_processing.kernel(
                "reverse2D_x", get_cuda_srcfile("ElementOp.cu"), signature="Pii"
            )
            blk = (32, 32, 1)
            self._xflip_blksize = blk
            self._xflip_gridsize_1 = (updiv(self.n_x, blk[0]), updiv(self.n_angles, blk[1]), 1)
            self._xflip_gridsize_2 = self._halftomo_gridsize

    #
    # 2D
    #

    def _get_sino_halftomo(self, sino, output=None):
        if output is None:
            output = self.cuda_processing.allocate_array("d_output", self.output_shape[1:])
        elif output.shape != self.output_shape[1:]:
            raise ValueError("Expected output to have shape %s but got %s" % (self.output_shape[1:], output.shape))

        d_sino = self.d_sino
        n_a, n_x = sino.shape
        d_sino[:n_a] = sino[:]
        if self.n_angles & 1:
            d_sino[-1, :].fill(0)
        if self._halftomo_flip:
            self.xflip_kernel(d_sino, n_x, n_a, grid=self._xflip_gridsize_1, block=self._xflip_blksize)
        # Sometimes CoR is set well outside the FoV. Not supported by cuda backend for now.
        # TODO/FIXME: TEMPORARY PATCH, waiting for cuda implementation
        if self.rot_center > self.n_x:
            d_sino.get(self.h_sino)  # copy D2H
            res = _convert_halftomo_right(self.h_sino, self.extended_sino_width)
            output.set(res)  # copy H2D
        #
        else:
            self.halftomo_kernel(
                d_sino,
                output,
                self.d_halftomo_weights,
                n_a,
                n_x,
                self.extended_sino_width // 2,
                grid=self._halftomo_gridsize,
                block=self._halftomo_blksize,
            )
        if self._halftomo_flip:
            self.xflip_kernel(
                output, self.extended_sino_width, (n_a + 1) // 2, grid=self._xflip_gridsize_2, block=self._xflip_blksize
            )
        return output

    #
    # 3D
    #

    def _get_sinos_simple(self, radios, output=None):
        if output is None:
            return radios.transpose(axes=(1, 0, 2))  # view
        else:
            # why can't I do a discontig single copy ?
            for i in range(radios.shape[1]):
                output[i] = radios[:, i, :]
            return output

    def _get_sinos_halftomo(self, radios, output=None):
        if output is None:
            output = self.cuda_processing.allocate_array("output", self.output_shape, "f")
        elif output.shape != self.output_shape:
            raise ValueError("Expected output to have shape %s but got %s" % (self.output_shape, output.shape))
        for i in range(self.n_z):
            sino = self._get_sino_simple(radios, i)
            self._get_sino_halftomo(sino, output=output[i])
        return output



CudaSinoProcessing = deprecated_class("'CudaSinoProcessing' was renamed 'CudaSinoBuilder'", do_print=True)(
    CudaSinoBuilder
)



[docs]
class CudaSinoMult(SinoMult):
    def __init__(self, sino_shape, rot_center, **cuda_options):
        super().__init__(sino_shape, rot_center)
        self.cuda_processing = CudaProcessing(**cuda_options)
        self._init_kernel()

    def _init_kernel(self):
        self.halftomo_kernel = self.cuda_processing.kernel(
            "halftomo_prepare_sinogram", filename=get_cuda_srcfile("halftomo.cu"), signature="PPiiii"
        )
        self.d_weights = self.cuda_processing.set_array("d_weights", self.weights)
        self._halftomo_kernel_other_args = [
            self.d_weights,
            np.int32(self.n_a),
            np.int32(self.n_x),
            np.int32(self.start_x),
            np.int32(self.end_x),
        ]
        self._grid = (self.n_x, self.n_a)
        self._blk = (32, 32, 1)  # tune ?


[docs]
    def prepare_sino(self, sino):
        sino = self.cuda_processing.set_array("d_sino", sino)
        self.halftomo_kernel(sino, *self._halftomo_kernel_other_args, grid=self._grid, block=self._blk)
        return sino





[docs]
class CudaSinoNormalization(SinoNormalization):
    def __init__(
        self, kind="chebyshev", sinos_shape=None, radios_shape=None, normalization_array=None, cuda_options=None
    ):
        super().__init__(
            kind=kind, sinos_shape=sinos_shape, radios_shape=radios_shape, normalization_array=normalization_array
        )
        self._get_shapes(sinos_shape, radios_shape)
        self.cuda_processing = CudaProcessing(**(cuda_options or {}))
        self._init_cuda_normalization()

    _get_shapes = SinoBuilder._get_shapes

    #
    # Chebyshev normalization
    #

    def _init_cuda_normalization(self):
        self._d_tmp = self.cuda_processing.allocate_array("_d_tmp", self.sinos_shape[-2:], "f")
        if self.normalization_kind == "chebyshev":
            self._chebyshev_kernel = self.cuda_processing.kernel(
                "normalize_chebyshev",
                filename=get_cuda_srcfile("normalization.cu"),
                signature="Piii",
            )
            self._chebyshev_kernel_args = [np.int32(self.n_x), np.int32(self.n_angles), np.int32(self.n_z)]
            blk = (1, 64, 16)  # TODO tune ?
            self._chebyshev_kernel_kwargs = {
                "block": blk,
                "grid": (1, int(updiv(self.n_angles, blk[1])), int(updiv(self.n_z, blk[2]))),
            }
        elif self.normalization_array is not None:
            normalization_array = self.normalization_array
            # If normalization_array is 1D, make a 2D array by repeating the line
            if normalization_array.ndim == 1:
                normalization_array = np.tile(normalization_array, (self.n_angles, 1))
            self._d_normalization_array = self.cuda_processing.to_device(
                "_d_normalization_array", normalization_array.astype("f")
            )
            if self.normalization_kind == "subtraction":
                generic_op_val = 1
            elif self.normalization_kind == "division":
                generic_op_val = 3
            self._norm_kernel = self.cuda_processing.kernel(
                "inplace_generic_op_2Dby2D",
                filename=get_cuda_srcfile("ElementOp.cu"),
                signature="PPii",
                options=["-DGENERIC_OP=%d" % generic_op_val],
            )
            self._norm_kernel_args = [self._d_normalization_array, np.int32(self.n_angles), np.int32(self.n_x)]
            blk = (32, 32, 1)
            self._norm_kernel_kwargs = {
                "block": blk,
                "grid": (int(updiv(self.n_angles, blk[0])), int(updiv(self.n_x, blk[1])), 1),
            }

    def _normalize_chebyshev(self, sinos):
        if sinos.flags.c_contiguous:
            self._chebyshev_kernel(sinos, *self._chebyshev_kernel_args, **self._chebyshev_kernel_kwargs)
        else:
            # This kernel seems to have an issue on arrays that are not C-contiguous.
            # We have to process image per image.
            nz = np.int32(1)
            nthreadsperblock = (1, 32, 1)  # TODO tune
            nblocks = (1, int(updiv(self.n_angles, nthreadsperblock[1])), 1)
            for i in range(sinos.shape[0]):
                self._d_tmp[:] = sinos[i][:]
                self._chebyshev_kernel(
                    self._d_tmp,
                    np.int32(self.n_x),
                    np.int32(self.n_angles),
                    np.int32(1),
                    grid=nblocks,
                    block=nthreadsperblock,
                )
                sinos[i][:] = self._d_tmp[:]
        return sinos

    #
    # Array subtraction/division
    #

    def _normalize_op(self, sino):
        if sino.ndim == 2:
            # Things can go wrong if "sino" is a non-contiguous 2D array
            # But this should be handled outside this function, as the processing is in-place
            self._norm_kernel(sino, *self._norm_kernel_args, **self._norm_kernel_kwargs)
        else:
            if sino.flags.forc:
                # Contiguous 3D array. But pycuda wants the same shape for both operands.
                for i in range(sino.shape[0]):
                    self._norm_kernel(sino[i], *self._norm_kernel_args, **self._norm_kernel_kwargs)
            else:
                # Non-contiguous 2D array. Make a temp. copy
                for i in range(sino.shape[0]):
                    self._d_tmp[:] = sino[i][:]
                    self._norm_kernel(self._d_tmp, *self._norm_kernel_args, **self._norm_kernel_kwargs)
                    sino[i][:] = self._d_tmp[:]
        return sino