Source code for nabu.reconstruction.sinogram_cuda

import numpy as np
from ..utils import get_cuda_srcfile, updiv, deprecated_class
from .sinogram import SinoBuilder, SinoNormalization, SinoMult
from .sinogram import _convert_halftomo_right  # FIXME Temporary patch
from ..cuda.processing import CudaProcessing


[docs] class CudaSinoBuilder(SinoBuilder): def __init__( self, sinos_shape=None, radios_shape=None, rot_center=None, halftomo=False, angles=None, cuda_options=None ): """ Initialize a CudaSinoBuilder instance. Please see the documentation of nabu.reconstruction.sinogram.Builder and nabu.cuda.processing.CudaProcessing. """ super().__init__( sinos_shape=sinos_shape, radios_shape=radios_shape, rot_center=rot_center, halftomo=halftomo, angles=angles ) self.cuda_processing = CudaProcessing(**(cuda_options or {})) self._init_cuda_halftomo() def _init_cuda_halftomo(self): if not (self.halftomo): return kernel_name = "halftomo_kernel" self.halftomo_kernel = self.cuda_processing.kernel( kernel_name, get_cuda_srcfile("halftomo.cu"), signature="PPPiii", ) blk = (32, 32, 1) # tune ? self._halftomo_blksize = blk self._halftomo_gridsize = (updiv(self.extended_sino_width, blk[0]), updiv((self.n_angles + 1) // 2, blk[1]), 1) d = self.n_x - self.extended_sino_width // 2 # will have to be adapted for varying axis pos self.halftomo_weights = np.linspace(0, 1, 2 * abs(d), endpoint=True, dtype="f") self.d_halftomo_weights = self.cuda_processing.to_device("d_halftomo_weights", self.halftomo_weights) # Allocate one single sinogram (kernel needs c-contiguous array). # If odd number of angles: repeat last angle. self.d_sino = self.cuda_processing.allocate_array( "d_sino", (self.n_angles + (self.n_angles & 1), self.n_x), "f" ) self.h_sino = self.d_sino.get() # self.cuda_processing.init_arrays_to_none(["d_output"]) if self._halftomo_flip: self.xflip_kernel = self.cuda_processing.kernel( "reverse2D_x", get_cuda_srcfile("ElementOp.cu"), signature="Pii" ) blk = (32, 32, 1) self._xflip_blksize = blk self._xflip_gridsize_1 = (updiv(self.n_x, blk[0]), updiv(self.n_angles, blk[1]), 1) self._xflip_gridsize_2 = self._halftomo_gridsize # # 2D # def _get_sino_halftomo(self, sino, output=None): if output is None: output = self.cuda_processing.allocate_array("d_output", self.output_shape[1:]) elif output.shape != self.output_shape[1:]: raise ValueError("Expected output to have shape %s but got %s" % (self.output_shape[1:], output.shape)) d_sino = self.d_sino n_a, n_x = sino.shape d_sino[:n_a] = sino[:] if self.n_angles & 1: d_sino[-1, :].fill(0) if self._halftomo_flip: self.xflip_kernel(d_sino, n_x, n_a, grid=self._xflip_gridsize_1, block=self._xflip_blksize) # Sometimes CoR is set well outside the FoV. Not supported by cuda backend for now. # TODO/FIXME: TEMPORARY PATCH, waiting for cuda implementation if self.rot_center > self.n_x: d_sino.get(self.h_sino) # copy D2H res = _convert_halftomo_right(self.h_sino, self.extended_sino_width) output.set(res) # copy H2D # else: self.halftomo_kernel( d_sino, output, self.d_halftomo_weights, n_a, n_x, self.extended_sino_width // 2, grid=self._halftomo_gridsize, block=self._halftomo_blksize, ) if self._halftomo_flip: self.xflip_kernel( output, self.extended_sino_width, (n_a + 1) // 2, grid=self._xflip_gridsize_2, block=self._xflip_blksize ) return output # # 3D # def _get_sinos_simple(self, radios, output=None): if output is None: return radios.transpose(axes=(1, 0, 2)) # view else: # why can't I do a discontig single copy ? for i in range(radios.shape[1]): output[i] = radios[:, i, :] return output def _get_sinos_halftomo(self, radios, output=None): if output is None: output = self.cuda_processing.allocate_array("output", self.output_shape, "f") elif output.shape != self.output_shape: raise ValueError("Expected output to have shape %s but got %s" % (self.output_shape, output.shape)) for i in range(self.n_z): sino = self._get_sino_simple(radios, i) self._get_sino_halftomo(sino, output=output[i]) return output
CudaSinoProcessing = deprecated_class("'CudaSinoProcessing' was renamed 'CudaSinoBuilder'", do_print=True)( CudaSinoBuilder )
[docs] class CudaSinoMult(SinoMult): def __init__(self, sino_shape, rot_center, **cuda_options): super().__init__(sino_shape, rot_center) self.cuda_processing = CudaProcessing(**cuda_options) self._init_kernel() def _init_kernel(self): self.halftomo_kernel = self.cuda_processing.kernel( "halftomo_prepare_sinogram", filename=get_cuda_srcfile("halftomo.cu"), signature="PPiiii" ) self.d_weights = self.cuda_processing.set_array("d_weights", self.weights) self._halftomo_kernel_other_args = [ self.d_weights, np.int32(self.n_a), np.int32(self.n_x), np.int32(self.start_x), np.int32(self.end_x), ] self._grid = (self.n_x, self.n_a) self._blk = (32, 32, 1) # tune ?
[docs] def prepare_sino(self, sino): sino = self.cuda_processing.set_array("d_sino", sino) self.halftomo_kernel(sino, *self._halftomo_kernel_other_args, grid=self._grid, block=self._blk) return sino
[docs] class CudaSinoNormalization(SinoNormalization): def __init__( self, kind="chebyshev", sinos_shape=None, radios_shape=None, normalization_array=None, cuda_options=None ): super().__init__( kind=kind, sinos_shape=sinos_shape, radios_shape=radios_shape, normalization_array=normalization_array ) self._get_shapes(sinos_shape, radios_shape) self.cuda_processing = CudaProcessing(**(cuda_options or {})) self._init_cuda_normalization() _get_shapes = SinoBuilder._get_shapes # # Chebyshev normalization # def _init_cuda_normalization(self): self._d_tmp = self.cuda_processing.allocate_array("_d_tmp", self.sinos_shape[-2:], "f") if self.normalization_kind == "chebyshev": self._chebyshev_kernel = self.cuda_processing.kernel( "normalize_chebyshev", filename=get_cuda_srcfile("normalization.cu"), signature="Piii", ) self._chebyshev_kernel_args = [np.int32(self.n_x), np.int32(self.n_angles), np.int32(self.n_z)] blk = (1, 64, 16) # TODO tune ? self._chebyshev_kernel_kwargs = { "block": blk, "grid": (1, int(updiv(self.n_angles, blk[1])), int(updiv(self.n_z, blk[2]))), } elif self.normalization_array is not None: normalization_array = self.normalization_array # If normalization_array is 1D, make a 2D array by repeating the line if normalization_array.ndim == 1: normalization_array = np.tile(normalization_array, (self.n_angles, 1)) self._d_normalization_array = self.cuda_processing.to_device( "_d_normalization_array", normalization_array.astype("f") ) if self.normalization_kind == "subtraction": generic_op_val = 1 elif self.normalization_kind == "division": generic_op_val = 3 self._norm_kernel = self.cuda_processing.kernel( "inplace_generic_op_2Dby2D", filename=get_cuda_srcfile("ElementOp.cu"), signature="PPii", options=["-DGENERIC_OP=%d" % generic_op_val], ) self._norm_kernel_args = [self._d_normalization_array, np.int32(self.n_angles), np.int32(self.n_x)] blk = (32, 32, 1) self._norm_kernel_kwargs = { "block": blk, "grid": (int(updiv(self.n_angles, blk[0])), int(updiv(self.n_x, blk[1])), 1), } def _normalize_chebyshev(self, sinos): if sinos.flags.c_contiguous: self._chebyshev_kernel(sinos, *self._chebyshev_kernel_args, **self._chebyshev_kernel_kwargs) else: # This kernel seems to have an issue on arrays that are not C-contiguous. # We have to process image per image. nz = np.int32(1) nthreadsperblock = (1, 32, 1) # TODO tune nblocks = (1, int(updiv(self.n_angles, nthreadsperblock[1])), 1) for i in range(sinos.shape[0]): self._d_tmp[:] = sinos[i][:] self._chebyshev_kernel( self._d_tmp, np.int32(self.n_x), np.int32(self.n_angles), np.int32(1), grid=nblocks, block=nthreadsperblock, ) sinos[i][:] = self._d_tmp[:] return sinos # # Array subtraction/division # def _normalize_op(self, sino): if sino.ndim == 2: # Things can go wrong if "sino" is a non-contiguous 2D array # But this should be handled outside this function, as the processing is in-place self._norm_kernel(sino, *self._norm_kernel_args, **self._norm_kernel_kwargs) else: if sino.flags.forc: # Contiguous 3D array. But pycuda wants the same shape for both operands. for i in range(sino.shape[0]): self._norm_kernel(sino[i], *self._norm_kernel_args, **self._norm_kernel_kwargs) else: # Non-contiguous 2D array. Make a temp. copy for i in range(sino.shape[0]): self._d_tmp[:] = sino[i][:] self._norm_kernel(self._d_tmp, *self._norm_kernel_args, **self._norm_kernel_kwargs) sino[i][:] = self._d_tmp[:] return sino