# Source code for silx.math.histogram

```
# /*##########################################################################
# Copyright (C) 2016 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ############################################################################*/
"""
This module provides a function and a class to compute multidimensional
histograms.
Classes
=======
- :class:`Histogramnd` : multi dimensional histogram.
- :class:`HistogramndLut` : optimized to compute several histograms from data sharing the same coordinates.
Examples
========
Single histogram
----------------
Given some 3D data:
>>> import numpy as np
>>> shape = (10**7, 3)
>>> sample = np.random.random(shape) * 500
>>> weights = np.random.random((shape[0],))
Computing the histogram with Histogramnd :
>>> from silx.math import Histogramnd
>>> n_bins = 35
>>> ranges = [[40., 150.], [-130., 250.], [0., 505]]
>>> histo, w_histo, edges = Histogramnd(sample, n_bins=n_bins, histo_range=ranges, weights=weights)
Histogramnd can accumulate sets of data that don't have the same
coordinates :
>>> from silx.math import Histogramnd
>>> histo_obj = Histogramnd(sample, n_bins=n_bins, histo_range=ranges, weights=weights)
>>> sample_2 = np.random.random(shape) * 200
>>> weights_2 = np.random.random((shape[0],))
>>> histo_obj.accumulate(sample_2, weights=weights_2)
And then access the results:
>>> histo = histo_obj.histo
>>> weighted_histo = histo_obj.weighted_histo
or even:
>>> histo, w_histo, edges = histo_obj
Accumulating histograms (LUT)
-----------------------------
In some situations we need to compute the weighted histogram of several
sets of data (weights) that have the same coordinates (sample).
Again, some data (2 sets of weights) :
>>> import numpy as np
>>> shape = (10**7, 3)
>>> sample = np.random.random(shape) * 500
>>> weights_1 = np.random.random((shape[0],))
>>> weights_2 = np.random.random((shape[0],))
And getting the result with HistogramLut :
>>> from silx.math import HistogramndLut
>>> n_bins = 35
>>> ranges = [[40., 150.], [-130., 250.], [0., 505]]
>>> histo_lut = HistogramndLut(sample, ranges, n_bins)
First call, with weight_1 :
>>> histo_lut.accumulate(weights_1)
Second call, with weight_2 :
>>> histo_lut.accumulate(weights_2)
Retrieving the results (this is a copy of what's actually stored in
this instance) :
>>> histo = histo_lut.histo
>>> w_histo = histo_lut.weighted_histo
Note that the following code gives the same result, but the
HistogramndLut instance does not store the accumulated weighted histogram.
First call with weights_1
>>> histo, w_histo = histo_lut.apply_lut(weights_1)
Second call with weights_2
>>> histo, w_histo = histo_lut.apply_lut(weights_2, histo=histo, weighted_histo=w_histo)
Bin edges
---------
When computing an histogram the caller is asked to provide the histogram
range along each coordinates (parameter *histo_range*). This parameter must
be given a [N, 2] array where N is the number of dimensions of the histogram.
In other words, the caller must provide, for each dimension,
the left edge of the first (*leftmost*) bin, and the right edge of the
last (*rightmost*) bin.
E.g. : for a 1D sample, for a histo_range equal to [0, 10] and n_bins=4, the
bins ranges will be :
* [0, 2.5[, [2.5, 5[, [5, 7.5[, [7.5, 10 **[** if last_bin_closed = **False**
* [0, 2.5[, [2.5, 5[, [5, 7.5[, [7.5, 10 **]** if last_bin_closed = **True**
....
"""
__authors__ = ["D. Naudet"]
__license__ = "MIT"
__date__ = "02/10/2017"
import numpy as np
from .chistogramnd import chistogramnd as _chistogramnd # noqa
from .chistogramnd_lut import histogramnd_get_lut as _histo_get_lut
from .chistogramnd_lut import histogramnd_from_lut as _histo_from_lut
[docs]
class Histogramnd(object):
"""
Computes the multidimensional histogram of some data.
"""
[docs]
def __init__(
self,
sample,
histo_range,
n_bins,
weights=None,
weight_min=None,
weight_max=None,
last_bin_closed=False,
wh_dtype=None,
):
"""
:param sample:
The data to be histogrammed.
Its shape must be either
(N,) if it contains one dimensional coordinates,
or an (N,D) array where the rows are the
coordinates of points in a D dimensional space.
The following dtypes are supported : :class:`numpy.float64`,
:class:`numpy.float32`, :class:`numpy.int32`.
.. warning:: if sample is not a C_CONTIGUOUS ndarray (e.g : a non
contiguous slice) then histogramnd will have to do make an internal
copy.
:type sample: :class:`numpy.array`
:param histo_range:
A (N, 2) array containing the histogram range along each dimension,
where N is the sample's number of dimensions.
:type histo_range: array_like
:param n_bins:
The number of bins :
* a scalar (same number of bins for all dimensions)
* a D elements array (number of bins for each dimensions)
:type n_bins: scalar or array_like
:param weights:
A N elements numpy array of values associated with
each sample.
The values of the *weighted_histo* array
returned by the function are equal to the sum of
the weights associated with the samples falling
into each bin.
The following dtypes are supported : :class:`numpy.float64`,
:class:`numpy.float32`, :class:`numpy.int32`.
.. note:: If None, the weighted histogram returned will be None.
:type weights: *optional*, :class:`numpy.array`
:param weight_min:
Use this parameter to filter out all samples whose
weights are lower than this value.
.. note:: This value will be cast to the same type
as *weights*.
:type weight_min: *optional*, scalar
:param weight_max:
Use this parameter to filter out all samples whose
weights are higher than this value.
.. note:: This value will be cast to the same type
as *weights*.
:type weight_max: *optional*, scalar
:param last_bin_closed:
By default the last bin is half
open (i.e.: [x,y) ; x included, y
excluded), like all the other bins.
Set this parameter to true if you want
the LAST bin to be closed.
:type last_bin_closed: *optional*, :class:`python.boolean`
:param wh_dtype: type of the weighted histogram array.
If not provided, the weighted histogram array will contain values
of type numpy.double. Allowed values are : `numpy.double` and
`numpy.float32`
:type wh_dtype: *optional*, numpy data type
"""
self.__histo_range = histo_range
self.__n_bins = n_bins
self.__last_bin_closed = last_bin_closed
self.__wh_dtype = wh_dtype
if sample is None:
self.__data = [None, None, None]
else:
self.__data = _chistogramnd(
sample,
self.__histo_range,
self.__n_bins,
weights=weights,
weight_min=weight_min,
weight_max=weight_max,
last_bin_closed=self.__last_bin_closed,
wh_dtype=self.__wh_dtype,
)
[docs]
def __getitem__(self, key):
"""
If necessary, results can be unpacked from an instance of Histogramnd :
*histogram*, *weighted histogram*, *bins edge*.
Example :
.. code-block:: python
histo, w_histo, edges = Histogramnd(sample, histo_range, n_bins, weights)
"""
return self.__data[key]
[docs]
def accumulate(self, sample, weights=None, weight_min=None, weight_max=None):
"""
Computes the multidimensional histogram of some data and accumulates it
into the histogram held by this instance of Histogramnd.
:param sample:
The data to be histogrammed.
Its shape must be either
(N,) if it contains one dimensional coordinates,
or an (N,D) array where the rows are the
coordinates of points in a D dimensional space.
The following dtypes are supported : :class:`numpy.float64`,
:class:`numpy.float32`, :class:`numpy.int32`.
.. warning:: if sample is not a C_CONTIGUOUS ndarray (e.g : a non
contiguous slice) then histogramnd will have to do make an internal
copy.
:type sample: :class:`numpy.array`
:param weights:
A N elements numpy array of values associated with
each sample.
The values of the *weighted_histo* array
returned by the function are equal to the sum of
the weights associated with the samples falling
into each bin.
The following dtypes are supported : :class:`numpy.float64`,
:class:`numpy.float32`, :class:`numpy.int32`.
.. note:: If None, the weighted histogram returned will be None.
:type weights: *optional*, :class:`numpy.array`
:param weight_min:
Use this parameter to filter out all samples whose
weights are lower than this value.
.. note:: This value will be cast to the same type
as *weights*.
:type weight_min: *optional*, scalar
:param weight_max:
Use this parameter to filter out all samples whose
weights are higher than this value.
.. note:: This value will be cast to the same type
as *weights*.
:type weight_max: *optional*, scalar
"""
result = _chistogramnd(
sample,
self.__histo_range,
self.__n_bins,
weights=weights,
weight_min=weight_min,
weight_max=weight_max,
last_bin_closed=self.__last_bin_closed,
histo=self.__data[0],
weighted_histo=self.__data[1],
wh_dtype=self.__wh_dtype,
)
if self.__data[0] is None:
self.__data = result
elif self.__data[1] is None and result[1] is not None:
self.__data = result
histo = property(lambda self: self[0])
""" Histogram array, or None if this instance was initialized without
<sample> and accumulate has not been called yet.
.. note:: this is a **reference** to the array store in this
Histogramnd instance, use with caution.
"""
weighted_histo = property(lambda self: self[1])
""" Weighted Histogram, or None if this instance was initialized without
<sample>, or no weights have been passed to __init__ nor accumulate.
.. note:: this is a **reference** to the array store in this
Histogramnd instance, use with caution.
"""
edges = property(lambda self: self[2])
""" Bins edges, or None if this instance was initialized without
<sample> and accumulate has not been called yet.
"""
[docs]
class HistogramndLut(object):
"""
The HistogramndLut class allows you to bin data onto a regular grid.
The use of HistogramndLut is interesting when several sets of data that
share the same coordinates (*sample*) have to be mapped onto the same grid.
"""
[docs]
def __init__(self, sample, histo_range, n_bins, last_bin_closed=False, dtype=None):
"""
:param sample:
The coordinates of the data to be histogrammed.
Its shape must be either (N,) if it contains one dimensional
coordinates, or an (N, D) array where the rows are the
coordinates of points in a D dimensional space.
The following dtypes are supported : :class:`numpy.float64`,
:class:`numpy.float32`, :class:`numpy.int32`.
:type sample: :class:`numpy.array`
:param histo_range:
A (N, 2) array containing the histogram range along each dimension,
where N is the sample's number of dimensions.
:type histo_range: array_like
:param n_bins:
The number of bins :
* a scalar (same number of bins for all dimensions)
* a D elements array (number of bins for each dimensions)
:type n_bins: scalar or array_like
:param dtype: data type of the weighted histogram. If None, the data type
will be the same as the first weights array provided (on first call of
the instance).
:type dtype: `numpy.dtype`
:param last_bin_closed:
By default the last bin is half
open (i.e.: [x,y) ; x included, y
excluded), like all the other bins.
Set this parameter to true if you want
the LAST bin to be closed.
:type last_bin_closed: *optional*, :class:`python.boolean`
"""
lut, histo, edges = _histo_get_lut(
sample, histo_range, n_bins, last_bin_closed=last_bin_closed
)
self.__n_bins = np.array(histo.shape)
self.__histo_range = histo_range
self.__lut = lut
self.__histo = None
self.__weighted_histo = None
self.__edges = edges
self.__dtype = dtype
self.__shape = histo.shape
self.__last_bin_closed = last_bin_closed
self.clear()
[docs]
def clear(self):
"""
Resets the instance (zeroes the histograms).
"""
self.__weighted_histo = None
self.__histo = None
@property
def lut(self):
"""
Copy of the Lut
"""
return self.__lut.copy()
[docs]
def histo(self, copy=True):
"""
Histogram (a copy of it), or None if `~accumulate` has not been called yet
(or clear was just called).
If *copy* is set to False then the actual reference to the array is
returned *(use with caution)*.
"""
if copy and self.__histo is not None:
return self.__histo.copy()
return self.__histo
[docs]
def weighted_histo(self, copy=True):
"""
Weighted histogram (a copy of it), or None if `~accumulate` has not been called yet
(or clear was just called). If *copy* is set to False then the actual
reference to the array is returned *(use with caution)*.
"""
if copy and self.__weighted_histo is not None:
return self.__weighted_histo.copy()
return self.__weighted_histo
@property
def histo_range(self):
"""
Bins ranges.
"""
return self.__histo_range.copy()
@property
def n_bins(self):
"""
Number of bins in each direction.
"""
return self.__n_bins.copy()
@property
def bins_edges(self):
"""
Bins edges of the histograms, one array for each dimensions.
"""
return tuple([edges[:] for edges in self.__edges])
@property
def last_bin_closed(self):
"""
Returns True if the rightmost bin in each dimension is close (i.e :
values equal to the rightmost bin edge is included in the bin).
"""
return self.__last_bin_closed
[docs]
def accumulate(self, weights, weight_min=None, weight_max=None):
"""
Computes the multidimensional histogram of some data and adds it to
the current histogram stored by this instance. The results can be
retrieved with the :attr:`~.histo` and :attr:`~.weighted_histo`
properties.
:param weights:
A numpy array of values associated with each sample. The number of
elements in the array must be the same as the number of samples
provided at instantiation time.
:type histo_range: array_like
:param weight_min:
Use this parameter to filter out all samples whose
weights are lower than this value.
.. note:: This value will be cast to the same type
as *weights*.
:type weight_min: *optional*, scalar
:param weight_max:
Use this parameter to filter out all samples whose
weights are higher than this value.
.. note:: This value will be cast to the same type
as *weights*.
:type weight_max: *optional*, scalar
"""
if self.__dtype is None:
self.__dtype = weights.dtype
histo, w_histo = _histo_from_lut(
weights,
self.__lut,
histo=self.__histo,
weighted_histo=self.__weighted_histo,
shape=self.__shape,
dtype=self.__dtype,
weight_min=weight_min,
weight_max=weight_max,
)
if self.__histo is None:
self.__histo = histo
if self.__weighted_histo is None:
self.__weighted_histo = w_histo
[docs]
def apply_lut(
self, weights, histo=None, weighted_histo=None, weight_min=None, weight_max=None
):
"""
Computes the multidimensional histogram of some data and returns the
result (it is NOT added to the current histogram stored by this
instance).
:param weights:
A numpy array of values associated with each sample. The number of
elements in the array must be the same as the number of samples
provided at instantiation time.
:type histo_range: array_like
:param histo:
Use this parameter if you want to pass your
own histogram array instead of the one
created by this function. New values
will be added to this array. The returned array
will then be this one.
:type histo: *optional*, :class:`numpy.array`
:param weighted_histo:
Use this parameter if you want to pass your
own weighted histogram array instead of
the created by this function. New
values will be added to this array. The returned array
will then be this one (same reference).
:type weighted_histo: *optional*, :class:`numpy.array`
:param weight_min:
Use this parameter to filter out all samples whose
weights are lower than this value.
.. note:: This value will be cast to the same type
as *weights*.
:type weight_min: *optional*, scalar
:param weight_max:
Use this parameter to filter out all samples whose
weights are higher than this value.
.. note:: This value will be cast to the same type
as *weights*.
:type weight_max: *optional*, scalar
"""
histo, w_histo = _histo_from_lut(
weights,
self.__lut,
histo=histo,
weighted_histo=weighted_histo,
shape=self.__shape,
dtype=self.__dtype,
weight_min=weight_min,
weight_max=weight_max,
)
self.__dtype = w_histo.dtype
return histo, w_histo
if __name__ == "__main__":
pass
```