Source code for dimarray.core.missingvalues

""" methods to deal with missing values
"""
import numpy as np
from dimarray.tools import is_DimArray

def _isnan(a, na=np.nan):
    """ analogous to numpy's isnan
    """
    if np.isnan(na):
        return np.isnan(a)
    else:
        return a== na

def is_boolean_array(value):
    """ 
    >>> from dimarray import DimArray
    >>> a = DimArray([1,2,3])
    >>> is_boolean_array(a)
    False
    >>> is_boolean_array(a>1)
    True
    """
    return (isinstance(value, np.ndarray) or is_DimArray(value)) \
            and value.dtype is np.dtype('bool')

def _matches(a, value):

    if is_boolean_array(value):
        # boolean array accepted
        test = np.asarray(value)

    elif np.iterable(value):
        test = np.any([_matches(a, val) for val in value], axis=0)

    else:
        test = a == value
    return test

def setna(self, value, na=np.nan, inplace=False):
    """ set a value as missing

    Parameters
    ----------
    value : the values to set to na
    na : the replacement value (default np.nan)

    Examples
    --------
    >>> from dimarray import DimArray
    >>> a = DimArray([1,2,-99])
    >>> a.setna(-99)
    dimarray: 2 non-null elements (1 null)
    0 / x0 (3): 0 to 2
    array([ 1.,  2., nan])
    >>> a.setna([-99, 2]) # sequence
    dimarray: 1 non-null elements (2 null)
    0 / x0 (3): 0 to 2
    array([ 1., nan, nan])
    >>> a.setna(a > 1) # boolean
    dimarray: 2 non-null elements (1 null)
    0 / x0 (3): 0 to 2
    array([  1.,  nan, -99.])
    >>> a = DimArray([[1,2,-99]])  # multi-dim
    >>> a.setna([-99, a>1])  # boolean
    dimarray: 1 non-null elements (2 null)
    0 / x0 (1): 0 to 0
    1 / x1 (3): 0 to 2
    array([[ 1., nan, nan]])
    """
    return self.put(_matches(self.values, value), na, cast=True, inplace=inplace)

def fillna(self, value, inplace=False, na=np.nan):
    """ Fill NaN with a replacement value

    Examples
    --------
    >>> from dimarray import DimArray
    >>> a = DimArray([1,2,np.nan])
    >>> a.fillna(-99)
    dimarray: 3 non-null elements (0 null)
    0 / x0 (3): 0 to 2
    array([  1.,   2., -99.])
    """
    return self.put(_isnan(self.values, na=na), value, cast=True, inplace=inplace)

def dropna(self, axis=0, minvalid=None, na=np.nan):
    """ drop nans along an axis

    Parameters
    ----------
    axis : axis position or name or list of names
    minvalid : int, optional
        min number of valid point in each slice along axis values
        by default all the points

    Returns
    -------
    DimArray

    Examples
    --------

    1-Dimension

    >>> from dimarray import DimArray
    >>> a = DimArray([1.,2,3],('time',[1950, 1955, 1960]))
    >>> a.ix[1] = np.nan
    >>> a
    dimarray: 2 non-null elements (1 null)
    0 / time (3): 1950 to 1960
    array([ 1., nan,  3.])
    >>> a.dropna()
    dimarray: 2 non-null elements (0 null)
    0 / time (2): 1950 to 1960
    array([1., 3.])

    Multi-dimensional

    >>> a = DimArray([[ np.nan, 2., 3.],[ np.nan, 5., np.nan]])
    >>> a
    dimarray: 3 non-null elements (3 null)
    0 / x0 (2): 0 to 1
    1 / x1 (3): 0 to 2
    array([[nan,  2.,  3.],
           [nan,  5., nan]])
    >>> a.dropna(axis=1)
    dimarray: 2 non-null elements (0 null)
    0 / x0 (2): 0 to 1
    1 / x1 (1): 1 to 1
    array([[2.],
           [5.]])
    >>> a.dropna(axis=1, minvalid=1)  # minimum number of valid values, equivalent to `how="all"` in pandas
    dimarray: 3 non-null elements (1 null)
    0 / x0 (2): 0 to 1
    1 / x1 (2): 1 to 2
    array([[ 2.,  3.],
           [ 5., nan]])
    """
    assert axis is not None, "axis cannot be None for dropna"

    idx, name = self._get_axis_info(axis)

    if self.ndim == 1:
        return  self[~_isnan(self.values, na=na)]

    else:
        nans = _isnan(self, na=na) 
        nans = nans.flatten([dim for dim in self.dims if dim != name], insert=0) # in first position
        count_nans_axis = nans.sum(axis=0) # number of points valid along that axis
        count_vals_axis = (~nans).sum(axis=0) # number of points valid along that axis
        #count_nans_axis = nans.sum(axis=[dim for dim in a.dims if dim != name]) # number of points valid along that axis
        #count_vals_axis = nans.sum(axis=[dim for dim in a.dims if dim != name]) # number of points valid along that axis

    # pick up only points whose number of nans is below the threshold
    if minvalid is None: 
        maxna = 0
    else:
        maxna = nans.axes[0].size - minvalid

    return self.compress_axis(count_nans_axis <= maxna, axis=idx)
    # indices = countnans_axis <= maxna
    # return self.take_axis(np.where(indices)[0], axis=idx, indexing='position')
    # return self.take(count_nans_axis <= maxna, axis=idx)