""" Functions and dimarray methods associated to array alignment
"""
from __future__ import print_function
from future.utils import string_types
from collections import OrderedDict as odict
import itertools
import warnings
import numpy as np
from dimarray.config import get_option
from dimarray.tools import is_DimArray
from dimarray.core.axes import Axes, Axis
from dimarray.core.indexing import locate_many
__all__ = ["broadcast_arrays", "align", "stack", "concatenate"]
def get_dims(*arrays):
""" find all dimensions from a variable list of arrays (or any object with `axes` attribute)
Note: not in public API, but used by other modules
"""
dims = []
for o in arrays:
for ax in o.axes:
if ax.name not in dims:
dims.append(ax.name)
return dims
def _get_axes(*arrays):
""" find list of axes from a list of axis-aligned DimArray objects
"""
dims = get_dims(*arrays) # all dimensions present in objects
axes = Axes()
for dim in dims:
common_axis = None
for o in arrays:
# skip missing dimensions
if dim not in o.dims: continue
axis = o.axes[dim]
# update values
if common_axis is None or (common_axis.size==1 and axis.size > 1):
common_axis = axis
# Test alignment for non-singleton axes
if not (axis.size == 1 or np.all(axis.values==common_axis.values)):
raise ValueError("axes are not aligned")
# append new axis
axes.append(common_axis)
return axes
[docs]def align_dims(*arrays):
""" Align dimensions of a list of arrays so that they are ready for broadcast.
Method: inserting singleton axes at the right place and transpose where needed.
Note : not part of public API, but used in other dimarray modules
Examples
--------
>>> import dimarray as da
>>> import numpy as np
>>> x = da.DimArray(np.arange(2), dims=('x0',))
>>> y = da.DimArray(np.arange(3), dims=('x1',))
>>> align_dims(x, y)
[dimarray: 2 non-null elements (0 null)
0 / x0 (2): 0 to 1
1 / x1 (1): None to None
array([[0],
[1]]), dimarray: 3 non-null elements (0 null)
0 / x0 (1): None to None
1 / x1 (3): 0 to 2
array([[0, 1, 2]])]
"""
# If dimensions are already equal, do nothing
lst = {o.dims for o in arrays}
if len(lst) == 1:
return arrays
# Determine the dimensions of the result
newdims = get_dims(*arrays)
# Reshape all DimArrays
newarrays = []
for o in arrays:
o = o.reshape(newdims)
newarrays.append(o)
return newarrays
[docs]def broadcast_arrays(*arrays):
""" Analogous to numpy.broadcast_arrays
but with looser requirements on input shape
and returns copy instead of views
Parameters
----------
arrays : variable list of DimArrays
Returns
-------
list of DimArrays
Examples
--------
Just as numpy's broadcast_arrays
>>> import dimarray as da
>>> x = da.DimArray([[1,2,3]])
>>> y = da.DimArray([[1],[2],[3]])
>>> da.broadcast_arrays(x, y)
[dimarray: 9 non-null elements (0 null)
0 / x0 (3): 0 to 2
1 / x1 (3): 0 to 2
array([[1, 2, 3],
[1, 2, 3],
[1, 2, 3]]), dimarray: 9 non-null elements (0 null)
0 / x0 (3): 0 to 2
1 / x1 (3): 0 to 2
array([[1, 1, 1],
[2, 2, 2],
[3, 3, 3]])]
"""
# give all objects the same dimension (without changing the size)
arrays = align_dims(*arrays)
# get axes object made of all non-singleton common axes
try:
axes = _get_axes(*arrays)
# fails if axes are not aligned
except AssertionError as error:
raise ValueError(error)
# now broadcast each DimArray along commmon axes
newarrays = []
for o in arrays:
o = o.broadcast(axes)
newarrays.append(o)
return newarrays
def _common_axis(axes, join):
""" find the common axis among a list of axes ==> proceed recursively
"""
assert len(axes) > 0
# recursion end
if len(axes) == 1:
return axes[0]
# recursive call
ax0 = axes[0]
ax1 = _common_axis(axes[1:],join)
# special cases
# do not include None unless we have a singleton
if ax0[0] is None:
return ax1
if len(ax1) == 1 and ax1[0] is None:
return ax0
# TODO: make a separate version of the axis module
# with helper functions that do the basic work, without the whole
# Axis machinery. This may limit the possibility of optimizing the
# Axes via hidden attributes, but this would also make things simpler
# and prevents false "good" ideas (such as indeed, adding hidden attributes)
if join == 'outer':
com_axis = ax0.union(ax1)
else:
com_axis = ax0.intersection(ax1)
return com_axis
def _get_aligned_axes(arrays, join='outer', axis=None , sort=False, strict=False):
"""From a list of arrays, or any object with `axes` attributes,
a new list of axes.
"""
# find the dimensions
if axis is None:
dims = get_dims(*arrays)
elif isinstance(axis, string_types):
dims = [axis]
else:
if not isinstance(axis, string_types):
raise ValueError("align: axis must be provided as a string")
axes = Axes()
for jj, d in enumerate(dims):
# arrays which have that dimension
ii = [i for i in range(len(arrays)) if d in arrays[i].dims]
if strict and len(ii) != len(arrays):
raise ValueError("align (strict=True): some arrays lack dimension {}".format(d))
# common axis to reindex on
ax = _common_axis([arrays[i].axes[d] for i in ii], join)
if sort:
ax.sort()
axes.append(ax)
# assert len(axes) > 0
return axes
def align(arrays, join='outer', axis=None , sort=False, strict=False):
"""Align axes of a list of DimArray arrays by reindexing
Parameters
----------
array1, array2, ... : variable list of DimArrays or Datasets
join : {"outer", "inner"}, optional
method to find the common axis
"outer" : union of all axes, missing values filled with NaNs
"inner" : intersection of all axes
Default to "outer" (can be changed with `dimarray.set_option('align.join","inner")`)
sort : bool, optional
Sort the axis prior to aligning.
default to False
axis : str, optional
default to None : align all axes
(must be a string since the axes do not necessarily match)
strict : bool, optional
if True, check that all arrays have the same dimensions
Returns
-------
aligned_array1, aligned_array2, ... : list of aligned DimArrays (or Dataset)
See Also
--------
`DimArray.reindex_axis`, `DimArray.reindex_like`
Examples
--------
>>> from dimarray import DimArray, align
>>> a = DimArray([0,1,2],axes=[[0,1,2]])
>>> b = DimArray([1,2,3],axes=[[1,2,3]])
>>> align([a, b])
[dimarray: 3 non-null elements (1 null)
0 / x0 (4): 0 to 3
array([ 0., 1., 2., nan]), dimarray: 3 non-null elements (1 null)
0 / x0 (4): 0 to 3
array([nan, 1., 2., 3.])]
>>> align([a, b], join='inner')
[dimarray: 2 non-null elements (0 null)
0 / x0 (2): 1 to 2
array([1, 2]), dimarray: 2 non-null elements (0 null)
0 / x0 (2): 1 to 2
array([1, 2])]
Also work on multi-dimensional arrays
>>> a = DimArray([0,1], axes=[[0,1]]) # on 'x0' only
>>> b = DimArray([[0,1],[2,3.],[4.,5.]], axes=[[0,1,2],[1,2]]) # one more element along the 1st dimension, 2nd dimension ignored
>>> align([a, b])
[dimarray: 2 non-null elements (1 null)
0 / x0 (3): 0 to 2
array([ 0., 1., nan]), dimarray: 6 non-null elements (0 null)
0 / x0 (3): 0 to 2
1 / x1 (2): 1 to 2
array([[0., 1.],
[2., 3.],
[4., 5.]])]
"""
# join = kwargs.pop('join', get_option('align.join'))
# sort = kwargs.pop('sort', False)
# axis = kwargs.pop('axis', None)
# strict = kwargs.pop('strict', False)
# if len(kwargs) > 0:
# raise TypeError("align() got unexpected argument(s): "+", ".join(kwargs.keys()))
if not (isinstance(arrays, list) or isinstance(arrays, tuple)):
raise ValueError("align: only accepts list or tuple arguments. Got: {}".format(type(arrays)))
# convert any scalar to dimarray
from dimarray import DimArray, Dataset
arrays = [a for a in arrays] # convert to list
for i, a in enumerate(arrays):
if not isinstance(a, DimArray) and not isinstance(a, Dataset):
if np.isscalar(a):
arrays[i] = DimArray(a)
else:
raise TypeError("can only align DimArray and Dataset instances, got: {}".format(type(a)))
# find the common axes
axes = _get_aligned_axes(arrays, axis=axis, join=join, sort=sort, strict=strict)
# update arrays
for ax in axes:
for i, o in enumerate(arrays):
if ax.name not in o.dims:
continue
if np.all(o.axes[ax.name] == ax):
continue
arrays[i] = o.reindex_axis(ax)
return arrays
align_ = align # for internal use, so that it does not conflict with "align" parameter
def _check_stack_args(arrays, keys=None):
""" generic function to deal with arguments for stacking
accepts arrays as sequence or dict and returns
a list of keys and values
"""
# convert dictionary to sequence + keys
if isinstance(arrays, dict):
if keys is None: keys = arrays.keys()
arrays = arrays.values()
# make sure the result is a sequence
if type(arrays) not in (list, tuple):
raise TypeError("argument must be a dictionary, list or tuple")
# make sure keys exist
if keys is None: keys = np.arange(len(arrays))
return arrays, keys
def _check_stack_axis(axis, dims, default='unnamed'):
""" check or get new axis name when stacking array or datasets
(just to have that in one place)
"""
if axis is None:
axis = default
if axis in dims:
i = 1
while default+"_{}".format(i) in dims:
i+=1
axis = default+"_{}".format(i)
if type(axis) is int:
raise TypeError("axis must be a str (new axis name)")
if axis in dims:
raise ValueError("please provide an axis name which does not \
already exist, or use `concatenate`")
return axis
[docs]def stack(arrays, axis=None, keys=None, align=False, **kwargs):
""" stack arrays along a new dimension (raise error if already existing)
Parameters
----------
arrays : sequence or dict of arrays
axis : str, optional
new dimension along which to stack the array
keys : array-like, optional
stack axis values, useful if array is a sequence, or a non-ordered dictionary
align : bool, optional
if True, align axes prior to stacking (Default to False)
**kwargs : optional key-word arguments passed to align, if align is True
Returns
-------
DimArray : joint array
See Also
--------
concatenate : join arrays along an existing dimension
swapaxes : to modify the position of the newly inserted axis
Examples
--------
>>> from dimarray import DimArray
>>> a = DimArray([1,2,3])
>>> b = DimArray([11,22,33])
>>> stack([a, b], axis='stackdim', keys=['a','b'])
dimarray: 6 non-null elements (0 null)
0 / stackdim (2): 'a' to 'b'
1 / x0 (3): 0 to 2
array([[ 1, 2, 3],
[11, 22, 33]])
"""
assert not isinstance(axis, int), "axis must be a str (you are creating a new axis)"
# make a sequence of arrays
arrays, keys = _check_stack_args(arrays, keys)
for a in arrays:
if not is_DimArray(a): raise TypeError('can only stack DimArray instances')
# make sure the stacking dimension is OK (new)
dims = get_dims(*arrays)
axis = _check_stack_axis(axis, dims)
# re-index axes if needed
if align:
kwargs['strict'] = True
arrays = align_(arrays, **kwargs)
# make it a numpy array
data = [a.values for a in arrays]
data = np.array(data)
# new axis
newaxis = Axis(keys, axis)
# find common axes
try:
axes = _get_axes(*arrays)
except ValueError as msg:
if 'axes are not aligned' in repr(msg):
msg = 'axes are not aligned\n ==> Try passing `align=True`'
raise ValueError(msg)
# new axes
#newaxes = axes[:pos] + [newaxis] + axes[pos:]
newaxes = [newaxis] + axes
# create dimarray
_constructor = arrays[0]._constructor # DimArray
return _constructor(data, axes=newaxes)
def _concatenate_axes(axes):
""" concatenate Axis objects
axes: list of Axis objects
>>> a = Axis([1,2,3],'x0')
>>> b = Axis([5,6,7],'x0')
>>> ax = _concatenate_axes((a, b))
>>> ax.name
'x0'
>>> ax.values
array([1, 2, 3, 5, 6, 7])
"""
#assert np.iterable(axes) and axes
#if not isinstance(axes[0], Axis): raise TypeError()
if len({ax.name for ax in axes}) != 1:
print(axes)
raise ValueError("axis names differ!")
values = np.concatenate([ax.values for ax in axes])
return Axis(values, axes[0].name)
[docs]def concatenate(arrays, axis=0, _no_check=False, align=False, **kwargs):
""" concatenate several DimArrays
Parameters
-----------
arrays : list of DimArrays
arrays to concatenate
axis : int or str
axis along which to concatenate (must exist)
align : bool, optional
align secondary axes before joining on the primary
axis `axis`. Default to False.
**kwargs : optional key-word arguments passed to align, if align is True
Returns
-------
concatenated DimArray
See Also
--------
stack: join arrays along a new dimension
align: align arrays
Examples
--------
1-D
>>> from dimarray import DimArray
>>> a = DimArray([1,2,3], axes=[['a','b','c']])
>>> b = DimArray([4,5,6], axes=[['d','e','f']])
>>> concatenate((a, b))
dimarray: 6 non-null elements (0 null)
0 / x0 (6): 'a' to 'f'
array([1, 2, 3, 4, 5, 6])
2-D
>>> a = DimArray([[1,2,3],[11,22,33]])
>>> b = DimArray([[4,5,6],[44,55,66]])
>>> concatenate((a, b), axis=0)
dimarray: 12 non-null elements (0 null)
0 / x0 (4): 0 to 1
1 / x1 (3): 0 to 2
array([[ 1, 2, 3],
[11, 22, 33],
[ 4, 5, 6],
[44, 55, 66]])
>>> concatenate((a, b), axis='x1')
dimarray: 12 non-null elements (0 null)
0 / x0 (2): 0 to 1
1 / x1 (6): 0 to 2
array([[ 1, 2, 3, 4, 5, 6],
[11, 22, 33, 44, 55, 66]])
"""
# input argument check
if not type(arrays) in (list, tuple):
raise ValueError("arrays must be list or tuple, got {}:{}".format(type(arrays), arrays))
arrays = [a for a in arrays]
from dimarray import DimArray, Dataset
for i, a in enumerate(arrays):
if isinstance(a, Dataset):
msg = "\n==>Note: you may use `concatenate_ds` for Datasets"
raise ValueError("concatenate: expected DimArray. Got {}".format(type(a))+msg)
elif np.isscalar(a):
arrays[i] = DimArray(a)
if not isinstance(a, DimArray):
raise ValueError("concatenate: expected DimArray. Got {}".format(type(a)))
if type(axis) is not int:
axis = arrays[0].dims.index(axis)
dim = arrays[0].dims[axis]
# align secondary axes prior to concatenate
# TODO: just encourage user to use align outside this function
# and remove argument passing
if align:
kwargs['strict'] = True
for ax in arrays[0].axes:
if ax.name != dim:
arrays = align_(arrays, axis=ax.name, **kwargs)
values = np.concatenate([a.values for a in arrays], axis=axis)
_get_subaxes = lambda x: [ax for i, ax in enumerate(arrays[0].axes) if i != axis]
subaxes = _get_subaxes(arrays[0])
# concatenate axis values
newaxis = _concatenate_axes([a.axes[axis] for a in arrays])
if not align and not _no_check:
# check that other axes match
for ax in subaxes:
for a in arrays:
if not np.all(a.axes[ax.name].values == ax.values):
raise ValueError("contatenate: secondary axes do not match. Align first? (`align=True`)")
# print arrays[0]
# for i,a in enumerate(arrays[1:]):
# if not _get_subaxes(a) == subaxes:
# msg = "First array:\n{}\n".format(subaxes)
# msg += "{}th array:\n{}\n".format(i,_get_subaxes(a))
# raise ValueError("contatenate: secondary axes do not match. Align first? (`align=True`)")
# print a
# print '==> arrays look ok'
newaxes = subaxes[:axis] + [newaxis] + subaxes[axis:]
return arrays[0]._constructor(values, newaxes)
##############################################################
# The functions below are meant to be used as DimArray methods
##############################################################
#
# Reindex axis
#
def reindex_axis(self, values, axis=0, fill_value=np.nan, raise_error=False, method=None):
""" reindex an array along an axis
Parameters
----------
values : array-like or Axis
new axis values
axis : int or str, optional
axis number or name
fill_value: bool, optional
Fill data to use for missing axis value,
if `raise_error` is False.
raise_error : bool, optional
if True, raise error when an axis value is not present
otherwise just replace with `fill_value`. Defaulf is False
method : {None, 'left', 'right'}
method to fill the gaps (default None)
If 'left' or 'right', just pass along to numpy.searchsorted.
Returns
-------
dimarray: DimArray instance
Examples
--------
Basic reindexing: fill missing values with NaN
>>> import dimarray as da
>>> a = da.DimArray([1,2,3],axes=[('x0', [1,2,3])])
>>> b = da.DimArray([3,4],axes=[('x0',[1,3])])
>>> b.reindex_axis([1,2,3])
dimarray: 2 non-null elements (1 null)
0 / x0 (3): 1 to 3
array([ 3., nan, 4.])
Or replace with anything else, like -9999
>>> b.reindex_axis([1,2,3], fill_value=-9999)
dimarray: 3 non-null elements (0 null)
0 / x0 (3): 1 to 3
array([ 3, -9999, 4])
"""
if isinstance(values, Axis):
newaxis = values
values = newaxis.values
axis = newaxis.name
elif np.isscalar(values) or type(values) is slice:
raise TypeError("Please provide list, array-like or Axis object to perform re-indexing")
else:
values = np.asarray(values)
# Get indices
ax = self.axes[axis]
# indices = ax.loc(values, mode='clip', side=method)
indices = locate_many(ax.values, values, side=method or 'left')
newobj = self.take_axis(indices, axis, indexing='position')
# Replace mismatch with missing values?
mask = ax.values.take(indices) != values
if np.any(mask):
if raise_error:
raise IndexError("Some values where not found in the axis: {}".format(values[mask]))
if method is None:
newobj.put(mask, fill_value, axis=axis, inplace=True, indexing="position", cast=True)
# Make sure the axis values match the requested new axis
newobj.axes[axis][mask] = values[mask]
return newobj
def reindex_axis_with_pandas(obj, values, axis=0, fill_value=np.nan):
""" Convert to and from pandas to use a faster (?) indexing method
"""
import pandas
pandasobj = obj.to_pandas()
axis_id, axis_nm = obj._get_axis_info(axis)
try:
newpandas = pandasobj.reindex_axis(values, axis=axis_id, fill_value=fill_value)
except TypeError:
# older versions of pandas do not have the fill_value parameter
newpandas = pandasobj.reindex_axis(values, axis=axis_id)
newobj = obj.from_pandas(newpandas) # use class method from_pandas
newobj.attrs.update(obj.attrs) # add metadata back
newobj.axes[axis_id].name = axis_nm # give back original name
return newobj
def reindex_like(self, other, **kwargs):
""" reindex_like : re-index like another dimarray / axes instance
Applies reindex_axis on each axis to match another DimArray
Parameters
----------
other : DimArray or Axes instance
**kwargs :
Returns
-------
DimArray
Notes
-----
only reindex axes which are present in other
Examples
--------
>>> import dimarray as da
>>> b = da.DimArray([3,4],('x0',[1,3]))
>>> c = da.DimArray([[1,2,3], [1,2,3]],[('x1',["a","b"]),('x0',[1, 2, 3])])
>>> b.reindex_like(c)
dimarray: 2 non-null elements (1 null)
0 / x0 (3): 1 to 3
array([ 3., nan, 4.])
"""
if hasattr(other, 'axes'):
axes = other.axes
elif isinstance(other, Axes):
axes = other
else:
raise TypeError('expected DimArray or Axes, got {}: {}'.format(type(other), other))
newdims = [ax2.name for ax2 in axes]
obj = self
for ax in self.axes:
if ax.name in newdims:
newaxis = axes[ax.name].values
obj = obj.reindex_axis(newaxis, axis=ax.name, **kwargs)
return obj
def sort_axis(a, axis=0, key=None, kind='quicksort'):
""" sort an axis
Parameters
----------
a : DimArray (this argument is pre-assigned when using as bound method)
axis : int or str, optional
axis by position (int) or name (str) (default: 0)
key : callable or dict-like, optional
function that is called on each axis label and
whose return value is used for sorting instead of axis label.
Any other object with __getitem__ attribute may also be used as key,
such as a dictionary.
If None (the default), axis label is used for sorting.
kind : str, optional
sort algorigthm (see numpy.sort for more info)
Returns
--------
sorted : new DimArray with sorted axis
Examples
--------
Basic
>>> from dimarray import DimArray
>>> a = DimArray([10,20,30], labels=[2, 0, 1])
>>> a
dimarray: 3 non-null elements (0 null)
0 / x0 (3): 2 to 1
array([10, 20, 30])
>>> a.sort_axis()
dimarray: 3 non-null elements (0 null)
0 / x0 (3): 0 to 2
array([20, 30, 10])
>>> a.sort_axis(key=lambda x: -x)
dimarray: 3 non-null elements (0 null)
0 / x0 (3): 2 to 0
array([10, 30, 20])
Multi-dimensional
>>> a = DimArray([[10,20,30],[40,50,60]], labels=[[0, 1], ['a','c','b']])
>>> a.sort_axis(axis=1)
dimarray: 6 non-null elements (0 null)
0 / x0 (2): 0 to 1
1 / x1 (3): 'a' to 'c'
array([[10, 30, 20],
[40, 60, 50]])
"""
index = a.axes[axis].values
# convert key to a function
if key is None:
ii = index.argsort(kind=kind) # the default
else:
if not hasattr(key, '__call__') and hasattr(key, '__getitem__'):
key = key.__getitem__
ii = argsort(index, key)
return a.take_axis(ii, axis=axis, indexing='position')
def argsort(seq, key=None):
""" equivalent of numpy's argsort in basic python
Modified after http://stackoverflow.com/questions/3382352/equivalent-of-numpy-argsort-in-basic-python
>>> a = ['a', 'd', 'c']
>>> argsort(a)
[0, 2, 1]
>>> argsort(a, key=lambda x: {'a':2,'c':1,'d':0}[x])
[1, 2, 0]
"""
if key is None:
_key = seq.__getitem__
else:
_key = lambda x: key(seq.__getitem__(x))
return sorted(range(len(seq)), key=_key)