Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.windows, func, self.boundary, self.side, **kwargs
)
coords = {}
for c, v in self.obj.coords.items():
if any(d in self.windows for d in v.dims):
coords[c] = v.variable.coarsen(
self.windows,
self.coord_func[c],
self.boundary,
self.side,
**kwargs,
)
else:
coords[c] = v.variable
return Dataset(reduced, coords=coords)
>>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4))
>>> da = DataArray(da_vals, dims=['x', 'y', 'z'])
>>> dm_vals = np.arange(4)
>>> dm = DataArray(dm_vals, dims=['z'])
>>> dm.dims
('z')
>>> da.dims
('x', 'y', 'z')
>>> dot_result = da.dot(dm)
>>> dot_result.dims
('x', 'y')
"""
if isinstance(other, Dataset):
raise NotImplementedError(
"dot products are not yet supported with Dataset objects."
)
if not isinstance(other, DataArray):
raise TypeError("dot only operates on DataArrays.")
return computation.dot(self, other, dims=dims)
def _fast_dataset(
variables: Dict[Hashable, Variable], coord_variables: Mapping[Hashable, Variable]
) -> "Dataset":
"""Create a dataset as quickly as possible.
Beware: the `variables` dict is modified INPLACE.
"""
from .dataset import Dataset
variables.update(coord_variables)
coord_names = set(coord_variables)
return Dataset._construct_direct(variables, coord_names)
Data variables:
temperature (x, y, time) float64 25.86 20.82 6.954 23.13 10.25 11.68 ...
precipitation (x, y, time) float64 5.702 0.9422 2.075 1.178 3.284 ...
"""
selection = []
for var_name, variable in self.data_vars.items():
for attr_name, pattern in kwargs.items():
attr_value = variable.attrs.get(attr_name)
if ((callable(pattern) and pattern(attr_value)) or
attr_value == pattern):
selection.append(var_name)
return self[selection]
ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False)
)
list_of_coords = build_output_coords(args, signature, exclude_dims)
args = [getattr(arg, "data_vars", arg) for arg in args]
result_vars = apply_dict_of_variables_vfunc(
func, *args, signature=signature, join=dataset_join, fill_value=fill_value
)
if signature.num_outputs > 1:
out = tuple(_fast_dataset(*args) for args in zip(result_vars, list_of_coords))
else:
(coord_vars,) = list_of_coords
out = _fast_dataset(result_vars, coord_vars)
if keep_attrs and isinstance(first_obj, Dataset):
if isinstance(out, tuple):
out = tuple(ds._copy_attrs_from(first_obj) for ds in out)
else:
out._copy_attrs_from(first_obj)
return out
def _broadcast_array(array):
data = _set_dims(array.variable)
coords = dict(array.coords)
coords.update(common_coords)
return DataArray(data, coords, data.dims, name=array.name, attrs=array.attrs)
def _broadcast_dataset(ds):
data_vars = {k: _set_dims(ds.variables[k]) for k in ds.data_vars}
coords = dict(ds.coords)
coords.update(common_coords)
return Dataset(data_vars, coords, ds.attrs)
if isinstance(arg, DataArray):
return _broadcast_array(arg)
elif isinstance(arg, Dataset):
return _broadcast_dataset(arg)
else:
raise ValueError("all input must be Dataset or DataArray objects")
# We're adding multiple new layers to the graph:
# The first new layer is the result of the computation on
# the array.
# Then we add one layer per variable, which extracts the
# result for that variable, and depends on just the first new
# layer.
new_layers[gname_l][key] = (operator.getitem, from_wrapper, name)
hlg = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset])
for gname_l, layer in new_layers.items():
# This adds in the getitems for each variable in the dataset.
hlg.dependencies[gname_l] = {gname}
hlg.layers[gname_l] = layer
result = Dataset(coords=indexes, attrs=template.attrs)
for name, gname_l in var_key_map.items():
dims = template[name].dims
var_chunks = []
for dim in dims:
if dim in input_chunks:
var_chunks.append(input_chunks[dim])
elif dim in indexes:
var_chunks.append((len(indexes[dim]),))
data = dask.array.Array(
hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype
)
result[name] = (dims, data, template[name].attrs)
result = result.set_coords(template._coord_names)
def _dummy_copy(xarray_obj):
from .dataset import Dataset
from .dataarray import DataArray
if isinstance(xarray_obj, Dataset):
res = Dataset(
{
k: dtypes.get_fill_value(v.dtype)
for k, v in xarray_obj.data_vars.items()
},
{
k: dtypes.get_fill_value(v.dtype)
for k, v in xarray_obj.coords.items()
if k not in xarray_obj.dims
},
xarray_obj.attrs,
)
elif isinstance(xarray_obj, DataArray):
res = DataArray(
dtypes.get_fill_value(xarray_obj.dtype),
{
k: dtypes.get_fill_value(v.dtype)
def _broadcast_dataset(ds):
data_vars = {k: _set_dims(ds.variables[k]) for k in ds.data_vars}
coords = dict(ds.coords)
coords.update(common_coords)
return Dataset(data_vars, coords, ds.attrs)
var = var.set_dims(common_dims, common_shape)
yield var
# stack up each variable to fill-out the dataset (in order)
# n.b. this loop preserves variable order, needed for groupby.
for k in datasets[0].variables:
if k in concat_over:
try:
vars = ensure_common_dims([ds.variables[k] for ds in datasets])
except KeyError:
raise ValueError("%r is not present in all datasets." % k)
combined = concat_vars(vars, dim, positions)
assert isinstance(combined, Variable)
result_vars[k] = combined
result = Dataset(result_vars, attrs=result_attrs)
absent_coord_names = coord_names - set(result.variables)
if absent_coord_names:
raise ValueError(
"Variables %r are coordinates in some datasets but not others."
% absent_coord_names
)
result = result.set_coords(coord_names)
result.encoding = result_encoding
result = result.drop_vars(unlabeled_dims, errors="ignore")
if coord is not None:
# add concat dimension last to ensure that its in the final Dataset
result[coord.name] = coord
return result