How to use the xarray.open_dataset function in xarray

To help you get started, we’ve selected a few xarray examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jweyn / DLWP / Azure / train_func.py View on Github external
validation_set = list(pd.date_range(datetime(2003, 1, 1, 0), datetime(2006, 12, 31, 18), freq='6H'))
train_set = list(pd.date_range(datetime(1979, 1, 1, 6), datetime(2002, 12, 31, 18), freq='6H'))


#%% Open data. If temporary file is specified, copy it there.

if args.temp_dir != 'None':
    new_predictor_file = os.path.join(args.temp_dir, args.predictor_file)
    print('Copying predictor file to %s...' % new_predictor_file)
    if os.path.isfile(new_predictor_file):
        print('File already exists!')
    else:
        shutil.copy(predictor_file, new_predictor_file, follow_symlinks=True)
    data = xr.open_dataset(new_predictor_file, chunks={'sample': batch_size})
else:
    data = xr.open_dataset(predictor_file, chunks={'sample': batch_size})

if 'time_step' in data.dims:
    time_dim = data.dims['time_step']
else:
    time_dim = 1
n_sample = data.dims['sample']

if crop_north_pole:
    data = data.isel(lat=(data.lat < 90.0))


#%% Create a model and the data generators

dlwp = DLWPFunctional(is_convolutional=model_is_convolutional, is_recurrent=model_is_recurrent, time_dim=io_time_steps)

# Find the validation set
github dcs4cop / xcube / xcube / cli / restime.py View on Github external
def _read_dataset(input_file):
    input_file_name = os.path.basename(input_file)
    if os.path.isdir(input_file):
        if input_file_name.endswith('.zarr'):
            ds = xr.open_zarr(input_file)
        else:
            ds = xr.open_mfdataset(glob.glob(os.path.join(input_file, '**', '*.nc'), recursive=True))
    else:
        if input_file_name.endswith('.zarr.zip'):
            ds = xr.open_zarr(input_file)
        else:
            ds = xr.open_dataset(input_file)
    return ds
github pydata / xarray / asv_bench / benchmarks / dataset_io.py View on Github external
def time_load_dataset_netcdf4_with_block_chunks_vindexing(self):
        ds = xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.block_chunks)
        ds = ds.isel(**self.vinds).load()
github deeplycloudy / glmtools / examples / grid / grid_sample_data.py View on Github external
gridder(glm_filenames, start_time, end_time, **grid_kwargs)
        
        # print("Output file sizes")
        for entry in os.scandir(os.path.join(tmpdirname, *resulting_date_path)):
            
            # File size should be close to what we expect, with some platform
            # differences due to OS, compression, etc.
            target = output_sizes[entry.name]
            actual = entry.stat().st_size
            percent = 1
            assert  np.abs(target-actual) < int(target*percent/100)

            # Now compare the contents directly
            valid_file = os.path.join(sample_path, dirname, 
                                      *resulting_date_path, entry.name)
            valid = xr.open_dataset(valid_file)
            check = xr.open_dataset(entry.path)
            xr.testing.assert_allclose(valid, check)
github landlab / landlab / landlab / graph / graph.py View on Github external
def from_netcdf(cls, fname):
        return cls.from_dataset(xr.open_dataset(fname))
github monocongo / climate_indices / scripts / process_grid_ufunc.py View on Github external
def compute_write_pet(kwrgs):

    # open the temperature NetCDF as an xarray DataSet object
    dataset = xr.open_dataset(kwrgs["netcdf_temp"])

    # trim out all data variables from the dataset except the precipitation
    for var in dataset.data_vars:
        if var not in kwrgs["var_name_temp"]:
            dataset = dataset.drop(var)

    # get the initial year of the data
    data_start_year = int(str(dataset["time"].values[0])[0:4])

    _logger.info("Computing PET")

    # get the temperature and latitude arrays, over which we'll compute the PET
    da_temp = dataset[kwrgs["var_name_temp"]]

    # create a DataArray with the same shape as temperature, fill all lon/times with the lat value for the lat index
    da_lat_orig = dataset["lat"]
github ceos-seo / data_cube_ui / apps / spectral_indices / tasks.py View on Github external
if check_cancel_task(self, task): return

    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None
    total_chunks = sorted(chunks, key=lambda x: x[0])
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']

    metadata = {}
    combined_data = None
    for index, chunk in enumerate(total_chunks):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0])
        if combined_data is None:
            combined_data = data
            continue
        #give time an indice to keep mosaicking from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = task.satellite.get_clean_mask_func()(data)
        combined_data = task.get_processing_method()(data,
                                                     clean_mask=clear_mask,
                                                     intermediate_product=combined_data,
                                                     no_data=task.satellite.no_data_value,
                                                     reverse_time=task.get_reverse_time())
        if check_cancel_task(self, task): return
    if combined_data is None:
        return None
github Unidata / python-gallery / examples / Satellite_Example.py View on Github external
from metpy.units import units
from netCDF4 import num2date
import scipy.ndimage as ndimage
from siphon.catalog import TDSCatalog
import xarray as xr


##############################################
# Get satellite data and set projection based on that data.

# Scan the catalog and download the data
satcat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/satellite/'
                    'WV/WEST-CONUS_4km/current/catalog.xml')
dataset = satcat.datasets[0]
f = GiniFile(dataset.remote_open())
gini_ds = xr.open_dataset(f)

# Pull parts out of the data file
dat = gini_ds.metpy.parse_cf('WV')
data_var = gini_ds.variables['WV']
x = gini_ds.variables['x'][:]
y = gini_ds.variables['y'][:]
timestamp = f.prod_desc.datetime

##############################################
# Use Siphon to obtain data that is close to the time of the satellite file

gfscat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/'
                    'NCEP/GFS/Global_0p5deg/catalog.xml')
dataset = gfscat.datasets['Best GFS Half Degree Forecast Time Series']
ncss = dataset.subset()
github MPAS-Dev / MPAS-Analysis / mpas_analysis / ocean / ocean_regional_profiles.py View on Github external
dsRestart = xr.open_dataset(restartFileName)
        dsRestart = dsRestart.isel(Time=0)
        areaCell = dsRestart.areaCell

        nVertLevels = dsRestart.sizes['nVertLevels']

        vertIndex = \
            xr.DataArray.from_dict({'dims': ('nVertLevels',),
                                    'data': np.arange(nVertLevels)})

        vertMask = vertIndex < dsRestart.maxLevelCell

        # get region masks
        regionMaskFileName = self.parentTask.masksSubtask.maskFileName
        dsRegionMask = xr.open_dataset(regionMaskFileName)

        # figure out the indices of the regions to plot
        regionNames = decode_strings(dsRegionMask.regionNames)

        regionIndices = []
        for regionToPlot in self.parentTask.regionNames:
            for index, regionName in enumerate(regionNames):
                if regionToPlot == regionName:
                    regionIndices.append(index)
                    break

        # select only those regions we want to plot
        dsRegionMask = dsRegionMask.isel(nRegions=regionIndices)
        cellMasks = dsRegionMask.regionCellMasks
        regionNamesVar = dsRegionMask.regionNames
github scivision / georinex / src / georinex / base.py View on Github external
meas: Sequence[str] = None,
             verbose: bool = False,
             *,
             overwrite: bool = False,
             fast: bool = True,
             interval: Union[float, int, timedelta] = None) -> xarray.Dataset:
    """
    Read RINEX 2.x and 3.x OBS files in ASCII or GZIP (or Hatanaka)
    """

    if isinstance(fn, (str, Path)):
        fn = Path(fn).expanduser()
# %% NetCDF4
        if fn.suffix == '.nc':
            try:
                return xarray.open_dataset(fn, group=group)
            except OSError as e:
                raise LookupError(f'Group {group} not found in {fn}   {e}')

    tlim = _tlim(tlim)
# %% version selection
    info = rinexinfo(fn)

    if int(info['version']) in (1, 2):
        obs = rinexobs2(fn, use, tlim=tlim,
                        useindicators=useindicators, meas=meas,
                        verbose=verbose,
                        fast=fast, interval=interval)
    elif int(info['version']) == 3:
        obs = rinexobs3(fn, use, tlim=tlim,
                        useindicators=useindicators, meas=meas,
                        verbose=verbose,