Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# set and use the testing set to evaluate how well the gridder is performing.
train, test = vd.train_test_split(
projection(*coordinates),
(data.wind_speed_east_knots, data.wind_speed_north_knots),
random_state=2,
)
# We'll make a 20 arc-minute grid
spacing = 20 / 60
# Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually
# requires de-trended data), and finally a Spline for each component. Notice that
# BlockReduce can work on multicomponent data without the use of Vector.
chain = vd.Chain(
[
("mean", vd.BlockReduce(np.mean, spacing * 111e3)),
("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])),
(
"spline",
vd.Vector([vd.Spline(damping=1e-10, mindist=500e3) for i in range(2)]),
),
]
)
print(chain)
# Fit on the training data
chain.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a perfect
# prediction of the test data.
score = chain.score(*test)
print("Cross-validation R^2 score: {:.2f}".format(score))
def generic_gridder(day, df, idx):
"""
Generic gridding algorithm for easy variables
"""
data = df[idx].values
coordinates = (df["lon"].values, df["lat"].values)
region = [XAXIS[0], XAXIS[-1], YAXIS[0], YAXIS[-1]]
projection = pyproj.Proj(proj="merc", lat_ts=df["lat"].mean())
spacing = 0.5
chain = vd.Chain(
[
("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)),
("spline", vd.Spline(damping=1e-10, mindist=100e3)),
]
)
train, test = vd.train_test_split(
projection(*coordinates), data, random_state=0
)
chain.fit(*train)
score = chain.score(*test)
shape = (len(YAXIS), len(XAXIS))
grid = chain.grid(
region=region,
shape=shape,
projection=projection,
dims=["latitude", "longitude"],
data_names=["precip"],
)
# Split the data into a training and testing set. We'll fit the gridder on the training
# set and use the testing set to evaluate how well the gridder is performing.
train, test = vd.train_test_split(
projection(*coordinates), (data.velocity_east, data.velocity_north), random_state=0
)
# We'll make a 15 arc-minute grid in the end.
spacing = 15 / 60
# Chain together a blocked mean to avoid aliasing, a polynomial trend to take care of
# the increase toward the coast, and finally the vector gridder using Poisson's ratio
# 0.5 to couple the two horizontal components.
chain = vd.Chain(
[
("mean", vd.BlockReduce(np.mean, spacing * 111e3)),
("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])),
("spline", vd.VectorSpline2D(poisson=0.5, mindist=10e3)),
]
)
# Fit on the training data
chain.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a perfect
# prediction of the test data.
score = chain.score(*test)
print("Cross-validation R^2 score: {:.2f}".format(score))
# Interpolate our horizontal GPS velocities onto a regular geographic grid and mask the
# data that are far from the observation points
grid_full = chain.grid(
region, spacing=spacing, projection=projection, dims=["latitude", "longitude"]
)
plt.plot(proj_coords[0], proj_coords[1], ".k", markersize=0.5)
plt.xlabel("Easting (m)")
plt.ylabel("Northing (m)")
plt.gca().set_aspect("equal")
plt.tight_layout()
plt.show()
########################################################################################
# Cartesian grids
# ---------------
#
# Now we can use :class:`verde.BlockReduce` and :class:`verde.Spline` on our projected
# coordinates. We'll specify the desired grid spacing as degrees and convert it to
# Cartesian using the 1 degree approx. 111 km rule-of-thumb.
spacing = 10 / 60
reducer = vd.BlockReduce(np.median, spacing=spacing * 111e3)
filter_coords, filter_bathy = reducer.filter(proj_coords, data.bathymetry_m)
spline = vd.Spline().fit(filter_coords, filter_bathy)
########################################################################################
# If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced in
# projected Cartesian coordinates.
grid = spline.grid(spacing=spacing * 111e3, data_names=["bathymetry"])
print("Cartesian grid:")
print(grid)
########################################################################################
# We'll mask our grid using :func:`verde.distance_mask` to get rid of all the spurious
# solutions far away from the data points.
grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid)
plt.figure(figsize=(7, 6))
plt.figure(figsize=(7, 7))
ax = plt.axes(projection=ccrs.Mercator())
ax.set_title("Locations of decimated data")
# Plot the bathymetry data locations as black dots
plt.plot(*coordinates, ".k", markersize=1, transform=crs)
vd.datasets.setup_baja_bathymetry_map(ax)
plt.tight_layout()
plt.show()
########################################################################################
# By default, the coordinates of the decimated data are obtained by applying the same
# reduction operation to the coordinates of the original data. Alternatively, we can
# tell :class:`~verde.BlockReduce` to return the coordinates of the center of each
# block:
reducer_center = vd.BlockReduce(
reduction=np.median, spacing=5 / 60, center_coordinates=True
)
coordinates_center, bathymetry = reducer_center.filter(
coordinates=(data.longitude, data.latitude), data=data.bathymetry_m
)
plt.figure(figsize=(7, 7))
ax = plt.axes(projection=ccrs.Mercator())
ax.set_title("Locations of decimated data using block centers")
# Plot the bathymetry data locations as black dots
plt.plot(*coordinates_center, ".k", markersize=1, transform=crs)
vd.datasets.setup_baja_bathymetry_map(ax)
plt.tight_layout()
plt.show()
########################################################################################
# Class :class:`verde.BlockReduce` can be used to apply a reduction/aggregation
# operation (mean, median, standard deviation, etc) to the data in regular blocks. All
# data inside each block will be replaced by their aggregated value.
# :class:`~verde.BlockReduce` takes an aggregation function as input. It can be any
# function that receives a numpy array as input and returns a single scalar value. The
# :func:`numpy.mean` or :func:`numpy.median` functions are usually what we want.
import numpy as np
########################################################################################
# Blocked means and medians are good ways to decimate data for interpolation. Let's use
# a blocked median on our data to decimate it to our desired grid interval of 5
# arc-minutes. The reason for using a median over a mean is because bathymetry data can
# vary abruptly and a mean would smooth the data too much. For data varies more
# smoothly (like gravity and magnetic data), a mean would be a better option.
reducer = vd.BlockReduce(reduction=np.median, spacing=5 / 60)
print(reducer)
########################################################################################
# Use the :meth:`~verde.BlockReduce.filter` method to apply the reduction:
coordinates, bathymetry = reducer.filter(
coordinates=(data.longitude, data.latitude), data=data.bathymetry_m
)
plt.figure(figsize=(7, 7))
ax = plt.axes(projection=ccrs.Mercator())
ax.set_title("Locations of decimated data")
# Plot the bathymetry data locations as black dots
plt.plot(*coordinates, ".k", markersize=1, transform=crs)
vd.datasets.setup_baja_bathymetry_map(ax)
plt.tight_layout()
plt.show()