Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# We'll test this on the air temperature data from Texas
data = vd.datasets.fetch_texas_wind()
coordinates = (data.longitude.values, data.latitude.values)
region = vd.get_region(coordinates)
# Use a Mercator projection for our Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
# The output grid spacing will 15 arc-minutes
spacing = 15 / 60
# Now we can chain a blocked mean and spline together. The Spline can be regularized
# by setting the damping coefficient (should be positive). It's also a good idea to set
# the minimum distance to the average data spacing to avoid singularities in the spline.
chain = vd.Chain(
[
("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)),
("spline", vd.Spline(damping=1e-10, mindist=100e3)),
]
)
print(chain)
# We can evaluate model performance by splitting the data into a training and testing
# set. We'll use the training set to grid the data and the testing set to validate our
# spline model.
train, test = vd.train_test_split(
projection(*coordinates), data.air_temperature_c, random_state=0
)
# Fit the model on the training set
chain.fit(*train)
# Use a Mercator projection because VectorSpline2D is a Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
# Split the data into a training and testing set. We'll fit the gridder on the training
# set and use the testing set to evaluate how well the gridder is performing.
train, test = vd.train_test_split(
projection(*coordinates), (data.velocity_east, data.velocity_north), random_state=0
)
# We'll make a 15 arc-minute grid in the end.
spacing = 15 / 60
# Chain together a blocked mean to avoid aliasing, a polynomial trend to take care of
# the increase toward the coast, and finally the vector gridder using Poisson's ratio
# 0.5 to couple the two horizontal components.
chain = vd.Chain(
[
("mean", vd.BlockReduce(np.mean, spacing * 111e3)),
("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])),
("spline", vd.VectorSpline2D(poisson=0.5, mindist=10e3)),
]
)
# Fit on the training data
chain.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a perfect
# prediction of the test data.
score = chain.score(*test)
print("Cross-validation R^2 score: {:.2f}".format(score))
# Interpolate our horizontal GPS velocities onto a regular geographic grid and mask the
# data that are far from the observation points
grid_full = chain.grid(
import numpy as np
import verde as vd
# We'll test this on the California vertical GPS velocity data because it comes with the
# uncertainties
data = vd.datasets.fetch_california_gps()
coordinates = (data.longitude.values, data.latitude.values)
# Use a Mercator projection for our Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
# Now we can chain a block weighted mean and weighted spline together. We'll use
# uncertainty propagation to calculate the new weights from block mean because our data
# vary smoothly but have different uncertainties.
spacing = 5 / 60 # 5 arc-minutes
chain = vd.Chain(
[
("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)),
("spline", vd.Spline(damping=1e-10)),
]
)
print(chain)
# Split the data into a training and testing set. We'll use the training set to grid the
# data and the testing set to validate our spline model. Weights need to
# 1/uncertainty**2 for the error propagation in BlockMean to work.
train, test = vd.train_test_split(
projection(*coordinates),
data.velocity_up,
weights=1 / data.std_up ** 2,
random_state=0,
)
def generic_gridder(day, df, idx):
"""
Generic gridding algorithm for easy variables
"""
data = df[idx].values
coordinates = (df["lon"].values, df["lat"].values)
region = [XAXIS[0], XAXIS[-1], YAXIS[0], YAXIS[-1]]
projection = pyproj.Proj(proj="merc", lat_ts=df["lat"].mean())
spacing = 0.5
chain = vd.Chain(
[
("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)),
("spline", vd.Spline(damping=1e-10, mindist=100e3)),
]
)
train, test = vd.train_test_split(
projection(*coordinates), data, random_state=0
)
chain.fit(*train)
score = chain.score(*test)
shape = (len(YAXIS), len(XAXIS))
grid = chain.grid(
region=region,
shape=shape,
projection=projection,
dims=["latitude", "longitude"],
data.longitude,
data.latitude,
c=data.bathymetry_m,
s=0.1,
transform=ccrs.PlateCarree(),
)
plt.colorbar().set_label("meters")
vd.datasets.setup_baja_bathymetry_map(ax)
plt.tight_layout()
plt.show()
########################################################################################
# We'll create a chain that applies a blocked median to the data, fits a polynomial
# trend, and then fits a standard gridder to the trend residuals.
chain = vd.Chain(
[
("reduce", vd.BlockReduce(np.median, spacing * 111e3)),
("trend", vd.Trend(degree=1)),
("spline", vd.Spline()),
]
)
print(chain)
########################################################################################
# Calling :meth:`verde.Chain.fit` will automatically run the data through the chain:
#
# #. Apply the blocked median to the input data
# #. Fit a trend to the blocked data and output the residuals
# #. Fit the spline to the trend residuals
chain.fit(proj_coords, data.bathymetry_m)
# Split the data into a training and testing set. We'll fit the gridder on the training
# set and use the testing set to evaluate how well the gridder is performing.
train, test = vd.train_test_split(
projection(*coordinates),
(data.wind_speed_east_knots, data.wind_speed_north_knots),
random_state=2,
)
# We'll make a 20 arc-minute grid
spacing = 20 / 60
# Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually
# requires de-trended data), and finally a Spline for each component. Notice that
# BlockReduce can work on multicomponent data without the use of Vector.
chain = vd.Chain(
[
("mean", vd.BlockReduce(np.mean, spacing * 111e3)),
("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])),
(
"spline",
vd.Vector([vd.Spline(damping=1e-10, mindist=500e3) for i in range(2)]),
),
]
)
print(chain)
# Fit on the training data
chain.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a perfect
# prediction of the test data.
score = chain.score(*test)
import pyproj
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
proj_coords = projection(data.longitude.values, data.latitude.values)
region = vd.get_region(coordinates)
spacing = 5 / 60
########################################################################################
# Now we can grid our data using a weighted spline. We'll use the block mean results
# with uncertainty based weights.
#
# Note that the weighted spline solution will only work on a non-exact interpolation. So
# we'll need to use some damping regularization or not use the data locations for the
# point forces. Here, we'll apply a bit of damping.
spline = vd.Chain(
[
# Convert the spacing to meters because Spline is a Cartesian gridder
("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)),
("spline", vd.Spline(damping=1e-10)),
]
).fit(proj_coords, data.velocity_up, data.weights)
grid = spline.grid(
region=region,
spacing=spacing,
projection=projection,
dims=["latitude", "longitude"],
data_names=["velocity"],
)
########################################################################################
# Calculate an unweighted spline as well for comparison.