Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def generic_gridder(day, df, idx):
"""
Generic gridding algorithm for easy variables
"""
data = df[idx].values
coordinates = (df["lon"].values, df["lat"].values)
region = [XAXIS[0], XAXIS[-1], YAXIS[0], YAXIS[-1]]
projection = pyproj.Proj(proj="merc", lat_ts=df["lat"].mean())
spacing = 0.5
chain = vd.Chain(
[
("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)),
("spline", vd.Spline(damping=1e-10, mindist=100e3)),
]
)
train, test = vd.train_test_split(
projection(*coordinates), data, random_state=0
)
chain.fit(*train)
score = chain.score(*test)
shape = (len(YAXIS), len(XAXIS))
grid = chain.grid(
region=region,
shape=shape,
projection=projection,
dims=["latitude", "longitude"],
data_names=["precip"],
)
res = grid.to_array()
# We'll test this on the California vertical GPS velocity data because it comes with the
# uncertainties
data = vd.datasets.fetch_california_gps()
coordinates = (data.longitude.values, data.latitude.values)
# Use a Mercator projection for our Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
# Now we can chain a block weighted mean and weighted spline together. We'll use
# uncertainty propagation to calculate the new weights from block mean because our data
# vary smoothly but have different uncertainties.
spacing = 5 / 60 # 5 arc-minutes
chain = vd.Chain(
[
("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)),
("spline", vd.Spline(damping=1e-10)),
]
)
print(chain)
# Split the data into a training and testing set. We'll use the training set to grid the
# data and the testing set to validate our spline model. Weights need to
# 1/uncertainty**2 for the error propagation in BlockMean to work.
train, test = vd.train_test_split(
projection(*coordinates),
data.velocity_up,
weights=1 / data.std_up ** 2,
random_state=0,
)
# Fit the model on the training set
chain.fit(*train)
# And calculate an R^2 score coefficient on the testing set. The best possible score
########################################################################################
# That's a good score meaning that our gridder is able to accurately predict data that
# wasn't used in the gridding algorithm.
#
# .. caution::
#
# Once caveat for this score is that it is highly dependent on the particular split
# that we made. Changing the random number generator seed in
# :func:`verde.train_test_split` will result in a different score.
# Use 1 as a seed instead of 0
train_other, test_other = vd.train_test_split(
proj_coords, data.air_temperature_c, test_size=0.3, random_state=1
)
print("R² score with seed 1:", vd.Spline().fit(*train_other).score(*test_other))
########################################################################################
# Cross-validation
# ----------------
#
# A more robust way of scoring the gridders is to use function
# :func:`verde.cross_val_score`, which (by default) uses a `k-fold cross-validation
# `__
# by default. It will split the data *k* times and return the score on each *fold*. We
# can then take a mean of these scores.
scores = vd.cross_val_score(vd.Spline(), proj_coords, data.air_temperature_c)
print("k-fold scores:", scores)
print("Mean score:", np.mean(scores))
########################################################################################
# The data gets grouped into blocks (with size specified by ``spacing``) and the blocks
# get split into training and testing. We can see this clearly when we visualize the
# split:
plt.figure(figsize=(8, 6))
ax = plt.axes()
ax.plot(train[0][0], train[0][1], ".r", label="train")
ax.plot(test[0][0], test[0][1], ".b", label="test")
ax.legend()
ax.set_aspect("equal")
plt.tight_layout()
plt.show()
########################################################################################
# Training and scoring our
spline = vd.Spline()
spline.fit(*train)
score = spline.score(*test)
print("R² score:", score)
train_other, test_other = vd.train_test_split(
proj_coords,
bathymetry,
test_size=0.3,
random_state=10,
method="block",
spacing=1 * 111000,
)
print(
"R² score different random state:", vd.Spline().fit(*train_other).score(*test_other)
)
plt.ylabel("Northing (m)")
plt.gca().set_aspect("equal")
plt.tight_layout()
plt.show()
########################################################################################
# Cartesian grids
# ---------------
#
# Now we can use :class:`verde.BlockReduce` and :class:`verde.Spline` on our projected
# coordinates. We'll specify the desired grid spacing as degrees and convert it to
# Cartesian using the 1 degree approx. 111 km rule-of-thumb.
spacing = 10 / 60
reducer = vd.BlockReduce(np.median, spacing=spacing * 111e3)
filter_coords, filter_bathy = reducer.filter(proj_coords, data.bathymetry_m)
spline = vd.Spline().fit(filter_coords, filter_bathy)
########################################################################################
# If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced in
# projected Cartesian coordinates.
grid = spline.grid(spacing=spacing * 111e3, data_names=["bathymetry"])
print("Cartesian grid:")
print(grid)
########################################################################################
# We'll mask our grid using :func:`verde.distance_mask` to get rid of all the spurious
# solutions far away from the data points.
grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid)
plt.figure(figsize=(7, 6))
plt.title("Gridded bathymetry in Cartesian coordinates")
pc = grid.bathymetry.plot.pcolormesh(cmap="viridis", vmax=0, add_colorbar=False)
########################################################################################
# Scoring
# --------
#
# Gridders in Verde implement the :meth:`~verde.base.BaseGridder.score` method that
# calculates the `R² coefficient of determination
# `__
# for a given comparison dataset (``test`` in our case). The R² score is at most 1,
# meaning a perfect prediction, but has no lower bound.
#
# We can pass the training dataset to the :meth:`~verde.base.BaseGridder.fit` method of
# most gridders using Python's argument expansion using the ``*`` symbol. The same can
# be applied to the testing set:
spline = vd.Spline()
spline.fit(*train)
score = spline.score(*test)
print("R² score:", score)
########################################################################################
# That's a good score meaning that our gridder is able to accurately predict data that
# wasn't used in the gridding algorithm.
#
# .. caution::
#
# **Caveat**: for this score is that it is dependent on the particular split that we
# made. Changing the random number generator seed in :func:`verde.train_test_split`
# will result in a different score.
train_other, test_other = vd.train_test_split(
proj_coords, bathymetry, test_size=0.3, random_state=2
plt.ylabel("Northing (m)")
plt.gca().set_aspect("equal")
plt.tight_layout()
plt.show()
########################################################################################
# Cartesian grids
# ---------------
#
# Now we can use :class:`verde.BlockReduce` and :class:`verde.Spline` on our projected
# coordinates. We'll specify the desired grid spacing as degrees and convert it to
# Cartesian using the 1 degree approx. 111 km rule-of-thumb.
spacing = 10 / 60
reducer = vd.BlockReduce(np.median, spacing=spacing * 111e3)
filter_coords, filter_bathy = reducer.filter(proj_coords, data.bathymetry_m)
spline = vd.Spline().fit(filter_coords, filter_bathy)
########################################################################################
# If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced in
# projected Cartesian coordinates.
grid = spline.grid(spacing=spacing * 111e3, data_names=["bathymetry"])
print("Cartesian grid:")
print(grid)
########################################################################################
# We'll mask our grid using :func:`verde.distance_mask` to get rid of all the spurious
# solutions far away from the data points.
grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid)
plt.figure(figsize=(7, 6))
plt.title("Gridded bathymetry in Cartesian coordinates")
pc = grid.bathymetry.plot.pcolormesh(cmap="viridis", vmax=0, add_colorbar=False)