Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def api_call_latency(title, verb, scope, threshold):
return d.Graph(
title=title,
targets=[
g.Target(expr=str(threshold), legendFormat="threshold"),
g.Target(
expr=d.one_line(expression % {"verb": verb, "scope": scope}
),
# TODO(github.com/grafana/grafana/issues/19410): uncomment once fixed
# legendFormat="{{verb}} {{scope}}/{{resource}}",
),
],
yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
)
def show_quantiles(queryTemplate, quantiles=None, legend=""):
quantiles = quantiles or QUANTILES
targets = []
for quantile in quantiles:
q = "{:.2f}".format(quantile)
l = legend or q
targets.append(g.Target(expr=queryTemplate.format(quantile=q), legendFormat=l))
return targets
def api_call_latency(title, verb, scope, threshold):
return d.Graph(
title=title,
targets=[
g.Target(expr=str(threshold), legendFormat="threshold"),
g.Target(
expr=d.one_line(expression % {"verb": verb, "scope": scope}
),
# TODO(github.com/grafana/grafana/issues/19410): uncomment once fixed
# legendFormat="{{verb}} {{scope}}/{{resource}}",
),
],
yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
)
title="DNS latency",
targets=d.show_quantiles(
'probes:dns_lookup_latency:histogram_quantile{{quantile="{quantile}"}}',
legend="{{quantile}}",
),
yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
nullPointMode="null",
),
d.Graph(
title="probe: lookup rate",
targets=[
g.Target(
expr='sum(rate(probes_in_cluster_dns_lookup_count{namespace="probes", job="dns"}[1m]))',
legendFormat="lookup rate",
),
g.Target(
expr='sum(rate(probes_in_cluster_network_latency_error{namespace="probes", job="dns"}[1m]))',
legendFormat="error rate",
),
],
),
d.Graph(
title="probe: # running",
targets=[
d.Target(
expr='count(container_memory_usage_bytes{namespace="probes", container="dns"}) by (container, namespace)'
)
],
nullPointMode="null",
),
d.Graph(
title="probe: memory usage",
def filesystem_usage(datasource):
return G.Graph(
title="Filesystem Usage",
dataSource=datasource,
xAxis=X_TIME,
yAxes=[
G.YAxis(
format="percent",
),
G.YAxis(
show=False,
),
],
targets=[
G.Target(
# Get the proportion used of each filesystem on a volume from
# a PersistentVolumeClaim on each node of the cluster. It's
# hard to figure out the role each filesystem serves from this
# graph (since all we get is the PVC name). Better than
# nothing, though. Hopefully later we can do better.
expr="""
100
* filesystem_used_bytes{volume=~"pvc-.*"}
/ filesystem_size_bytes{volume=~"pvc-.*"}
""",
legendFormat="{{volume}}",
refId="A",
),
# different pods into a single result. There should be
# minimal overlap but whenever the pod gets recreated (because
# the deploying is updated, for example) there's a little.
expr="""
avg without (pod,instance) (
rate(tahoe_lafs_roundtrip_benchmark_{metric}_bytes_per_second_sum{{service="tahoe-lafs-transfer-rate-monitor"}}[60m])
/ rate(tahoe_lafs_roundtrip_benchmark_{metric}_bytes_per_second_count{{service="tahoe-lafs-transfer-rate-monitor"}}[60m])
)
""".format(metric=metric),
legendFormat="avg " + legend_format,
refId=next(refid),
)
for (legend_format, metric)
in [("upload", "write"), ("download", "read")]
) + list(
G.Target(
# The average above is nice, I suppose. It doesn't give the
# full picture, though. So also compute the rate which is
# slower than 90% of the results (faster than 10% of the
# results). This is basically what a 90% transfer speed SLA
# would talk about. Put another way, 90% of uploads should
# occur at a rate equal to or greater than the one plotted by
# this expression.
expr="""
avg without (pod,instance) (
histogram_quantile(
0.10,
rate(
tahoe_lafs_roundtrip_benchmark_{metric}_bytes_per_second_bucket{{service="tahoe-lafs-transfer-rate-monitor"}}[60m]
)
)
)
),
G.YAxis(
show=False,
),
],
targets=[
G.Target(
# Get the rate of data received on the public interface (eth0)
# for each entire node (id="/") over the last minute.
expr="""
receive:container_network_bytes:rate1m / 2 ^ 20
""",
legendFormat="receive",
refId="A",
),
G.Target(
# And rate of data sent.
expr="""
transmit:container_network_bytes:rate1m / 2 ^ 20
""",
legendFormat="transmit",
refId="B",
),
:param kwargs: Passed on to Graph.
"""
letters = string.ascii_uppercase
expressions = list(expressions)
if len(expressions) > len(letters):
raise ValueError(
'Too many expressions. Can support at most {}, but got {}'.format(
len(letters), len(expressions)))
if all(isinstance(expr, dict) for expr in expressions):
targets = [
G.Target(refId=refId, **args)
for (args, refId) in zip(expressions, letters)]
else:
targets = [
G.Target(expr=expr, legendFormat=legend, refId=refId)
for ((legend, expr), refId) in zip(expressions, letters)]
return G.Graph(
title=title,
dataSource=data_source,
targets=targets,
**kwargs
)
title="Unhandled Errors",
dataSource=datasource,
xAxis=X_TIME,
yAxes=[
G.YAxis(
format="none",
label="Count",
),
G.YAxis(
show=False,
),
],
targets=[
G.Target(
expr="""
sum(s4_unhandled_error_counter)
""",
refId="A",
legendFormat="Total Unhandled Errors",
),
title="Since Last Convergence",
dataSource=datasource,
xAxis=X_TIME,
yAxes=[
G.YAxis(
format="none",
label="Period",
),
G.YAxis(
show=False,
),
],
targets=[
G.Target(
expr="""
time()
- max(
s4_last_convergence_succeeded{
pod=~"subscription-converger-.*"
}
)
""",
refId="A",
legendFormat="Time Since Last Convergence Success",
),