hotshot_query_service/
metrics.rs

1#![allow(dead_code)]
2
3// Copyright (c) 2022 Espresso Systems (espressosys.com)
4// This file is part of the HotShot Query Service library.
5//
6// This program is free software: you can redistribute it and/or modify it under the terms of the GNU
7// General Public License as published by the Free Software Foundation, either version 3 of the
8// License, or (at your option) any later version.
9// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
10// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11// General Public License for more details.
12// You should have received a copy of the GNU General Public License along with this program. If not,
13// see <https://www.gnu.org/licenses/>.
14
15use std::{
16    collections::HashMap,
17    sync::{Arc, RwLock},
18};
19
20use hotshot_types::traits::metrics;
21use itertools::Itertools;
22use prometheus::{
23    core::{AtomicU64, GenericCounter, GenericCounterVec, GenericGauge, GenericGaugeVec},
24    Encoder, HistogramVec, Opts, Registry, TextEncoder,
25};
26use snafu::Snafu;
27
28#[derive(Debug, Snafu)]
29pub enum MetricsError {
30    NoSuchSubgroup {
31        path: Vec<String>,
32    },
33    NoSuchMetric {
34        namespace: Vec<String>,
35        name: String,
36    },
37    Prometheus {
38        source: prometheus::Error,
39    },
40}
41
42impl From<prometheus::Error> for MetricsError {
43    fn from(source: prometheus::Error) -> Self {
44        Self::Prometheus { source }
45    }
46}
47
48/// A Prometheus-based implementation of a [Metrics](metrics::Metrics) registry.
49///
50/// [PrometheusMetrics] provides a collection of metrics including [Counter], [Gauge], and
51/// [Histogram]. These metrics can be created and associated with a [PrometheusMetrics] collection
52/// and then used as handles for updating and populating. The [PrometheusMetrics] registry can then
53/// be used to collect all of the associated metrics and export them in the Prometheus text format.
54///
55/// This implementation provides a few features beyond the basic [prometheus] features. It supports
56/// hierarchical namespaces; any [PrometheusMetrics] can be used to derive a subgroup with a certain
57/// name. The subgroup is then related to the parent, and any [PrometheusMetrics] in the tree of
58/// related groups can be used to collect _all_ registered metrics. The namespacing will be
59/// reflected in the fully qualified name of each metric in the Prometheus output. The subgroup
60/// relationship is pure and deterministic -- calling
61/// [get_subgroup](PrometheusMetrics::get_subgroup) with the same subgroup name will always return a
62/// handle to the same underlying [PrometheusMetrics] object.
63///
64/// [PrometheusMetrics] also supports querying for individual metrics by name, unlike
65/// [prometheus::Registry]. This provides a programming interface for inspecting the values of
66/// specific metrics at run-time, if that is preferable to exporting all metrics wholesale.
67#[derive(Clone, Debug, Default)]
68pub struct PrometheusMetrics {
69    metrics: Registry,
70    namespace: Vec<String>,
71    children: Arc<RwLock<HashMap<String, PrometheusMetrics>>>,
72    counters: Arc<RwLock<HashMap<String, Counter>>>,
73    gauges: Arc<RwLock<HashMap<String, Gauge>>>,
74    histograms: Arc<RwLock<HashMap<String, Histogram>>>,
75    counter_families: Arc<RwLock<HashMap<String, CounterFamily>>>,
76    gauge_families: Arc<RwLock<HashMap<String, GaugeFamily>>>,
77    histogram_families: Arc<RwLock<HashMap<String, HistogramFamily>>>,
78}
79
80impl PrometheusMetrics {
81    /// Get a counter in this sub-group by name.
82    pub fn get_counter(&self, name: &str) -> Result<Counter, MetricsError> {
83        self.get_metric(&self.counters, name)
84    }
85
86    /// Get a gauge in this sub-group by name.
87    pub fn get_gauge(&self, name: &str) -> Result<Gauge, MetricsError> {
88        self.get_metric(&self.gauges, name)
89    }
90
91    /// Get a histogram in this sub-group by name.
92    pub fn get_histogram(&self, name: &str) -> Result<Histogram, MetricsError> {
93        self.get_metric(&self.histograms, name)
94    }
95
96    /// Get a counter family in this sub-group by name.
97    pub fn get_counter_family(&self, name: &str) -> Result<CounterFamily, MetricsError> {
98        self.get_metric(&self.counter_families, name)
99    }
100
101    /// Get a gauge family in this sub-group by name.
102    pub fn gauge_family(&self, name: &str) -> Result<GaugeFamily, MetricsError> {
103        self.get_metric(&self.gauge_families, name)
104    }
105
106    /// Get a histogram family in this sub-group by name.
107    pub fn get_histogram_family(&self, name: &str) -> Result<HistogramFamily, MetricsError> {
108        self.get_metric(&self.histogram_families, name)
109    }
110
111    /// Get a (possibly nested) subgroup of this group by its path.
112    pub fn get_subgroup<I>(&self, path: I) -> Result<PrometheusMetrics, MetricsError>
113    where
114        I: IntoIterator,
115        I::Item: AsRef<str>,
116    {
117        let mut curr = self.clone();
118        for seg in path.into_iter() {
119            let next = curr
120                .children
121                .read()
122                .unwrap()
123                .get(seg.as_ref())
124                .ok_or_else(|| MetricsError::NoSuchSubgroup {
125                    path: {
126                        let mut path = curr.namespace.clone();
127                        path.push(seg.as_ref().to_string());
128                        path
129                    },
130                })?
131                .clone();
132            curr = next;
133        }
134        Ok(curr)
135    }
136
137    fn get_metric<M: Clone>(
138        &self,
139        metrics: &Arc<RwLock<HashMap<String, M>>>,
140        name: &str,
141    ) -> Result<M, MetricsError> {
142        metrics
143            .read()
144            .unwrap()
145            .get(name)
146            .cloned()
147            .ok_or_else(|| MetricsError::NoSuchMetric {
148                namespace: self.namespace.clone(),
149                name: name.to_string(),
150            })
151    }
152
153    fn metric_opts(&self, name: String, unit_label: Option<String>) -> Opts {
154        let help = unit_label.unwrap_or_else(|| name.clone());
155        let mut opts = Opts::new(name, help);
156        let mut group_names = self.namespace.iter();
157        if let Some(namespace) = group_names.next() {
158            opts = opts
159                .namespace(namespace.clone())
160                .subsystem(group_names.join("_"));
161        }
162        opts
163    }
164}
165
166impl tide_disco::metrics::Metrics for PrometheusMetrics {
167    type Error = MetricsError;
168
169    fn export(&self) -> Result<String, Self::Error> {
170        let encoder = TextEncoder::new();
171        let metric_families = self.metrics.gather();
172        let mut buffer = vec![];
173        encoder.encode(&metric_families, &mut buffer)?;
174        String::from_utf8(buffer).map_err(|err| MetricsError::Prometheus {
175            source: prometheus::Error::Msg(format!(
176                "could not convert Prometheus output to UTF-8: {err}"
177            )),
178        })
179    }
180}
181
182impl metrics::Metrics for PrometheusMetrics {
183    fn create_counter(
184        &self,
185        name: String,
186        unit_label: Option<String>,
187    ) -> Box<dyn metrics::Counter> {
188        let counter = Counter::new(&self.metrics, self.metric_opts(name.clone(), unit_label));
189        self.counters.write().unwrap().insert(name, counter.clone());
190        Box::new(counter)
191    }
192
193    fn create_gauge(&self, name: String, unit_label: Option<String>) -> Box<dyn metrics::Gauge> {
194        let gauge = Gauge::new(&self.metrics, self.metric_opts(name.clone(), unit_label));
195        self.gauges.write().unwrap().insert(name, gauge.clone());
196        Box::new(gauge)
197    }
198
199    fn create_histogram(
200        &self,
201        name: String,
202        unit_label: Option<String>,
203    ) -> Box<dyn metrics::Histogram> {
204        let histogram = Histogram::new(&self.metrics, self.metric_opts(name.clone(), unit_label));
205        self.histograms
206            .write()
207            .unwrap()
208            .insert(name, histogram.clone());
209        Box::new(histogram)
210    }
211
212    fn create_text(&self, name: String) {
213        self.create_gauge(name, None).set(1);
214    }
215
216    fn counter_family(&self, name: String, labels: Vec<String>) -> Box<dyn metrics::CounterFamily> {
217        let family =
218            CounterFamily::new(&self.metrics, self.metric_opts(name.clone(), None), &labels);
219        self.counter_families
220            .write()
221            .unwrap()
222            .insert(name, family.clone());
223        Box::new(family)
224    }
225
226    fn gauge_family(&self, name: String, labels: Vec<String>) -> Box<dyn metrics::GaugeFamily> {
227        let family = GaugeFamily::new(&self.metrics, self.metric_opts(name.clone(), None), &labels);
228        self.gauge_families
229            .write()
230            .unwrap()
231            .insert(name, family.clone());
232        Box::new(family)
233    }
234
235    fn histogram_family(
236        &self,
237        name: String,
238        labels: Vec<String>,
239    ) -> Box<dyn metrics::HistogramFamily> {
240        let family =
241            HistogramFamily::new(&self.metrics, self.metric_opts(name.clone(), None), &labels);
242        self.histogram_families
243            .write()
244            .unwrap()
245            .insert(name, family.clone());
246        Box::new(family)
247    }
248
249    fn text_family(&self, name: String, labels: Vec<String>) -> Box<dyn metrics::TextFamily> {
250        Box::new(TextFamily::new(
251            &self.metrics,
252            self.metric_opts(name.clone(), None),
253            &labels,
254        ))
255    }
256
257    fn subgroup(&self, subgroup_name: String) -> Box<dyn metrics::Metrics> {
258        Box::new(
259            self.children
260                .write()
261                .unwrap()
262                .entry(subgroup_name.clone())
263                .or_insert_with(|| Self {
264                    metrics: self.metrics.clone(),
265                    namespace: {
266                        let mut namespace = self.namespace.clone();
267                        namespace.push(subgroup_name);
268                        namespace
269                    },
270                    ..Default::default()
271                })
272                .clone(),
273        )
274    }
275}
276
277/// A [Counter](metrics::Counter) metric.
278#[derive(Clone, Debug)]
279pub struct Counter(GenericCounter<AtomicU64>);
280
281impl Counter {
282    fn new(registry: &Registry, opts: Opts) -> Self {
283        let counter = GenericCounter::with_opts(opts).unwrap();
284        registry.register(Box::new(counter.clone())).unwrap();
285        Self(counter)
286    }
287
288    pub fn get(&self) -> usize {
289        self.0.get() as usize
290    }
291}
292
293impl metrics::Counter for Counter {
294    fn add(&self, amount: usize) {
295        self.0.inc_by(amount as u64);
296    }
297}
298
299/// A [Gauge](metrics::Gauge) metric.
300#[derive(Clone, Debug)]
301pub struct Gauge(GenericGauge<AtomicU64>);
302
303impl Gauge {
304    fn new(registry: &Registry, opts: Opts) -> Self {
305        let gauge = GenericGauge::with_opts(opts).unwrap();
306        registry.register(Box::new(gauge.clone())).unwrap();
307        Self(gauge)
308    }
309
310    pub fn get(&self) -> usize {
311        self.0.get() as usize
312    }
313}
314
315impl metrics::Gauge for Gauge {
316    fn set(&self, amount: usize) {
317        self.0.set(amount as u64);
318    }
319
320    fn update(&self, delta: i64) {
321        if delta >= 0 {
322            self.0.add(delta as u64);
323        } else {
324            self.0.sub(-delta as u64);
325        }
326    }
327}
328
329/// A [Histogram](metrics::Histogram) metric.
330#[derive(Clone, Debug)]
331pub struct Histogram(prometheus::Histogram);
332
333impl Histogram {
334    fn new(registry: &Registry, opts: Opts) -> Self {
335        let histogram = prometheus::Histogram::with_opts(opts.into()).unwrap();
336        registry.register(Box::new(histogram.clone())).unwrap();
337        Self(histogram)
338    }
339
340    pub fn sample_count(&self) -> usize {
341        self.0.get_sample_count() as usize
342    }
343
344    pub fn sum(&self) -> f64 {
345        self.0.get_sample_sum()
346    }
347
348    pub fn mean(&self) -> f64 {
349        self.sum() / (self.sample_count() as f64)
350    }
351}
352
353impl metrics::Histogram for Histogram {
354    fn add_point(&self, point: f64) {
355        self.0.observe(point);
356    }
357}
358
359/// A [CounterFamily](metrics::CounterFamily) metric.
360#[derive(Clone, Debug)]
361pub struct CounterFamily(GenericCounterVec<AtomicU64>);
362
363impl CounterFamily {
364    fn new(registry: &Registry, opts: Opts, labels: &[String]) -> Self {
365        let labels = labels.iter().map(String::as_str).collect::<Vec<_>>();
366        let family = GenericCounterVec::new(opts, &labels).unwrap();
367        registry.register(Box::new(family.clone())).unwrap();
368        Self(family)
369    }
370
371    pub fn get(&self, label_values: &[impl AsRef<str>]) -> Counter {
372        let labels = label_values.iter().map(AsRef::as_ref).collect::<Vec<_>>();
373        Counter(self.0.get_metric_with_label_values(&labels).unwrap())
374    }
375}
376
377impl metrics::MetricsFamily<Box<dyn metrics::Counter>> for CounterFamily {
378    fn create(&self, labels: Vec<String>) -> Box<dyn metrics::Counter> {
379        Box::new(self.get(&labels))
380    }
381}
382
383/// A [GaugeFamily](metrics::GaugeFamily) metric.
384#[derive(Clone, Debug)]
385pub struct GaugeFamily(GenericGaugeVec<AtomicU64>);
386
387impl GaugeFamily {
388    fn new(registry: &Registry, opts: Opts, labels: &[String]) -> Self {
389        let labels = labels.iter().map(String::as_str).collect::<Vec<_>>();
390        let family = GenericGaugeVec::new(opts, &labels).unwrap();
391        registry.register(Box::new(family.clone())).unwrap();
392        Self(family)
393    }
394
395    pub fn get(&self, label_values: &[impl AsRef<str>]) -> Gauge {
396        let labels = label_values.iter().map(AsRef::as_ref).collect::<Vec<_>>();
397        Gauge(self.0.get_metric_with_label_values(&labels).unwrap())
398    }
399}
400
401impl metrics::MetricsFamily<Box<dyn metrics::Gauge>> for GaugeFamily {
402    fn create(&self, labels: Vec<String>) -> Box<dyn metrics::Gauge> {
403        Box::new(self.get(&labels))
404    }
405}
406
407/// A [HistogramFamily](metrics::HistogramFamily) metric.
408#[derive(Clone, Debug)]
409pub struct HistogramFamily(HistogramVec);
410
411impl HistogramFamily {
412    fn new(registry: &Registry, opts: Opts, labels: &[String]) -> Self {
413        let labels = labels.iter().map(String::as_str).collect::<Vec<_>>();
414        let family = HistogramVec::new(opts.into(), &labels).unwrap();
415        registry.register(Box::new(family.clone())).unwrap();
416        Self(family)
417    }
418
419    pub fn get(&self, label_values: &[impl AsRef<str>]) -> Histogram {
420        let labels = label_values.iter().map(AsRef::as_ref).collect::<Vec<_>>();
421        Histogram(self.0.get_metric_with_label_values(&labels).unwrap())
422    }
423}
424
425impl metrics::MetricsFamily<Box<dyn metrics::Histogram>> for HistogramFamily {
426    fn create(&self, labels: Vec<String>) -> Box<dyn metrics::Histogram> {
427        Box::new(self.get(&labels))
428    }
429}
430
431/// A [TextFamily](metrics::TextFamily) metric.
432#[derive(Clone, Debug)]
433pub struct TextFamily(GaugeFamily);
434
435impl TextFamily {
436    fn new(registry: &Registry, opts: Opts, labels: &[String]) -> Self {
437        Self(GaugeFamily::new(registry, opts, labels))
438    }
439}
440
441impl metrics::MetricsFamily<()> for TextFamily {
442    fn create(&self, labels: Vec<String>) {
443        self.0.create(labels).set(1);
444    }
445}
446
447#[cfg(test)]
448mod test {
449    use metrics::Metrics;
450    use tide_disco::metrics::Metrics as _;
451
452    use super::*;
453
454    #[test_log::test]
455    fn test_prometheus_metrics() {
456        let metrics = PrometheusMetrics::default();
457
458        // Register one metric of each type.
459        let counter = metrics.create_counter("counter".into(), None);
460        let gauge = metrics.create_gauge("gauge".into(), None);
461        let histogram = metrics.create_histogram("histogram".into(), None);
462        metrics.create_text("text".into());
463
464        // Set the metric values.
465        counter.add(20);
466        gauge.set(42);
467        histogram.add_point(20f64);
468
469        // Check the values.
470        assert_eq!(metrics.get_counter("counter").unwrap().get(), 20);
471        assert_eq!(metrics.get_gauge("gauge").unwrap().get(), 42);
472        assert_eq!(
473            metrics.get_histogram("histogram").unwrap().sample_count(),
474            1
475        );
476        assert_eq!(metrics.get_histogram("histogram").unwrap().sum(), 20f64);
477        assert_eq!(metrics.get_histogram("histogram").unwrap().mean(), 20f64);
478
479        // Set the metric values again, to be sure they update properly.
480        counter.add(22);
481        gauge.set(100);
482        histogram.add_point(22f64);
483
484        // Check the updated values.
485        assert_eq!(metrics.get_counter("counter").unwrap().get(), 42);
486        assert_eq!(metrics.get_gauge("gauge").unwrap().get(), 100);
487        assert_eq!(
488            metrics.get_histogram("histogram").unwrap().sample_count(),
489            2
490        );
491        assert_eq!(metrics.get_histogram("histogram").unwrap().sum(), 42f64);
492        assert_eq!(metrics.get_histogram("histogram").unwrap().mean(), 21f64);
493
494        // Export to a Prometheus string.
495        let string = metrics.export().unwrap();
496        // Make sure the output makes sense.
497        let lines = string.lines().collect::<Vec<_>>();
498        assert!(lines.contains(&"counter 42"));
499        assert!(lines.contains(&"gauge 100"));
500        assert!(lines.contains(&"histogram_sum 42"));
501        assert!(lines.contains(&"histogram_count 2"));
502        assert!(lines.contains(&"text 1"));
503    }
504
505    #[test_log::test]
506    fn test_namespace() {
507        let metrics = PrometheusMetrics::default();
508        let subgroup1 = metrics.subgroup("subgroup1".into());
509        let subgroup2 = subgroup1.subgroup("subgroup2".into());
510        let counter = subgroup2.create_counter("counter".into(), None);
511        subgroup2.create_text("text".into());
512        counter.add(42);
513
514        // Check namespacing.
515        assert_eq!(
516            metrics.get_subgroup(["subgroup1"]).unwrap().namespace,
517            ["subgroup1"]
518        );
519        assert_eq!(
520            metrics
521                .get_subgroup(["subgroup1", "subgroup2"])
522                .unwrap()
523                .namespace,
524            ["subgroup1", "subgroup2"]
525        );
526        assert_eq!(
527            metrics
528                .get_subgroup(["subgroup1"])
529                .unwrap()
530                .get_subgroup(["subgroup2"])
531                .unwrap()
532                .namespace,
533            ["subgroup1", "subgroup2"]
534        );
535
536        // Check different ways of accessing the counter.
537        assert_eq!(
538            metrics
539                .get_subgroup(["subgroup1", "subgroup2"])
540                .unwrap()
541                .get_counter("counter")
542                .unwrap()
543                .get(),
544            42
545        );
546        assert_eq!(
547            metrics
548                .get_subgroup(["subgroup1"])
549                .unwrap()
550                .get_subgroup(["subgroup2"])
551                .unwrap()
552                .get_counter("counter")
553                .unwrap()
554                .get(),
555            42
556        );
557
558        // Check fully-qualified counter name in export.
559        assert!(metrics
560            .export()
561            .unwrap()
562            .lines()
563            .contains(&"subgroup1_subgroup2_counter 42"));
564
565        // Check fully-qualified text name in export.
566        assert!(metrics
567            .export()
568            .unwrap()
569            .lines()
570            .contains(&"subgroup1_subgroup2_text 1"));
571    }
572
573    #[test_log::test]
574    fn test_labels() {
575        let metrics = PrometheusMetrics::default();
576
577        let http_count = metrics.counter_family("http".into(), vec!["method".into()]);
578        let get_count = http_count.create(vec!["GET".into()]);
579        let post_count = http_count.create(vec!["POST".into()]);
580        get_count.add(1);
581        post_count.add(2);
582
583        metrics
584            .text_family("version".into(), vec!["semver".into(), "rev".into()])
585            .create(vec!["0.1.0".into(), "d1b650a7".into()]);
586
587        assert_eq!(
588            metrics
589                .get_counter_family("http")
590                .unwrap()
591                .get(&["GET"])
592                .get(),
593            1
594        );
595        assert_eq!(
596            metrics
597                .get_counter_family("http")
598                .unwrap()
599                .get(&["POST"])
600                .get(),
601            2
602        );
603
604        // Export to a Prometheus string.
605        let string = metrics.export().unwrap();
606        // Make sure the output makes sense.
607        let lines = string.lines().collect::<Vec<_>>();
608        assert!(lines.contains(&"http{method=\"GET\"} 1"), "{lines:?}");
609        assert!(lines.contains(&"http{method=\"POST\"} 2"), "{lines:?}");
610        assert!(
611            lines.contains(&"version{rev=\"d1b650a7\",semver=\"0.1.0\"} 1"),
612            "{lines:?}"
613        );
614    }
615}