hotshot_query_service/data_source/storage/
pruning.rs

1// Copyright (c) 2022 Espresso Systems (espressosys.com)
2// This file is part of the HotShot Query Service library.
3//
4// This program is free software: you can redistribute it and/or modify it under the terms of the GNU
5// General Public License as published by the Free Software Foundation, either version 3 of the
6// License, or (at your option) any later version.
7// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
8// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9// General Public License for more details.
10// You should have received a copy of the GNU General Public License along with this program. If not,
11// see <https://www.gnu.org/licenses/>.
12
13use std::{fmt::Debug, time::Duration};
14
15use anyhow::bail;
16use async_trait::async_trait;
17
18#[derive(Clone, Debug)]
19pub struct PrunerCfg {
20    pruning_threshold: Option<u64>,
21    minimum_retention: Duration,
22    target_retention: Duration,
23    batch_size: u64,
24    max_usage: u16,
25    interval: Duration,
26    incremental_vacuum_pages: u64,
27    state_tables: Vec<String>,
28}
29
30#[async_trait]
31pub trait PruneStorage: PrunerConfig {
32    type Pruner: Default + Send;
33
34    async fn get_disk_usage(&self) -> anyhow::Result<u64> {
35        Ok(0)
36    }
37
38    async fn prune(&self, _pruner: &mut Self::Pruner) -> anyhow::Result<Option<u64>> {
39        Ok(None)
40    }
41
42    async fn vacuum(&self) -> anyhow::Result<()> {
43        Ok(())
44    }
45}
46
47#[async_trait]
48pub trait PrunedHeightStorage: Sized {
49    async fn load_pruned_height(&mut self) -> anyhow::Result<Option<u64>> {
50        Ok(None)
51    }
52}
53
54#[async_trait]
55pub trait PrunedHeightDataSource: Sized {
56    async fn load_pruned_height(&self) -> anyhow::Result<Option<u64>> {
57        Ok(None)
58    }
59}
60
61pub trait PrunerConfig {
62    fn set_pruning_config(&mut self, _cfg: PrunerCfg) {}
63    fn get_pruning_config(&self) -> Option<PrunerCfg> {
64        None
65    }
66}
67
68impl PrunerCfg {
69    pub fn new() -> Self {
70        Default::default()
71    }
72
73    pub fn validate(&self) -> anyhow::Result<()> {
74        if let Some(pruning_threshold) = self.pruning_threshold {
75            if pruning_threshold == 0 {
76                bail!("pruning_threshold must be greater than 0 or set to None")
77            }
78        }
79
80        if self.max_usage > 10000 {
81            bail!("max_usage must be less than or equal to 10000")
82        }
83
84        Ok(())
85    }
86
87    pub fn with_state_tables(mut self, state_tables: Vec<String>) -> Self {
88        self.state_tables = state_tables;
89        self
90    }
91
92    pub fn with_pruning_threshold(mut self, pruning_threshold: u64) -> Self {
93        self.pruning_threshold = Some(pruning_threshold);
94        self
95    }
96
97    pub fn with_minimum_retention(mut self, minimum_retention: Duration) -> Self {
98        self.minimum_retention = minimum_retention;
99        self
100    }
101
102    pub fn with_target_retention(mut self, target_retention: Duration) -> Self {
103        self.target_retention = target_retention;
104        self
105    }
106
107    pub fn with_batch_size(mut self, batch_size: u64) -> Self {
108        self.batch_size = batch_size;
109        self
110    }
111
112    pub fn with_max_usage(mut self, max_usage: u16) -> Self {
113        self.max_usage = max_usage;
114        self
115    }
116
117    pub fn with_interval(mut self, interval: Duration) -> Self {
118        self.interval = interval;
119        self
120    }
121
122    pub fn with_incremental_vacuum_pages(mut self, pages: u64) -> Self {
123        self.incremental_vacuum_pages = pages;
124        self
125    }
126
127    /// Disk space threshold (in bytes).
128    ///
129    /// If the disk usage exceeds this threshold, pruning of data starts from
130    /// the oldest data and continues until the disk usage falls below `MAX_USAGE
131    /// or until the oldest data is younger than `MINIMUM_RETENTION`
132    pub fn pruning_threshold(&self) -> Option<u64> {
133        self.pruning_threshold
134    }
135
136    /// Minimum data retention period
137    ///
138    /// Data younger than this is never pruned, regardless of disk usage.
139    pub fn minimum_retention(&self) -> Duration {
140        self.minimum_retention
141    }
142
143    /// Target data retention period
144    ///
145    /// This is the ideal period for which data should be retained
146    /// data younger than this and older than `MINIMUM_RETENTION` may be pruned if disk usage exceeds the `pruning_threshold`.
147    pub fn target_retention(&self) -> Duration {
148        self.target_retention
149    }
150
151    /// Number of blocks to remove in a single pruning operation.
152    pub fn batch_size(&self) -> u64 {
153        self.batch_size
154    }
155
156    /// Maximum disk usage (in basis points).
157    ///
158    /// Pruning stops once the disk usage falls below this value, even if
159    /// some data older than the `MINIMUM_RETENTION` remains. Values range
160    /// from 0 (0%) to 10000 (100%).
161    pub fn max_usage(&self) -> u16 {
162        self.max_usage
163    }
164
165    /// Pruning interval
166    pub fn interval(&self) -> Duration {
167        self.interval
168    }
169
170    /// pages to remove from freelist during SQLite vacuuming
171    pub fn incremental_vacuum_pages(&self) -> u64 {
172        self.incremental_vacuum_pages
173    }
174
175    /// State tables to prune
176    pub fn state_tables(&self) -> Vec<String> {
177        self.state_tables.clone()
178    }
179}
180
181impl Default for PrunerCfg {
182    fn default() -> Self {
183        Self {
184            // 3 TB
185            pruning_threshold: Some(3 * 10_u64.pow(12)),
186            // 1 day
187            minimum_retention: Duration::from_secs(24 * 3600),
188            // 7 days
189            target_retention: Duration::from_secs(7 * 24 * 3600),
190            batch_size: 30000,
191            // 80%
192            max_usage: 8000,
193            // 1.5 hour
194            interval: Duration::from_secs(5400),
195            // 8000 pages
196            incremental_vacuum_pages: 8000,
197            state_tables: Vec::new(),
198        }
199    }
200}