hotshot_query_service/data_source/
storage.rs

1// Copyright (c) 2022 Espresso Systems (espressosys.com)
2// This file is part of the HotShot Query Service library.
3//
4// This program is free software: you can redistribute it and/or modify it under the terms of the GNU
5// General Public License as published by the Free Software Foundation, either version 3 of the
6// License, or (at your option) any later version.
7// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
8// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9// General Public License for more details.
10// You should have received a copy of the GNU General Public License along with this program. If not,
11// see <https://www.gnu.org/licenses/>.
12
13//! Persistent storage for data sources.
14//!
15//! Naturally, an archival query service such as this is heavily dependent on a persistent storage
16//! implementation. This module defines the interfaces required of this storage. Any storage layer
17//! implementing the appropriate interfaces can be used as the storage layer when constructing a
18//! [`FetchingDataSource`](super::FetchingDataSource), which can in turn be used to instantiate the
19//! REST APIs provided by this crate.
20//!
21//! This module also comes with a few pre-built persistence implementations:
22//! * [`SqlStorage`]
23//! * [`FileSystemStorage`]
24//!
25//! # Storage Traits vs Data Source Traits
26//!
27//! Many of the traits defined in this module (e.g. [`NodeStorage`], [`ExplorerStorage`], and
28//! others) are nearly identical to the corresponding data source traits (e.g.
29//! [`NodeDataSource`](crate::node::NodeDataSource),
30//! [`ExplorerDataSource`](crate::explorer::ExplorerDataSource), etc). They typically differ in
31//! mutability: the storage traits are intended to be implemented on storage
32//! [transactions](super::Transaction), and because even reading may update the internal
33//! state of a transaction, such as a buffer or database cursor, these traits typically take `&mut
34//! self`. This is not a barrier for concurrency since there may be many transactions open
35//! simultaneously from a single data source. The data source traits, meanwhile, are implemented on
36//! the data source itself. Internally, they usually open a fresh transaction and do all their work
37//! on the transaction, not modifying the data source itself, so they take `&self`.
38//!
39//! For traits that differ _only_ in the mutability of the `self` parameter, it is almost possible
40//! to combine them into a single trait whose methods take `self` by value, and implementing said
41//! traits for the reference types `&SomeDataSource` and `&mut SomeDataSourceTransaction`. There are
42//! two problems with this approach, which lead us to prefer the slight redundance of having
43//! separate versions of the traits with mutable and immutable methods:
44//! * The trait bounds quickly get out of hand, since we now have trait bounds not only on the type
45//!   itself, but also on references to that type, and the reference also requires the introduction
46//!   of an additional lifetime parameter.
47//! * We run into a longstanding [`rustc` bug](https://github.com/rust-lang/rust/issues/85063) in
48//!   which type inference diverges when given trait bounds on reference types, even when
49//!   theoretically the types are uniquely inferable. This issue can be worked around by [explicitly
50//!   specifying type parameters at every call site](https://users.rust-lang.org/t/type-recursion-when-trait-bound-is-added-on-reference-type/74525/2),
51//!   but this further exacerbates the ergonomic issues with this approach, past the point of
52//!   viability.
53//!
54//! Occasionally, there may be further differences between the data source traits and corresponding
55//! storage traits. For example, [`AvailabilityStorage`] also differs from
56//! [`AvailabilityDataSource`](crate::availability::AvailabilityDataSource) in fallibility.
57//!
58
59use std::ops::RangeBounds;
60
61use alloy::primitives::map::HashMap;
62use async_trait::async_trait;
63use futures::future::Future;
64use hotshot_types::{
65    data::VidShare, simple_certificate::CertificatePair, traits::node_implementation::NodeType,
66};
67use jf_merkle_tree_compat::prelude::MerkleProof;
68use tagged_base64::TaggedBase64;
69
70use crate::{
71    Header, Payload, QueryResult, Transaction,
72    availability::{
73        BlockId, BlockQueryData, LeafId, LeafQueryData, NamespaceId, PayloadMetadata,
74        PayloadQueryData, QueryableHeader, QueryablePayload, TransactionHash, VidCommonMetadata,
75        VidCommonQueryData,
76    },
77    explorer::{
78        query_data::{
79            BlockDetail, BlockIdentifier, BlockSummary, ExplorerSummary, GetBlockDetailError,
80            GetBlockSummariesError, GetBlockSummariesRequest, GetExplorerSummaryError,
81            GetSearchResultsError, GetTransactionDetailError, GetTransactionSummariesError,
82            GetTransactionSummariesRequest, SearchResult, TransactionDetailResponse,
83            TransactionIdentifier, TransactionSummary,
84        },
85        traits::{ExplorerHeader, ExplorerTransaction},
86    },
87    merklized_state::{MerklizedState, Snapshot},
88    node::{SyncStatusQueryData, TimeWindowQueryData, WindowStart},
89};
90
91pub mod fail_storage;
92pub mod fs;
93mod ledger_log;
94pub mod pruning;
95pub mod sql;
96
97#[cfg(any(test, feature = "testing"))]
98pub use fail_storage::FailStorage;
99#[cfg(feature = "file-system-data-source")]
100pub use fs::FileSystemStorage;
101#[cfg(feature = "sql-data-source")]
102pub use sql::{SqlStorage, StorageConnectionType};
103
104/// Persistent storage for a HotShot blockchain.
105///
106/// This trait defines the interface which must be provided by the storage layer in order to
107/// implement an availability data source. It is very similar to
108/// [`AvailabilityDataSource`](crate::availability::AvailabilityDataSource) with every occurrence of
109/// [`Fetch`](crate::availability::Fetch) replaced by [`QueryResult`]. This is not a coincidence.
110/// The purpose of the storage layer is to provide all of the functionality of the data source
111/// layer, but independent of an external fetcher for missing data. Thus, when the storage layer
112/// encounters missing, corrupt, or inaccessible data, it simply gives up and replaces the missing
113/// data with [`Err`], rather than creating an asynchronous fetch request to retrieve the missing
114/// data.
115///
116/// Rust gives us ways to abstract and deduplicate these two similar APIs, but they do not lead to a
117/// better interface.
118#[async_trait]
119pub trait AvailabilityStorage<Types>: Send + Sync
120where
121    Types: NodeType,
122    Header<Types>: QueryableHeader<Types>,
123    Payload<Types>: QueryablePayload<Types>,
124{
125    async fn get_leaf(&mut self, id: LeafId<Types>) -> QueryResult<LeafQueryData<Types>>;
126    async fn get_block(&mut self, id: BlockId<Types>) -> QueryResult<BlockQueryData<Types>>;
127    async fn get_header(&mut self, id: BlockId<Types>) -> QueryResult<Header<Types>>;
128    async fn get_payload(&mut self, id: BlockId<Types>) -> QueryResult<PayloadQueryData<Types>>;
129    async fn get_payload_metadata(
130        &mut self,
131        id: BlockId<Types>,
132    ) -> QueryResult<PayloadMetadata<Types>>;
133    async fn get_vid_common(
134        &mut self,
135        id: BlockId<Types>,
136    ) -> QueryResult<VidCommonQueryData<Types>>;
137    async fn get_vid_common_metadata(
138        &mut self,
139        id: BlockId<Types>,
140    ) -> QueryResult<VidCommonMetadata<Types>>;
141
142    async fn get_leaf_range<R>(
143        &mut self,
144        range: R,
145    ) -> QueryResult<Vec<QueryResult<LeafQueryData<Types>>>>
146    where
147        R: RangeBounds<usize> + Send + 'static;
148    async fn get_block_range<R>(
149        &mut self,
150        range: R,
151    ) -> QueryResult<Vec<QueryResult<BlockQueryData<Types>>>>
152    where
153        R: RangeBounds<usize> + Send + 'static;
154
155    async fn get_header_range<R>(
156        &mut self,
157        range: R,
158    ) -> QueryResult<Vec<QueryResult<Header<Types>>>>
159    where
160        R: RangeBounds<usize> + Send + 'static,
161    {
162        let blocks = self.get_block_range(range).await?;
163        Ok(blocks
164            .into_iter()
165            .map(|block| block.map(|block| block.header))
166            .collect())
167    }
168    async fn get_payload_range<R>(
169        &mut self,
170        range: R,
171    ) -> QueryResult<Vec<QueryResult<PayloadQueryData<Types>>>>
172    where
173        R: RangeBounds<usize> + Send + 'static;
174    async fn get_payload_metadata_range<R>(
175        &mut self,
176        range: R,
177    ) -> QueryResult<Vec<QueryResult<PayloadMetadata<Types>>>>
178    where
179        R: RangeBounds<usize> + Send + 'static;
180    async fn get_vid_common_range<R>(
181        &mut self,
182        range: R,
183    ) -> QueryResult<Vec<QueryResult<VidCommonQueryData<Types>>>>
184    where
185        R: RangeBounds<usize> + Send + 'static;
186    async fn get_vid_common_metadata_range<R>(
187        &mut self,
188        range: R,
189    ) -> QueryResult<Vec<QueryResult<VidCommonMetadata<Types>>>>
190    where
191        R: RangeBounds<usize> + Send + 'static;
192
193    async fn get_block_with_transaction(
194        &mut self,
195        hash: TransactionHash<Types>,
196    ) -> QueryResult<BlockQueryData<Types>>;
197
198    /// Get the first leaf which is available in the database with height >= `from`.
199    async fn first_available_leaf(&mut self, from: u64) -> QueryResult<LeafQueryData<Types>>;
200}
201
202pub trait UpdateAvailabilityStorage<Types>
203where
204    Types: NodeType,
205{
206    fn insert_leaf(
207        &mut self,
208        leaf: LeafQueryData<Types>,
209    ) -> impl Send + Future<Output = anyhow::Result<()>> {
210        self.insert_leaf_with_qc_chain(leaf, None)
211    }
212
213    fn insert_leaf_with_qc_chain(
214        &mut self,
215        leaf: LeafQueryData<Types>,
216        qc_chain: Option<[CertificatePair<Types>; 2]>,
217    ) -> impl Send + Future<Output = anyhow::Result<()>>;
218    fn insert_block(
219        &mut self,
220        block: BlockQueryData<Types>,
221    ) -> impl Send + Future<Output = anyhow::Result<()>>;
222    fn insert_vid(
223        &mut self,
224        common: VidCommonQueryData<Types>,
225        share: Option<VidShare>,
226    ) -> impl Send + Future<Output = anyhow::Result<()>>;
227}
228
229#[async_trait]
230pub trait NodeStorage<Types>
231where
232    Types: NodeType,
233    Header<Types>: QueryableHeader<Types>,
234{
235    async fn block_height(&mut self) -> QueryResult<usize>;
236    async fn count_transactions_in_range(
237        &mut self,
238        range: impl RangeBounds<usize> + Send,
239        namespace: Option<NamespaceId<Types>>,
240    ) -> QueryResult<usize>;
241    async fn payload_size_in_range(
242        &mut self,
243        range: impl RangeBounds<usize> + Send,
244        namespace: Option<NamespaceId<Types>>,
245    ) -> QueryResult<usize>;
246    async fn vid_share<ID>(&mut self, id: ID) -> QueryResult<VidShare>
247    where
248        ID: Into<BlockId<Types>> + Send + Sync;
249    async fn get_header_window(
250        &mut self,
251        start: impl Into<WindowStart<Types>> + Send + Sync,
252        end: u64,
253        limit: usize,
254    ) -> QueryResult<TimeWindowQueryData<Header<Types>>>;
255
256    async fn latest_qc_chain(&mut self) -> QueryResult<Option<[CertificatePair<Types>; 2]>>;
257
258    /// Search the given range of the database for missing objects.
259    async fn sync_status_for_range(
260        &mut self,
261        from: usize,
262        to: usize,
263    ) -> QueryResult<SyncStatusQueryData>;
264}
265
266#[derive(Clone, Debug, Default)]
267pub struct Aggregate<Types: NodeType>
268where
269    Header<Types>: QueryableHeader<Types>,
270{
271    pub height: i64,
272    pub num_transactions: HashMap<Option<NamespaceId<Types>>, usize>,
273    pub payload_size: HashMap<Option<NamespaceId<Types>>, usize>,
274}
275
276pub trait AggregatesStorage<Types>
277where
278    Types: NodeType,
279    Header<Types>: QueryableHeader<Types>,
280{
281    /// The block height for which aggregate statistics are currently available.
282    fn aggregates_height(&mut self) -> impl Future<Output = anyhow::Result<usize>> + Send;
283
284    /// the last aggregate
285    fn load_prev_aggregate(
286        &mut self,
287    ) -> impl Future<Output = anyhow::Result<Option<Aggregate<Types>>>> + Send;
288}
289
290pub trait UpdateAggregatesStorage<Types>
291where
292    Types: NodeType,
293    Header<Types>: QueryableHeader<Types>,
294{
295    /// Update aggregate statistics based on a new block.
296    fn update_aggregates(
297        &mut self,
298        aggregate: Aggregate<Types>,
299        blocks: &[PayloadMetadata<Types>],
300    ) -> impl Future<Output = anyhow::Result<Aggregate<Types>>> + Send;
301}
302
303/// An interface for querying Data and Statistics from the HotShot Blockchain.
304///
305/// This interface provides methods that allows the enabling of querying data
306/// concerning the blockchain from the stored data for use with a
307/// block explorer.  It does not provide the same guarantees as the
308/// Availability data source with data fetching.  It is not concerned with
309/// being up-to-date or having all of the data required, but rather it is
310/// concerned with providing the requested data as quickly as possible, and in
311/// a way that can be easily cached.
312#[async_trait]
313pub trait ExplorerStorage<Types>
314where
315    Types: NodeType,
316    Header<Types>: ExplorerHeader<Types> + QueryableHeader<Types>,
317    Transaction<Types>: ExplorerTransaction<Types>,
318    Payload<Types>: QueryablePayload<Types>,
319{
320    /// `get_block_detail` is a method that retrieves the details of a specific
321    /// block from the blockchain.  The block is identified by the given
322    /// [BlockIdentifier].
323    async fn get_block_detail(
324        &mut self,
325        request: BlockIdentifier<Types>,
326    ) -> Result<BlockDetail<Types>, GetBlockDetailError>;
327
328    /// `get_block_summaries` is a method that retrieves a list of block
329    /// summaries from the blockchain.  The list is generated from the given
330    /// [GetBlockSummariesRequest].
331    async fn get_block_summaries(
332        &mut self,
333        request: GetBlockSummariesRequest<Types>,
334    ) -> Result<Vec<BlockSummary<Types>>, GetBlockSummariesError>;
335
336    /// `get_transaction_detail` is a method that retrieves the details of a
337    /// specific transaction from the blockchain.  The transaction is identified
338    /// by the given [TransactionIdentifier].
339    async fn get_transaction_detail(
340        &mut self,
341        request: TransactionIdentifier<Types>,
342    ) -> Result<TransactionDetailResponse<Types>, GetTransactionDetailError>;
343
344    /// `get_transaction_summaries` is a method that retrieves a list of
345    /// transaction summaries from the blockchain.  The list is generated from
346    /// the given [GetTransactionSummariesRequest].
347    async fn get_transaction_summaries(
348        &mut self,
349        request: GetTransactionSummariesRequest<Types>,
350    ) -> Result<Vec<TransactionSummary<Types>>, GetTransactionSummariesError>;
351
352    /// `get_explorer_summary` is a method that retrieves a summary overview of
353    /// the blockchain.  This is useful for displaying information that
354    /// indicates the overall status of the block chain.
355    async fn get_explorer_summary(
356        &mut self,
357    ) -> Result<ExplorerSummary<Types>, GetExplorerSummaryError>;
358
359    /// `get_search_results` is a method that retrieves the results of a search
360    /// query against the blockchain.  The results are generated from the given
361    /// query string.
362    async fn get_search_results(
363        &mut self,
364        query: TaggedBase64,
365    ) -> Result<SearchResult<Types>, GetSearchResultsError>;
366}
367
368/// This trait defines methods that a data source should implement
369/// It enables retrieval of the membership path for a leaf node, which can be used to reconstruct the Merkle tree state.
370#[async_trait]
371pub trait MerklizedStateStorage<Types, State, const ARITY: usize>
372where
373    Types: NodeType,
374    State: MerklizedState<Types, ARITY>,
375{
376    async fn get_path(
377        &mut self,
378        snapshot: Snapshot<Types, State, ARITY>,
379        key: State::Key,
380    ) -> QueryResult<MerkleProof<State::Entry, State::Key, State::T, ARITY>>;
381}
382
383#[async_trait]
384pub trait MerklizedStateHeightStorage {
385    async fn get_last_state_height(&mut self) -> QueryResult<usize>;
386}