hotshot_query_service/data_source/
storage.rs

1// Copyright (c) 2022 Espresso Systems (espressosys.com)
2// This file is part of the HotShot Query Service library.
3//
4// This program is free software: you can redistribute it and/or modify it under the terms of the GNU
5// General Public License as published by the Free Software Foundation, either version 3 of the
6// License, or (at your option) any later version.
7// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
8// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9// General Public License for more details.
10// You should have received a copy of the GNU General Public License along with this program. If not,
11// see <https://www.gnu.org/licenses/>.
12
13//! Persistent storage for data sources.
14//!
15//! Naturally, an archival query service such as this is heavily dependent on a persistent storage
16//! implementation. This module defines the interfaces required of this storage. Any storage layer
17//! implementing the appropriate interfaces can be used as the storage layer when constructing a
18//! [`FetchingDataSource`](super::FetchingDataSource), which can in turn be used to instantiate the
19//! REST APIs provided by this crate.
20//!
21//! This module also comes with a few pre-built persistence implementations:
22//! * [`SqlStorage`]
23//! * [`FileSystemStorage`]
24//!
25//! # Storage Traits vs Data Source Traits
26//!
27//! Many of the traits defined in this module (e.g. [`NodeStorage`], [`ExplorerStorage`], and
28//! others) are nearly identical to the corresponding data source traits (e.g.
29//! [`NodeDataSource`](crate::node::NodeDataSource),
30//! [`ExplorerDataSource`](crate::explorer::ExplorerDataSource), etc). They typically differ in
31//! mutability: the storage traits are intended to be implemented on storage
32//! [transactions](super::Transaction), and because even reading may update the internal
33//! state of a transaction, such as a buffer or database cursor, these traits typically take `&mut
34//! self`. This is not a barrier for concurrency since there may be many transactions open
35//! simultaneously from a single data source. The data source traits, meanwhile, are implemented on
36//! the data source itself. Internally, they usually open a fresh transaction and do all their work
37//! on the transaction, not modifying the data source itself, so they take `&self`.
38//!
39//! For traits that differ _only_ in the mutability of the `self` parameter, it is almost possible
40//! to combine them into a single trait whose methods take `self` by value, and implementing said
41//! traits for the reference types `&SomeDataSource` and `&mut SomeDataSourceTransaction`. There are
42//! two problems with this approach, which lead us to prefer the slight redundance of having
43//! separate versions of the traits with mutable and immutable methods:
44//! * The trait bounds quickly get out of hand, since we now have trait bounds not only on the type
45//!   itself, but also on references to that type, and the reference also requires the introduction
46//!   of an additional lifetime parameter.
47//! * We run into a longstanding [`rustc` bug](https://github.com/rust-lang/rust/issues/85063) in
48//!   which type inference diverges when given trait bounds on reference types, even when
49//!   theoretically the types are uniquely inferable. This issue can be worked around by [explicitly
50//!   specifying type parameters at every call site](https://users.rust-lang.org/t/type-recursion-when-trait-bound-is-added-on-reference-type/74525/2),
51//!   but this further exacerbates the ergonomic issues with this approach, past the point of
52//!   viability.
53//!
54//! Occasionally, there may be further differences between the data source traits and corresponding
55//! storage traits. For example, [`AvailabilityStorage`] also differs from
56//! [`AvailabilityDataSource`](crate::availability::AvailabilityDataSource) in fallibility.
57//!
58
59use std::ops::RangeBounds;
60
61use alloy::primitives::map::HashMap;
62use async_trait::async_trait;
63use futures::future::Future;
64use hotshot_types::{
65    data::VidShare, simple_certificate::CertificatePair, traits::node_implementation::NodeType,
66};
67use jf_merkle_tree_compat::prelude::MerkleProof;
68use tagged_base64::TaggedBase64;
69
70use crate::{
71    availability::{
72        BlockId, BlockQueryData, LeafId, LeafQueryData, NamespaceId, PayloadMetadata,
73        PayloadQueryData, QueryableHeader, QueryablePayload, TransactionHash, VidCommonMetadata,
74        VidCommonQueryData,
75    },
76    explorer::{
77        query_data::{
78            BlockDetail, BlockIdentifier, BlockSummary, ExplorerSummary, GetBlockDetailError,
79            GetBlockSummariesError, GetBlockSummariesRequest, GetExplorerSummaryError,
80            GetSearchResultsError, GetTransactionDetailError, GetTransactionSummariesError,
81            GetTransactionSummariesRequest, SearchResult, TransactionDetailResponse,
82            TransactionIdentifier, TransactionSummary,
83        },
84        traits::{ExplorerHeader, ExplorerTransaction},
85    },
86    merklized_state::{MerklizedState, Snapshot},
87    node::{SyncStatus, TimeWindowQueryData, WindowStart},
88    Header, Payload, QueryResult, Transaction,
89};
90
91pub mod fail_storage;
92pub mod fs;
93mod ledger_log;
94pub mod pruning;
95pub mod sql;
96
97#[cfg(any(test, feature = "testing"))]
98pub use fail_storage::FailStorage;
99#[cfg(feature = "file-system-data-source")]
100pub use fs::FileSystemStorage;
101#[cfg(feature = "sql-data-source")]
102pub use sql::{SqlStorage, StorageConnectionType};
103
104/// Persistent storage for a HotShot blockchain.
105///
106/// This trait defines the interface which must be provided by the storage layer in order to
107/// implement an availability data source. It is very similar to
108/// [`AvailabilityDataSource`](crate::availability::AvailabilityDataSource) with every occurrence of
109/// [`Fetch`](crate::availability::Fetch) replaced by [`QueryResult`]. This is not a coincidence.
110/// The purpose of the storage layer is to provide all of the functionality of the data source
111/// layer, but independent of an external fetcher for missing data. Thus, when the storage layer
112/// encounters missing, corrupt, or inaccessible data, it simply gives up and replaces the missing
113/// data with [`Err`], rather than creating an asynchronous fetch request to retrieve the missing
114/// data.
115///
116/// Rust gives us ways to abstract and deduplicate these two similar APIs, but they do not lead to a
117/// better interface.
118#[async_trait]
119pub trait AvailabilityStorage<Types>: Send + Sync
120where
121    Types: NodeType,
122    Header<Types>: QueryableHeader<Types>,
123    Payload<Types>: QueryablePayload<Types>,
124{
125    async fn get_leaf(&mut self, id: LeafId<Types>) -> QueryResult<LeafQueryData<Types>>;
126    async fn get_block(&mut self, id: BlockId<Types>) -> QueryResult<BlockQueryData<Types>>;
127    async fn get_header(&mut self, id: BlockId<Types>) -> QueryResult<Header<Types>>;
128    async fn get_payload(&mut self, id: BlockId<Types>) -> QueryResult<PayloadQueryData<Types>>;
129    async fn get_payload_metadata(
130        &mut self,
131        id: BlockId<Types>,
132    ) -> QueryResult<PayloadMetadata<Types>>;
133    async fn get_vid_common(
134        &mut self,
135        id: BlockId<Types>,
136    ) -> QueryResult<VidCommonQueryData<Types>>;
137    async fn get_vid_common_metadata(
138        &mut self,
139        id: BlockId<Types>,
140    ) -> QueryResult<VidCommonMetadata<Types>>;
141
142    async fn get_leaf_range<R>(
143        &mut self,
144        range: R,
145    ) -> QueryResult<Vec<QueryResult<LeafQueryData<Types>>>>
146    where
147        R: RangeBounds<usize> + Send + 'static;
148    async fn get_block_range<R>(
149        &mut self,
150        range: R,
151    ) -> QueryResult<Vec<QueryResult<BlockQueryData<Types>>>>
152    where
153        R: RangeBounds<usize> + Send + 'static;
154
155    async fn get_header_range<R>(
156        &mut self,
157        range: R,
158    ) -> QueryResult<Vec<QueryResult<Header<Types>>>>
159    where
160        R: RangeBounds<usize> + Send + 'static,
161    {
162        let blocks = self.get_block_range(range).await?;
163        Ok(blocks
164            .into_iter()
165            .map(|block| block.map(|block| block.header))
166            .collect())
167    }
168    async fn get_payload_range<R>(
169        &mut self,
170        range: R,
171    ) -> QueryResult<Vec<QueryResult<PayloadQueryData<Types>>>>
172    where
173        R: RangeBounds<usize> + Send + 'static;
174    async fn get_payload_metadata_range<R>(
175        &mut self,
176        range: R,
177    ) -> QueryResult<Vec<QueryResult<PayloadMetadata<Types>>>>
178    where
179        R: RangeBounds<usize> + Send + 'static;
180    async fn get_vid_common_range<R>(
181        &mut self,
182        range: R,
183    ) -> QueryResult<Vec<QueryResult<VidCommonQueryData<Types>>>>
184    where
185        R: RangeBounds<usize> + Send + 'static;
186    async fn get_vid_common_metadata_range<R>(
187        &mut self,
188        range: R,
189    ) -> QueryResult<Vec<QueryResult<VidCommonMetadata<Types>>>>
190    where
191        R: RangeBounds<usize> + Send + 'static;
192
193    async fn get_block_with_transaction(
194        &mut self,
195        hash: TransactionHash<Types>,
196    ) -> QueryResult<BlockQueryData<Types>>;
197
198    /// Get the first leaf which is available in the database with height >= `from`.
199    async fn first_available_leaf(&mut self, from: u64) -> QueryResult<LeafQueryData<Types>>;
200}
201
202pub trait UpdateAvailabilityStorage<Types>
203where
204    Types: NodeType,
205{
206    fn insert_leaf(
207        &mut self,
208        leaf: LeafQueryData<Types>,
209    ) -> impl Send + Future<Output = anyhow::Result<()>> {
210        self.insert_leaf_with_qc_chain(leaf, None)
211    }
212
213    fn insert_leaf_with_qc_chain(
214        &mut self,
215        leaf: LeafQueryData<Types>,
216        qc_chain: Option<[CertificatePair<Types>; 2]>,
217    ) -> impl Send + Future<Output = anyhow::Result<()>>;
218    fn insert_block(
219        &mut self,
220        block: BlockQueryData<Types>,
221    ) -> impl Send + Future<Output = anyhow::Result<()>>;
222    fn insert_vid(
223        &mut self,
224        common: VidCommonQueryData<Types>,
225        share: Option<VidShare>,
226    ) -> impl Send + Future<Output = anyhow::Result<()>>;
227}
228
229#[async_trait]
230pub trait NodeStorage<Types>
231where
232    Types: NodeType,
233    Header<Types>: QueryableHeader<Types>,
234{
235    async fn block_height(&mut self) -> QueryResult<usize>;
236    async fn count_transactions_in_range(
237        &mut self,
238        range: impl RangeBounds<usize> + Send,
239        namespace: Option<NamespaceId<Types>>,
240    ) -> QueryResult<usize>;
241    async fn payload_size_in_range(
242        &mut self,
243        range: impl RangeBounds<usize> + Send,
244        namespace: Option<NamespaceId<Types>>,
245    ) -> QueryResult<usize>;
246    async fn vid_share<ID>(&mut self, id: ID) -> QueryResult<VidShare>
247    where
248        ID: Into<BlockId<Types>> + Send + Sync;
249    async fn get_header_window(
250        &mut self,
251        start: impl Into<WindowStart<Types>> + Send + Sync,
252        end: u64,
253        limit: usize,
254    ) -> QueryResult<TimeWindowQueryData<Header<Types>>>;
255
256    async fn latest_qc_chain(&mut self) -> QueryResult<Option<[CertificatePair<Types>; 2]>>;
257
258    /// Search the database for missing objects and generate a report.
259    async fn sync_status(&mut self) -> QueryResult<SyncStatus>;
260}
261
262#[derive(Clone, Debug, Default)]
263pub struct Aggregate<Types: NodeType>
264where
265    Header<Types>: QueryableHeader<Types>,
266{
267    pub height: i64,
268    pub num_transactions: HashMap<Option<NamespaceId<Types>>, usize>,
269    pub payload_size: HashMap<Option<NamespaceId<Types>>, usize>,
270}
271
272pub trait AggregatesStorage<Types>
273where
274    Types: NodeType,
275    Header<Types>: QueryableHeader<Types>,
276{
277    /// The block height for which aggregate statistics are currently available.
278    fn aggregates_height(&mut self) -> impl Future<Output = anyhow::Result<usize>> + Send;
279
280    /// the last aggregate
281    fn load_prev_aggregate(
282        &mut self,
283    ) -> impl Future<Output = anyhow::Result<Option<Aggregate<Types>>>> + Send;
284}
285
286pub trait UpdateAggregatesStorage<Types>
287where
288    Types: NodeType,
289    Header<Types>: QueryableHeader<Types>,
290{
291    /// Update aggregate statistics based on a new block.
292    fn update_aggregates(
293        &mut self,
294        aggregate: Aggregate<Types>,
295        blocks: &[PayloadMetadata<Types>],
296    ) -> impl Future<Output = anyhow::Result<Aggregate<Types>>> + Send;
297}
298
299/// An interface for querying Data and Statistics from the HotShot Blockchain.
300///
301/// This interface provides methods that allows the enabling of querying data
302/// concerning the blockchain from the stored data for use with a
303/// block explorer.  It does not provide the same guarantees as the
304/// Availability data source with data fetching.  It is not concerned with
305/// being up-to-date or having all of the data required, but rather it is
306/// concerned with providing the requested data as quickly as possible, and in
307/// a way that can be easily cached.
308#[async_trait]
309pub trait ExplorerStorage<Types>
310where
311    Types: NodeType,
312    Header<Types>: ExplorerHeader<Types> + QueryableHeader<Types>,
313    Transaction<Types>: ExplorerTransaction<Types>,
314    Payload<Types>: QueryablePayload<Types>,
315{
316    /// `get_block_detail` is a method that retrieves the details of a specific
317    /// block from the blockchain.  The block is identified by the given
318    /// [BlockIdentifier].
319    async fn get_block_detail(
320        &mut self,
321        request: BlockIdentifier<Types>,
322    ) -> Result<BlockDetail<Types>, GetBlockDetailError>;
323
324    /// `get_block_summaries` is a method that retrieves a list of block
325    /// summaries from the blockchain.  The list is generated from the given
326    /// [GetBlockSummariesRequest].
327    async fn get_block_summaries(
328        &mut self,
329        request: GetBlockSummariesRequest<Types>,
330    ) -> Result<Vec<BlockSummary<Types>>, GetBlockSummariesError>;
331
332    /// `get_transaction_detail` is a method that retrieves the details of a
333    /// specific transaction from the blockchain.  The transaction is identified
334    /// by the given [TransactionIdentifier].
335    async fn get_transaction_detail(
336        &mut self,
337        request: TransactionIdentifier<Types>,
338    ) -> Result<TransactionDetailResponse<Types>, GetTransactionDetailError>;
339
340    /// `get_transaction_summaries` is a method that retrieves a list of
341    /// transaction summaries from the blockchain.  The list is generated from
342    /// the given [GetTransactionSummariesRequest].
343    async fn get_transaction_summaries(
344        &mut self,
345        request: GetTransactionSummariesRequest<Types>,
346    ) -> Result<Vec<TransactionSummary<Types>>, GetTransactionSummariesError>;
347
348    /// `get_explorer_summary` is a method that retrieves a summary overview of
349    /// the blockchain.  This is useful for displaying information that
350    /// indicates the overall status of the block chain.
351    async fn get_explorer_summary(
352        &mut self,
353    ) -> Result<ExplorerSummary<Types>, GetExplorerSummaryError>;
354
355    /// `get_search_results` is a method that retrieves the results of a search
356    /// query against the blockchain.  The results are generated from the given
357    /// query string.
358    async fn get_search_results(
359        &mut self,
360        query: TaggedBase64,
361    ) -> Result<SearchResult<Types>, GetSearchResultsError>;
362}
363
364/// This trait defines methods that a data source should implement
365/// It enables retrieval of the membership path for a leaf node, which can be used to reconstruct the Merkle tree state.
366#[async_trait]
367pub trait MerklizedStateStorage<Types, State, const ARITY: usize>
368where
369    Types: NodeType,
370    State: MerklizedState<Types, ARITY>,
371{
372    async fn get_path(
373        &mut self,
374        snapshot: Snapshot<Types, State, ARITY>,
375        key: State::Key,
376    ) -> QueryResult<MerkleProof<State::Entry, State::Key, State::T, ARITY>>;
377}
378
379#[async_trait]
380pub trait MerklizedStateHeightStorage {
381    async fn get_last_state_height(&mut self) -> QueryResult<usize>;
382}