hotshot_query_service/data_source/
storage.rs

1// Copyright (c) 2022 Espresso Systems (espressosys.com)
2// This file is part of the HotShot Query Service library.
3//
4// This program is free software: you can redistribute it and/or modify it under the terms of the GNU
5// General Public License as published by the Free Software Foundation, either version 3 of the
6// License, or (at your option) any later version.
7// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
8// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9// General Public License for more details.
10// You should have received a copy of the GNU General Public License along with this program. If not,
11// see <https://www.gnu.org/licenses/>.
12
13//! Persistent storage for data sources.
14//!
15//! Naturally, an archival query service such as this is heavily dependent on a persistent storage
16//! implementation. This module defines the interfaces required of this storage. Any storage layer
17//! implementing the appropriate interfaces can be used as the storage layer when constructing a
18//! [`FetchingDataSource`](super::FetchingDataSource), which can in turn be used to instantiate the
19//! REST APIs provided by this crate.
20//!
21//! This module also comes with a few pre-built persistence implementations:
22//! * [`SqlStorage`]
23//! * [`FileSystemStorage`]
24//!
25//! # Storage Traits vs Data Source Traits
26//!
27//! Many of the traits defined in this module (e.g. [`NodeStorage`], [`ExplorerStorage`], and
28//! others) are nearly identical to the corresponding data source traits (e.g.
29//! [`NodeDataSource`](crate::node::NodeDataSource),
30//! [`ExplorerDataSource`](crate::explorer::ExplorerDataSource), etc). They typically differ in
31//! mutability: the storage traits are intended to be implemented on storage
32//! [transactions](super::Transaction), and because even reading may update the internal
33//! state of a transaction, such as a buffer or database cursor, these traits typically take `&mut
34//! self`. This is not a barrier for concurrency since there may be many transactions open
35//! simultaneously from a single data source. The data source traits, meanwhile, are implemented on
36//! the data source itself. Internally, they usually open a fresh transaction and do all their work
37//! on the transaction, not modifying the data source itself, so they take `&self`.
38//!
39//! For traits that differ _only_ in the mutability of the `self` parameter, it is almost possible
40//! to combine them into a single trait whose methods take `self` by value, and implementing said
41//! traits for the reference types `&SomeDataSource` and `&mut SomeDataSourceTransaction`. There are
42//! two problems with this approach, which lead us to prefer the slight redundance of having
43//! separate versions of the traits with mutable and immutable methods:
44//! * The trait bounds quickly get out of hand, since we now have trait bounds not only on the type
45//!   itself, but also on references to that type, and the reference also requires the introduction
46//!   of an additional lifetime parameter.
47//! * We run into a longstanding [`rustc` bug](https://github.com/rust-lang/rust/issues/85063) in
48//!   which type inference diverges when given trait bounds on reference types, even when
49//!   theoretically the types are uniquely inferable. This issue can be worked around by [explicitly
50//!   specifying type parameters at every call site](https://users.rust-lang.org/t/type-recursion-when-trait-bound-is-added-on-reference-type/74525/2),
51//!   but this further exacerbates the ergonomic issues with this approach, past the point of
52//!   viability.
53//!
54//! Occasionally, there may be further differences between the data source traits and corresponding
55//! storage traits. For example, [`AvailabilityStorage`] also differs from
56//! [`AvailabilityDataSource`](crate::availability::AvailabilityDataSource) in fallibility.
57//!
58
59use std::ops::RangeBounds;
60
61use alloy::primitives::map::HashMap;
62use async_trait::async_trait;
63use futures::future::Future;
64use hotshot_types::{data::VidShare, traits::node_implementation::NodeType};
65use jf_merkle_tree::prelude::MerkleProof;
66use tagged_base64::TaggedBase64;
67
68use crate::{
69    availability::{
70        BlockId, BlockQueryData, LeafId, LeafQueryData, NamespaceId, PayloadMetadata,
71        PayloadQueryData, QueryableHeader, QueryablePayload, StateCertQueryData, TransactionHash,
72        TransactionQueryData, VidCommonMetadata, VidCommonQueryData,
73    },
74    explorer::{
75        query_data::{
76            BlockDetail, BlockIdentifier, BlockSummary, ExplorerSummary, GetBlockDetailError,
77            GetBlockSummariesError, GetBlockSummariesRequest, GetExplorerSummaryError,
78            GetSearchResultsError, GetTransactionDetailError, GetTransactionSummariesError,
79            GetTransactionSummariesRequest, SearchResult, TransactionDetailResponse,
80            TransactionIdentifier, TransactionSummary,
81        },
82        traits::{ExplorerHeader, ExplorerTransaction},
83    },
84    merklized_state::{MerklizedState, Snapshot},
85    node::{SyncStatus, TimeWindowQueryData, WindowStart},
86    Header, Payload, QueryResult, Transaction,
87};
88
89pub mod fail_storage;
90pub mod fs;
91mod ledger_log;
92pub mod pruning;
93pub mod sql;
94
95#[cfg(any(test, feature = "testing"))]
96pub use fail_storage::FailStorage;
97#[cfg(feature = "file-system-data-source")]
98pub use fs::FileSystemStorage;
99#[cfg(feature = "sql-data-source")]
100pub use sql::SqlStorage;
101
102/// Persistent storage for a HotShot blockchain.
103///
104/// This trait defines the interface which must be provided by the storage layer in order to
105/// implement an availability data source. It is very similar to
106/// [`AvailabilityDataSource`](crate::availability::AvailabilityDataSource) with every occurrence of
107/// [`Fetch`](crate::availability::Fetch) replaced by [`QueryResult`]. This is not a coincidence.
108/// The purpose of the storage layer is to provide all of the functionality of the data source
109/// layer, but independent of an external fetcher for missing data. Thus, when the storage layer
110/// encounters missing, corrupt, or inaccessible data, it simply gives up and replaces the missing
111/// data with [`Err`], rather than creating an asynchronous fetch request to retrieve the missing
112/// data.
113///
114/// Rust gives us ways to abstract and deduplicate these two similar APIs, but they do not lead to a
115/// better interface.
116#[async_trait]
117pub trait AvailabilityStorage<Types>: Send + Sync
118where
119    Types: NodeType,
120    Header<Types>: QueryableHeader<Types>,
121    Payload<Types>: QueryablePayload<Types>,
122{
123    async fn get_leaf(&mut self, id: LeafId<Types>) -> QueryResult<LeafQueryData<Types>>;
124    async fn get_block(&mut self, id: BlockId<Types>) -> QueryResult<BlockQueryData<Types>>;
125    async fn get_header(&mut self, id: BlockId<Types>) -> QueryResult<Header<Types>>;
126    async fn get_payload(&mut self, id: BlockId<Types>) -> QueryResult<PayloadQueryData<Types>>;
127    async fn get_payload_metadata(
128        &mut self,
129        id: BlockId<Types>,
130    ) -> QueryResult<PayloadMetadata<Types>>;
131    async fn get_vid_common(
132        &mut self,
133        id: BlockId<Types>,
134    ) -> QueryResult<VidCommonQueryData<Types>>;
135    async fn get_vid_common_metadata(
136        &mut self,
137        id: BlockId<Types>,
138    ) -> QueryResult<VidCommonMetadata<Types>>;
139
140    async fn get_leaf_range<R>(
141        &mut self,
142        range: R,
143    ) -> QueryResult<Vec<QueryResult<LeafQueryData<Types>>>>
144    where
145        R: RangeBounds<usize> + Send + 'static;
146    async fn get_block_range<R>(
147        &mut self,
148        range: R,
149    ) -> QueryResult<Vec<QueryResult<BlockQueryData<Types>>>>
150    where
151        R: RangeBounds<usize> + Send + 'static;
152
153    async fn get_header_range<R>(
154        &mut self,
155        range: R,
156    ) -> QueryResult<Vec<QueryResult<Header<Types>>>>
157    where
158        R: RangeBounds<usize> + Send + 'static,
159    {
160        let blocks = self.get_block_range(range).await?;
161        Ok(blocks
162            .into_iter()
163            .map(|block| block.map(|block| block.header))
164            .collect())
165    }
166    async fn get_payload_range<R>(
167        &mut self,
168        range: R,
169    ) -> QueryResult<Vec<QueryResult<PayloadQueryData<Types>>>>
170    where
171        R: RangeBounds<usize> + Send + 'static;
172    async fn get_payload_metadata_range<R>(
173        &mut self,
174        range: R,
175    ) -> QueryResult<Vec<QueryResult<PayloadMetadata<Types>>>>
176    where
177        R: RangeBounds<usize> + Send + 'static;
178    async fn get_vid_common_range<R>(
179        &mut self,
180        range: R,
181    ) -> QueryResult<Vec<QueryResult<VidCommonQueryData<Types>>>>
182    where
183        R: RangeBounds<usize> + Send + 'static;
184    async fn get_vid_common_metadata_range<R>(
185        &mut self,
186        range: R,
187    ) -> QueryResult<Vec<QueryResult<VidCommonMetadata<Types>>>>
188    where
189        R: RangeBounds<usize> + Send + 'static;
190
191    async fn get_transaction(
192        &mut self,
193        hash: TransactionHash<Types>,
194    ) -> QueryResult<TransactionQueryData<Types>>;
195
196    /// Get the first leaf which is available in the database with height >= `from`.
197    async fn first_available_leaf(&mut self, from: u64) -> QueryResult<LeafQueryData<Types>>;
198
199    async fn get_state_cert(&mut self, epoch: u64) -> QueryResult<StateCertQueryData<Types>>;
200}
201
202pub trait UpdateAvailabilityStorage<Types>
203where
204    Types: NodeType,
205{
206    fn insert_leaf(
207        &mut self,
208        leaf: LeafQueryData<Types>,
209    ) -> impl Send + Future<Output = anyhow::Result<()>>;
210    fn insert_block(
211        &mut self,
212        block: BlockQueryData<Types>,
213    ) -> impl Send + Future<Output = anyhow::Result<()>>;
214    fn insert_vid(
215        &mut self,
216        common: VidCommonQueryData<Types>,
217        share: Option<VidShare>,
218    ) -> impl Send + Future<Output = anyhow::Result<()>>;
219    fn insert_state_cert(
220        &mut self,
221        state_cert: StateCertQueryData<Types>,
222    ) -> impl Send + Future<Output = anyhow::Result<()>>;
223}
224
225#[async_trait]
226pub trait NodeStorage<Types>
227where
228    Types: NodeType,
229    Header<Types>: QueryableHeader<Types>,
230{
231    async fn block_height(&mut self) -> QueryResult<usize>;
232    async fn count_transactions_in_range(
233        &mut self,
234        range: impl RangeBounds<usize> + Send,
235        namespace: Option<NamespaceId<Types>>,
236    ) -> QueryResult<usize>;
237    async fn payload_size_in_range(
238        &mut self,
239        range: impl RangeBounds<usize> + Send,
240        namespace: Option<NamespaceId<Types>>,
241    ) -> QueryResult<usize>;
242    async fn vid_share<ID>(&mut self, id: ID) -> QueryResult<VidShare>
243    where
244        ID: Into<BlockId<Types>> + Send + Sync;
245    async fn get_header_window(
246        &mut self,
247        start: impl Into<WindowStart<Types>> + Send + Sync,
248        end: u64,
249        limit: usize,
250    ) -> QueryResult<TimeWindowQueryData<Header<Types>>>;
251
252    /// Search the database for missing objects and generate a report.
253    async fn sync_status(&mut self) -> QueryResult<SyncStatus>;
254}
255
256#[derive(Clone, Debug, Default)]
257pub struct Aggregate<Types: NodeType>
258where
259    Header<Types>: QueryableHeader<Types>,
260{
261    pub height: i64,
262    pub num_transactions: HashMap<Option<NamespaceId<Types>>, usize>,
263    pub payload_size: HashMap<Option<NamespaceId<Types>>, usize>,
264}
265
266pub trait AggregatesStorage<Types>
267where
268    Types: NodeType,
269    Header<Types>: QueryableHeader<Types>,
270{
271    /// The block height for which aggregate statistics are currently available.
272    fn aggregates_height(&mut self) -> impl Future<Output = anyhow::Result<usize>> + Send;
273
274    /// the last aggregate
275    fn load_prev_aggregate(
276        &mut self,
277    ) -> impl Future<Output = anyhow::Result<Option<Aggregate<Types>>>> + Send;
278}
279
280pub trait UpdateAggregatesStorage<Types>
281where
282    Types: NodeType,
283    Header<Types>: QueryableHeader<Types>,
284{
285    /// Update aggregate statistics based on a new block.
286    fn update_aggregates(
287        &mut self,
288        aggregate: Aggregate<Types>,
289        blocks: &[PayloadMetadata<Types>],
290    ) -> impl Future<Output = anyhow::Result<Aggregate<Types>>> + Send;
291}
292
293/// An interface for querying Data and Statistics from the HotShot Blockchain.
294///
295/// This interface provides methods that allows the enabling of querying data
296/// concerning the blockchain from the stored data for use with a
297/// block explorer.  It does not provide the same guarantees as the
298/// Availability data source with data fetching.  It is not concerned with
299/// being up-to-date or having all of the data required, but rather it is
300/// concerned with providing the requested data as quickly as possible, and in
301/// a way that can be easily cached.
302#[async_trait]
303pub trait ExplorerStorage<Types>
304where
305    Types: NodeType,
306    Header<Types>: ExplorerHeader<Types> + QueryableHeader<Types>,
307    Transaction<Types>: ExplorerTransaction<Types>,
308    Payload<Types>: QueryablePayload<Types>,
309{
310    /// `get_block_detail` is a method that retrieves the details of a specific
311    /// block from the blockchain.  The block is identified by the given
312    /// [BlockIdentifier].
313    async fn get_block_detail(
314        &mut self,
315        request: BlockIdentifier<Types>,
316    ) -> Result<BlockDetail<Types>, GetBlockDetailError>;
317
318    /// `get_block_summaries` is a method that retrieves a list of block
319    /// summaries from the blockchain.  The list is generated from the given
320    /// [GetBlockSummariesRequest].
321    async fn get_block_summaries(
322        &mut self,
323        request: GetBlockSummariesRequest<Types>,
324    ) -> Result<Vec<BlockSummary<Types>>, GetBlockSummariesError>;
325
326    /// `get_transaction_detail` is a method that retrieves the details of a
327    /// specific transaction from the blockchain.  The transaction is identified
328    /// by the given [TransactionIdentifier].
329    async fn get_transaction_detail(
330        &mut self,
331        request: TransactionIdentifier<Types>,
332    ) -> Result<TransactionDetailResponse<Types>, GetTransactionDetailError>;
333
334    /// `get_transaction_summaries` is a method that retrieves a list of
335    /// transaction summaries from the blockchain.  The list is generated from
336    /// the given [GetTransactionSummariesRequest].
337    async fn get_transaction_summaries(
338        &mut self,
339        request: GetTransactionSummariesRequest<Types>,
340    ) -> Result<Vec<TransactionSummary<Types>>, GetTransactionSummariesError>;
341
342    /// `get_explorer_summary` is a method that retrieves a summary overview of
343    /// the blockchain.  This is useful for displaying information that
344    /// indicates the overall status of the block chain.
345    async fn get_explorer_summary(
346        &mut self,
347    ) -> Result<ExplorerSummary<Types>, GetExplorerSummaryError>;
348
349    /// `get_search_results` is a method that retrieves the results of a search
350    /// query against the blockchain.  The results are generated from the given
351    /// query string.
352    async fn get_search_results(
353        &mut self,
354        query: TaggedBase64,
355    ) -> Result<SearchResult<Types>, GetSearchResultsError>;
356}
357
358/// This trait defines methods that a data source should implement
359/// It enables retrieval of the membership path for a leaf node, which can be used to reconstruct the Merkle tree state.
360#[async_trait]
361pub trait MerklizedStateStorage<Types, State, const ARITY: usize>
362where
363    Types: NodeType,
364    State: MerklizedState<Types, ARITY>,
365{
366    async fn get_path(
367        &mut self,
368        snapshot: Snapshot<Types, State, ARITY>,
369        key: State::Key,
370    ) -> QueryResult<MerkleProof<State::Entry, State::Key, State::T, ARITY>>;
371}
372
373#[async_trait]
374pub trait MerklizedStateHeightStorage {
375    async fn get_last_state_height(&mut self) -> QueryResult<usize>;
376}