hotshot_query_service/data_source/
sql.rs

1// Copyright (c) 2022 Espresso Systems (espressosys.com)
2// This file is part of the HotShot Query Service library.
3//
4// This program is free software: you can redistribute it and/or modify it under the terms of the GNU
5// General Public License as published by the Free Software Foundation, either version 3 of the
6// License, or (at your option) any later version.
7// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
8// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9// General Public License for more details.
10// You should have received a copy of the GNU General Public License along with this program. If not,
11// see <https://www.gnu.org/licenses/>.
12
13#![cfg(feature = "sql-data-source")]
14
15pub use anyhow::Error;
16use hotshot_types::traits::node_implementation::NodeType;
17pub use refinery::Migration;
18pub use sql::Transaction;
19
20use super::{
21    fetching,
22    storage::sql::{self, SqlStorage},
23    AvailabilityProvider, FetchingDataSource,
24};
25pub use crate::include_migrations;
26use crate::{
27    availability::{QueryableHeader, QueryablePayload},
28    Header, Payload,
29};
30
31pub type Builder<Types, Provider> = fetching::Builder<Types, SqlStorage, Provider>;
32
33pub type Config = sql::Config;
34
35impl Config {
36    /// Connect to the database with this config.
37    pub async fn connect<Types, P: AvailabilityProvider<Types>>(
38        self,
39        provider: P,
40    ) -> Result<SqlDataSource<Types, P>, Error>
41    where
42        Types: NodeType,
43        Header<Types>: QueryableHeader<Types>,
44        Payload<Types>: QueryablePayload<Types>,
45    {
46        self.builder(provider).await?.build().await
47    }
48
49    /// Connect to the database, setting options on the underlying [`FetchingDataSource`] using the
50    /// [`fetching::Builder`] interface.
51    pub async fn builder<Types, P: AvailabilityProvider<Types>>(
52        self,
53        provider: P,
54    ) -> Result<Builder<Types, P>, Error>
55    where
56        Types: NodeType,
57        Header<Types>: QueryableHeader<Types>,
58        Payload<Types>: QueryablePayload<Types>,
59    {
60        SqlDataSource::connect(self, provider).await
61    }
62}
63
64/// A data source for the APIs provided in this crate, backed by a remote PostgreSQL database.
65///
66/// # Administration
67///
68/// This data source will automatically connect to and perform queries on a remote SQL database.
69/// However, _administration_ of the database, such as initialization, resetting, and backups, is
70/// left out of the scope of this implementation, and is expected to be performed manually using
71/// off-the-shelf DBMS administration tools. The one exception is migrations, which are handled
72/// transparently by the [`SqlDataSource`].
73///
74/// ## Schema
75///
76/// All the objects created and used by [`SqlDataSource`] are grouped under a schema for easy
77/// management. By default, the schema is named `hotshot`, and is created the first time a
78/// [`SqlDataSource`] is constructed. The name of the schema can be configured by setting
79/// [`Config::schema`].
80///
81/// ## Initialization
82///
83/// When creating a PostgreSQL [`SqlDataSource`], the caller can use [`Config`] to specify the host, user, and
84/// database for the connection. If the `embedded-db` feature is enabled, the caller can instead specify the
85/// file path for an SQLite database.
86/// As such, [`SqlDataSource`] is not very opinionated about how the
87/// database instance is set up. The administrator must simply ensure that there is a database
88/// dedicated to the [`SqlDataSource`] and a user with appropriate permissions (all on `SCHEMA` and
89/// all on `DATABASE`) over that database.
90///
91/// Here is an example of how a sufficient database could be initialized. When using the standard
92/// `postgres` Docker image, these statements could be placed in
93/// `/docker-entrypoint-initdb.d/init.sql` to automatically initialize the database upon startup.
94///
95/// ```sql
96/// CREATE DATABASE hotshot_query_service;
97/// \connect hotshot_query_service;
98/// CREATE USER hotshot_user WITH PASSWORD 'password';
99/// GRANT ALL ON SCHEMA public TO hotshot_user;
100/// GRANT ALL ON DATABASE hotshot_query_service TO hotshot_user WITH GRANT OPTION;
101/// ```
102///
103/// For SQLite, simply provide the file path, and the file will be created if it does not already exist.
104///
105/// One could then connect to this database with the following [`Config`] for postgres:
106///
107/// ```
108/// # use hotshot_query_service::data_source::sql::Config;
109/// #[cfg(not(feature= "embedded-db"))]
110/// Config::default()
111///     .host("postgres.database.hostname")
112///     .database("hotshot_query_service")
113///     .user("hotshot_user")
114///     .password("password")
115/// # ;
116/// ```
117/// Or, if the `embedded-db` feature is enabled, configure it as follows for SQLite:
118///
119/// ```
120/// # use hotshot_query_service::data_source::sql::Config;
121/// #[cfg(feature= "embedded-db")]
122/// Config::default()
123///     .db_path("temp.db".into())
124/// # ;
125/// ```
126/// ## Resetting
127///
128/// In general, resetting the database when necessary is left up to the administrator. However, for
129/// convenience, we do provide a [`reset_schema`](Config::reset_schema) option which can be used to
130/// wipe out existing state and create a fresh instance of the query service. This is particularly
131/// useful for development and staging environments. This function will permanently delete all
132/// tables associated with the schema used by this query service, but will not reset other schemas
133/// or database.
134///
135/// ## Migrations
136///
137/// For the [`SqlDataSource`] to work, the database must be initialized with the appropriate schema,
138/// and the schema must be kept up to date when deploying a new version of this software which
139/// depends on a different schema. Both of these tasks are accomplished via _migrations_.
140///
141/// Each release of this software is bundled with a sequence of migration files: one migration for
142/// each release that changed the schema, including the latest one. Replaying these SQL files
143/// against a database with an older version of the schema, including a completely empty database,
144/// will bring it up to date with the schema required by this version of the software. Upon creating
145/// an instance of [`SqlDataSource`] and connecting to a database, the data source will
146/// automatically fetch the current version from the database and, if it is old, replay the
147/// necessary migration files.
148///
149/// ## Custom Migrations
150///
151/// In keeping with the philosophy of this crate, [`SqlDataSource`] is designed to be
152/// [extensible and composable](#extension-and-composition). When extending the provided APIs with
153/// new, application-specific queries, it will often be desirable to alter the schema of the
154/// database in some way, such as adding additional columns to some of the tables or creating new
155/// indices. When composing the provided APIs with additional API modules, it may also be desirable
156/// to alter the schema, although the changes are more likely to be completely independent of the
157/// schema used by this data source, such as adding entirely new tables.
158///
159/// In either case, the default schema can be modified by inserting additional migrations between
160/// the migrations distributed with this crate. The new migrations will then automatically be
161/// replayed as necessary when initializing a [`SqlDataSource`]. New custom migrations can be
162/// added with each software update, to keep the custom data up to date as the default schema
163/// changes.
164///
165/// Custom migrations can be inserted using [`Config::migrations`]. Each custom migration will be
166/// inserted into the overall sequence of migrations in order of version number. The migrations
167/// provided by this crate only use version numbers which are multiples of 100, so the non-multiples
168/// can be used to insert custom migrations between the default migrations. You can also replace a
169/// default migration completely by providing a custom migration with the same version number. This
170/// may be useful when an earlier custom migration has altered the schema in such a way that a later
171/// migration no longer works as-is. However, this technique is error prone and should be used only
172/// when necessary.
173///
174/// When using custom migrations, it is the user's responsibility to ensure that the resulting
175/// schema is compatible with the schema expected by [`SqlDataSource`]. Adding things (tables,
176/// columns, indices) should usually be safe. Removing, altering, or renaming things should be done
177/// with extreme caution.
178///
179/// It is standard to store custom migrations as SQL files in a sub-directory of the crate. For ease
180/// of release and deployment, such directories can be embedded into a Rust binary and parsed into
181/// a list of [`Migration`] objects using the [`include_migrations`] macro.
182///
183/// It is also possible to take complete control over migrating the schema using
184/// [`Config::no_migrations`] to prevent the [`SqlDataSource`] from running its own migrations. The
185/// database administrator then becomes responsible for manually migrating the database, ensuring the
186/// schema is up to date, and ensuring that the schema is at all times compatible with the schema
187/// expected by the current version of this software. Nevertheless, this may be the best option when
188/// your application-specific schema has diverged significantly from the default schema.
189///
190/// # Synchronization
191///
192/// [`SqlDataSource`] implements [`VersionedDataSource`](super::VersionedDataSource), which means
193/// changes are applied to the underlying database via transactions. [`Transaction`] maps exactly to
194/// a transaction in the underling RDBMS, and inherits the underlying concurrency semantics.
195///
196/// # Extension and Composition
197///
198/// [`SqlDataSource`] is designed to be both extensible (so you can add additional state to the API
199/// modules defined in this crate) and composable (so you can use [`SqlDataSource`] as one component
200/// of a larger state type for an application with additional modules).
201///
202/// ## Extension
203///
204/// It is possible to add additional, application-specific state to [`SqlDataSource`]. If the new
205/// state should live in memory, simply wrap the [`SqlDataSource`] in an
206/// [`ExtensibleDataSource`](super::ExtensibleDataSource):
207///
208/// ```
209/// # use hotshot_query_service::data_source::{
210/// #   sql::{Config, Error}, ExtensibleDataSource, SqlDataSource,
211/// # };
212/// # use hotshot_query_service::fetching::provider::NoFetching;
213/// # use hotshot_query_service::testing::mocks::MockTypes as AppTypes;
214/// # async fn doc(config: Config) -> Result<(), Error> {
215/// type AppState = &'static str;
216///
217/// let data_source: ExtensibleDataSource<SqlDataSource<AppTypes, NoFetching>, AppState> =
218///     ExtensibleDataSource::new(config.connect(NoFetching).await?, "app state");
219/// # Ok(())
220/// # }
221/// ```
222///
223/// The [`ExtensibleDataSource`](super::ExtensibleDataSource) wrapper implements all the same data
224/// source traits as [`SqlDataSource`], and also provides access to the `AppState` parameter for use
225/// in API endpoint handlers. This can be used to implement an app-specific data source trait and
226/// add a new API endpoint that uses this app-specific data, as described in the
227/// [extension guide](crate#extension).
228///
229/// If the new application-specific state should live in the SQL database itself, the implementation
230/// is more involved, but still possible. Follow the steps for [custom
231/// migrations](#custom-migrations) to modify the database schema to account for the new data you
232/// want to store. You can then access this data through the [`SqlDataSource`] using
233/// [`read`](super::VersionedDataSource::read) to run a custom read-only SQL query or
234/// [`write`](super::VersionedDataSource::write) to execute a custom atomic mutation of the
235/// database.
236///
237/// ## Composition
238///
239/// Composing [`SqlDataSource`] with other module states is fairly simple -- just
240/// create an aggregate struct containing both [`SqlDataSource`] and your additional module
241/// states, as described in the [composition guide](crate#composition). If the additional modules
242/// have data that should live in the same database as the [`SqlDataSource`] data, you can follow
243/// the steps in [custom migrations](#custom-migrations) to accommodate this.
244///
245/// ```
246/// # use futures::StreamExt;
247/// # use hotshot::types::SystemContextHandle;
248/// # use hotshot_query_service::Error;
249/// # use hotshot_query_service::data_source::{
250/// #   sql::Config, Transaction, SqlDataSource, UpdateDataSource, VersionedDataSource,
251/// # };
252/// # use hotshot_query_service::fetching::provider::NoFetching;
253/// # use hotshot_query_service::testing::mocks::{
254/// #   MockNodeImpl as AppNodeImpl, MockTypes as AppTypes, MockVersions as AppVersions
255/// # };
256/// # use hotshot_example_types::node_types::TestVersions;
257/// # use std::sync::Arc;
258/// # use tide_disco::App;
259/// # use tokio::spawn;
260/// # use vbs::version::StaticVersionType;
261/// struct AppState {
262///     hotshot_qs: SqlDataSource<AppTypes, NoFetching>,
263///     // additional state for other modules
264/// }
265///
266/// async fn init_server<Ver: StaticVersionType + 'static>(
267///     config: Config,
268///     hotshot: SystemContextHandle<AppTypes, AppNodeImpl, AppVersions>,
269/// ) -> anyhow::Result<App<Arc<AppState>, Error>> {
270///     let mut hotshot_qs = config.connect(NoFetching).await?;
271///     // Initialize storage for other modules, using `hotshot_qs` to access the database.
272///     let tx = hotshot_qs.write().await?;
273///     // ...
274///     tx.commit().await?;
275///
276///     let state = Arc::new(AppState {
277///         hotshot_qs,
278///         // additional state for other modules
279///     });
280///     let mut app = App::with_state(state.clone());
281///     // Register API modules.
282///
283///     spawn(async move {
284///         let mut events = hotshot.event_stream();
285///         while let Some(event) = events.next().await {
286///             if state.hotshot_qs.update(&event).await.is_err() {
287///                 continue;
288///             }
289///
290///             let mut tx = state.hotshot_qs.write().await.unwrap();
291///             // Update other modules' states based on `event`, using `tx` to access the database.
292///             tx.commit().await.unwrap();
293///         }
294///     });
295///
296///     Ok(app)
297/// }
298/// ```
299pub type SqlDataSource<Types, P> = FetchingDataSource<Types, SqlStorage, P>;
300
301impl<Types, P: AvailabilityProvider<Types>> SqlDataSource<Types, P>
302where
303    Types: NodeType,
304    Header<Types>: QueryableHeader<Types>,
305    Payload<Types>: QueryablePayload<Types>,
306{
307    /// Connect to a remote database.
308    ///
309    /// This function returns a [`fetching::Builder`] which can be used to set options on the
310    /// underlying [`FetchingDataSource`], before constructing the [`SqlDataSource`] with
311    /// [`build`](fetching::Builder::build). For a convenient constructor that uses the default
312    /// fetching options, see [`Config::connect`].
313    pub async fn connect(config: Config, provider: P) -> Result<Builder<Types, P>, Error> {
314        Ok(Self::builder(SqlStorage::connect(config).await?, provider))
315    }
316}
317
318// These tests run the `postgres` Docker image, which doesn't work on Windows.
319#[cfg(all(any(test, feature = "testing"), not(target_os = "windows")))]
320pub mod testing {
321    use async_trait::async_trait;
322    use hotshot::types::Event;
323    pub use sql::testing::TmpDb;
324
325    use super::*;
326    use crate::{
327        data_source::UpdateDataSource,
328        testing::{consensus::DataSourceLifeCycle, mocks::MockTypes},
329    };
330
331    #[async_trait]
332    impl<P: AvailabilityProvider<MockTypes> + Default> DataSourceLifeCycle
333        for SqlDataSource<MockTypes, P>
334    {
335        type Storage = TmpDb;
336
337        async fn create(_node_id: usize) -> Self::Storage {
338            TmpDb::init().await
339        }
340
341        async fn connect(tmp_db: &Self::Storage) -> Self {
342            tmp_db.config().connect(Default::default()).await.unwrap()
343        }
344
345        async fn reset(tmp_db: &Self::Storage) -> Self {
346            tmp_db
347                .config()
348                .reset_schema()
349                .connect(Default::default())
350                .await
351                .unwrap()
352        }
353
354        async fn leaf_only_ds(tmp_db: &Self::Storage) -> Self {
355            let config = tmp_db.config();
356            let builder = config.builder(Default::default()).await.unwrap();
357
358            builder
359                .leaf_only()
360                .build()
361                .await
362                .expect("failed to build leaf only sql ds")
363        }
364
365        async fn handle_event(&self, event: &Event<MockTypes>) {
366            self.update(event).await.unwrap();
367        }
368    }
369}
370
371// These tests run the `postgres` Docker image, which doesn't work on Windows.
372#[cfg(all(test, not(target_os = "windows")))]
373mod generic_test {
374    use super::SqlDataSource;
375    // For some reason this is the only way to import the macro defined in another module of this
376    // crate.
377    use crate::*;
378    use crate::{fetching::provider::NoFetching, testing::mocks::MockTypes};
379
380    instantiate_data_source_tests!(SqlDataSource<MockTypes, NoFetching>);
381}
382
383#[cfg(all(test, not(target_os = "windows")))]
384mod test {
385    use hotshot_example_types::state_types::{TestInstanceState, TestValidatedState};
386    use hotshot_types::{data::VidShare, vid::advz::advz_scheme};
387    use jf_vid::VidScheme;
388
389    use super::*;
390    use crate::{
391        availability::{
392            AvailabilityDataSource, BlockInfo, LeafQueryData, UpdateAvailabilityData,
393            VidCommonQueryData,
394        },
395        data_source::{
396            storage::{NodeStorage, UpdateAvailabilityStorage},
397            Transaction, VersionedDataSource,
398        },
399        fetching::provider::NoFetching,
400        testing::{consensus::DataSourceLifeCycle, mocks::MockTypes, setup_test},
401    };
402
403    type D = SqlDataSource<MockTypes, NoFetching>;
404
405    // This function should be generic, but the file system data source does not currently support
406    // storing VID common and later the corresponding share.
407    #[tokio::test(flavor = "multi_thread")]
408    async fn test_vid_monotonicity() {
409        use hotshot_example_types::node_types::TestVersions;
410
411        setup_test();
412
413        let storage = D::create(0).await;
414        let ds = <D as DataSourceLifeCycle>::connect(&storage).await;
415
416        // Generate some test VID data.
417        let disperse = advz_scheme(2).disperse([]).unwrap();
418
419        // Insert test data with VID common but no share.
420        let leaf = LeafQueryData::<MockTypes>::genesis::<TestVersions>(
421            &TestValidatedState::default(),
422            &TestInstanceState::default(),
423        )
424        .await;
425        let common =
426            VidCommonQueryData::new(leaf.header().clone(), crate::VidCommon::V0(disperse.common));
427        ds.append(BlockInfo::new(leaf, None, Some(common.clone()), None, None))
428            .await
429            .unwrap();
430
431        assert_eq!(ds.get_vid_common(0).await.await, common);
432        NodeStorage::<MockTypes>::vid_share(&mut ds.read().await.unwrap(), 0)
433            .await
434            .unwrap_err();
435
436        // Re-insert the common data with the share.
437        let share0 = VidShare::V0(disperse.shares[0].clone());
438        let mut tx = ds.write().await.unwrap();
439        tx.insert_vid(common.clone(), Some(share0.clone()))
440            .await
441            .unwrap();
442        tx.commit().await.unwrap();
443        assert_eq!(ds.get_vid_common(0).await.await, common);
444        assert_eq!(
445            NodeStorage::<MockTypes>::vid_share(&mut ds.read().await.unwrap(), 0)
446                .await
447                .unwrap(),
448            share0
449        );
450    }
451}