hotshot_query_service/data_source/sql.rs
1// Copyright (c) 2022 Espresso Systems (espressosys.com)
2// This file is part of the HotShot Query Service library.
3//
4// This program is free software: you can redistribute it and/or modify it under the terms of the GNU
5// General Public License as published by the Free Software Foundation, either version 3 of the
6// License, or (at your option) any later version.
7// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
8// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9// General Public License for more details.
10// You should have received a copy of the GNU General Public License along with this program. If not,
11// see <https://www.gnu.org/licenses/>.
12
13#![cfg(feature = "sql-data-source")]
14
15pub use anyhow::Error;
16use hotshot_types::traits::node_implementation::NodeType;
17pub use refinery::Migration;
18pub use sql::Transaction;
19
20use super::{
21 fetching,
22 storage::sql::{self, SqlStorage},
23 AvailabilityProvider, FetchingDataSource,
24};
25pub use crate::include_migrations;
26use crate::{
27 availability::{QueryableHeader, QueryablePayload},
28 Header, Payload,
29};
30
31pub type Builder<Types, Provider> = fetching::Builder<Types, SqlStorage, Provider>;
32
33pub type Config = sql::Config;
34
35impl Config {
36 /// Connect to the database with this config.
37 pub async fn connect<Types, P: AvailabilityProvider<Types>>(
38 self,
39 provider: P,
40 ) -> Result<SqlDataSource<Types, P>, Error>
41 where
42 Types: NodeType,
43 Header<Types>: QueryableHeader<Types>,
44 Payload<Types>: QueryablePayload<Types>,
45 {
46 self.builder(provider).await?.build().await
47 }
48
49 /// Connect to the database, setting options on the underlying [`FetchingDataSource`] using the
50 /// [`fetching::Builder`] interface.
51 pub async fn builder<Types, P: AvailabilityProvider<Types>>(
52 self,
53 provider: P,
54 ) -> Result<Builder<Types, P>, Error>
55 where
56 Types: NodeType,
57 Header<Types>: QueryableHeader<Types>,
58 Payload<Types>: QueryablePayload<Types>,
59 {
60 SqlDataSource::connect(self, provider).await
61 }
62}
63
64/// A data source for the APIs provided in this crate, backed by a remote PostgreSQL database.
65///
66/// # Administration
67///
68/// This data source will automatically connect to and perform queries on a remote SQL database.
69/// However, _administration_ of the database, such as initialization, resetting, and backups, is
70/// left out of the scope of this implementation, and is expected to be performed manually using
71/// off-the-shelf DBMS administration tools. The one exception is migrations, which are handled
72/// transparently by the [`SqlDataSource`].
73///
74/// ## Schema
75///
76/// All the objects created and used by [`SqlDataSource`] are grouped under a schema for easy
77/// management. By default, the schema is named `hotshot`, and is created the first time a
78/// [`SqlDataSource`] is constructed. The name of the schema can be configured by setting
79/// [`Config::schema`].
80///
81/// ## Initialization
82///
83/// When creating a PostgreSQL [`SqlDataSource`], the caller can use [`Config`] to specify the host, user, and
84/// database for the connection. If the `embedded-db` feature is enabled, the caller can instead specify the
85/// file path for an SQLite database.
86/// As such, [`SqlDataSource`] is not very opinionated about how the
87/// database instance is set up. The administrator must simply ensure that there is a database
88/// dedicated to the [`SqlDataSource`] and a user with appropriate permissions (all on `SCHEMA` and
89/// all on `DATABASE`) over that database.
90///
91/// Here is an example of how a sufficient database could be initialized. When using the standard
92/// `postgres` Docker image, these statements could be placed in
93/// `/docker-entrypoint-initdb.d/init.sql` to automatically initialize the database upon startup.
94///
95/// ```sql
96/// CREATE DATABASE hotshot_query_service;
97/// \connect hotshot_query_service;
98/// CREATE USER hotshot_user WITH PASSWORD 'password';
99/// GRANT ALL ON SCHEMA public TO hotshot_user;
100/// GRANT ALL ON DATABASE hotshot_query_service TO hotshot_user WITH GRANT OPTION;
101/// ```
102///
103/// For SQLite, simply provide the file path, and the file will be created if it does not already exist.
104///
105/// One could then connect to this database with the following [`Config`] for postgres:
106///
107/// ```
108/// # use hotshot_query_service::data_source::sql::Config;
109/// #[cfg(not(feature= "embedded-db"))]
110/// Config::default()
111/// .host("postgres.database.hostname")
112/// .database("hotshot_query_service")
113/// .user("hotshot_user")
114/// .password("password")
115/// # ;
116/// ```
117/// Or, if the `embedded-db` feature is enabled, configure it as follows for SQLite:
118///
119/// ```
120/// # use hotshot_query_service::data_source::sql::Config;
121/// #[cfg(feature= "embedded-db")]
122/// Config::default()
123/// .db_path("temp.db".into())
124/// # ;
125/// ```
126/// ## Resetting
127///
128/// In general, resetting the database when necessary is left up to the administrator. However, for
129/// convenience, we do provide a [`reset_schema`](Config::reset_schema) option which can be used to
130/// wipe out existing state and create a fresh instance of the query service. This is particularly
131/// useful for development and staging environments. This function will permanently delete all
132/// tables associated with the schema used by this query service, but will not reset other schemas
133/// or database.
134///
135/// ## Migrations
136///
137/// For the [`SqlDataSource`] to work, the database must be initialized with the appropriate schema,
138/// and the schema must be kept up to date when deploying a new version of this software which
139/// depends on a different schema. Both of these tasks are accomplished via _migrations_.
140///
141/// Each release of this software is bundled with a sequence of migration files: one migration for
142/// each release that changed the schema, including the latest one. Replaying these SQL files
143/// against a database with an older version of the schema, including a completely empty database,
144/// will bring it up to date with the schema required by this version of the software. Upon creating
145/// an instance of [`SqlDataSource`] and connecting to a database, the data source will
146/// automatically fetch the current version from the database and, if it is old, replay the
147/// necessary migration files.
148///
149/// ## Custom Migrations
150///
151/// In keeping with the philosophy of this crate, [`SqlDataSource`] is designed to be
152/// [extensible and composable](#extension-and-composition). When extending the provided APIs with
153/// new, application-specific queries, it will often be desirable to alter the schema of the
154/// database in some way, such as adding additional columns to some of the tables or creating new
155/// indices. When composing the provided APIs with additional API modules, it may also be desirable
156/// to alter the schema, although the changes are more likely to be completely independent of the
157/// schema used by this data source, such as adding entirely new tables.
158///
159/// In either case, the default schema can be modified by inserting additional migrations between
160/// the migrations distributed with this crate. The new migrations will then automatically be
161/// replayed as necessary when initializing a [`SqlDataSource`]. New custom migrations can be
162/// added with each software update, to keep the custom data up to date as the default schema
163/// changes.
164///
165/// Custom migrations can be inserted using [`Config::migrations`]. Each custom migration will be
166/// inserted into the overall sequence of migrations in order of version number. The migrations
167/// provided by this crate only use version numbers which are multiples of 100, so the non-multiples
168/// can be used to insert custom migrations between the default migrations. You can also replace a
169/// default migration completely by providing a custom migration with the same version number. This
170/// may be useful when an earlier custom migration has altered the schema in such a way that a later
171/// migration no longer works as-is. However, this technique is error prone and should be used only
172/// when necessary.
173///
174/// When using custom migrations, it is the user's responsibility to ensure that the resulting
175/// schema is compatible with the schema expected by [`SqlDataSource`]. Adding things (tables,
176/// columns, indices) should usually be safe. Removing, altering, or renaming things should be done
177/// with extreme caution.
178///
179/// It is standard to store custom migrations as SQL files in a sub-directory of the crate. For ease
180/// of release and deployment, such directories can be embedded into a Rust binary and parsed into
181/// a list of [`Migration`] objects using the [`include_migrations`] macro.
182///
183/// It is also possible to take complete control over migrating the schema using
184/// [`Config::no_migrations`] to prevent the [`SqlDataSource`] from running its own migrations. The
185/// database administrator then becomes responsible for manually migrating the database, ensuring the
186/// schema is up to date, and ensuring that the schema is at all times compatible with the schema
187/// expected by the current version of this software. Nevertheless, this may be the best option when
188/// your application-specific schema has diverged significantly from the default schema.
189///
190/// # Synchronization
191///
192/// [`SqlDataSource`] implements [`VersionedDataSource`](super::VersionedDataSource), which means
193/// changes are applied to the underlying database via transactions. [`Transaction`] maps exactly to
194/// a transaction in the underling RDBMS, and inherits the underlying concurrency semantics.
195///
196/// # Extension and Composition
197///
198/// [`SqlDataSource`] is designed to be both extensible (so you can add additional state to the API
199/// modules defined in this crate) and composable (so you can use [`SqlDataSource`] as one component
200/// of a larger state type for an application with additional modules).
201///
202/// ## Extension
203///
204/// It is possible to add additional, application-specific state to [`SqlDataSource`]. If the new
205/// state should live in memory, simply wrap the [`SqlDataSource`] in an
206/// [`ExtensibleDataSource`](super::ExtensibleDataSource):
207///
208/// ```
209/// # use hotshot_query_service::data_source::{
210/// # sql::{Config, Error}, ExtensibleDataSource, SqlDataSource,
211/// # };
212/// # use hotshot_query_service::fetching::provider::NoFetching;
213/// # use hotshot_query_service::testing::mocks::MockTypes as AppTypes;
214/// # async fn doc(config: Config) -> Result<(), Error> {
215/// type AppState = &'static str;
216///
217/// let data_source: ExtensibleDataSource<SqlDataSource<AppTypes, NoFetching>, AppState> =
218/// ExtensibleDataSource::new(config.connect(NoFetching).await?, "app state");
219/// # Ok(())
220/// # }
221/// ```
222///
223/// The [`ExtensibleDataSource`](super::ExtensibleDataSource) wrapper implements all the same data
224/// source traits as [`SqlDataSource`], and also provides access to the `AppState` parameter for use
225/// in API endpoint handlers. This can be used to implement an app-specific data source trait and
226/// add a new API endpoint that uses this app-specific data, as described in the
227/// [extension guide](crate#extension).
228///
229/// If the new application-specific state should live in the SQL database itself, the implementation
230/// is more involved, but still possible. Follow the steps for [custom
231/// migrations](#custom-migrations) to modify the database schema to account for the new data you
232/// want to store. You can then access this data through the [`SqlDataSource`] using
233/// [`read`](super::VersionedDataSource::read) to run a custom read-only SQL query or
234/// [`write`](super::VersionedDataSource::write) to execute a custom atomic mutation of the
235/// database.
236///
237/// ## Composition
238///
239/// Composing [`SqlDataSource`] with other module states is fairly simple -- just
240/// create an aggregate struct containing both [`SqlDataSource`] and your additional module
241/// states, as described in the [composition guide](crate#composition). If the additional modules
242/// have data that should live in the same database as the [`SqlDataSource`] data, you can follow
243/// the steps in [custom migrations](#custom-migrations) to accommodate this.
244///
245/// ```
246/// # use futures::StreamExt;
247/// # use hotshot::types::SystemContextHandle;
248/// # use hotshot_query_service::Error;
249/// # use hotshot_query_service::data_source::{
250/// # sql::Config, Transaction, SqlDataSource, UpdateDataSource, VersionedDataSource,
251/// # };
252/// # use hotshot_query_service::fetching::provider::NoFetching;
253/// # use hotshot_query_service::testing::mocks::{
254/// # MockNodeImpl as AppNodeImpl, MockTypes as AppTypes, MockVersions as AppVersions
255/// # };
256/// # use hotshot_example_types::node_types::TestVersions;
257/// # use std::sync::Arc;
258/// # use tide_disco::App;
259/// # use tokio::spawn;
260/// # use vbs::version::StaticVersionType;
261/// struct AppState {
262/// hotshot_qs: SqlDataSource<AppTypes, NoFetching>,
263/// // additional state for other modules
264/// }
265///
266/// async fn init_server<Ver: StaticVersionType + 'static>(
267/// config: Config,
268/// hotshot: SystemContextHandle<AppTypes, AppNodeImpl, AppVersions>,
269/// ) -> anyhow::Result<App<Arc<AppState>, Error>> {
270/// let mut hotshot_qs = config.connect(NoFetching).await?;
271/// // Initialize storage for other modules, using `hotshot_qs` to access the database.
272/// let tx = hotshot_qs.write().await?;
273/// // ...
274/// tx.commit().await?;
275///
276/// let state = Arc::new(AppState {
277/// hotshot_qs,
278/// // additional state for other modules
279/// });
280/// let mut app = App::with_state(state.clone());
281/// // Register API modules.
282///
283/// spawn(async move {
284/// let mut events = hotshot.event_stream();
285/// while let Some(event) = events.next().await {
286/// if state.hotshot_qs.update(&event).await.is_err() {
287/// continue;
288/// }
289///
290/// let mut tx = state.hotshot_qs.write().await.unwrap();
291/// // Update other modules' states based on `event`, using `tx` to access the database.
292/// tx.commit().await.unwrap();
293/// }
294/// });
295///
296/// Ok(app)
297/// }
298/// ```
299pub type SqlDataSource<Types, P> = FetchingDataSource<Types, SqlStorage, P>;
300
301impl<Types, P: AvailabilityProvider<Types>> SqlDataSource<Types, P>
302where
303 Types: NodeType,
304 Header<Types>: QueryableHeader<Types>,
305 Payload<Types>: QueryablePayload<Types>,
306{
307 /// Connect to a remote database.
308 ///
309 /// This function returns a [`fetching::Builder`] which can be used to set options on the
310 /// underlying [`FetchingDataSource`], before constructing the [`SqlDataSource`] with
311 /// [`build`](fetching::Builder::build). For a convenient constructor that uses the default
312 /// fetching options, see [`Config::connect`].
313 pub async fn connect(config: Config, provider: P) -> Result<Builder<Types, P>, Error> {
314 Ok(Self::builder(SqlStorage::connect(config).await?, provider))
315 }
316}
317
318// These tests run the `postgres` Docker image, which doesn't work on Windows.
319#[cfg(all(any(test, feature = "testing"), not(target_os = "windows")))]
320pub mod testing {
321 use async_trait::async_trait;
322 use hotshot::types::Event;
323 pub use sql::testing::TmpDb;
324
325 use super::*;
326 use crate::{
327 data_source::UpdateDataSource,
328 testing::{consensus::DataSourceLifeCycle, mocks::MockTypes},
329 };
330
331 #[async_trait]
332 impl<P: AvailabilityProvider<MockTypes> + Default> DataSourceLifeCycle
333 for SqlDataSource<MockTypes, P>
334 {
335 type Storage = TmpDb;
336
337 async fn create(_node_id: usize) -> Self::Storage {
338 TmpDb::init().await
339 }
340
341 async fn connect(tmp_db: &Self::Storage) -> Self {
342 tmp_db.config().connect(Default::default()).await.unwrap()
343 }
344
345 async fn reset(tmp_db: &Self::Storage) -> Self {
346 tmp_db
347 .config()
348 .reset_schema()
349 .connect(Default::default())
350 .await
351 .unwrap()
352 }
353
354 async fn leaf_only_ds(tmp_db: &Self::Storage) -> Self {
355 let config = tmp_db.config();
356 let builder = config.builder(Default::default()).await.unwrap();
357
358 builder
359 .leaf_only()
360 .build()
361 .await
362 .expect("failed to build leaf only sql ds")
363 }
364
365 async fn handle_event(&self, event: &Event<MockTypes>) {
366 self.update(event).await.unwrap();
367 }
368 }
369}
370
371// These tests run the `postgres` Docker image, which doesn't work on Windows.
372#[cfg(all(test, not(target_os = "windows")))]
373mod generic_test {
374 use super::SqlDataSource;
375 // For some reason this is the only way to import the macro defined in another module of this
376 // crate.
377 use crate::*;
378 use crate::{fetching::provider::NoFetching, testing::mocks::MockTypes};
379
380 instantiate_data_source_tests!(SqlDataSource<MockTypes, NoFetching>);
381}
382
383#[cfg(all(test, not(target_os = "windows")))]
384mod test {
385 use hotshot_example_types::state_types::{TestInstanceState, TestValidatedState};
386 use hotshot_types::{data::VidShare, vid::advz::advz_scheme};
387 use jf_vid::VidScheme;
388
389 use super::*;
390 use crate::{
391 availability::{
392 AvailabilityDataSource, BlockInfo, LeafQueryData, UpdateAvailabilityData,
393 VidCommonQueryData,
394 },
395 data_source::{
396 storage::{NodeStorage, UpdateAvailabilityStorage},
397 Transaction, VersionedDataSource,
398 },
399 fetching::provider::NoFetching,
400 testing::{consensus::DataSourceLifeCycle, mocks::MockTypes, setup_test},
401 };
402
403 type D = SqlDataSource<MockTypes, NoFetching>;
404
405 // This function should be generic, but the file system data source does not currently support
406 // storing VID common and later the corresponding share.
407 #[tokio::test(flavor = "multi_thread")]
408 async fn test_vid_monotonicity() {
409 use hotshot_example_types::node_types::TestVersions;
410
411 setup_test();
412
413 let storage = D::create(0).await;
414 let ds = <D as DataSourceLifeCycle>::connect(&storage).await;
415
416 // Generate some test VID data.
417 let disperse = advz_scheme(2).disperse([]).unwrap();
418
419 // Insert test data with VID common but no share.
420 let leaf = LeafQueryData::<MockTypes>::genesis::<TestVersions>(
421 &TestValidatedState::default(),
422 &TestInstanceState::default(),
423 )
424 .await;
425 let common =
426 VidCommonQueryData::new(leaf.header().clone(), crate::VidCommon::V0(disperse.common));
427 ds.append(BlockInfo::new(leaf, None, Some(common.clone()), None, None))
428 .await
429 .unwrap();
430
431 assert_eq!(ds.get_vid_common(0).await.await, common);
432 NodeStorage::<MockTypes>::vid_share(&mut ds.read().await.unwrap(), 0)
433 .await
434 .unwrap_err();
435
436 // Re-insert the common data with the share.
437 let share0 = VidShare::V0(disperse.shares[0].clone());
438 let mut tx = ds.write().await.unwrap();
439 tx.insert_vid(common.clone(), Some(share0.clone()))
440 .await
441 .unwrap();
442 tx.commit().await.unwrap();
443 assert_eq!(ds.get_vid_common(0).await.await, common);
444 assert_eq!(
445 NodeStorage::<MockTypes>::vid_share(&mut ds.read().await.unwrap(), 0)
446 .await
447 .unwrap(),
448 share0
449 );
450 }
451}