hotshot_testing/
test_builder.rs

1// Copyright (c) 2021-2024 Espresso Systems (espressosys.com)
2// This file is part of the HotShot repository.
3
4// You should have received a copy of the MIT License
5// along with the HotShot repository. If not, see <https://mit-license.org/>.
6
7use std::{collections::HashMap, num::NonZeroUsize, rc::Rc, sync::Arc, time::Duration};
8
9use async_lock::RwLock;
10use hotshot::{
11    tasks::EventTransformerState,
12    traits::{NetworkReliability, NodeImplementation, TestableNodeImplementation},
13    types::SystemContextHandle,
14    HotShotInitializer, SystemContext, TwinsHandlerState,
15};
16use hotshot_example_types::{
17    node_types::TestTypes, state_types::TestInstanceState, storage_types::TestStorage,
18    testable_delay::DelayConfig,
19};
20use hotshot_types::{
21    consensus::ConsensusMetricsValue,
22    epoch_membership::EpochMembershipCoordinator,
23    storage_metrics::StorageMetricsValue,
24    traits::node_implementation::{NodeType, Versions},
25    HotShotConfig, PeerConfig, ValidatorConfig,
26};
27use hotshot_utils::anytrace::*;
28use tide_disco::Url;
29use vec1::Vec1;
30
31use super::{
32    completion_task::{CompletionTaskDescription, TimeBasedCompletionTaskDescription},
33    overall_safety_task::OverallSafetyPropertiesDescription,
34    txn_task::TxnTaskDescription,
35};
36use crate::{
37    helpers::{key_pair_for_id, TestNodeKeyMap},
38    node_stake::TestNodeStakes,
39    spinning_task::SpinningTaskDescription,
40    test_launcher::{Network, ResourceGenerators, TestLauncher},
41    test_task::TestTaskStateSeed,
42    view_sync_task::ViewSyncTaskDescription,
43};
44
45pub type TransactionValidator = Arc<dyn Fn(&Vec<(u64, u64)>) -> Result<()> + Send + Sync>;
46
47/// data describing how a round should be timed.
48#[derive(Clone, Debug, Copy)]
49pub struct TimingData {
50    /// Base duration for next-view timeout, in milliseconds
51    pub next_view_timeout: u64,
52    /// The maximum amount of time a leader can wait to get a block from a builder
53    pub builder_timeout: Duration,
54    /// time to wait until we request data associated with a proposal
55    pub data_request_delay: Duration,
56    /// Delay before sending through the secondary network in CombinedNetworks
57    pub secondary_network_delay: Duration,
58    /// view sync timeout
59    pub view_sync_timeout: Duration,
60}
61
62pub fn default_hotshot_config<TYPES: NodeType>(
63    known_nodes_with_stake: Vec<PeerConfig<TYPES>>,
64    known_da_nodes: Vec<PeerConfig<TYPES>>,
65    num_bootstrap_nodes: usize,
66    epoch_height: u64,
67    epoch_start_block: u64,
68) -> HotShotConfig<TYPES> {
69    HotShotConfig {
70        start_threshold: (1, 1),
71        num_nodes_with_stake: NonZeroUsize::new(known_nodes_with_stake.len()).unwrap(),
72        known_da_nodes: known_da_nodes.clone(),
73        da_committees: Default::default(),
74        num_bootstrap: num_bootstrap_nodes,
75        known_nodes_with_stake: known_nodes_with_stake.clone(),
76        da_staked_committee_size: known_da_nodes.len(),
77        fixed_leader_for_gpuvid: 1,
78        next_view_timeout: 500,
79        view_sync_timeout: Duration::from_millis(250),
80        builder_timeout: Duration::from_millis(1000),
81        data_request_delay: Duration::from_millis(200),
82        // Placeholder until we spin up the builder
83        builder_urls: vec1::vec1![Url::parse("http://localhost:9999").expect("Valid URL")],
84        start_proposing_view: u64::MAX,
85        stop_proposing_view: 0,
86        start_voting_view: u64::MAX,
87        stop_voting_view: 0,
88        start_proposing_time: u64::MAX,
89        stop_proposing_time: 0,
90        start_voting_time: u64::MAX,
91        stop_voting_time: 0,
92        epoch_height,
93        epoch_start_block,
94        stake_table_capacity: hotshot_types::light_client::DEFAULT_STAKE_TABLE_CAPACITY,
95        drb_difficulty: 10,
96        drb_upgrade_difficulty: 20,
97    }
98}
99
100#[allow(clippy::type_complexity)]
101pub fn gen_node_lists<TYPES: NodeType>(
102    num_staked_nodes: u64,
103    num_da_nodes: u64,
104    node_stakes: &TestNodeStakes,
105) -> (Vec<PeerConfig<TYPES>>, Vec<PeerConfig<TYPES>>) {
106    let mut staked_nodes = Vec::new();
107    let mut da_nodes = Vec::new();
108
109    for n in 0..num_staked_nodes {
110        let validator_config: ValidatorConfig<TYPES> = ValidatorConfig::generated_from_seed_indexed(
111            [0u8; 32],
112            n,
113            node_stakes.get(n),
114            n < num_da_nodes,
115        );
116
117        let peer_config = validator_config.public_config();
118        staked_nodes.push(peer_config.clone());
119
120        if n < num_da_nodes {
121            da_nodes.push(peer_config)
122        }
123    }
124
125    (staked_nodes, da_nodes)
126}
127
128/// metadata describing a test
129#[derive(Clone)]
130pub struct TestDescription<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> {
131    /// `HotShotConfig` used for setting up the test infrastructure.
132    ///
133    /// Note: this is not the same as the `HotShotConfig` passed to test nodes for `SystemContext::init`;
134    /// those configs are instead provided by the resource generators in the test launcher.
135    pub test_config: HotShotConfig<TYPES>,
136    /// Whether to skip initializing nodes that will start late, which will catch up later with
137    /// `HotShotInitializer::from_reload` in the spinning task.
138    pub skip_late: bool,
139    /// overall safety property description
140    pub overall_safety_properties: OverallSafetyPropertiesDescription,
141    /// spinning properties
142    pub spinning_properties: SpinningTaskDescription,
143    /// txns timing
144    pub txn_description: TxnTaskDescription,
145    /// completion task
146    pub completion_task_description: CompletionTaskDescription,
147    /// timing data
148    pub timing_data: TimingData,
149    /// unrelabile networking metadata
150    pub unreliable_network: Option<Box<dyn NetworkReliability>>,
151    /// view sync check task
152    pub view_sync_properties: ViewSyncTaskDescription,
153    /// description of builders to run
154    pub builders: Vec1<BuilderDescription>,
155    /// description of fallback builder to run
156    pub fallback_builder: BuilderDescription,
157    /// description of the solver to run
158    pub solver: FakeSolverApiDescription,
159    /// nodes with byzantine behaviour
160    pub behaviour: Rc<dyn Fn(u64) -> Behaviour<TYPES, I, V>>,
161    /// Delay config if any to add delays to asynchronous calls
162    pub async_delay_config: HashMap<u64, DelayConfig>,
163    /// view in which to propose an upgrade
164    pub upgrade_view: Option<u64>,
165    /// whether to initialize the solver on startup
166    pub start_solver: bool,
167    /// boxed closure used to validate the resulting transactions
168    pub validate_transactions: TransactionValidator,
169    /// stake to apply to particular nodes. Nodes not included will have a stake of 1.
170    pub node_stakes: TestNodeStakes,
171}
172
173pub fn nonempty_block_threshold(threshold: (u64, u64)) -> TransactionValidator {
174    Arc::new(move |transactions| {
175        if matches!(threshold, (0, _)) {
176            return Ok(());
177        }
178
179        let blocks: Vec<_> = transactions.iter().filter(|(view, _)| *view != 0).collect();
180
181        let num_blocks = blocks.len() as u64;
182        let mut num_nonempty_blocks = 0;
183
184        ensure!(num_blocks > 0, "Failed to commit any non-genesis blocks");
185
186        for (_, num_transactions) in blocks {
187            if *num_transactions > 0 {
188                num_nonempty_blocks += 1;
189            }
190        }
191
192        ensure!(
193            // i.e. num_nonempty_blocks / num_blocks >= threshold.0 / threshold.1
194            num_nonempty_blocks * threshold.1 >= threshold.0 * num_blocks,
195            "Failed to meet nonempty block threshold of {}/{}; got {num_nonempty_blocks} nonempty \
196             blocks out of a total of {num_blocks}",
197            threshold.0,
198            threshold.1
199        );
200
201        Ok(())
202    })
203}
204
205pub fn nonempty_block_limit(limit: (u64, u64)) -> TransactionValidator {
206    Arc::new(move |transactions| {
207        if matches!(limit, (_, 0)) {
208            return Ok(());
209        }
210
211        let blocks: Vec<_> = transactions.iter().filter(|(view, _)| *view != 0).collect();
212
213        let num_blocks = blocks.len() as u64;
214        let mut num_nonempty_blocks = 0;
215
216        ensure!(num_blocks > 0, "Failed to commit any non-genesis blocks");
217
218        for (_, num_transactions) in blocks {
219            if *num_transactions > 0 {
220                num_nonempty_blocks += 1;
221            }
222        }
223
224        ensure!(
225            // i.e. num_nonempty_blocks / num_blocks <= limit.0 / limit.1
226            num_nonempty_blocks * limit.1 <= limit.0 * num_blocks,
227            "Exceeded nonempty block limit of {}/{}; got {num_nonempty_blocks} nonempty blocks \
228             out of a total of {num_blocks}",
229            limit.0,
230            limit.1
231        );
232
233        Ok(())
234    })
235}
236
237#[derive(Debug)]
238pub enum Behaviour<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> {
239    ByzantineTwins(Box<dyn TwinsHandlerState<TYPES, I, V>>),
240    Byzantine(Box<dyn EventTransformerState<TYPES, I, V>>),
241    Standard,
242}
243
244pub async fn create_test_handle<
245    TYPES: NodeType<InstanceState = TestInstanceState>,
246    I: NodeImplementation<TYPES>,
247    V: Versions,
248>(
249    metadata: TestDescription<TYPES, I, V>,
250    node_id: u64,
251    network: Network<TYPES, I>,
252    memberships: Arc<RwLock<TYPES::Membership>>,
253    config: HotShotConfig<TYPES>,
254    storage: I::Storage,
255) -> SystemContextHandle<TYPES, I, V> {
256    let initializer = HotShotInitializer::<TYPES>::from_genesis::<V>(
257        TestInstanceState::new(
258            metadata
259                .async_delay_config
260                .get(&node_id)
261                .cloned()
262                .unwrap_or_default(),
263        ),
264        metadata.test_config.epoch_height,
265        metadata.test_config.epoch_start_block,
266        vec![],
267    )
268    .await
269    .unwrap();
270
271    // See whether or not we should be DA
272    let is_da = node_id < config.da_staked_committee_size as u64;
273
274    let validator_config: ValidatorConfig<TYPES> = ValidatorConfig::generated_from_seed_indexed(
275        [0u8; 32],
276        node_id,
277        metadata.node_stakes.get(node_id),
278        is_da,
279    );
280
281    // Get key pair for certificate aggregation
282    let private_key = validator_config.private_key.clone();
283    let public_key = validator_config.public_key.clone();
284    let state_private_key = validator_config.state_private_key.clone();
285    let membership_coordinator =
286        EpochMembershipCoordinator::new(memberships, config.epoch_height, &storage.clone());
287
288    let behaviour = (metadata.behaviour)(node_id);
289    match behaviour {
290        Behaviour::ByzantineTwins(state) => {
291            let state = Box::leak(state);
292            let (left_handle, _right_handle) = state
293                .spawn_twin_handles(
294                    public_key,
295                    private_key,
296                    state_private_key,
297                    node_id,
298                    config,
299                    membership_coordinator,
300                    network,
301                    initializer,
302                    ConsensusMetricsValue::default(),
303                    storage,
304                    StorageMetricsValue::default(),
305                )
306                .await;
307
308            left_handle
309        },
310        Behaviour::Byzantine(state) => {
311            let state = Box::leak(state);
312            state
313                .spawn_handle(
314                    public_key,
315                    private_key,
316                    state_private_key,
317                    node_id,
318                    config,
319                    membership_coordinator,
320                    network,
321                    initializer,
322                    ConsensusMetricsValue::default(),
323                    storage,
324                    StorageMetricsValue::default(),
325                )
326                .await
327        },
328        Behaviour::Standard => {
329            let hotshot = SystemContext::<TYPES, I, V>::new(
330                public_key,
331                private_key,
332                state_private_key,
333                node_id,
334                config,
335                membership_coordinator,
336                network,
337                initializer,
338                ConsensusMetricsValue::default(),
339                storage,
340                StorageMetricsValue::default(),
341            )
342            .await;
343
344            hotshot.run_tasks().await
345        },
346    }
347}
348
349/// Describes a possible change to builder status during test
350#[derive(Clone, Debug)]
351pub enum BuilderChange {
352    // Builder should start up
353    Up,
354    // Builder should shut down completely
355    Down,
356    // Toggles whether builder should always respond
357    // to claim calls with errors
358    FailClaims(bool),
359}
360
361/// Metadata describing builder behaviour during a test
362#[derive(Clone, Debug, Default)]
363pub struct BuilderDescription {
364    /// view number -> change to builder status
365    pub changes: HashMap<u64, BuilderChange>,
366}
367
368#[derive(Clone, Debug)]
369pub struct FakeSolverApiDescription {
370    /// The rate at which errors occur in the mock solver API
371    pub error_pct: f32,
372}
373
374impl Default for TimingData {
375    fn default() -> Self {
376        Self {
377            next_view_timeout: 6000,
378            builder_timeout: Duration::from_millis(500),
379            data_request_delay: Duration::from_millis(200),
380            secondary_network_delay: Duration::from_millis(1000),
381            view_sync_timeout: Duration::from_millis(2000),
382        }
383    }
384}
385
386impl<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> TestDescription<TYPES, I, V> {
387    /// the default metadata for a stress test
388    #[must_use]
389    #[allow(clippy::redundant_field_names)]
390    pub fn default_stress() -> Self {
391        let num_nodes_with_stake = 100;
392
393        Self {
394            overall_safety_properties: OverallSafetyPropertiesDescription {
395                num_successful_views: 50,
396                ..OverallSafetyPropertiesDescription::default()
397            },
398            timing_data: TimingData {
399                next_view_timeout: 2000,
400                ..TimingData::default()
401            },
402            view_sync_properties: ViewSyncTaskDescription::Threshold(0, num_nodes_with_stake),
403            ..Self::default()
404        }
405    }
406
407    /// the default metadata for multiple rounds
408    #[must_use]
409    #[allow(clippy::redundant_field_names)]
410    pub fn default_multiple_rounds() -> Self {
411        let num_nodes_with_stake = 10;
412        TestDescription::<TYPES, I, V> {
413            overall_safety_properties: OverallSafetyPropertiesDescription {
414                num_successful_views: 20,
415                ..OverallSafetyPropertiesDescription::default()
416            },
417            timing_data: TimingData {
418                ..TimingData::default()
419            },
420            view_sync_properties: ViewSyncTaskDescription::Threshold(0, num_nodes_with_stake),
421            ..TestDescription::<TYPES, I, V>::default()
422        }
423    }
424
425    /// Default setting with 20 nodes and 8 views of successful views.
426    #[must_use]
427    #[allow(clippy::redundant_field_names)]
428    pub fn default_more_nodes() -> Self {
429        Self::default_more_nodes_with_stake(TestNodeStakes::default())
430    }
431
432    #[must_use]
433    #[allow(clippy::redundant_field_names)]
434    pub fn default_more_nodes_with_stake(node_stakes: TestNodeStakes) -> Self {
435        let num_nodes_with_stake = 20;
436        let num_da_nodes = 14;
437        let epoch_height = 10;
438        let epoch_start_block = 1;
439
440        let (staked_nodes, da_nodes) =
441            gen_node_lists::<TYPES>(num_nodes_with_stake, num_da_nodes, &node_stakes);
442
443        Self {
444            test_config: default_hotshot_config::<TYPES>(
445                staked_nodes,
446                da_nodes,
447                num_nodes_with_stake.try_into().unwrap(),
448                epoch_height,
449                epoch_start_block,
450            ),
451            // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the
452            // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the
453            // following issue.
454            completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder(
455                TimeBasedCompletionTaskDescription {
456                    // Increase the duration to get the expected number of successful views.
457                    duration: Duration::from_secs(340),
458                },
459            ),
460            overall_safety_properties: OverallSafetyPropertiesDescription {
461                ..Default::default()
462            },
463            timing_data: TimingData {
464                next_view_timeout: 6000,
465                ..TimingData::default()
466            },
467            view_sync_properties: ViewSyncTaskDescription::Threshold(
468                0,
469                num_nodes_with_stake.try_into().unwrap(),
470            ),
471            node_stakes,
472            ..Self::default()
473        }
474    }
475
476    pub fn set_num_nodes(self, num_nodes: u64, num_da_nodes: u64) -> Self {
477        assert!(
478            num_da_nodes <= num_nodes,
479            "Cannot build test with fewer DA than total nodes. You may have mixed up the \
480             arguments to the function"
481        );
482
483        let (staked_nodes, da_nodes) =
484            gen_node_lists::<TYPES>(num_nodes, num_da_nodes, &self.node_stakes);
485
486        Self {
487            test_config: default_hotshot_config::<TYPES>(
488                staked_nodes,
489                da_nodes,
490                self.test_config.num_bootstrap,
491                self.test_config.epoch_height,
492                self.test_config.epoch_start_block,
493            ),
494            ..self
495        }
496    }
497
498    pub fn build_node_key_map(&self) -> Arc<TestNodeKeyMap> {
499        let mut node_key_map = TestNodeKeyMap::new();
500        for i in 0..self.test_config.num_nodes_with_stake.into() {
501            let (private_key, public_key) = key_pair_for_id::<TestTypes>(i as u64);
502            node_key_map.insert(public_key, private_key);
503        }
504
505        Arc::new(node_key_map)
506    }
507
508    #[must_use]
509    pub fn default_with_stake(node_stakes: TestNodeStakes) -> Self {
510        let num_nodes_with_stake = 7;
511        let num_da_nodes = num_nodes_with_stake;
512        let epoch_height = 10;
513        let epoch_start_block = 1;
514
515        let (staked_nodes, da_nodes) =
516            gen_node_lists::<TYPES>(num_nodes_with_stake, num_da_nodes, &node_stakes);
517
518        Self {
519            test_config: default_hotshot_config::<TYPES>(
520                staked_nodes,
521                da_nodes,
522                num_nodes_with_stake.try_into().unwrap(),
523                epoch_height,
524                epoch_start_block,
525            ),
526            timing_data: TimingData::default(),
527            skip_late: false,
528            spinning_properties: SpinningTaskDescription {
529                node_changes: vec![],
530            },
531            overall_safety_properties: OverallSafetyPropertiesDescription::default(),
532            // arbitrary, haven't done the math on this
533            txn_description: TxnTaskDescription::RoundRobinTimeBased(Duration::from_millis(100)),
534            completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder(
535                TimeBasedCompletionTaskDescription {
536                    duration: Duration::from_secs(120),
537                },
538            ),
539            unreliable_network: None,
540            view_sync_properties: ViewSyncTaskDescription::Threshold(
541                0,
542                num_nodes_with_stake.try_into().unwrap(),
543            ),
544            builders: vec1::vec1![BuilderDescription::default(), BuilderDescription::default(),],
545            fallback_builder: BuilderDescription::default(),
546            solver: FakeSolverApiDescription {
547                // Default to a 10% error rate.
548                error_pct: 0.1,
549            },
550            behaviour: Rc::new(|_| Behaviour::Standard),
551            async_delay_config: HashMap::new(),
552            upgrade_view: None,
553            start_solver: true,
554            validate_transactions: Arc::new(|_| Ok(())),
555            node_stakes,
556        }
557    }
558}
559
560impl<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> Default
561    for TestDescription<TYPES, I, V>
562{
563    /// by default, just a single round
564    #[allow(clippy::redundant_field_names)]
565    fn default() -> Self {
566        Self::default_with_stake(TestNodeStakes::default())
567    }
568}
569
570impl<
571        TYPES: NodeType<InstanceState = TestInstanceState>,
572        I: TestableNodeImplementation<TYPES>,
573        V: Versions,
574    > TestDescription<TYPES, I, V>
575where
576    I: NodeImplementation<TYPES>,
577{
578    /// turn a description of a test (e.g. a [`TestDescription`]) into
579    /// a [`TestLauncher`] that can be used to launch the test.
580    /// # Panics
581    /// if some of the configuration values are zero
582    pub fn gen_launcher(self) -> TestLauncher<TYPES, I, V> {
583        self.gen_launcher_with_tasks(vec![])
584    }
585
586    /// turn a description of a test (e.g. a [`TestDescription`]) into
587    /// a [`TestLauncher`] that can be used to launch the test, with
588    /// additional testing tasks to run in test harness
589    /// # Panics
590    /// if some of the configuration values are zero
591    #[must_use]
592    pub fn gen_launcher_with_tasks(
593        self,
594        additional_test_tasks: Vec<Box<dyn TestTaskStateSeed<TYPES, I, V>>>,
595    ) -> TestLauncher<TYPES, I, V> {
596        let TestDescription {
597            timing_data,
598            unreliable_network,
599            test_config,
600            node_stakes,
601            ..
602        } = self.clone();
603
604        let num_nodes_with_stake = test_config.num_nodes_with_stake.into();
605        let num_bootstrap_nodes = test_config.num_bootstrap;
606        let da_staked_committee_size = test_config.da_staked_committee_size;
607
608        let validator_config = Rc::new(move |node_id| {
609            ValidatorConfig::<TYPES>::generated_from_seed_indexed(
610                [0u8; 32],
611                node_id,
612                node_stakes.get(node_id),
613                // This is the config for node 0
614                node_id < test_config.da_staked_committee_size as u64,
615            )
616        });
617
618        let hotshot_config = Rc::new(move |_| test_config.clone());
619        let TimingData {
620            next_view_timeout,
621            builder_timeout,
622            data_request_delay,
623            secondary_network_delay,
624            view_sync_timeout,
625        } = timing_data;
626        // TODO this should really be using the timing config struct
627        let mod_hotshot_config = move |hotshot_config: &mut HotShotConfig<TYPES>| {
628            hotshot_config.next_view_timeout = next_view_timeout;
629            hotshot_config.builder_timeout = builder_timeout;
630            hotshot_config.data_request_delay = data_request_delay;
631            hotshot_config.view_sync_timeout = view_sync_timeout;
632        };
633
634        let metadata = self.clone();
635        TestLauncher {
636            resource_generators: ResourceGenerators {
637                channel_generator: <I as TestableNodeImplementation<TYPES>>::gen_networks(
638                    num_nodes_with_stake,
639                    num_bootstrap_nodes,
640                    da_staked_committee_size,
641                    unreliable_network,
642                    secondary_network_delay,
643                ),
644                storage: Rc::new(move |node_id| {
645                    let storage = TestStorage::<TYPES> {
646                        delay_config: metadata
647                            .async_delay_config
648                            .get(&node_id)
649                            .cloned()
650                            .unwrap_or_default(),
651                        ..Default::default()
652                    };
653                    storage
654                }),
655                hotshot_config,
656                validator_config,
657            },
658            metadata: self,
659            additional_test_tasks,
660        }
661        .map_hotshot_config(mod_hotshot_config)
662    }
663}