hotshot_testing/
test_builder.rs

1// Copyright (c) 2021-2024 Espresso Systems (espressosys.com)
2// This file is part of the HotShot repository.
3
4// You should have received a copy of the MIT License
5// along with the HotShot repository. If not, see <https://mit-license.org/>.
6
7use std::{collections::HashMap, num::NonZeroUsize, rc::Rc, sync::Arc, time::Duration};
8
9use async_lock::RwLock;
10use hotshot::{
11    HotShotInitializer, SystemContext, TwinsHandlerState,
12    tasks::EventTransformerState,
13    traits::{NetworkReliability, NodeImplementation, TestableNodeImplementation},
14    types::SystemContextHandle,
15};
16use hotshot_example_types::{
17    node_types::TestTypes, state_types::TestInstanceState, storage_types::TestStorage,
18    testable_delay::DelayConfig,
19};
20use hotshot_types::{
21    HotShotConfig, PeerConfig, ValidatorConfig,
22    consensus::ConsensusMetricsValue,
23    epoch_membership::EpochMembershipCoordinator,
24    storage_metrics::StorageMetricsValue,
25    traits::{node_implementation::NodeType, signature_key::StakeTableEntryType},
26};
27use hotshot_utils::anytrace::*;
28use tide_disco::Url;
29use vec1::Vec1;
30use versions::{MIN_SUPPORTED_VERSION, Upgrade};
31
32use super::{
33    completion_task::{CompletionTaskDescription, TimeBasedCompletionTaskDescription},
34    overall_safety_task::OverallSafetyPropertiesDescription,
35    txn_task::TxnTaskDescription,
36};
37use crate::{
38    helpers::{TestNodeKeyMap, key_pair_for_id},
39    node_stake::TestNodeStakes,
40    spinning_task::SpinningTaskDescription,
41    test_launcher::{Network, ResourceGenerators, TestLauncher},
42    test_task::TestTaskStateSeed,
43    view_sync_task::ViewSyncTaskDescription,
44};
45
46pub type TransactionValidator = Arc<dyn Fn(&Vec<(u64, u64)>) -> Result<()> + Send + Sync>;
47
48/// data describing how a round should be timed.
49#[derive(Clone, Debug, Copy)]
50pub struct TimingData {
51    /// Base duration for next-view timeout, in milliseconds
52    pub next_view_timeout: u64,
53    /// The maximum amount of time a leader can wait to get a block from a builder
54    pub builder_timeout: Duration,
55    /// time to wait until we request data associated with a proposal
56    pub data_request_delay: Duration,
57    /// Delay before sending through the secondary network in CombinedNetworks
58    pub secondary_network_delay: Duration,
59    /// view sync timeout
60    pub view_sync_timeout: Duration,
61}
62
63pub fn default_hotshot_config<TYPES: NodeType>(
64    known_nodes_with_stake: Vec<PeerConfig<TYPES>>,
65    known_da_nodes: Vec<PeerConfig<TYPES>>,
66    num_bootstrap_nodes: usize,
67    epoch_height: u64,
68    epoch_start_block: u64,
69) -> HotShotConfig<TYPES> {
70    HotShotConfig {
71        start_threshold: (1, 1),
72        num_nodes_with_stake: NonZeroUsize::new(known_nodes_with_stake.len()).unwrap(),
73        known_da_nodes: known_da_nodes.clone(),
74        da_committees: Default::default(),
75        num_bootstrap: num_bootstrap_nodes,
76        known_nodes_with_stake: known_nodes_with_stake.clone(),
77        da_staked_committee_size: known_da_nodes.len(),
78        fixed_leader_for_gpuvid: 1,
79        next_view_timeout: 500,
80        view_sync_timeout: Duration::from_millis(250),
81        builder_timeout: Duration::from_millis(1000),
82        data_request_delay: Duration::from_millis(200),
83        // Placeholder until we spin up the builder
84        builder_urls: vec1::vec1![Url::parse("http://localhost:9999").expect("Valid URL")],
85        start_proposing_view: u64::MAX,
86        stop_proposing_view: 0,
87        start_voting_view: u64::MAX,
88        stop_voting_view: 0,
89        start_proposing_time: u64::MAX,
90        stop_proposing_time: 0,
91        start_voting_time: u64::MAX,
92        stop_voting_time: 0,
93        epoch_height,
94        epoch_start_block,
95        stake_table_capacity: hotshot_types::light_client::DEFAULT_STAKE_TABLE_CAPACITY,
96        drb_difficulty: 10,
97        drb_upgrade_difficulty: 20,
98    }
99}
100
101#[allow(clippy::type_complexity)]
102pub fn gen_node_lists<TYPES: NodeType>(
103    num_staked_nodes: u64,
104    num_da_nodes: u64,
105    node_stakes: &TestNodeStakes,
106) -> (Vec<PeerConfig<TYPES>>, Vec<PeerConfig<TYPES>>) {
107    let mut staked_nodes = Vec::new();
108    let mut da_nodes = Vec::new();
109
110    for n in 0..num_staked_nodes {
111        let validator_config: ValidatorConfig<TYPES> = ValidatorConfig::generated_from_seed_indexed(
112            [0u8; 32],
113            n,
114            node_stakes.get(n),
115            n < num_da_nodes,
116        );
117
118        let peer_config = validator_config.public_config();
119        staked_nodes.push(peer_config.clone());
120
121        if n < num_da_nodes {
122            da_nodes.push(peer_config)
123        }
124    }
125
126    (staked_nodes, da_nodes)
127}
128
129/// metadata describing a test
130#[derive(Clone)]
131pub struct TestDescription<TYPES: NodeType, I: NodeImplementation<TYPES>> {
132    /// `HotShotConfig` used for setting up the test infrastructure.
133    ///
134    /// Note: this is not the same as the `HotShotConfig` passed to test nodes for `SystemContext::init`;
135    /// those configs are instead provided by the resource generators in the test launcher.
136    pub test_config: HotShotConfig<TYPES>,
137    /// Whether to skip initializing nodes that will start late, which will catch up later with
138    /// `HotShotInitializer::from_reload` in the spinning task.
139    pub skip_late: bool,
140    /// overall safety property description
141    pub overall_safety_properties: OverallSafetyPropertiesDescription,
142    /// spinning properties
143    pub spinning_properties: SpinningTaskDescription,
144    /// txns timing
145    pub txn_description: TxnTaskDescription,
146    /// completion task
147    pub completion_task_description: CompletionTaskDescription,
148    /// timing data
149    pub timing_data: TimingData,
150    /// unrelabile networking metadata
151    pub unreliable_network: Option<Box<dyn NetworkReliability>>,
152    /// view sync check task
153    pub view_sync_properties: ViewSyncTaskDescription,
154    /// description of builders to run
155    pub builders: Vec1<BuilderDescription>,
156    /// description of fallback builder to run
157    pub fallback_builder: BuilderDescription,
158    /// description of the solver to run
159    pub solver: FakeSolverApiDescription,
160    /// nodes with byzantine behaviour
161    pub behaviour: Rc<dyn Fn(u64) -> Behaviour<TYPES, I>>,
162    /// Delay config if any to add delays to asynchronous calls
163    pub async_delay_config: HashMap<u64, DelayConfig>,
164    /// Configured version upgrade
165    pub upgrade: versions::Upgrade,
166    /// view in which to propose an upgrade
167    pub upgrade_view: Option<u64>,
168    /// whether to initialize the solver on startup
169    pub start_solver: bool,
170    /// boxed closure used to validate the resulting transactions
171    pub validate_transactions: TransactionValidator,
172    /// stake to apply to particular nodes. Nodes not included will have a stake of 1.
173    pub node_stakes: TestNodeStakes,
174}
175
176pub fn nonempty_block_threshold(threshold: (u64, u64)) -> TransactionValidator {
177    Arc::new(move |transactions| {
178        if matches!(threshold, (0, _)) {
179            return Ok(());
180        }
181
182        let blocks: Vec<_> = transactions.iter().filter(|(view, _)| *view != 0).collect();
183
184        let num_blocks = blocks.len() as u64;
185        let mut num_nonempty_blocks = 0;
186
187        ensure!(num_blocks > 0, "Failed to commit any non-genesis blocks");
188
189        for (_, num_transactions) in blocks {
190            if *num_transactions > 0 {
191                num_nonempty_blocks += 1;
192            }
193        }
194
195        ensure!(
196            // i.e. num_nonempty_blocks / num_blocks >= threshold.0 / threshold.1
197            num_nonempty_blocks * threshold.1 >= threshold.0 * num_blocks,
198            "Failed to meet nonempty block threshold of {}/{}; got {num_nonempty_blocks} nonempty \
199             blocks out of a total of {num_blocks}",
200            threshold.0,
201            threshold.1
202        );
203
204        Ok(())
205    })
206}
207
208pub fn nonempty_block_limit(limit: (u64, u64)) -> TransactionValidator {
209    Arc::new(move |transactions| {
210        if matches!(limit, (_, 0)) {
211            return Ok(());
212        }
213
214        let blocks: Vec<_> = transactions.iter().filter(|(view, _)| *view != 0).collect();
215
216        let num_blocks = blocks.len() as u64;
217        let mut num_nonempty_blocks = 0;
218
219        ensure!(num_blocks > 0, "Failed to commit any non-genesis blocks");
220
221        for (_, num_transactions) in blocks {
222            if *num_transactions > 0 {
223                num_nonempty_blocks += 1;
224            }
225        }
226
227        ensure!(
228            // i.e. num_nonempty_blocks / num_blocks <= limit.0 / limit.1
229            num_nonempty_blocks * limit.1 <= limit.0 * num_blocks,
230            "Exceeded nonempty block limit of {}/{}; got {num_nonempty_blocks} nonempty blocks \
231             out of a total of {num_blocks}",
232            limit.0,
233            limit.1
234        );
235
236        Ok(())
237    })
238}
239
240#[derive(Debug)]
241pub enum Behaviour<TYPES: NodeType, I: NodeImplementation<TYPES>> {
242    ByzantineTwins(Box<dyn TwinsHandlerState<TYPES, I>>),
243    Byzantine(Box<dyn EventTransformerState<TYPES, I>>),
244    Standard,
245}
246
247pub async fn create_test_handle<
248    TYPES: NodeType<InstanceState = TestInstanceState>,
249    I: NodeImplementation<TYPES>,
250>(
251    metadata: TestDescription<TYPES, I>,
252    node_id: u64,
253    network: Network<TYPES, I>,
254    memberships: Arc<RwLock<TYPES::Membership>>,
255    config: HotShotConfig<TYPES>,
256    storage: I::Storage,
257) -> SystemContextHandle<TYPES, I> {
258    let initializer = HotShotInitializer::<TYPES>::from_genesis(
259        TestInstanceState::new(
260            metadata
261                .async_delay_config
262                .get(&node_id)
263                .cloned()
264                .unwrap_or_default(),
265        ),
266        metadata.test_config.epoch_height,
267        metadata.test_config.epoch_start_block,
268        vec![],
269        metadata.upgrade,
270    )
271    .await
272    .unwrap();
273
274    // See whether or not we should be DA
275    let is_da = node_id < config.da_staked_committee_size as u64;
276
277    let validator_config: ValidatorConfig<TYPES> = ValidatorConfig::generated_from_seed_indexed(
278        [0u8; 32],
279        node_id,
280        metadata.node_stakes.get(node_id),
281        is_da,
282    );
283
284    // Get key pair for certificate aggregation
285    let private_key = validator_config.private_key.clone();
286    let public_key = validator_config.public_key.clone();
287    let state_private_key = validator_config.state_private_key.clone();
288    let membership_coordinator =
289        EpochMembershipCoordinator::new(memberships, config.epoch_height, &storage.clone());
290
291    let behaviour = (metadata.behaviour)(node_id);
292    match behaviour {
293        Behaviour::ByzantineTwins(state) => {
294            let state = Box::leak(state);
295            let (left_handle, _right_handle) = state
296                .spawn_twin_handles(
297                    public_key,
298                    private_key,
299                    state_private_key,
300                    node_id,
301                    config,
302                    metadata.upgrade,
303                    membership_coordinator,
304                    network,
305                    initializer,
306                    ConsensusMetricsValue::default(),
307                    storage,
308                    StorageMetricsValue::default(),
309                )
310                .await;
311
312            left_handle
313        },
314        Behaviour::Byzantine(state) => {
315            let state = Box::leak(state);
316            state
317                .spawn_handle(
318                    public_key,
319                    private_key,
320                    state_private_key,
321                    node_id,
322                    config,
323                    metadata.upgrade,
324                    membership_coordinator,
325                    network,
326                    initializer,
327                    ConsensusMetricsValue::default(),
328                    storage,
329                    StorageMetricsValue::default(),
330                )
331                .await
332        },
333        Behaviour::Standard => {
334            let hotshot = SystemContext::<TYPES, I>::new(
335                public_key,
336                private_key,
337                state_private_key,
338                node_id,
339                config,
340                metadata.upgrade,
341                membership_coordinator,
342                network,
343                initializer,
344                ConsensusMetricsValue::default(),
345                storage,
346                StorageMetricsValue::default(),
347            )
348            .await;
349
350            hotshot.run_tasks().await
351        },
352    }
353}
354
355/// Describes a possible change to builder status during test
356#[derive(Clone, Debug)]
357pub enum BuilderChange {
358    // Builder should start up
359    Up,
360    // Builder should shut down completely
361    Down,
362    // Toggles whether builder should always respond
363    // to claim calls with errors
364    FailClaims(bool),
365}
366
367/// Metadata describing builder behaviour during a test
368#[derive(Clone, Debug, Default)]
369pub struct BuilderDescription {
370    /// view number -> change to builder status
371    pub changes: HashMap<u64, BuilderChange>,
372}
373
374#[derive(Clone, Debug)]
375pub struct FakeSolverApiDescription {
376    /// The rate at which errors occur in the mock solver API
377    pub error_pct: f32,
378}
379
380impl Default for TimingData {
381    fn default() -> Self {
382        Self {
383            next_view_timeout: 6000,
384            builder_timeout: Duration::from_millis(500),
385            data_request_delay: Duration::from_millis(200),
386            secondary_network_delay: Duration::from_millis(1000),
387            view_sync_timeout: Duration::from_millis(2000),
388        }
389    }
390}
391
392impl<TYPES: NodeType, I: NodeImplementation<TYPES>> TestDescription<TYPES, I> {
393    /// the default metadata for a stress test
394    #[must_use]
395    #[allow(clippy::redundant_field_names)]
396    pub fn default_stress() -> Self {
397        let num_nodes_with_stake = 100;
398
399        Self {
400            overall_safety_properties: OverallSafetyPropertiesDescription {
401                num_successful_views: 50,
402                ..OverallSafetyPropertiesDescription::default()
403            },
404            timing_data: TimingData {
405                next_view_timeout: 2000,
406                ..TimingData::default()
407            },
408            view_sync_properties: ViewSyncTaskDescription::Threshold(0, num_nodes_with_stake),
409            ..Self::default()
410        }
411    }
412
413    /// the default metadata for multiple rounds
414    #[must_use]
415    #[allow(clippy::redundant_field_names)]
416    pub fn default_multiple_rounds() -> Self {
417        let num_nodes_with_stake = 10;
418        TestDescription::<TYPES, I> {
419            overall_safety_properties: OverallSafetyPropertiesDescription {
420                num_successful_views: 20,
421                ..OverallSafetyPropertiesDescription::default()
422            },
423            timing_data: TimingData {
424                ..TimingData::default()
425            },
426            view_sync_properties: ViewSyncTaskDescription::Threshold(0, num_nodes_with_stake),
427            ..TestDescription::<TYPES, I>::default()
428        }
429    }
430
431    /// Default setting with 20 nodes and 8 views of successful views.
432    #[must_use]
433    #[allow(clippy::redundant_field_names)]
434    pub fn default_more_nodes() -> Self {
435        Self::default_more_nodes_with_stake(TestNodeStakes::default())
436    }
437
438    #[must_use]
439    #[allow(clippy::redundant_field_names)]
440    pub fn default_more_nodes_with_stake(node_stakes: TestNodeStakes) -> Self {
441        let num_nodes_with_stake = 20;
442        let num_da_nodes = 14;
443        let epoch_height = 10;
444        let epoch_start_block = 1;
445
446        let (staked_nodes, da_nodes) =
447            gen_node_lists::<TYPES>(num_nodes_with_stake, num_da_nodes, &node_stakes);
448
449        let upgrade = Upgrade::trivial(MIN_SUPPORTED_VERSION);
450        Self {
451            test_config: default_hotshot_config::<TYPES>(
452                staked_nodes,
453                da_nodes,
454                num_nodes_with_stake.try_into().unwrap(),
455                epoch_height,
456                epoch_start_block,
457            ),
458            upgrade,
459            // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the
460            // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the
461            // following issue.
462            completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder(
463                TimeBasedCompletionTaskDescription {
464                    // Increase the duration to get the expected number of successful views.
465                    duration: Duration::from_secs(340),
466                },
467            ),
468            overall_safety_properties: OverallSafetyPropertiesDescription {
469                ..Default::default()
470            },
471            timing_data: TimingData {
472                next_view_timeout: 6000,
473                ..TimingData::default()
474            },
475            view_sync_properties: ViewSyncTaskDescription::Threshold(
476                0,
477                num_nodes_with_stake.try_into().unwrap(),
478            ),
479            node_stakes,
480            ..Self::default()
481        }
482    }
483
484    pub fn set_num_nodes(self, num_nodes: u64, num_da_nodes: u64) -> Self {
485        assert!(
486            num_da_nodes <= num_nodes,
487            "Cannot build test with fewer DA than total nodes. You may have mixed up the \
488             arguments to the function"
489        );
490
491        let (staked_nodes, da_nodes) =
492            gen_node_lists::<TYPES>(num_nodes, num_da_nodes, &self.node_stakes);
493
494        let upgrade = Upgrade::trivial(MIN_SUPPORTED_VERSION);
495        Self {
496            test_config: default_hotshot_config::<TYPES>(
497                staked_nodes,
498                da_nodes,
499                self.test_config.num_bootstrap,
500                self.test_config.epoch_height,
501                self.test_config.epoch_start_block,
502            ),
503            upgrade,
504            ..self
505        }
506    }
507
508    pub fn build_node_key_map(&self) -> Arc<TestNodeKeyMap> {
509        let mut node_key_map = TestNodeKeyMap::new();
510        for i in 0..self.test_config.num_nodes_with_stake.into() {
511            let (private_key, public_key) = key_pair_for_id::<TestTypes>(i as u64);
512            node_key_map.insert(public_key, private_key);
513        }
514
515        Arc::new(node_key_map)
516    }
517
518    #[must_use]
519    pub fn default_with_stake(node_stakes: TestNodeStakes) -> Self {
520        let num_nodes_with_stake = 7;
521        let num_da_nodes = num_nodes_with_stake;
522        let epoch_height = 10;
523        let epoch_start_block = 1;
524
525        let (staked_nodes, da_nodes) =
526            gen_node_lists::<TYPES>(num_nodes_with_stake, num_da_nodes, &node_stakes);
527
528        let upgrade = Upgrade::trivial(MIN_SUPPORTED_VERSION);
529        Self {
530            test_config: default_hotshot_config::<TYPES>(
531                staked_nodes,
532                da_nodes,
533                num_nodes_with_stake.try_into().unwrap(),
534                epoch_height,
535                epoch_start_block,
536            ),
537            upgrade,
538            timing_data: TimingData::default(),
539            skip_late: false,
540            spinning_properties: SpinningTaskDescription {
541                node_changes: vec![],
542            },
543            overall_safety_properties: OverallSafetyPropertiesDescription::default(),
544            // arbitrary, haven't done the math on this
545            txn_description: TxnTaskDescription::RoundRobinTimeBased(Duration::from_millis(100)),
546            completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder(
547                TimeBasedCompletionTaskDescription {
548                    duration: Duration::from_secs(120),
549                },
550            ),
551            unreliable_network: None,
552            view_sync_properties: ViewSyncTaskDescription::Threshold(
553                0,
554                num_nodes_with_stake.try_into().unwrap(),
555            ),
556            builders: vec1::vec1![BuilderDescription::default(), BuilderDescription::default(),],
557            fallback_builder: BuilderDescription::default(),
558            solver: FakeSolverApiDescription {
559                // Default to a 10% error rate.
560                error_pct: 0.1,
561            },
562            behaviour: Rc::new(|_| Behaviour::Standard),
563            async_delay_config: HashMap::new(),
564            upgrade_view: None,
565            start_solver: true,
566            validate_transactions: Arc::new(|_| Ok(())),
567            node_stakes,
568        }
569    }
570}
571
572impl<TYPES: NodeType, I: NodeImplementation<TYPES>> Default for TestDescription<TYPES, I> {
573    /// by default, just a single round
574    #[allow(clippy::redundant_field_names)]
575    fn default() -> Self {
576        Self::default_with_stake(TestNodeStakes::default())
577    }
578}
579
580impl<TYPES: NodeType<InstanceState = TestInstanceState>, I: TestableNodeImplementation<TYPES>>
581    TestDescription<TYPES, I>
582where
583    I: NodeImplementation<TYPES>,
584{
585    /// turn a description of a test (e.g. a [`TestDescription`]) into
586    /// a [`TestLauncher`] that can be used to launch the test.
587    /// # Panics
588    /// if some of the configuration values are zero
589    pub fn gen_launcher(self) -> TestLauncher<TYPES, I> {
590        self.gen_launcher_with_tasks(vec![])
591    }
592
593    /// turn a description of a test (e.g. a [`TestDescription`]) into
594    /// a [`TestLauncher`] that can be used to launch the test, with
595    /// additional testing tasks to run in test harness
596    /// # Panics
597    /// if some of the configuration values are zero
598    #[must_use]
599    pub fn gen_launcher_with_tasks(
600        mut self,
601        additional_test_tasks: Vec<Box<dyn TestTaskStateSeed<TYPES, I>>>,
602    ) -> TestLauncher<TYPES, I> {
603        let mut connect_infos = HashMap::new();
604        let networks = <I as TestableNodeImplementation<TYPES>>::gen_networks(
605            self.test_config.num_nodes_with_stake.into(),
606            self.test_config.num_bootstrap,
607            self.test_config.da_staked_committee_size,
608            self.unreliable_network.clone(),
609            self.timing_data.secondary_network_delay,
610            &mut connect_infos,
611        );
612
613        // Update peer configs with address information created by `gen_networks`.
614        for cfg in self.test_config.known_nodes_with_stake.iter_mut() {
615            if let Some(info) = connect_infos.get(&cfg.stake_table_entry.public_key()) {
616                cfg.connect_info = Some(info.clone())
617            }
618        }
619        for cfg in self.test_config.known_da_nodes.iter_mut() {
620            if let Some(info) = connect_infos.get(&cfg.stake_table_entry.public_key()) {
621                cfg.connect_info = Some(info.clone())
622            }
623        }
624
625        let TestDescription {
626            timing_data,
627            test_config,
628            node_stakes,
629            ..
630        } = self.clone();
631
632        let validator_config = Rc::new(move |node_id| {
633            ValidatorConfig::<TYPES>::generated_from_seed_indexed(
634                [0u8; 32],
635                node_id,
636                node_stakes.get(node_id),
637                // This is the config for node 0
638                node_id < test_config.da_staked_committee_size as u64,
639            )
640        });
641
642        let hotshot_config = Rc::new(move |_| test_config.clone());
643
644        let TimingData {
645            next_view_timeout,
646            builder_timeout,
647            data_request_delay,
648            view_sync_timeout,
649            ..
650        } = timing_data;
651
652        // TODO this should really be using the timing config struct
653        let mod_hotshot_config = move |hotshot_config: &mut HotShotConfig<TYPES>| {
654            hotshot_config.next_view_timeout = next_view_timeout;
655            hotshot_config.builder_timeout = builder_timeout;
656            hotshot_config.data_request_delay = data_request_delay;
657            hotshot_config.view_sync_timeout = view_sync_timeout;
658        };
659
660        let metadata = self.clone();
661        TestLauncher {
662            resource_generators: ResourceGenerators {
663                channel_generator: networks,
664                storage: Rc::new(move |node_id| TestStorage::<TYPES> {
665                    delay_config: metadata
666                        .async_delay_config
667                        .get(&node_id)
668                        .cloned()
669                        .unwrap_or_default(),
670                    ..Default::default()
671                }),
672                hotshot_config,
673                validator_config,
674            },
675            metadata: self,
676            additional_test_tasks,
677        }
678        .map_hotshot_config(mod_hotshot_config)
679    }
680}