From e70997995a5b4549a5630fc098852ea16fa6c24d Mon Sep 17 00:00:00 2001 From: Krisztian Kovacs Date: Fri, 20 Sep 2024 11:15:14 +0200 Subject: [PATCH 1/5] fix(pathfinder/state/revert): always apply reverse state updates When reverting Merkle tries to a previous state we have been checking if we still have the tree root for the revert target. This seems like a good idea, because if we _still_ have the trie(s) for the target block then why bother applying reverse state updates from the current state? It turns out that's not so simple. First, trie nodes added in blocks that are being deleted (right after the revert when pruning the block range that has been reorged away) will be permanently leaked. There's no way around that because we do not keep track of per-block trie node _additions_, only _removals_. Second -- and this is _much_ more problematic -- the way we were handling trie removals data was just plain wrong. When reverting trie state to block N we've just moved removals data for all blocks > N into our target block. That removals data was effectively nodes that have been deleted from the trie after block N. The problem with this is that when we were then removing trie nodes based on the removals data of block N later, we might be deleting nodes that were still reachable at that block! This change fixes this by removing the special case: when doing a revert we are _always_ just applying a reverse diff to the _current_ state of the trie. This makes sure that we'll compute and insert new state roots for the target block and that the _removals_ data for that block will indeed only contain nodes that are unreachable (either by having been removed at a previous reorged-away block _or_ by the reverse update). This change has been tested using the `feeder_gateway` tool triggering thousands of multi-block reorgs while syncing Sepolia testnet block range 113k to 194k. Without this fix this end-to-end test was consistently reproducing the "missing node" issue. Closes: #2170 --- crates/pathfinder/src/state/sync/revert.rs | 173 ++++++++++----------- crates/storage/src/connection/trie.rs | 2 +- 2 files changed, 84 insertions(+), 91 deletions(-) diff --git a/crates/pathfinder/src/state/sync/revert.rs b/crates/pathfinder/src/state/sync/revert.rs index 06d4580d8c..6abe0c2042 100644 --- a/crates/pathfinder/src/state/sync/revert.rs +++ b/crates/pathfinder/src/state/sync/revert.rs @@ -49,8 +49,7 @@ pub fn revert_starknet_state( target_block, target_header.class_commitment, )?; - - transaction.coalesce_trie_nodes(target_block) + transaction.coalesce_trie_removals(target_block) } /// Revert all contract/global storage trie updates. @@ -63,55 +62,52 @@ fn revert_contract_updates( target_block: BlockNumber, expected_storage_commitment: StorageCommitment, ) -> anyhow::Result<()> { - if !transaction.storage_root_exists(target_block)? { - let updates = transaction.reverse_contract_updates(head, target_block)?; - - let mut global_tree = StorageCommitmentTree::load(transaction, head) - .context("Loading global storage tree")?; - - for (contract_address, contract_update) in updates { - let state_hash = pathfinder_merkle_tree::contract_state::revert_contract_state( - transaction, - contract_address, - head, - target_block, - contract_update, - )?; - - transaction - .insert_contract_state_hash(target_block, contract_address, state_hash) - .context("Inserting reverted contract state hash")?; - - global_tree - .set(contract_address, state_hash) - .context("Updating contract state hash in global tree")?; - } - - tracing::debug!("Applied reverse updates, committing global state tree"); - - let (storage_commitment, trie_update) = global_tree - .commit() - .context("Committing global state tree")?; - - if expected_storage_commitment != storage_commitment { - anyhow::bail!( - "Storage commitment mismatch: expected {}, calculated {}", - expected_storage_commitment, - storage_commitment - ); - } - - let root_idx = transaction - .insert_storage_trie(&trie_update, target_block) - .context("Persisting storage trie")?; + let updates = transaction.reverse_contract_updates(head, target_block)?; + + let mut global_tree = + StorageCommitmentTree::load(transaction, head).context("Loading global storage tree")?; + + for (contract_address, contract_update) in updates { + let state_hash = pathfinder_merkle_tree::contract_state::revert_contract_state( + transaction, + contract_address, + head, + target_block, + contract_update, + )?; transaction - .insert_storage_root(target_block, root_idx) - .context("Inserting storage root index")?; - tracing::debug!(%target_block, %storage_commitment, "Committed global state tree"); - } else { - tracing::debug!(%target_block, "State tree root node exists"); + .insert_contract_state_hash(target_block, contract_address, state_hash) + .context("Inserting reverted contract state hash")?; + + global_tree + .set(contract_address, state_hash) + .context("Updating contract state hash in global tree")?; + } + + tracing::debug!("Applied reverse updates, committing global state tree"); + + let (storage_commitment, trie_update) = global_tree + .commit() + .context("Committing global state tree")?; + + if expected_storage_commitment != storage_commitment { + anyhow::bail!( + "Storage commitment mismatch: expected {}, calculated {}", + expected_storage_commitment, + storage_commitment + ); } + + let root_idx = transaction + .insert_storage_trie(&trie_update, target_block) + .context("Persisting storage trie")?; + + transaction + .insert_storage_root(target_block, root_idx) + .context("Inserting storage root index")?; + tracing::debug!(%target_block, %storage_commitment, "Committed global state tree"); + Ok(()) } @@ -122,51 +118,48 @@ fn revert_class_updates( target_block: BlockNumber, expected_class_commitment: ClassCommitment, ) -> anyhow::Result<()> { - if !transaction.class_root_exists(target_block)? { - let updates = transaction.reverse_sierra_class_updates(head, target_block)?; - - let mut class_tree = ClassCommitmentTree::load(transaction, head) - .context("Loading class commitment trie")?; - - for (class_hash, casm_update) in updates { - let new_value = match casm_update { - None => { - // The class must be removed - ClassCommitmentLeafHash::ZERO - } - Some(casm_hash) => { - // Class hash has changed. Note that the class commitment leaf must have already - // been added to storage. - pathfinder_common::calculate_class_commitment_leaf_hash(casm_hash) - } - }; - - class_tree - .set(class_hash, new_value) - .context("Updating class commitment trie")?; - } - - let (class_commitment, trie_update) = - class_tree.commit().context("Committing class trie")?; - - if expected_class_commitment != class_commitment { - anyhow::bail!( - "Storage commitment mismatch: expected {}, calculated {}", - expected_class_commitment, - class_commitment - ); - } - - let root_idx = transaction - .insert_class_trie(&trie_update, target_block) - .context("Persisting class trie")?; + let updates = transaction.reverse_sierra_class_updates(head, target_block)?; + + let mut class_tree = + ClassCommitmentTree::load(transaction, head).context("Loading class commitment trie")?; + + for (class_hash, casm_update) in updates { + let new_value = match casm_update { + None => { + // The class must be removed + ClassCommitmentLeafHash::ZERO + } + Some(casm_hash) => { + // Class hash has changed. Note that the class commitment leaf must have already + // been added to storage. + pathfinder_common::calculate_class_commitment_leaf_hash(casm_hash) + } + }; + + class_tree + .set(class_hash, new_value) + .context("Updating class commitment trie")?; + } - transaction - .insert_class_root(target_block, root_idx) - .context("Inserting class root index")?; + let (class_commitment, trie_update) = class_tree.commit().context("Committing class trie")?; - tracing::debug!(%target_block, %class_commitment, "Committed class trie"); + if expected_class_commitment != class_commitment { + anyhow::bail!( + "Storage commitment mismatch: expected {}, calculated {}", + expected_class_commitment, + class_commitment + ); } + let root_idx = transaction + .insert_class_trie(&trie_update, target_block) + .context("Persisting class trie")?; + + transaction + .insert_class_root(target_block, root_idx) + .context("Inserting class root index")?; + + tracing::debug!(%target_block, %class_commitment, "Committed class trie"); + Ok(()) } diff --git a/crates/storage/src/connection/trie.rs b/crates/storage/src/connection/trie.rs index 7c55c451e9..11fe530526 100644 --- a/crates/storage/src/connection/trie.rs +++ b/crates/storage/src/connection/trie.rs @@ -366,7 +366,7 @@ impl Transaction<'_> { Ok(()) } - pub fn coalesce_trie_nodes(&self, target_block: BlockNumber) -> anyhow::Result<()> { + pub fn coalesce_trie_removals(&self, target_block: BlockNumber) -> anyhow::Result<()> { self.coalesce_removed_trie_nodes(target_block, "trie_contracts")?; self.coalesce_removed_trie_nodes(target_block, "trie_storage")?; self.coalesce_removed_trie_nodes(target_block, "trie_class") From cd2695bb66263aceb7d95bbaf73432fbf58c0340 Mon Sep 17 00:00:00 2001 From: Krisztian Kovacs Date: Fri, 20 Sep 2024 15:12:37 +0200 Subject: [PATCH 2/5] fix(merkle-tree/revert): default contract root to zero for contracts with empty storage During a revert it's possible that we're reverting state for a contract whose storage is completely empty. Such contracts have _no_ contract root in the database so in case loading the contract root fails we should default that to zero instead of failing. --- crates/merkle-tree/src/contract_state.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/merkle-tree/src/contract_state.rs b/crates/merkle-tree/src/contract_state.rs index 33f7333131..4bde30d203 100644 --- a/crates/merkle-tree/src/contract_state.rs +++ b/crates/merkle-tree/src/contract_state.rs @@ -207,7 +207,7 @@ pub fn revert_contract_state( } else { transaction .contract_root(head, contract_address)? - .context("Fetching current contract root")? + .unwrap_or(ContractRoot::ZERO) }; let state_hash = if contract_address.is_system_contract() && root == ContractRoot::ZERO From f60b1777dba628dfb229db2d6e51eab1f48c5776 Mon Sep 17 00:00:00 2001 From: Krisztian Kovacs Date: Fri, 20 Sep 2024 17:38:51 +0200 Subject: [PATCH 3/5] chore: update CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d913d56339..ce151b69bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ More expansive patch notes and explanations may be found in the specific [pathfi The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Fixed + +- Pathfinder occasionally corrupts its Merkle trie storage during reorgs and then stops later with a "Node X at height Y is missing" or "Stored node's hash is missing" error. + ## [0.14.2] - 2024-09-03 ### Fixed From e1ea536827a7fee24fe3a21c507ce83a33b140c2 Mon Sep 17 00:00:00 2001 From: Krzysztof Lis Date: Thu, 5 Sep 2024 18:27:23 +0200 Subject: [PATCH 4/5] chore: clippy --- crates/common/src/lib.rs | 2 +- crates/executor/src/simulate.rs | 5 ++--- crates/gateway-client/src/metrics.rs | 4 +--- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index c6c64d8137..300427ae07 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -424,7 +424,7 @@ impl ChainId { pub fn as_str(&self) -> &str { std::str::from_utf8(self.0.as_be_bytes()) .expect("valid utf8") - .trim_start_matches(|c| c == '\0') + .trim_start_matches('\0') } pub const MAINNET: Self = Self::from_slice_unwrap(b"SN_MAIN"); diff --git a/crates/executor/src/simulate.rs b/crates/executor/src/simulate.rs index c87e8aac08..4cec1283d2 100644 --- a/crates/executor/src/simulate.rs +++ b/crates/executor/src/simulate.rs @@ -198,12 +198,11 @@ pub fn trace( cache.cache_set(block_hash, CacheItem::CachedErr(err.clone())); err })?; - let state_diff = - to_state_diff(&mut tx_state, tx_declared_deprecated_class_hash).map_err(|e| { + let state_diff = to_state_diff(&mut tx_state, tx_declared_deprecated_class_hash) + .inspect_err(|_| { // Remove the cache entry so it's no longer inflight. let mut cache = cache.0.lock().unwrap(); cache.cache_remove(&block_hash); - e })?; tx_state.commit(); diff --git a/crates/gateway-client/src/metrics.rs b/crates/gateway-client/src/metrics.rs index 98b71a4eb7..58c3a633da 100644 --- a/crates/gateway-client/src/metrics.rs +++ b/crates/gateway-client/src/metrics.rs @@ -166,7 +166,7 @@ pub async fn with_metrics( metrics::histogram!(METRIC_REQUESTS_LATENCY, elapsed, "method" => meta.method); - result.map_err(|e| { + result.inspect_err(|e| { increment(METRIC_FAILED_REQUESTS, meta); match &e { @@ -191,7 +191,5 @@ pub async fn with_metrics( } SequencerError::ReqwestError(_) => {} } - - e }) } From c52eb5d587d2acbc9bd2d1d520a418b103b1e951 Mon Sep 17 00:00:00 2001 From: Krisztian Kovacs Date: Mon, 23 Sep 2024 16:42:33 +0200 Subject: [PATCH 5/5] chore: bump version to 0.14.3 --- CHANGELOG.md | 2 +- Cargo.lock | 42 ++++++++++++++++++------------------- Cargo.toml | 2 +- crates/load-test/Cargo.lock | 2 +- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce151b69bc..4a0b7d1d01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ More expansive patch notes and explanations may be found in the specific [pathfi The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased +## [0.14.3] - 2024-09-23 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 668e771e17..07329da511 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3922,7 +3922,7 @@ dependencies = [ [[package]] name = "gateway-test-utils" -version = "0.14.2" +version = "0.14.3" dependencies = [ "http 0.2.12", "reqwest", @@ -6149,7 +6149,7 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "p2p" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "async-stream", @@ -6190,7 +6190,7 @@ dependencies = [ [[package]] name = "p2p_proto" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "fake", @@ -6211,7 +6211,7 @@ dependencies = [ [[package]] name = "p2p_proto_derive" -version = "0.14.2" +version = "0.14.3" dependencies = [ "proc-macro2", "quote", @@ -6220,7 +6220,7 @@ dependencies = [ [[package]] name = "p2p_stream" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "async-trait", @@ -6348,7 +6348,7 @@ checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" [[package]] name = "pathfinder" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "assert_matches", @@ -6415,7 +6415,7 @@ dependencies = [ [[package]] name = "pathfinder-common" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "bitvec", @@ -6439,7 +6439,7 @@ dependencies = [ [[package]] name = "pathfinder-compiler" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "cairo-lang-starknet 1.0.0-alpha.6", @@ -6460,7 +6460,7 @@ dependencies = [ [[package]] name = "pathfinder-crypto" -version = "0.14.2" +version = "0.14.3" dependencies = [ "ark-ff", "assert_matches", @@ -6477,7 +6477,7 @@ dependencies = [ [[package]] name = "pathfinder-ethereum" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "async-trait", @@ -6497,7 +6497,7 @@ dependencies = [ [[package]] name = "pathfinder-executor" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "blockifier", @@ -6518,7 +6518,7 @@ dependencies = [ [[package]] name = "pathfinder-merkle-tree" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "bitvec", @@ -6534,7 +6534,7 @@ dependencies = [ [[package]] name = "pathfinder-retry" -version = "0.14.2" +version = "0.14.3" dependencies = [ "tokio", "tokio-retry", @@ -6542,7 +6542,7 @@ dependencies = [ [[package]] name = "pathfinder-rpc" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "assert_matches", @@ -6593,7 +6593,7 @@ dependencies = [ [[package]] name = "pathfinder-serde" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "num-bigint 0.4.5", @@ -6608,7 +6608,7 @@ dependencies = [ [[package]] name = "pathfinder-storage" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "assert_matches", @@ -8279,7 +8279,7 @@ dependencies = [ [[package]] name = "starknet-gateway-client" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "assert_matches", @@ -8314,7 +8314,7 @@ dependencies = [ [[package]] name = "starknet-gateway-test-fixtures" -version = "0.14.2" +version = "0.14.3" dependencies = [ "pathfinder-common", "pathfinder-crypto", @@ -8322,7 +8322,7 @@ dependencies = [ [[package]] name = "starknet-gateway-types" -version = "0.14.2" +version = "0.14.3" dependencies = [ "anyhow", "assert_matches", @@ -8515,7 +8515,7 @@ dependencies = [ [[package]] name = "tagged" -version = "0.14.2" +version = "0.14.3" dependencies = [ "fake", "pretty_assertions_sorted", @@ -8524,7 +8524,7 @@ dependencies = [ [[package]] name = "tagged-debug-derive" -version = "0.14.2" +version = "0.14.3" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 4fbb0178cc..ac895253fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,7 @@ lto = true opt-level = 3 [workspace.package] -version = "0.14.2" +version = "0.14.3" edition = "2021" license = "MIT OR Apache-2.0" rust-version = "1.76" diff --git a/crates/load-test/Cargo.lock b/crates/load-test/Cargo.lock index aea085a038..c316e4d9dd 100644 --- a/crates/load-test/Cargo.lock +++ b/crates/load-test/Cargo.lock @@ -976,7 +976,7 @@ dependencies = [ [[package]] name = "pathfinder-crypto" -version = "0.14.2" +version = "0.14.3" dependencies = [ "bitvec", "fake",