diff --git a/arbitrator/stylus/src/cache.rs b/arbitrator/stylus/src/cache.rs index c1fdaaccee..5df4c62f7a 100644 --- a/arbitrator/stylus/src/cache.rs +++ b/arbitrator/stylus/src/cache.rs @@ -29,8 +29,16 @@ pub struct LruCounters { pub does_not_fit: u32, } +pub struct LongTermCounters { + pub hits: u32, + pub misses: u32, +} + pub struct InitCache { long_term: HashMap, + long_term_size_bytes: usize, + long_term_counters: LongTermCounters, + lru: CLruCache, lru_counters: LruCounters, } @@ -91,6 +99,20 @@ pub struct LruCacheMetrics { pub does_not_fit: u32, } +#[repr(C)] +pub struct LongTermCacheMetrics { + pub size_bytes: u64, + pub count: u32, + pub hits: u32, + pub misses: u32, +} + +#[repr(C)] +pub struct CacheMetrics { + pub lru: LruCacheMetrics, + pub long_term: LongTermCacheMetrics, +} + pub fn deserialize_module( module: &[u8], version: u16, @@ -117,6 +139,9 @@ impl InitCache { fn new(size_bytes: usize) -> Self { Self { long_term: HashMap::new(), + long_term_size_bytes: 0, + long_term_counters: LongTermCounters { hits: 0, misses: 0 }, + lru: CLruCache::with_config( CLruCacheConfig::new(NonZeroUsize::new(size_bytes).unwrap()) .with_scale(CustomWeightScale), @@ -136,22 +161,41 @@ impl InitCache { } /// Retrieves a cached value, updating items as necessary. - pub fn get(module_hash: Bytes32, version: u16, debug: bool) -> Option<(Module, Store)> { - let mut cache = cache!(); + /// If long_term_tag is 1 and the item is only in LRU will insert to long term cache. + pub fn get( + module_hash: Bytes32, + version: u16, + long_term_tag: u32, + debug: bool, + ) -> Option<(Module, Store)> { let key = CacheKey::new(module_hash, version, debug); + let mut cache = cache!(); // See if the item is in the long term cache if let Some(item) = cache.long_term.get(&key) { - return Some(item.data()); + let data = item.data(); + cache.long_term_counters.hits += 1; + return Some(data); + } + if long_term_tag == Self::ARBOS_TAG { + // only count misses only when we can expect to find the item in long term cache + cache.long_term_counters.misses += 1; } // See if the item is in the LRU cache, promoting if so - if let Some(item) = cache.lru.get(&key) { - let data = item.data(); + if let Some(item) = cache.lru.peek(&key).cloned() { cache.lru_counters.hits += 1; - return Some(data); + if long_term_tag == Self::ARBOS_TAG { + cache.long_term_size_bytes += item.entry_size_estimate_bytes; + cache.long_term.insert(key, item.clone()); + } else { + // only calls get to move the key to the head of the LRU list + cache.lru.get(&key); + } + return Some((item.module, Store::new(item.engine))); } cache.lru_counters.misses += 1; + None } @@ -174,6 +218,7 @@ impl InitCache { if let Some(item) = cache.lru.peek(&key).cloned() { if long_term_tag == Self::ARBOS_TAG { cache.long_term.insert(key, item.clone()); + cache.long_term_size_bytes += item.entry_size_estimate_bytes; } else { // only calls get to move the key to the head of the LRU list cache.lru.get(&key); @@ -195,6 +240,7 @@ impl InitCache { }; } else { cache.long_term.insert(key, item); + cache.long_term_size_bytes += entry_size_estimate_bytes; } Ok(data) } @@ -207,6 +253,7 @@ impl InitCache { let key = CacheKey::new(module_hash, version, debug); let mut cache = cache!(); if let Some(item) = cache.long_term.remove(&key) { + cache.long_term_size_bytes -= item.entry_size_estimate_bytes; if cache.lru.put_with_weight(key, item).is_err() { eprintln!("{}", Self::DOES_NOT_FIT_MSG); } @@ -225,23 +272,32 @@ impl InitCache { eprintln!("{}", Self::DOES_NOT_FIT_MSG); } } + cache.long_term_size_bytes = 0; } - pub fn get_lru_metrics() -> LruCacheMetrics { + pub fn get_metrics() -> CacheMetrics { let mut cache = cache!(); - let count = cache.lru.len(); - let metrics = LruCacheMetrics { - // add 1 to each entry to account that we subtracted 1 in the weight calculation - size_bytes: (cache.lru.weight() + count).try_into().unwrap(), + let lru_count = cache.lru.len(); + let lru_metrics = LruCacheMetrics { + // adds 1 to each entry to account that we subtracted 1 in the weight calculation + size_bytes: (cache.lru.weight() + lru_count).try_into().unwrap(), - count: count.try_into().unwrap(), + count: lru_count.try_into().unwrap(), hits: cache.lru_counters.hits, misses: cache.lru_counters.misses, does_not_fit: cache.lru_counters.does_not_fit, }; + let long_term_metrics = LongTermCacheMetrics { + size_bytes: cache.long_term_size_bytes.try_into().unwrap(), + count: cache.long_term.len().try_into().unwrap(), + + hits: cache.long_term_counters.hits, + misses: cache.long_term_counters.misses, + }; + // Empty counters. // go side, which is the only consumer of this function besides tests, // will read those counters and increment its own prometheus counters with them. @@ -250,8 +306,12 @@ impl InitCache { misses: 0, does_not_fit: 0, }; + cache.long_term_counters = LongTermCounters { hits: 0, misses: 0 }; - metrics + CacheMetrics { + lru: lru_metrics, + long_term: long_term_metrics, + } } // only used for testing diff --git a/arbitrator/stylus/src/lib.rs b/arbitrator/stylus/src/lib.rs index abea428167..a6be21f7bf 100644 --- a/arbitrator/stylus/src/lib.rs +++ b/arbitrator/stylus/src/lib.rs @@ -11,7 +11,7 @@ use arbutil::{ format::DebugBytes, Bytes32, }; -use cache::{deserialize_module, InitCache, LruCacheMetrics}; +use cache::{deserialize_module, CacheMetrics, InitCache}; use evm_api::NativeRequestHandler; use eyre::ErrReport; use native::NativeInstance; @@ -364,10 +364,10 @@ pub unsafe extern "C" fn stylus_drop_vec(vec: RustBytes) { } } -/// Gets lru cache metrics. +/// Gets cache metrics. #[no_mangle] -pub extern "C" fn stylus_get_lru_cache_metrics() -> LruCacheMetrics { - InitCache::get_lru_metrics() +pub extern "C" fn stylus_get_cache_metrics() -> CacheMetrics { + InitCache::get_metrics() } /// Clears lru cache. @@ -377,18 +377,23 @@ pub extern "C" fn stylus_clear_lru_cache() { InitCache::clear_lru_cache() } -/// Gets lru entry size in bytes. +/// Clears long term cache (for arbos_tag = 1) /// Only used for testing purposes. #[no_mangle] -pub extern "C" fn stylus_get_lru_entry_size_estimate_bytes( +pub extern "C" fn stylus_clear_long_term_cache() { + InitCache::clear_long_term(1); +} + +/// Gets entry size in bytes. +/// Only used for testing purposes. +#[no_mangle] +pub extern "C" fn stylus_get_entry_size_estimate_bytes( module: GoSliceData, version: u16, debug: bool, ) -> u64 { match deserialize_module(module.slice(), version, debug) { Err(error) => panic!("tried to get invalid asm!: {error}"), - Ok((_, _, lru_entry_size_estimate_bytes)) => { - lru_entry_size_estimate_bytes.try_into().unwrap() - } + Ok((_, _, entry_size_estimate_bytes)) => entry_size_estimate_bytes.try_into().unwrap(), } } diff --git a/arbitrator/stylus/src/native.rs b/arbitrator/stylus/src/native.rs index 516c6602e7..c751a670cc 100644 --- a/arbitrator/stylus/src/native.rs +++ b/arbitrator/stylus/src/native.rs @@ -121,13 +121,12 @@ impl> NativeInstance { let compile = CompileConfig::version(version, debug); let env = WasmEnv::new(compile, None, evm, evm_data); let module_hash = env.evm_data.module_hash; - - if let Some((module, store)) = InitCache::get(module_hash, version, debug) { - return Self::from_module(module, store, env); - } if !env.evm_data.cached { long_term_tag = 0; } + if let Some((module, store)) = InitCache::get(module_hash, version, long_term_tag, debug) { + return Self::from_module(module, store, env); + } let (module, store) = InitCache::insert(module_hash, module, version, long_term_tag, debug)?; Self::from_module(module, store, env) diff --git a/arbnode/dataposter/data_poster.go b/arbnode/dataposter/data_poster.go index acbf9c4cc8..373d247696 100644 --- a/arbnode/dataposter/data_poster.go +++ b/arbnode/dataposter/data_poster.go @@ -34,7 +34,6 @@ import ( "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/rpc" "github.com/ethereum/go-ethereum/signer/core/apitypes" - "github.com/go-redis/redis/v8" "github.com/holiman/uint256" "github.com/offchainlabs/nitro/arbnode/dataposter/dbstorage" "github.com/offchainlabs/nitro/arbnode/dataposter/noop" @@ -46,6 +45,7 @@ import ( "github.com/offchainlabs/nitro/util/rpcclient" "github.com/offchainlabs/nitro/util/signature" "github.com/offchainlabs/nitro/util/stopwaiter" + "github.com/redis/go-redis/v9" "github.com/spf13/pflag" redisstorage "github.com/offchainlabs/nitro/arbnode/dataposter/redis" diff --git a/arbnode/dataposter/redis/redisstorage.go b/arbnode/dataposter/redis/redisstorage.go index 8b6dcf65ac..b54abf618b 100644 --- a/arbnode/dataposter/redis/redisstorage.go +++ b/arbnode/dataposter/redis/redisstorage.go @@ -9,9 +9,9 @@ import ( "errors" "fmt" - "github.com/go-redis/redis/v8" "github.com/offchainlabs/nitro/arbnode/dataposter/storage" "github.com/offchainlabs/nitro/util/signature" + "github.com/redis/go-redis/v9" ) // Storage implements redis sorted set backed storage. It does not support @@ -196,7 +196,7 @@ func (s *Storage) Put(ctx context.Context, index uint64, prev, new *storage.Queu if err != nil { return err } - if err := pipe.ZAdd(ctx, s.key, &redis.Z{ + if err := pipe.ZAdd(ctx, s.key, redis.Z{ Score: float64(index), Member: string(signedItem), }).Err(); err != nil { diff --git a/arbnode/redislock/redis.go b/arbnode/redislock/redis.go index 7e26010cae..de9508323a 100644 --- a/arbnode/redislock/redis.go +++ b/arbnode/redislock/redis.go @@ -12,8 +12,8 @@ import ( "time" "github.com/ethereum/go-ethereum/log" - "github.com/go-redis/redis/v8" "github.com/offchainlabs/nitro/util/stopwaiter" + "github.com/redis/go-redis/v9" flag "github.com/spf13/pflag" ) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index 176ace114b..d22b4f7491 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -14,7 +14,7 @@ import ( "sync/atomic" "time" - "github.com/go-redis/redis/v8" + "github.com/redis/go-redis/v9" flag "github.com/spf13/pflag" "github.com/ethereum/go-ethereum/log" diff --git a/arbos/block_processor.go b/arbos/block_processor.go index b180405c43..19fc36b351 100644 --- a/arbos/block_processor.go +++ b/arbos/block_processor.go @@ -144,6 +144,7 @@ func ProduceBlock( chainContext core.ChainContext, chainConfig *params.ChainConfig, isMsgForPrefetch bool, + runMode core.MessageRunMode, ) (*types.Block, types.Receipts, error) { txes, err := ParseL2Transactions(message, chainConfig.ChainID) if err != nil { @@ -153,7 +154,7 @@ func ProduceBlock( hooks := NoopSequencingHooks() return ProduceBlockAdvanced( - message.Header, txes, delayedMessagesRead, lastBlockHeader, statedb, chainContext, chainConfig, hooks, isMsgForPrefetch, + message.Header, txes, delayedMessagesRead, lastBlockHeader, statedb, chainContext, chainConfig, hooks, isMsgForPrefetch, runMode, ) } @@ -168,6 +169,7 @@ func ProduceBlockAdvanced( chainConfig *params.ChainConfig, sequencingHooks *SequencingHooks, isMsgForPrefetch bool, + runMode core.MessageRunMode, ) (*types.Block, types.Receipts, error) { state, err := arbosState.OpenSystemArbosState(statedb, nil, true) @@ -318,6 +320,7 @@ func ProduceBlockAdvanced( tx, &header.GasUsed, vm.Config{}, + runMode, func(result *core.ExecutionResult) error { return hooks.PostTxFilter(header, state, tx, sender, dataGas, result) }, diff --git a/arbos/programs/native.go b/arbos/programs/native.go index 5fbc512211..e5c2632667 100644 --- a/arbos/programs/native.go +++ b/arbos/programs/native.go @@ -47,11 +47,16 @@ type rustBytes = C.RustBytes type rustSlice = C.RustSlice var ( - stylusLRUCacheSizeBytesGauge = metrics.NewRegisteredGauge("arb/arbos/stylus/cache/lru/size_bytes", nil) - stylusLRUCacheSizeCountGauge = metrics.NewRegisteredGauge("arb/arbos/stylus/cache/lru/count", nil) - stylusLRUCacheSizeHitsCounter = metrics.NewRegisteredCounter("arb/arbos/stylus/cache/lru/hits", nil) - stylusLRUCacheSizeMissesCounter = metrics.NewRegisteredCounter("arb/arbos/stylus/cache/lru/misses", nil) - stylusLRUCacheSizeDoesNotFitCounter = metrics.NewRegisteredCounter("arb/arbos/stylus/cache/lru/does_not_fit", nil) + stylusLRUCacheSizeBytesGauge = metrics.NewRegisteredGauge("arb/arbos/stylus/cache/lru/size_bytes", nil) + stylusLRUCacheCountGauge = metrics.NewRegisteredGauge("arb/arbos/stylus/cache/lru/count", nil) + stylusLRUCacheHitsCounter = metrics.NewRegisteredCounter("arb/arbos/stylus/cache/lru/hits", nil) + stylusLRUCacheMissesCounter = metrics.NewRegisteredCounter("arb/arbos/stylus/cache/lru/misses", nil) + stylusLRUCacheDoesNotFitCounter = metrics.NewRegisteredCounter("arb/arbos/stylus/cache/lru/does_not_fit", nil) + + stylusLongTermCacheSizeBytesGauge = metrics.NewRegisteredGauge("arb/arbos/stylus/cache/long_term/size_bytes", nil) + stylusLongTermCacheCountGauge = metrics.NewRegisteredGauge("arb/arbos/stylus/cache/long_term/count", nil) + stylusLongTermCacheHitsCounter = metrics.NewRegisteredCounter("arb/arbos/stylus/cache/long_term/hits", nil) + stylusLongTermCacheMissesCounter = metrics.NewRegisteredCounter("arb/arbos/stylus/cache/long_term/misses", nil) ) func activateProgram( @@ -333,24 +338,52 @@ func SetWasmLruCacheCapacity(capacityBytes uint64) { C.stylus_set_cache_lru_capacity(u64(capacityBytes)) } -// exported for testing +func UpdateWasmCacheMetrics() { + metrics := C.stylus_get_cache_metrics() + + stylusLRUCacheSizeBytesGauge.Update(int64(metrics.lru.size_bytes)) + stylusLRUCacheCountGauge.Update(int64(metrics.lru.count)) + stylusLRUCacheHitsCounter.Inc(int64(metrics.lru.hits)) + stylusLRUCacheMissesCounter.Inc(int64(metrics.lru.misses)) + stylusLRUCacheDoesNotFitCounter.Inc(int64(metrics.lru.does_not_fit)) + + stylusLongTermCacheSizeBytesGauge.Update(int64(metrics.long_term.size_bytes)) + stylusLongTermCacheCountGauge.Update(int64(metrics.long_term.count)) + stylusLongTermCacheHitsCounter.Inc(int64(metrics.long_term.hits)) + stylusLongTermCacheMissesCounter.Inc(int64(metrics.long_term.misses)) +} + +// Used for testing type WasmLruCacheMetrics struct { SizeBytes uint64 Count uint32 } -func GetWasmLruCacheMetrics() *WasmLruCacheMetrics { - metrics := C.stylus_get_lru_cache_metrics() +// Used for testing +type WasmLongTermCacheMetrics struct { + SizeBytes uint64 + Count uint32 +} - stylusLRUCacheSizeBytesGauge.Update(int64(metrics.size_bytes)) - stylusLRUCacheSizeCountGauge.Update(int64(metrics.count)) - stylusLRUCacheSizeHitsCounter.Inc(int64(metrics.hits)) - stylusLRUCacheSizeMissesCounter.Inc(int64(metrics.misses)) - stylusLRUCacheSizeDoesNotFitCounter.Inc(int64(metrics.does_not_fit)) +// Used for testing +type WasmCacheMetrics struct { + Lru WasmLruCacheMetrics + LongTerm WasmLongTermCacheMetrics +} - return &WasmLruCacheMetrics{ - SizeBytes: uint64(metrics.size_bytes), - Count: uint32(metrics.count), +// Used for testing +func GetWasmCacheMetrics() *WasmCacheMetrics { + metrics := C.stylus_get_cache_metrics() + + return &WasmCacheMetrics{ + Lru: WasmLruCacheMetrics{ + SizeBytes: uint64(metrics.lru.size_bytes), + Count: uint32(metrics.lru.count), + }, + LongTerm: WasmLongTermCacheMetrics{ + SizeBytes: uint64(metrics.long_term.size_bytes), + Count: uint32(metrics.long_term.count), + }, } } @@ -360,8 +393,13 @@ func ClearWasmLruCache() { } // Used for testing -func GetLruEntrySizeEstimateBytes(module []byte, version uint16, debug bool) uint64 { - return uint64(C.stylus_get_lru_entry_size_estimate_bytes(goSlice(module), u16(version), cbool(debug))) +func ClearWasmLongTermCache() { + C.stylus_clear_long_term_cache() +} + +// Used for testing +func GetEntrySizeEstimateBytes(module []byte, version uint16, debug bool) uint64 { + return uint64(C.stylus_get_entry_size_estimate_bytes(goSlice(module), u16(version), cbool(debug))) } const DefaultTargetDescriptionArm = "arm64-linux-unknown+neon" diff --git a/broadcastclient/broadcastclient.go b/broadcastclient/broadcastclient.go index 7d27c57fe9..4e97ca8cd0 100644 --- a/broadcastclient/broadcastclient.go +++ b/broadcastclient/broadcastclient.go @@ -280,6 +280,18 @@ func (bc *BroadcastClient) connect(ctx context.Context, nextSeqNum arbutil.Messa MinVersion: tls.VersionTLS12, }, Extensions: extensions, + NetDial: func(ctx context.Context, network, addr string) (net.Conn, error) { + var netDialer net.Dialer + // For tcp connections, prefer IPv4 over IPv6 to avoid rate limiting issues + if network == "tcp" { + conn, err := netDialer.DialContext(ctx, "tcp4", addr) + if err == nil { + return conn, nil + } + return netDialer.DialContext(ctx, "tcp6", addr) + } + return netDialer.DialContext(ctx, network, addr) + }, } if bc.isShuttingDown() { diff --git a/cmd/conf/chain.go b/cmd/conf/chain.go index b85f7727b1..28b06aad2b 100644 --- a/cmd/conf/chain.go +++ b/cmd/conf/chain.go @@ -52,23 +52,19 @@ func (c *ParentChainConfig) Validate() error { } type L2Config struct { - ID uint64 `koanf:"id"` - Name string `koanf:"name"` - InfoFiles []string `koanf:"info-files"` - InfoJson string `koanf:"info-json"` - DevWallet genericconf.WalletConfig `koanf:"dev-wallet"` - InfoIpfsUrl string `koanf:"info-ipfs-url"` - InfoIpfsDownloadPath string `koanf:"info-ipfs-download-path"` + ID uint64 `koanf:"id"` + Name string `koanf:"name"` + InfoFiles []string `koanf:"info-files"` + InfoJson string `koanf:"info-json"` + DevWallet genericconf.WalletConfig `koanf:"dev-wallet"` } var L2ConfigDefault = L2Config{ - ID: 0, - Name: "", - InfoFiles: []string{}, // Default file used is chaininfo/arbitrum_chain_info.json, stored in DefaultChainInfo in chain_info.go - InfoJson: "", - DevWallet: genericconf.WalletConfigDefault, - InfoIpfsUrl: "", - InfoIpfsDownloadPath: "/tmp/", + ID: 0, + Name: "", + InfoFiles: []string{}, // Default file used is chaininfo/arbitrum_chain_info.json, stored in DefaultChainInfo in chain_info.go + InfoJson: "", + DevWallet: genericconf.WalletConfigDefault, } func L2ConfigAddOptions(prefix string, f *flag.FlagSet) { @@ -79,9 +75,6 @@ func L2ConfigAddOptions(prefix string, f *flag.FlagSet) { // Dev wallet does not exist unless specified genericconf.WalletConfigAddOptions(prefix+".dev-wallet", f, "") - f.String(prefix+".info-ipfs-url", L2ConfigDefault.InfoIpfsUrl, "url to download chain info file") - f.String(prefix+".info-ipfs-download-path", L2ConfigDefault.InfoIpfsDownloadPath, "path to save temp downloaded file") - } func (c *L2Config) ResolveDirectoryNames(chain string) { diff --git a/cmd/ipfshelper/ipfshelper.bkup_go b/cmd/ipfshelper/ipfshelper.bkup_go deleted file mode 100644 index ccde492ca6..0000000000 --- a/cmd/ipfshelper/ipfshelper.bkup_go +++ /dev/null @@ -1,281 +0,0 @@ -//go:build ipfs -// +build ipfs - -package ipfshelper - -import ( - "context" - "fmt" - "io" - "math/rand" - "os" - "path/filepath" - "strings" - "sync" - - "github.com/ethereum/go-ethereum/log" - "github.com/ipfs/go-libipfs/files" - coreiface "github.com/ipfs/interface-go-ipfs-core" - "github.com/ipfs/interface-go-ipfs-core/options" - "github.com/ipfs/interface-go-ipfs-core/path" - "github.com/ipfs/kubo/config" - "github.com/ipfs/kubo/core" - "github.com/ipfs/kubo/core/coreapi" - "github.com/ipfs/kubo/core/node/libp2p" - "github.com/ipfs/kubo/plugin/loader" - "github.com/ipfs/kubo/repo" - "github.com/ipfs/kubo/repo/fsrepo" - "github.com/libp2p/go-libp2p/core/host" - "github.com/libp2p/go-libp2p/core/peer" - ma "github.com/multiformats/go-multiaddr" -) - -const DefaultIpfsProfiles = "" - -type IpfsHelper struct { - api coreiface.CoreAPI - node *core.IpfsNode - cfg *config.Config - repoPath string - repo repo.Repo -} - -func (h *IpfsHelper) createRepo(downloadPath string, profiles string) error { - fileInfo, err := os.Stat(downloadPath) - if err != nil { - return fmt.Errorf("failed to stat ipfs repo directory: %w", err) - } - if !fileInfo.IsDir() { - return fmt.Errorf("%s is not a directory", downloadPath) - } - h.repoPath = filepath.Join(downloadPath, "ipfs-repo") - // Create a config with default options and a 2048 bit key - h.cfg, err = config.Init(io.Discard, 2048) - if err != nil { - return err - } - if len(profiles) > 0 { - for _, profile := range strings.Split(profiles, ",") { - transformer, ok := config.Profiles[profile] - if !ok { - return fmt.Errorf("invalid ipfs configuration profile: %s", profile) - } - - if err := transformer.Transform(h.cfg); err != nil { - return err - } - } - } - // Create the repo with the config - // fsrepo.Init initializes new repo only if it's not initialized yet - err = fsrepo.Init(h.repoPath, h.cfg) - if err != nil { - return fmt.Errorf("failed to init ipfs repo: %w", err) - } - h.repo, err = fsrepo.Open(h.repoPath) - if err != nil { - return fmt.Errorf("failed to open ipfs repo: %w", err) - } - return nil -} - -func (h *IpfsHelper) createNode(ctx context.Context, clientOnly bool) error { - var routing libp2p.RoutingOption - if clientOnly { - routing = libp2p.DHTClientOption - } else { - routing = libp2p.DHTOption - } - nodeOptions := &core.BuildCfg{ - Online: true, - Routing: routing, - Repo: h.repo, - } - var err error - h.node, err = core.NewNode(ctx, nodeOptions) - if err != nil { - return err - } - h.api, err = coreapi.NewCoreAPI(h.node) - return err -} - -func (h *IpfsHelper) connectToPeers(ctx context.Context, peers []string) error { - peerInfos := make(map[peer.ID]*peer.AddrInfo, len(peers)) - for _, addressString := range peers { - address, err := ma.NewMultiaddr(addressString) - if err != nil { - return err - } - addressInfo, err := peer.AddrInfoFromP2pAddr(address) - if err != nil { - return err - } - peerInfo, ok := peerInfos[addressInfo.ID] - if !ok { - peerInfo = &peer.AddrInfo{ID: addressInfo.ID} - peerInfos[peerInfo.ID] = peerInfo - } - peerInfo.Addrs = append(peerInfo.Addrs, addressInfo.Addrs...) - } - var wg sync.WaitGroup - wg.Add(len(peerInfos)) - for _, peerInfo := range peerInfos { - go func(peerInfo *peer.AddrInfo) { - defer wg.Done() - err := h.api.Swarm().Connect(ctx, *peerInfo) - if err != nil { - log.Warn("failed to connect to peer", "peerId", peerInfo.ID, "err", err) - return - } - }(peerInfo) - } - wg.Wait() - return nil -} - -func (h *IpfsHelper) GetPeerHostAddresses() ([]string, error) { - addresses, err := peer.AddrInfoToP2pAddrs(host.InfoFromHost(h.node.PeerHost)) - if err != nil { - return []string{}, err - } - addressesStrings := make([]string, len(addresses)) - for i, a := range addresses { - addressesStrings[i] = a.String() - } - return addressesStrings, nil -} - -func normalizeCidString(cidString string) string { - if strings.HasPrefix(cidString, "ipfs://") { - return "/ipfs/" + cidString[7:] - } - if strings.HasPrefix(cidString, "ipns://") { - return "/ipns/" + cidString[7:] - } - return cidString -} - -func (h *IpfsHelper) DownloadFile(ctx context.Context, cidString string, destinationDir string) (string, error) { - cidString = normalizeCidString(cidString) - cidPath := path.New(cidString) - resolvedPath, err := h.api.ResolvePath(ctx, cidPath) - if err != nil { - return "", fmt.Errorf("failed to resolve path: %w", err) - } - // first pin the root node, then all its children nodes in random order to improve sharing with peers started at the same time - if err := h.api.Pin().Add(ctx, resolvedPath, options.Pin.Recursive(false)); err != nil { - return "", fmt.Errorf("failed to pin root path: %w", err) - } - links, err := h.api.Object().Links(ctx, resolvedPath) - if err != nil { - return "", fmt.Errorf("failed to get root links: %w", err) - } - log.Info("Pinning ipfs subtrees...") - printProgress := func(done int, all int) { - if all == 0 { - all = 1 // avoid division by 0 - done = 1 - } - fmt.Printf("\033[2K\rPinned %d / %d subtrees (%.2f%%)", done, all, float32(done)/float32(all)*100) - } - permutation := rand.Perm(len(links)) - printProgress(0, len(links)) - for i, j := range permutation { - link := links[j] - if err := h.api.Pin().Add(ctx, path.IpfsPath(link.Cid), options.Pin.Recursive(true)); err != nil { - return "", fmt.Errorf("failed to pin child path: %w", err) - } - printProgress(i+1, len(links)) - } - fmt.Printf("\n") - rootNodeDirectory, err := h.api.Unixfs().Get(ctx, cidPath) - if err != nil { - return "", fmt.Errorf("could not get file with CID: %w", err) - } - log.Info("Writing file...") - outputFilePath := filepath.Join(destinationDir, resolvedPath.Cid().String()) - _ = os.Remove(outputFilePath) - err = files.WriteTo(rootNodeDirectory, outputFilePath) - if err != nil { - return "", fmt.Errorf("could not write out the fetched CID: %w", err) - } - log.Info("Download done.") - return outputFilePath, nil -} - -func (h *IpfsHelper) AddFile(ctx context.Context, filePath string, includeHidden bool) (path.Resolved, error) { - fileInfo, err := os.Stat(filePath) - if err != nil { - return nil, err - } - fileNode, err := files.NewSerialFile(filePath, includeHidden, fileInfo) - if err != nil { - return nil, err - } - return h.api.Unixfs().Add(ctx, fileNode) -} - -func CreateIpfsHelper(ctx context.Context, downloadPath string, clientOnly bool, peerList []string, profiles string) (*IpfsHelper, error) { - return createIpfsHelperImpl(ctx, downloadPath, clientOnly, peerList, profiles) -} - -func (h *IpfsHelper) Close() error { - return h.node.Close() -} - -func setupPlugins() error { - plugins, err := loader.NewPluginLoader("") - if err != nil { - return fmt.Errorf("error loading plugins: %w", err) - } - // Load preloaded and external plugins - if err := plugins.Initialize(); err != nil { - return fmt.Errorf("error initializing plugins: %w", err) - } - if err := plugins.Inject(); err != nil { - return fmt.Errorf("error initializing plugins: %w", err) - } - return nil -} - -var loadPluginsOnce sync.Once - -func createIpfsHelperImpl(ctx context.Context, downloadPath string, clientOnly bool, peerList []string, profiles string) (*IpfsHelper, error) { - var onceErr error - loadPluginsOnce.Do(func() { - onceErr = setupPlugins() - }) - if onceErr != nil { - return nil, onceErr - } - client := IpfsHelper{} - err := client.createRepo(downloadPath, profiles) - if err != nil { - return nil, err - } - err = client.createNode(ctx, clientOnly) - if err != nil { - return nil, err - } - err = client.connectToPeers(ctx, peerList) - if err != nil { - return nil, err - } - return &client, nil -} - -func CanBeIpfsPath(pathString string) bool { - path := path.New(pathString) - return path.IsValid() == nil || - strings.HasPrefix(pathString, "/ipfs/") || - strings.HasPrefix(pathString, "/ipld/") || - strings.HasPrefix(pathString, "/ipns/") || - strings.HasPrefix(pathString, "ipfs://") || - strings.HasPrefix(pathString, "ipns://") -} - -// TODO break abstraction for now til we figure out what fns are needed -func (h *IpfsHelper) GetAPI() coreiface.CoreAPI { - return h.api -} diff --git a/cmd/ipfshelper/ipfshelper_stub.go b/cmd/ipfshelper/ipfshelper_stub.go deleted file mode 100644 index fa6a451927..0000000000 --- a/cmd/ipfshelper/ipfshelper_stub.go +++ /dev/null @@ -1,31 +0,0 @@ -//go:build !ipfs -// +build !ipfs - -package ipfshelper - -import ( - "context" - "errors" -) - -type IpfsHelper struct{} - -var ErrIpfsNotSupported = errors.New("ipfs not supported") - -var DefaultIpfsProfiles = "default ipfs profiles stub" - -func CanBeIpfsPath(pathString string) bool { - return false -} - -func CreateIpfsHelper(ctx context.Context, downloadPath string, clientOnly bool, peerList []string, profiles string) (*IpfsHelper, error) { - return nil, ErrIpfsNotSupported -} - -func (h *IpfsHelper) DownloadFile(ctx context.Context, cidString string, destinationDir string) (string, error) { - return "", ErrIpfsNotSupported -} - -func (h *IpfsHelper) Close() error { - return ErrIpfsNotSupported -} diff --git a/cmd/ipfshelper/ipfshelper_test.go b/cmd/ipfshelper/ipfshelper_test.go deleted file mode 100644 index 80f10c21f6..0000000000 --- a/cmd/ipfshelper/ipfshelper_test.go +++ /dev/null @@ -1,123 +0,0 @@ -//go:build ipfs -// +build ipfs - -package ipfshelper - -import ( - "bytes" - "context" - "math/rand" - "os" - "path/filepath" - "testing" - "time" - - "github.com/offchainlabs/nitro/util/testhelpers" -) - -func getTempFileWithData(t *testing.T, data []byte) string { - path := filepath.Join(t.TempDir(), "config.json") - err := os.WriteFile(path, []byte(data), 0600) - testhelpers.RequireImpl(t, err) - return path -} - -func fileDataEqual(t *testing.T, path string, expected []byte) bool { - data, err := os.ReadFile(path) - testhelpers.RequireImpl(t, err) - return bytes.Equal(data, expected) -} - -func TestIpfsHelper(t *testing.T) { - ctx := context.Background() - ipfsA, err := createIpfsHelperImpl(ctx, t.TempDir(), false, []string{}, "test") - testhelpers.RequireImpl(t, err) - // add a test file to node A - testData := make([]byte, 1024*1024) - _, err = rand.Read(testData) - testhelpers.RequireImpl(t, err) - testFile := getTempFileWithData(t, testData) - ipfsTestFilePath, err := ipfsA.AddFile(ctx, testFile, false) - testhelpers.RequireImpl(t, err) - testFileCid := ipfsTestFilePath.Cid().String() - addrsA, err := ipfsA.GetPeerHostAddresses() - testhelpers.RequireImpl(t, err) - // create node B connected to node A - ipfsB, err := createIpfsHelperImpl(ctx, t.TempDir(), false, addrsA, "test") - testhelpers.RequireImpl(t, err) - // download the test file with node B - downloadedFile, err := ipfsB.DownloadFile(ctx, testFileCid, t.TempDir()) - testhelpers.RequireImpl(t, err) - if !fileDataEqual(t, downloadedFile, testData) { - testhelpers.FailImpl(t, "Downloaded file does not contain expected data") - } - // clean up node A and test downloading the file from yet another node C - err = ipfsA.Close() - os.RemoveAll(ipfsA.repoPath) - testhelpers.RequireImpl(t, err) - addrsB, err := ipfsB.GetPeerHostAddresses() - testhelpers.RequireImpl(t, err) - ipfsC, err := createIpfsHelperImpl(ctx, t.TempDir(), false, addrsB, "test") - testhelpers.RequireImpl(t, err) - downloadedFile, err = ipfsC.DownloadFile(ctx, testFileCid, t.TempDir()) - testhelpers.RequireImpl(t, err) - if !fileDataEqual(t, downloadedFile, testData) { - testhelpers.FailImpl(t, "Downloaded file does not contain expected data") - } - // make sure closing B and C nodes (A already closed) will make it impossible to download the test file from new node D - ipfsD, err := createIpfsHelperImpl(ctx, t.TempDir(), false, addrsB, "test") - testhelpers.RequireImpl(t, err) - err = ipfsB.Close() - testhelpers.RequireImpl(t, err) - err = ipfsC.Close() - testhelpers.RequireImpl(t, err) - testTimeout := 300 * time.Millisecond - timeoutCtx, cancel := context.WithTimeout(ctx, testTimeout) - defer cancel() - _, err = ipfsD.DownloadFile(timeoutCtx, testFileCid, t.TempDir()) - if err == nil { - testhelpers.FailImpl(t, "Download attempt did not fail as expected") - } - err = ipfsD.Close() - testhelpers.RequireImpl(t, err) -} - -func TestNormalizeCidString(t *testing.T) { - for _, test := range []struct { - input string - expected string - }{ - {"ipfs://QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ", "/ipfs/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ"}, - {"ipns://k51qzi5uqu5dlvj2baxnqndepeb86cbk3ng7n3i46uzyxzyqj2xjonzllnv0v8", "/ipns/k51qzi5uqu5dlvj2baxnqndepeb86cbk3ng7n3i46uzyxzyqj2xjonzllnv0v8"}, - {"ipns://docs.ipfs.tech/introduction/", "/ipns/docs.ipfs.tech/introduction/"}, - {"/ipfs/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ", "/ipfs/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ"}, - {"/ipns/k51qzi5uqu5dlvj2baxnqndepeb86cbk3ng7n3i46uzyxzyqj2xjonzllnv0v8", "/ipns/k51qzi5uqu5dlvj2baxnqndepeb86cbk3ng7n3i46uzyxzyqj2xjonzllnv0v8"}, - {"QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ", "QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ"}, - } { - if res := normalizeCidString(test.input); res != test.expected { - testhelpers.FailImpl(t, "Failed to normalize cid string, input: ", test.input, " got: ", res, " expected: ", test.expected) - } - } -} - -func TestCanBeIpfsPath(t *testing.T) { - correctPaths := []string{ - "QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ", - "/ipfs/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ", - "/ipns/k51qzi5uqu5dlvj2baxnqndepeb86cbk3ng7n3i46uzyxzyqj2xjonzllnv0v8", - "/ipns/docs.ipfs.tech/introduction/", - "ipfs://QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ", - "ipns://k51qzi5uqu5dlvj2baxnqndepeb86cbk3ng7n3i46uzyxzyqj2xjonzllnv0v8", - } - for _, path := range correctPaths { - if !CanBeIpfsPath(path) { - testhelpers.FailImpl(t, "false negative result for path:", path) - } - } - incorrectPaths := []string{"www.ipfs.tech", "https://www.ipfs.tech", "QmIncorrect"} - for _, path := range incorrectPaths { - if CanBeIpfsPath(path) { - testhelpers.FailImpl(t, "false positive result for path:", path) - } - } -} diff --git a/cmd/nitro/init.go b/cmd/nitro/init.go index 9e3ecec747..f0b303817c 100644 --- a/cmd/nitro/init.go +++ b/cmd/nitro/init.go @@ -40,7 +40,6 @@ import ( "github.com/offchainlabs/nitro/arbos/arbostypes" "github.com/offchainlabs/nitro/cmd/chaininfo" "github.com/offchainlabs/nitro/cmd/conf" - "github.com/offchainlabs/nitro/cmd/ipfshelper" "github.com/offchainlabs/nitro/cmd/pruning" "github.com/offchainlabs/nitro/cmd/staterecovery" "github.com/offchainlabs/nitro/execution/gethexec" @@ -58,25 +57,6 @@ func downloadInit(ctx context.Context, initConfig *conf.InitConfig) (string, err if strings.HasPrefix(initConfig.Url, "file:") { return initConfig.Url[5:], nil } - if ipfshelper.CanBeIpfsPath(initConfig.Url) { - ipfsNode, err := ipfshelper.CreateIpfsHelper(ctx, initConfig.DownloadPath, false, []string{}, ipfshelper.DefaultIpfsProfiles) - if err != nil { - return "", err - } - log.Info("Downloading initial database via IPFS", "url", initConfig.Url) - initFile, downloadErr := ipfsNode.DownloadFile(ctx, initConfig.Url, initConfig.DownloadPath) - closeErr := ipfsNode.Close() - if downloadErr != nil { - if closeErr != nil { - log.Error("Failed to close IPFS node after download error", "err", closeErr) - } - return "", fmt.Errorf("Failed to download file from IPFS: %w", downloadErr) - } - if closeErr != nil { - return "", fmt.Errorf("Failed to close IPFS node: %w", err) - } - return initFile, nil - } log.Info("Downloading initial database", "url", initConfig.Url) if !initConfig.ValidateChecksum { file, err := downloadFile(ctx, initConfig, initConfig.Url, nil) @@ -732,8 +712,7 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo if err != nil { return chainDb, nil, err } - combinedL2ChainInfoFiles := aggregateL2ChainInfoFiles(ctx, config.Chain.InfoFiles, config.Chain.InfoIpfsUrl, config.Chain.InfoIpfsDownloadPath) - chainConfig, err = chaininfo.GetChainConfig(new(big.Int).SetUint64(config.Chain.ID), config.Chain.Name, genesisBlockNr, combinedL2ChainInfoFiles, config.Chain.InfoJson) + chainConfig, err = chaininfo.GetChainConfig(new(big.Int).SetUint64(config.Chain.ID), config.Chain.Name, genesisBlockNr, config.Chain.InfoFiles, config.Chain.InfoJson) if err != nil { return chainDb, nil, err } diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index bc2155a475..076ac66ec4 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -285,8 +285,6 @@ func mainImpl() int { } } - combinedL2ChainInfoFile := aggregateL2ChainInfoFiles(ctx, nodeConfig.Chain.InfoFiles, nodeConfig.Chain.InfoIpfsUrl, nodeConfig.Chain.InfoIpfsDownloadPath) - if nodeConfig.Node.Staker.Enable { if !nodeConfig.Node.ParentChainReader.Enable { flag.Usage() @@ -335,7 +333,7 @@ func mainImpl() int { log.Info("connected to l1 chain", "l1url", nodeConfig.ParentChain.Connection.URL, "l1chainid", nodeConfig.ParentChain.ID) - rollupAddrs, err = chaininfo.GetRollupAddressesConfig(nodeConfig.Chain.ID, nodeConfig.Chain.Name, combinedL2ChainInfoFile, nodeConfig.Chain.InfoJson) + rollupAddrs, err = chaininfo.GetRollupAddressesConfig(nodeConfig.Chain.ID, nodeConfig.Chain.Name, nodeConfig.Chain.InfoFiles, nodeConfig.Chain.InfoJson) if err != nil { log.Crit("error getting rollup addresses", "err", err) } @@ -367,7 +365,7 @@ func mainImpl() int { log.Crit("--node.validator.only-create-wallet-contract conflicts with --node.dangerous.no-l1-listener") } // Just create validator smart wallet if needed then exit - deployInfo, err := chaininfo.GetRollupAddressesConfig(nodeConfig.Chain.ID, nodeConfig.Chain.Name, combinedL2ChainInfoFile, nodeConfig.Chain.InfoJson) + deployInfo, err := chaininfo.GetRollupAddressesConfig(nodeConfig.Chain.ID, nodeConfig.Chain.Name, nodeConfig.Chain.InfoFiles, nodeConfig.Chain.InfoJson) if err != nil { log.Crit("error getting rollup addresses config", "err", err) } @@ -493,7 +491,7 @@ func mainImpl() int { return 0 } - chainInfo, err := chaininfo.ProcessChainInfo(nodeConfig.Chain.ID, nodeConfig.Chain.Name, combinedL2ChainInfoFile, nodeConfig.Chain.InfoJson) + chainInfo, err := chaininfo.ProcessChainInfo(nodeConfig.Chain.ID, nodeConfig.Chain.Name, nodeConfig.Chain.InfoFiles, nodeConfig.Chain.InfoJson) if err != nil { log.Error("error processing l2 chain info", "err", err) return 1 @@ -840,12 +838,10 @@ func ParseNode(ctx context.Context, args []string) (*NodeConfig, *genericconf.Wa l2ChainId := k.Int64("chain.id") l2ChainName := k.String("chain.name") - l2ChainInfoIpfsUrl := k.String("chain.info-ipfs-url") - l2ChainInfoIpfsDownloadPath := k.String("chain.info-ipfs-download-path") l2ChainInfoFiles := k.Strings("chain.info-files") l2ChainInfoJson := k.String("chain.info-json") // #nosec G115 - err = applyChainParameters(ctx, k, uint64(l2ChainId), l2ChainName, l2ChainInfoFiles, l2ChainInfoJson, l2ChainInfoIpfsUrl, l2ChainInfoIpfsDownloadPath) + err = applyChainParameters(k, uint64(l2ChainId), l2ChainName, l2ChainInfoFiles, l2ChainInfoJson) if err != nil { return nil, nil, err } @@ -908,20 +904,8 @@ func ParseNode(ctx context.Context, args []string) (*NodeConfig, *genericconf.Wa return &nodeConfig, &l2DevWallet, nil } -func aggregateL2ChainInfoFiles(ctx context.Context, l2ChainInfoFiles []string, l2ChainInfoIpfsUrl string, l2ChainInfoIpfsDownloadPath string) []string { - if l2ChainInfoIpfsUrl != "" { - l2ChainInfoIpfsFile, err := util.GetL2ChainInfoIpfsFile(ctx, l2ChainInfoIpfsUrl, l2ChainInfoIpfsDownloadPath) - if err != nil { - log.Error("error getting l2 chain info file from ipfs", "err", err) - } - l2ChainInfoFiles = append(l2ChainInfoFiles, l2ChainInfoIpfsFile) - } - return l2ChainInfoFiles -} - -func applyChainParameters(ctx context.Context, k *koanf.Koanf, chainId uint64, chainName string, l2ChainInfoFiles []string, l2ChainInfoJson string, l2ChainInfoIpfsUrl string, l2ChainInfoIpfsDownloadPath string) error { - combinedL2ChainInfoFiles := aggregateL2ChainInfoFiles(ctx, l2ChainInfoFiles, l2ChainInfoIpfsUrl, l2ChainInfoIpfsDownloadPath) - chainInfo, err := chaininfo.ProcessChainInfo(chainId, chainName, combinedL2ChainInfoFiles, l2ChainInfoJson) +func applyChainParameters(k *koanf.Koanf, chainId uint64, chainName string, l2ChainInfoFiles []string, l2ChainInfoJson string) error { + chainInfo, err := chaininfo.ProcessChainInfo(chainId, chainName, l2ChainInfoFiles, l2ChainInfoJson) if err != nil { return err } @@ -930,7 +914,7 @@ func applyChainParameters(ctx context.Context, k *koanf.Koanf, chainId uint64, c parentChainIsArbitrum = *chainInfo.ParentChainIsArbitrum } else { log.Warn("Chain info field parent-chain-is-arbitrum is missing, in the future this will be required", "chainId", chainInfo.ChainConfig.ChainID, "parentChainId", chainInfo.ParentChainId) - _, err := chaininfo.ProcessChainInfo(chainInfo.ParentChainId, "", combinedL2ChainInfoFiles, "") + _, err := chaininfo.ProcessChainInfo(chainInfo.ParentChainId, "", l2ChainInfoFiles, "") if err == nil { parentChainIsArbitrum = true } diff --git a/cmd/replay/main.go b/cmd/replay/main.go index 0fe56eb4c9..661040ea10 100644 --- a/cmd/replay/main.go +++ b/cmd/replay/main.go @@ -14,6 +14,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus" + "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" @@ -291,7 +292,7 @@ func main() { message := readMessage(chainConfig.ArbitrumChainParams.DataAvailabilityCommittee) chainContext := WavmChainContext{} - newBlock, _, err = arbos.ProduceBlock(message.Message, message.DelayedMessagesRead, lastBlockHeader, statedb, chainContext, chainConfig, false) + newBlock, _, err = arbos.ProduceBlock(message.Message, message.DelayedMessagesRead, lastBlockHeader, statedb, chainContext, chainConfig, false, core.MessageReplayMode) if err != nil { panic(err) } diff --git a/cmd/seq-coordinator-manager/rediscoordinator/redis_coordinator.go b/cmd/seq-coordinator-manager/rediscoordinator/redis_coordinator.go index e963c0e96c..b897b23252 100644 --- a/cmd/seq-coordinator-manager/rediscoordinator/redis_coordinator.go +++ b/cmd/seq-coordinator-manager/rediscoordinator/redis_coordinator.go @@ -5,8 +5,8 @@ import ( "errors" "strings" - "github.com/go-redis/redis/v8" "github.com/offchainlabs/nitro/util/redisutil" + "github.com/redis/go-redis/v9" ) // RedisCoordinator builds upon RedisCoordinator of redisutil with additional functionality diff --git a/cmd/util/chaininfoutil.go b/cmd/util/chaininfoutil.go deleted file mode 100644 index 906aa234ed..0000000000 --- a/cmd/util/chaininfoutil.go +++ /dev/null @@ -1,29 +0,0 @@ -package util - -import ( - "context" - "fmt" - - "github.com/ethereum/go-ethereum/log" - "github.com/offchainlabs/nitro/cmd/ipfshelper" -) - -func GetL2ChainInfoIpfsFile(ctx context.Context, l2ChainInfoIpfsUrl string, l2ChainInfoIpfsDownloadPath string) (string, error) { - ipfsNode, err := ipfshelper.CreateIpfsHelper(ctx, l2ChainInfoIpfsDownloadPath, false, []string{}, ipfshelper.DefaultIpfsProfiles) - if err != nil { - return "", err - } - log.Info("Downloading l2 info file via IPFS", "url", l2ChainInfoIpfsDownloadPath) - l2ChainInfoFile, downloadErr := ipfsNode.DownloadFile(ctx, l2ChainInfoIpfsUrl, l2ChainInfoIpfsDownloadPath) - closeErr := ipfsNode.Close() - if downloadErr != nil { - if closeErr != nil { - log.Error("Failed to close IPFS node after download error", "err", closeErr) - } - return "", fmt.Errorf("failed to download file from IPFS: %w", downloadErr) - } - if closeErr != nil { - return "", fmt.Errorf("failed to close IPFS node: %w", err) - } - return l2ChainInfoFile, nil -} diff --git a/das/redis_storage_service.go b/das/redis_storage_service.go index 210d5cb2d4..e57240992c 100644 --- a/das/redis_storage_service.go +++ b/das/redis_storage_service.go @@ -12,11 +12,11 @@ import ( "golang.org/x/crypto/sha3" - "github.com/go-redis/redis/v8" "github.com/offchainlabs/nitro/arbstate/daprovider" "github.com/offchainlabs/nitro/das/dastree" "github.com/offchainlabs/nitro/util/pretty" "github.com/offchainlabs/nitro/util/redisutil" + "github.com/redis/go-redis/v9" flag "github.com/spf13/pflag" "github.com/ethereum/go-ethereum/common" diff --git a/execution/gethexec/block_recorder.go b/execution/gethexec/block_recorder.go index a31b6b3736..a3af7876a8 100644 --- a/execution/gethexec/block_recorder.go +++ b/execution/gethexec/block_recorder.go @@ -8,6 +8,7 @@ import ( "github.com/ethereum/go-ethereum/arbitrum" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" @@ -154,6 +155,7 @@ func (r *BlockRecorder) RecordBlockCreation( chaincontext, chainConfig, false, + core.MessageReplayMode, ) if err != nil { return nil, err diff --git a/execution/gethexec/blockchain.go b/execution/gethexec/blockchain.go index 9b0c1a6f2f..fda8f49093 100644 --- a/execution/gethexec/blockchain.go +++ b/execution/gethexec/blockchain.go @@ -26,20 +26,21 @@ import ( ) type CachingConfig struct { - Archive bool `koanf:"archive"` - BlockCount uint64 `koanf:"block-count"` - BlockAge time.Duration `koanf:"block-age"` - TrieTimeLimit time.Duration `koanf:"trie-time-limit"` - TrieDirtyCache int `koanf:"trie-dirty-cache"` - TrieCleanCache int `koanf:"trie-clean-cache"` - SnapshotCache int `koanf:"snapshot-cache"` - DatabaseCache int `koanf:"database-cache"` - SnapshotRestoreGasLimit uint64 `koanf:"snapshot-restore-gas-limit"` - MaxNumberOfBlocksToSkipStateSaving uint32 `koanf:"max-number-of-blocks-to-skip-state-saving"` - MaxAmountOfGasToSkipStateSaving uint64 `koanf:"max-amount-of-gas-to-skip-state-saving"` - StylusLRUCacheCapacity uint32 `koanf:"stylus-lru-cache-capacity"` - StateScheme string `koanf:"state-scheme"` - StateHistory uint64 `koanf:"state-history"` + Archive bool `koanf:"archive"` + BlockCount uint64 `koanf:"block-count"` + BlockAge time.Duration `koanf:"block-age"` + TrieTimeLimit time.Duration `koanf:"trie-time-limit"` + TrieDirtyCache int `koanf:"trie-dirty-cache"` + TrieCleanCache int `koanf:"trie-clean-cache"` + SnapshotCache int `koanf:"snapshot-cache"` + DatabaseCache int `koanf:"database-cache"` + SnapshotRestoreGasLimit uint64 `koanf:"snapshot-restore-gas-limit"` + MaxNumberOfBlocksToSkipStateSaving uint32 `koanf:"max-number-of-blocks-to-skip-state-saving"` + MaxAmountOfGasToSkipStateSaving uint64 `koanf:"max-amount-of-gas-to-skip-state-saving"` + StylusLRUCacheCapacity uint32 `koanf:"stylus-lru-cache-capacity"` + DisableStylusCacheMetricsCollection bool `koanf:"disable-stylus-cache-metrics-collection"` + StateScheme string `koanf:"state-scheme"` + StateHistory uint64 `koanf:"state-history"` } func CachingConfigAddOptions(prefix string, f *flag.FlagSet) { @@ -55,6 +56,7 @@ func CachingConfigAddOptions(prefix string, f *flag.FlagSet) { f.Uint32(prefix+".max-number-of-blocks-to-skip-state-saving", DefaultCachingConfig.MaxNumberOfBlocksToSkipStateSaving, "maximum number of blocks to skip state saving to persistent storage (archive node only) -- warning: this option seems to cause issues") f.Uint64(prefix+".max-amount-of-gas-to-skip-state-saving", DefaultCachingConfig.MaxAmountOfGasToSkipStateSaving, "maximum amount of gas in blocks to skip saving state to Persistent storage (archive node only) -- warning: this option seems to cause issues") f.Uint32(prefix+".stylus-lru-cache-capacity", DefaultCachingConfig.StylusLRUCacheCapacity, "capacity, in megabytes, of the LRU cache that keeps initialized stylus programs") + f.Bool(prefix+".disable-stylus-cache-metrics-collection", DefaultCachingConfig.DisableStylusCacheMetricsCollection, "disable metrics collection for the stylus cache") f.String(prefix+".state-scheme", DefaultCachingConfig.StateScheme, "scheme to use for state trie storage (hash, path)") f.Uint64(prefix+".state-history", DefaultCachingConfig.StateHistory, "number of recent blocks to retain state history for (path state-scheme only)") } diff --git a/execution/gethexec/executionengine.go b/execution/gethexec/executionengine.go index a0f3a2f59a..6571672b71 100644 --- a/execution/gethexec/executionengine.go +++ b/execution/gethexec/executionengine.go @@ -87,6 +87,8 @@ type ExecutionEngine struct { reorgSequencing bool + disableStylusCacheMetricsCollection bool + prefetchBlock bool cachedL1PriceData *L1PriceData @@ -212,6 +214,16 @@ func (s *ExecutionEngine) EnableReorgSequencing() { s.reorgSequencing = true } +func (s *ExecutionEngine) DisableStylusCacheMetricsCollection() { + if s.Started() { + panic("trying to disable stylus cache metrics collection after start") + } + if s.disableStylusCacheMetricsCollection { + panic("trying to disable stylus cache metrics collection when already set") + } + s.disableStylusCacheMetricsCollection = true +} + func (s *ExecutionEngine) EnablePrefetchBlock() { if s.Started() { panic("trying to enable prefetch block after start") @@ -505,6 +517,7 @@ func (s *ExecutionEngine) sequenceTransactionsWithBlockMutex(header *arbostypes. s.bc.Config(), hooks, false, + core.MessageCommitMode, ) if err != nil { return nil, err @@ -661,6 +674,10 @@ func (s *ExecutionEngine) createBlockFromNextMessage(msg *arbostypes.MessageWith statedb.StartPrefetcher("TransactionStreamer") defer statedb.StopPrefetcher() + runMode := core.MessageCommitMode + if isMsgForPrefetch { + runMode = core.MessageReplayMode + } block, receipts, err := arbos.ProduceBlock( msg.Message, msg.DelayedMessagesRead, @@ -669,6 +686,7 @@ func (s *ExecutionEngine) createBlockFromNextMessage(msg *arbostypes.MessageWith s.bc, s.bc.Config(), isMsgForPrefetch, + runMode, ) return block, statedb, receipts, err @@ -963,15 +981,17 @@ func (s *ExecutionEngine) Start(ctx_in context.Context) { } } }) - // periodically update stylus lru cache metrics - s.LaunchThread(func(ctx context.Context) { - for { - select { - case <-ctx.Done(): - return - case <-time.After(time.Minute): - programs.GetWasmLruCacheMetrics() + if !s.disableStylusCacheMetricsCollection { + // periodically update stylus cache metrics + s.LaunchThread(func(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case <-time.After(time.Minute): + programs.UpdateWasmCacheMetrics() + } } - } - }) + }) + } } diff --git a/execution/gethexec/node.go b/execution/gethexec/node.go index cb06a58e74..499a13164e 100644 --- a/execution/gethexec/node.go +++ b/execution/gethexec/node.go @@ -188,6 +188,9 @@ func CreateExecutionNode( if config.EnablePrefetchBlock { execEngine.EnablePrefetchBlock() } + if config.Caching.DisableStylusCacheMetricsCollection { + execEngine.DisableStylusCacheMetricsCollection() + } if err != nil { return nil, err } diff --git a/go-ethereum b/go-ethereum index b068464bf5..b1075d3786 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit b068464bf59ab5414f72c2d4aba855b8af5edc17 +Subproject commit b1075d3786b28a6a3a06fe0e0ab8d1cdecc72f55 diff --git a/go.mod b/go.mod index 18e3a8b02a..488d455f44 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,6 @@ require ( github.com/ethereum/go-ethereum v1.10.26 github.com/fatih/structtag v1.2.0 github.com/gdamore/tcell/v2 v2.7.1 - github.com/go-redis/redis/v8 v8.11.5 github.com/gobwas/httphead v0.1.0 github.com/gobwas/ws v1.2.1 github.com/gobwas/ws-examples v0.0.0-20190625122829-a9e8908d9484 @@ -39,6 +38,7 @@ require ( github.com/mitchellh/mapstructure v1.4.1 github.com/pkg/errors v0.9.1 github.com/r3labs/diff/v3 v3.0.1 + github.com/redis/go-redis/v9 v9.6.1 github.com/rivo/tview v0.0.0-20240307173318-e804876934a1 github.com/spf13/pflag v1.0.5 github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 @@ -99,7 +99,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bits-and-blooms/bitset v1.10.0 // indirect github.com/btcsuite/btcd/btcec/v2 v2.2.0 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cockroachdb/errors v1.9.1 // indirect github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect github.com/cockroachdb/redact v1.1.3 // indirect diff --git a/go.sum b/go.sum index f848c2aa2d..d11610724e 100644 --- a/go.sum +++ b/go.sum @@ -139,6 +139,10 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bits-and-blooms/bitset v1.10.0 h1:ePXTeiPEazB5+opbv5fr8umg2R/1NlzgDsyepwsSr88= github.com/bits-and-blooms/bitset v1.10.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= github.com/btcsuite/btcd/btcec/v2 v2.2.0 h1:fzn1qaOt32TuLjFlkzYSsBC35Q3KUjT1SwPxiMSCF5k= github.com/btcsuite/btcd/btcec/v2 v2.2.0/go.mod h1:U7MHm051Al6XmscBQ0BoNydpOTsFAn707034b5nY8zU= github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1 h1:q0rUy8C/TYNBQS1+CGKw68tLOFYSNEs0TFnxxnS9+4U= @@ -150,8 +154,8 @@ github.com/cespare/cp v0.1.0 h1:SE+dxFebS7Iik5LK0tsi1k9ZCxEaFX4AjQmoyA+1dJk= github.com/cespare/cp v0.1.0/go.mod h1:SOGHArjBr4JWaSDEVpWpo/hNg6RoKrls6Oh40hiwW+s= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/logex v1.2.0/go.mod h1:9+9sk7u7pGNWYMkh0hdiL++6OeibzJccyQU4p4MedaY= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= @@ -281,8 +285,6 @@ github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AE github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= -github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC0oI= -github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo= github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU= github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -582,22 +584,19 @@ github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxzi github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/npillmayer/nestext v0.1.3/go.mod h1:h2lrijH8jpicr25dFY+oAJLyzlya6jhnuG+zWp9L0Uk= +github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= -github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= -github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.14.0 h1:2mOpI4JVVPBN+WQRa0WKH2eXR+Ey+uK4n7Zj0aYpIQA= github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= -github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= -github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1 h1:o0+MgICZLuZ7xjH7Vx6zS/zcu93/BEp1VwkIW1mEXCE= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.18.1 h1:M1GfJqGRrBrrGGsbxzV5dqM2U2ApXefZCQpkukxYRLE= -github.com/onsi/gomega v1.18.1/go.mod h1:0q+aL8jAiMXy9hbwj2mr5GziHiwhAIQpFmmtT5hitRs= github.com/opentracing/opentracing-go v1.1.0 h1:pWlfV3Bxv7k65HYwkikxat0+s3pV4bsqf19k25Ur8rU= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= @@ -643,6 +642,8 @@ github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5 github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4= github.com/r3labs/diff/v3 v3.0.1 h1:CBKqf3XmNRHXKmdU7mZP1w7TV0pDyVCis1AUHtA4Xtg= github.com/r3labs/diff/v3 v3.0.1/go.mod h1:f1S9bourRbiM66NskseyUdo0fTmEE0qKrikYJX63dgo= +github.com/redis/go-redis/v9 v9.6.1 h1:HHDteefn6ZkTtY5fGUE8tj8uy85AHk6zP7CpzIAM0y4= +github.com/redis/go-redis/v9 v9.6.1/go.mod h1:0C0c6ycQsdpVNQpxb1njEQIqkx5UcsM8FJCQLgE9+RA= github.com/rhnvrm/simples3 v0.6.1 h1:H0DJwybR6ryQE+Odi9eqkHuzjYAeJgtGcGtuBwOhsH8= github.com/rhnvrm/simples3 v0.6.1/go.mod h1:Y+3vYm2V7Y4VijFoJHHTrja6OgPrJ2cBti8dPGkC3sA= github.com/rivo/tview v0.0.0-20240307173318-e804876934a1 h1:bWLHTRekAy497pE7+nXSuzXwwFHI0XauRzz6roUvY+s= @@ -1042,6 +1043,7 @@ golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxb golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= diff --git a/pubsub/common.go b/pubsub/common.go index d7f041af15..ad36b6e622 100644 --- a/pubsub/common.go +++ b/pubsub/common.go @@ -2,12 +2,15 @@ package pubsub import ( "context" + "fmt" "strings" "github.com/ethereum/go-ethereum/log" - "github.com/go-redis/redis/v8" + "github.com/redis/go-redis/v9" ) +func ResultKeyFor(streamName, id string) string { return fmt.Sprintf("%s.%s", streamName, id) } + // CreateStream tries to create stream with given name, if it already exists // does not return an error. func CreateStream(ctx context.Context, streamName string, client redis.UniversalClient) error { diff --git a/pubsub/consumer.go b/pubsub/consumer.go index bd73e729e7..391042bd7e 100644 --- a/pubsub/consumer.go +++ b/pubsub/consumer.go @@ -5,36 +5,38 @@ import ( "encoding/json" "errors" "fmt" + "math" + "math/rand" + "strconv" "time" "github.com/ethereum/go-ethereum/log" - "github.com/go-redis/redis/v8" "github.com/google/uuid" "github.com/offchainlabs/nitro/util/stopwaiter" + "github.com/redis/go-redis/v9" "github.com/spf13/pflag" ) type ConsumerConfig struct { // Timeout of result entry in Redis. ResponseEntryTimeout time.Duration `koanf:"response-entry-timeout"` - // Duration after which consumer is considered to be dead if heartbeat - // is not updated. - KeepAliveTimeout time.Duration `koanf:"keepalive-timeout"` + // Minimum idle time after which messages will be autoclaimed + IdletimeToAutoclaim time.Duration `koanf:"idletime-to-autoclaim"` } var DefaultConsumerConfig = ConsumerConfig{ ResponseEntryTimeout: time.Hour, - KeepAliveTimeout: 5 * time.Minute, + IdletimeToAutoclaim: 5 * time.Minute, } var TestConsumerConfig = ConsumerConfig{ ResponseEntryTimeout: time.Minute, - KeepAliveTimeout: 30 * time.Millisecond, + IdletimeToAutoclaim: 30 * time.Millisecond, } func ConsumerConfigAddOptions(prefix string, f *pflag.FlagSet) { f.Duration(prefix+".response-entry-timeout", DefaultConsumerConfig.ResponseEntryTimeout, "timeout for response entry") - f.Duration(prefix+".keepalive-timeout", DefaultConsumerConfig.KeepAliveTimeout, "timeout after which consumer is considered inactive if heartbeat wasn't performed") + f.Duration(prefix+".idletime-to-autoclaim", DefaultConsumerConfig.IdletimeToAutoclaim, "After a message spends this amount of time in PEL (Pending Entries List i.e claimed by another consumer but not Acknowledged) it will be allowed to be autoclaimed by other consumers") } // Consumer implements a consumer for redis stream provides heartbeat to @@ -51,6 +53,7 @@ type Consumer[Request any, Response any] struct { type Message[Request any] struct { ID string Value Request + Ack func() } func NewConsumer[Request any, Response any](client redis.UniversalClient, streamName string, cfg *ConsumerConfig) (*Consumer[Request, Response], error) { @@ -69,12 +72,6 @@ func NewConsumer[Request any, Response any](client redis.UniversalClient, stream // Start starts the consumer to iteratively perform heartbeat in configured intervals. func (c *Consumer[Request, Response]) Start(ctx context.Context) { c.StopWaiter.Start(ctx, c) - c.StopWaiter.CallIteratively( - func(ctx context.Context) time.Duration { - c.heartBeat(ctx) - return c.cfg.KeepAliveTimeout / 10 - }, - ) } func (c *Consumer[Request, Response]) Id() string { @@ -83,11 +80,6 @@ func (c *Consumer[Request, Response]) Id() string { func (c *Consumer[Request, Response]) StopAndWait() { c.StopWaiter.StopAndWait() - c.deleteHeartBeat(c.GetParentContext()) -} - -func heartBeatKey(id string) string { - return fmt.Sprintf("consumer:%s:heartbeat", id) } func (c *Consumer[Request, Response]) RedisClient() redis.UniversalClient { @@ -98,68 +90,124 @@ func (c *Consumer[Request, Response]) StreamName() string { return c.redisStream } -func (c *Consumer[Request, Response]) heartBeatKey() string { - return heartBeatKey(c.id) -} - -// deleteHeartBeat deletes the heartbeat to indicate it is being shut down. -func (c *Consumer[Request, Response]) deleteHeartBeat(ctx context.Context) { - if err := c.client.Del(ctx, c.heartBeatKey()).Err(); err != nil { - l := log.Info - if ctx.Err() != nil { - l = log.Error - } - l("Deleting heardbeat", "consumer", c.id, "error", err) +func decrementMsgIdByOne(msgId string) string { + id, err := getUintParts(msgId) + if err != nil { + log.Error("Error decrementing start of XAutoClaim by one, defaulting to 0", "err", err) + return "0" } -} - -// heartBeat updates the heartBeat key indicating aliveness. -func (c *Consumer[Request, Response]) heartBeat(ctx context.Context) { - if err := c.client.Set(ctx, c.heartBeatKey(), time.Now().UnixMilli(), 2*c.cfg.KeepAliveTimeout).Err(); err != nil { - l := log.Info - if ctx.Err() != nil { - l = log.Error - } - l("Updating heardbeat", "consumer", c.id, "error", err) + if id[1] > 0 { + return strconv.FormatUint(id[0], 10) + "-" + strconv.FormatUint(id[1]-1, 10) + } else if id[0] > 0 { + return strconv.FormatUint(id[0]-1, 10) + "-" + strconv.FormatUint(math.MaxUint64, 10) } + return "0" } // Consumer first checks it there exists pending message that is claimed by // unresponsive consumer, if not then reads from the stream. func (c *Consumer[Request, Response]) Consume(ctx context.Context) (*Message[Request], error) { - res, err := c.client.XReadGroup(ctx, &redis.XReadGroupArgs{ - Group: c.redisGroup, - Consumer: c.id, - // Receive only messages that were never delivered to any other consumer, - // that is, only new messages. - Streams: []string{c.redisStream, ">"}, - Count: 1, - Block: time.Millisecond, // 0 seems to block the read instead of immediately returning - }).Result() - if errors.Is(err, redis.Nil) { - return nil, nil - } - if err != nil { - return nil, fmt.Errorf("reading message for consumer: %q: %w", c.id, err) + // First try to XAUTOCLAIM, with start as a random messageID from PEL with MinIdle as IdletimeToAutoclaim + // this prioritizes processing PEL messages that have been waiting for more than IdletimeToAutoclaim duration + var messages []redis.XMessage + if pendingMsgs, err := c.client.XPendingExt(ctx, &redis.XPendingExtArgs{ + Stream: c.redisStream, + Group: c.redisGroup, + Start: "-", + End: "+", + Count: 50, + Idle: c.cfg.IdletimeToAutoclaim, + }).Result(); err != nil { + if !errors.Is(err, redis.Nil) { + log.Error("Error from XpendingExt in getting PEL for auto claim", "err", err, "penindlen", len(pendingMsgs)) + } + } else if len(pendingMsgs) > 0 { + idx := rand.Intn(len(pendingMsgs)) + messages, _, err = c.client.XAutoClaim(ctx, &redis.XAutoClaimArgs{ + Group: c.redisGroup, + Consumer: c.id, + MinIdle: c.cfg.IdletimeToAutoclaim, // Minimum idle time for messages to claim (in milliseconds) + Stream: c.redisStream, + Start: decrementMsgIdByOne(pendingMsgs[idx].ID), + Count: 1, + }).Result() + if err != nil { + log.Info("error from xautoclaim", "err", err) + } } - if len(res) != 1 || len(res[0].Messages) != 1 { - return nil, fmt.Errorf("redis returned entries: %+v, for querying single message", res) + if len(messages) == 0 { + // If we fail to autoclaim then we do not retry but instead fallback to reading new messages + res, err := c.client.XReadGroup(ctx, &redis.XReadGroupArgs{ + Group: c.redisGroup, + Consumer: c.id, + // Receive only messages that were never delivered to any other consumer, + // that is, only new messages. + Streams: []string{c.redisStream, ">"}, + Count: 1, + Block: time.Millisecond, // 0 seems to block the read instead of immediately returning + }).Result() + if errors.Is(err, redis.Nil) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("reading message for consumer: %q: %w", c.id, err) + } + if len(res) != 1 || len(res[0].Messages) != 1 { + return nil, fmt.Errorf("redis returned entries: %+v, for querying single message", res) + } + messages = res[0].Messages } + var ( - value = res[0].Messages[0].Values[messageKey] + value = messages[0].Values[messageKey] data, ok = (value).(string) ) if !ok { - return nil, fmt.Errorf("casting request to string: %w", err) + return nil, errors.New("error casting request to string") } var req Request if err := json.Unmarshal([]byte(data), &req); err != nil { return nil, fmt.Errorf("unmarshaling value: %v, error: %w", value, err) } - log.Debug("Redis stream consuming", "consumer_id", c.id, "message_id", res[0].Messages[0].ID) + ackNotifier := make(chan struct{}) + c.StopWaiter.LaunchThread(func(ctx context.Context) { + for { + // Use XClaimJustID so that we would have clear difference between invalid requests that are claimed multiple times due to xautoclaim and + // valid requests that are just being claimed in regular intervals to indicate heartbeat + if ids, err := c.client.XClaimJustID(ctx, &redis.XClaimArgs{ + Stream: c.redisStream, + Group: c.redisGroup, + Consumer: c.id, + MinIdle: 0, + Messages: []string{messages[0].ID}, + }).Result(); err != nil { + log.Error("Error claiming message, it might be possible that other consumers might pick this request", "msgID", messages[0].ID) + } else if len(ids) == 0 { + log.Warn("XClaimJustID returned empty response when indicating hearbeat", "msgID", messages[0].ID) + } else if len(ids) > 1 { + log.Error("XClaimJustID returned response with more than entry", "msgIDs", ids) + } + select { + case <-ackNotifier: + return + case <-ctx.Done(): + log.Info("Context done while claiming message to indicate hearbeat", "messageID", messages[0].ID, "error", ctx.Err().Error()) + if c.StopWaiter.GetParentContext().Err() == nil { + // Proceeding to set the Idle time of message to IdletimeToAutoclaim to allow it to be picked by other consumers + if err := c.client.Do(c.StopWaiter.GetParentContext(), "XCLAIM", c.redisStream, c.redisGroup, c.id, 0, messages[0].ID, "IDLE", c.cfg.IdletimeToAutoclaim.Milliseconds()).Err(); err != nil { + log.Error("error when trying to set the idle time of currently worked on message to IdletimeToAutoclaim", "messageID", messages[0].ID, "err", err) + } + } + return + case <-time.After(c.cfg.IdletimeToAutoclaim / 10): + } + } + }) + log.Debug("Redis stream consuming", "consumer_id", c.id, "message_id", messages[0].ID) return &Message[Request]{ - ID: res[0].Messages[0].ID, + ID: messages[0].ID, Value: req, + Ack: func() { close(ackNotifier) }, }, nil } @@ -168,14 +216,18 @@ func (c *Consumer[Request, Response]) SetResult(ctx context.Context, messageID s if err != nil { return fmt.Errorf("marshaling result: %w", err) } - log.Debug("consumer: setting result", "cid", c.id, "messageId", messageID) - acquired, err := c.client.SetNX(ctx, messageID, resp, c.cfg.ResponseEntryTimeout).Result() + resultKey := ResultKeyFor(c.StreamName(), messageID) + log.Debug("consumer: setting result", "cid", c.id, "msgIdInStream", messageID, "resultKeyInRedis", resultKey) + acquired, err := c.client.SetNX(ctx, resultKey, resp, c.cfg.ResponseEntryTimeout).Result() if err != nil || !acquired { - return fmt.Errorf("setting result for message: %v, error: %w", messageID, err) + return fmt.Errorf("setting result for message with message-id in stream: %v, error: %w", messageID, err) } log.Debug("consumer: xack", "cid", c.id, "messageId", messageID) if _, err := c.client.XAck(ctx, c.redisStream, c.redisGroup, messageID).Result(); err != nil { return fmt.Errorf("acking message: %v, error: %w", messageID, err) } + if _, err := c.client.XDel(ctx, c.redisStream, messageID).Result(); err != nil { + return fmt.Errorf("deleting message: %v, error: %w", messageID, err) + } return nil } diff --git a/pubsub/producer.go b/pubsub/producer.go index 5eec3a4b52..dacaeba7d0 100644 --- a/pubsub/producer.go +++ b/pubsub/producer.go @@ -13,17 +13,16 @@ import ( "encoding/json" "errors" "fmt" - "math" "strconv" "strings" "sync" "time" "github.com/ethereum/go-ethereum/log" - "github.com/go-redis/redis/v8" "github.com/google/uuid" "github.com/offchainlabs/nitro/util/containers" "github.com/offchainlabs/nitro/util/stopwaiter" + "github.com/redis/go-redis/v9" "github.com/spf13/pflag" ) @@ -43,50 +42,31 @@ type Producer[Request any, Response any] struct { promisesLock sync.RWMutex promises map[string]*containers.Promise[Response] - // Used for running checks for pending messages with inactive consumers - // and checking responses from consumers iteratively for the first time when - // Produce is called. + // Used for checking responses from consumers iteratively + // For the first time when Produce is called. once sync.Once } type ProducerConfig struct { - // When enabled, messages that are sent to consumers that later die before - // processing them, will be re-inserted into the stream to be proceesed by - // another consumer - EnableReproduce bool `koanf:"enable-reproduce"` - // Interval duration in which producer checks for pending messages delivered - // to the consumers that are currently inactive. - CheckPendingInterval time.Duration `koanf:"check-pending-interval"` - // Duration after which consumer is considered to be dead if heartbeat - // is not updated. - KeepAliveTimeout time.Duration `koanf:"keepalive-timeout"` // Interval duration for checking the result set by consumers. CheckResultInterval time.Duration `koanf:"check-result-interval"` - CheckPendingItems int64 `koanf:"check-pending-items"` + // RequestTimeout is a TTL for any message sent to the redis stream + RequestTimeout time.Duration `koanf:"request-timeout"` } var DefaultProducerConfig = ProducerConfig{ - EnableReproduce: true, - CheckPendingInterval: time.Second, - KeepAliveTimeout: 5 * time.Minute, - CheckResultInterval: 5 * time.Second, - CheckPendingItems: 256, + CheckResultInterval: 5 * time.Second, + RequestTimeout: 3 * time.Hour, } var TestProducerConfig = ProducerConfig{ - EnableReproduce: false, - CheckPendingInterval: 10 * time.Millisecond, - KeepAliveTimeout: 100 * time.Millisecond, - CheckResultInterval: 5 * time.Millisecond, - CheckPendingItems: 256, + CheckResultInterval: 5 * time.Millisecond, + RequestTimeout: time.Minute, } func ProducerAddConfigAddOptions(prefix string, f *pflag.FlagSet) { - f.Bool(prefix+".enable-reproduce", DefaultProducerConfig.EnableReproduce, "when enabled, messages with dead consumer will be re-inserted into the stream") - f.Duration(prefix+".check-pending-interval", DefaultProducerConfig.CheckPendingInterval, "interval in which producer checks pending messages whether consumer processing them is inactive") f.Duration(prefix+".check-result-interval", DefaultProducerConfig.CheckResultInterval, "interval in which producer checks pending messages whether consumer processing them is inactive") - f.Duration(prefix+".keepalive-timeout", DefaultProducerConfig.KeepAliveTimeout, "timeout after which consumer is considered inactive if heartbeat wasn't performed") - f.Int64(prefix+".check-pending-items", DefaultProducerConfig.CheckPendingItems, "items to screen during check-pending") + f.Duration(prefix+".request-timeout", DefaultProducerConfig.RequestTimeout, "timeout after which the message in redis stream is considered as errored, this prevents workers from working on wrong requests indefinitely") } func NewProducer[Request any, Response any](client redis.UniversalClient, streamName string, cfg *ProducerConfig) (*Producer[Request, Response], error) { @@ -106,125 +86,72 @@ func NewProducer[Request any, Response any](client redis.UniversalClient, stream }, nil } -func (p *Producer[Request, Response]) errorPromisesFor(msgIds []string) { - p.promisesLock.Lock() - defer p.promisesLock.Unlock() - for _, msg := range msgIds { - if promise, found := p.promises[msg]; found { - promise.ProduceError(fmt.Errorf("internal error, consumer died while serving the request")) - delete(p.promises, msg) - } +func getUintParts(msgId string) ([2]uint64, error) { + idParts := strings.Split(msgId, "-") + if len(idParts) != 2 { + return [2]uint64{}, fmt.Errorf("invalid i.d: %v", msgId) } -} - -// checkAndReproduce reproduce pending messages that were sent to consumers -// that are currently inactive. -func (p *Producer[Request, Response]) checkAndReproduce(ctx context.Context) time.Duration { - staleIds, err := p.checkPending(ctx) + idTimeStamp, err := strconv.ParseUint(idParts[0], 10, 64) if err != nil { - log.Error("Checking pending messages", "error", err) - return p.cfg.CheckPendingInterval - } - if len(staleIds) == 0 { - return p.cfg.CheckPendingInterval + return [2]uint64{}, fmt.Errorf("invalid i.d: %v err: %w", msgId, err) } - if p.cfg.EnableReproduce { - err = p.reproduceIds(ctx, staleIds) - if err != nil { - log.Warn("filed reproducing messages", "err", err) - } - } else { - p.errorPromisesFor(staleIds) - } - return p.cfg.CheckPendingInterval -} - -func (p *Producer[Request, Response]) reproduceIds(ctx context.Context, staleIds []string) error { - log.Info("Attempting to claim", "messages", staleIds) - claimedMsgs, err := p.client.XClaim(ctx, &redis.XClaimArgs{ - Stream: p.redisStream, - Group: p.redisGroup, - Consumer: p.id, - MinIdle: p.cfg.KeepAliveTimeout, - Messages: staleIds, - }).Result() + idSerial, err := strconv.ParseUint(idParts[1], 10, 64) if err != nil { - return fmt.Errorf("claiming ownership on messages: %v, error: %w", staleIds, err) + return [2]uint64{}, fmt.Errorf("invalid i.d serial: %v err: %w", msgId, err) } - for _, msg := range claimedMsgs { - data, ok := (msg.Values[messageKey]).(string) - if !ok { - log.Error("redis producer reproduce: message not string", "id", msg.ID, "value", msg.Values[messageKey]) - continue - } - var req Request - if err := json.Unmarshal([]byte(data), &req); err != nil { - log.Error("redis producer reproduce: message not a request", "id", msg.ID, "err", err, "value", msg.Values[messageKey]) - continue - } - if _, err := p.client.XAck(ctx, p.redisStream, p.redisGroup, msg.ID).Result(); err != nil { - log.Error("redis producer reproduce: could not ACK", "id", msg.ID, "err", err) - continue - } - // Only re-insert messages that were removed the the pending list first. - if _, err := p.reproduce(ctx, req, msg.ID); err != nil { - log.Error("redis producer reproduce: error", "err", err) - } - } - return nil + return [2]uint64{idTimeStamp, idSerial}, nil } -func setMinIdInt(min *[2]uint64, id string) error { - idParts := strings.Split(id, "-") - if len(idParts) != 2 { - return fmt.Errorf("invalid i.d: %v", id) - } - idTimeStamp, err := strconv.ParseUint(idParts[0], 10, 64) +// cmpMsgId compares two msgid's and returns (0) if equal, (-1) if msgId1 < msgId2, (1) if msgId1 > msgId2, (-2) if not comparable (or error) +func cmpMsgId(msgId1, msgId2 string) int { + id1, err := getUintParts(msgId1) if err != nil { - return fmt.Errorf("invalid i.d: %v err: %w", id, err) + log.Trace("error comparing msgIds", "msgId1", msgId1, "msgId2", msgId2) + return -2 } - if idTimeStamp > min[0] { - return nil - } - idSerial, err := strconv.ParseUint(idParts[1], 10, 64) + id2, err := getUintParts(msgId2) if err != nil { - return fmt.Errorf("invalid i.d serial: %v err: %w", id, err) + log.Trace("error comparing msgIds", "msgId1", msgId1, "msgId2", msgId2) + return -2 } - if idTimeStamp < min[0] { - min[0] = idTimeStamp - min[1] = idSerial - return nil + if id1[0] < id2[0] { + return -1 + } else if id1[0] > id2[0] { + return 1 + } else if id1[1] < id2[1] { + return -1 + } else if id1[1] > id2[1] { + return 1 } - // idTimeStamp == min[0] - if idSerial < min[1] { - min[1] = idSerial - } - return nil + return 0 } // checkResponses checks iteratively whether response for the promise is ready. func (p *Producer[Request, Response]) checkResponses(ctx context.Context) time.Duration { - minIdInt := [2]uint64{math.MaxUint64, math.MaxUint64} log.Debug("redis producer: check responses starting") p.promisesLock.Lock() defer p.promisesLock.Unlock() responded := 0 errored := 0 checked := 0 + allowedOldestID := fmt.Sprintf("%d-0", time.Now().Add(-p.cfg.RequestTimeout).UnixMilli()) for id, promise := range p.promises { if ctx.Err() != nil { return 0 } checked++ - res, err := p.client.Get(ctx, id).Result() + resultKey := ResultKeyFor(p.redisStream, id) + res, err := p.client.Get(ctx, resultKey).Result() if err != nil { - errSetId := setMinIdInt(&minIdInt, id) - if errSetId != nil { - log.Error("redis producer: error setting minId", "err", err) - return p.cfg.CheckResultInterval - } if !errors.Is(err, redis.Nil) { - log.Error("redis producer: Error reading value in redis", "key", id, "error", err) + log.Error("Error reading value in redis", "key", resultKey, "error", err) + } else if cmpMsgId(id, allowedOldestID) == -1 { + // The request this producer is waiting for has been past its TTL or is older than current PEL's lower, + // so safe to error and stop tracking this promise + promise.ProduceError(errors.New("error getting response, request has been waiting for too long")) + log.Error("error getting response, request has been waiting past its TTL") + errored++ + delete(p.promises, id) } continue } @@ -237,22 +164,50 @@ func (p *Producer[Request, Response]) checkResponses(ctx context.Context) time.D promise.Produce(resp) responded++ } - p.client.Del(ctx, id) + p.client.Del(ctx, resultKey) delete(p.promises, id) } - var trimmed int64 - var trimErr error - minId := "+" - if minIdInt[0] < math.MaxUint64 { - minId = fmt.Sprintf("%d-%d", minIdInt[0], minIdInt[1]) - trimmed, trimErr = p.client.XTrimMinID(ctx, p.redisStream, minId).Result() - } else { - trimmed, trimErr = p.client.XTrimMaxLen(ctx, p.redisStream, 0).Result() - } - log.Debug("trimming", "id", minId, "trimmed", trimmed, "responded", responded, "errored", errored, "trim-err", trimErr, "checked", checked) + log.Debug("checkResponses", "responded", responded, "errored", errored, "checked", checked) return p.cfg.CheckResultInterval } +func (p *Producer[Request, Response]) clearMessages(ctx context.Context) time.Duration { + pelData, err := p.client.XPending(ctx, p.redisStream, p.redisGroup).Result() + if err != nil { + log.Error("error getting PEL data from xpending, xtrimming is disabled", "err", err) + } + // XDEL on consumer side already deletes acked messages (mark as deleted) but doesnt claim the memory back, XTRIM helps in claiming this memory in normal conditions + // pelData might be outdated when we do the xtrim, but thats ok as the messages are also being trimmed by other producers + if pelData != nil && pelData.Lower != "" { + trimmed, trimErr := p.client.XTrimMinID(ctx, p.redisStream, pelData.Lower).Result() + log.Debug("trimming", "xTrimMinID", pelData.Lower, "trimmed", trimmed, "trim-err", trimErr) + // Check if pelData.Lower has been past its TTL and if it is then ack it to remove from PEL and delete it, once + // its taken out from PEL the producer that sent this request will handle the corresponding promise accordingly (as its past TTL) + allowedOldestID := fmt.Sprintf("%d-0", time.Now().Add(-p.cfg.RequestTimeout).UnixMilli()) + if cmpMsgId(pelData.Lower, allowedOldestID) == -1 { + if err := p.client.XClaim(ctx, &redis.XClaimArgs{ + Stream: p.redisStream, + Group: p.redisGroup, + Consumer: p.id, + MinIdle: 0, + Messages: []string{pelData.Lower}, + }).Err(); err != nil { + log.Error("error claiming PEL's lower message thats past its TTL", "msgID", pelData.Lower, "err", err) + return 5 * p.cfg.CheckResultInterval + } + if _, err := p.client.XAck(ctx, p.redisStream, p.redisGroup, pelData.Lower).Result(); err != nil { + log.Error("error acking PEL's lower message thats past its TTL", "msgID", pelData.Lower, "err", err) + return 5 * p.cfg.CheckResultInterval + } + if _, err := p.client.XDel(ctx, p.redisStream, pelData.Lower).Result(); err != nil { + log.Error("error deleting PEL's lower message thats past its TTL", "msgID", pelData.Lower, "err", err) + return 0 + } + } + } + return 5 * p.cfg.CheckResultInterval +} + func (p *Producer[Request, Response]) Start(ctx context.Context) { p.StopWaiter.Start(ctx, p) } @@ -263,101 +218,31 @@ func (p *Producer[Request, Response]) promisesLen() int { return len(p.promises) } -// reproduce is used when Producer claims ownership on the pending -// message that was sent to inactive consumer and reinserts it into the stream, -// so that seamlessly return the answer in the same promise. -func (p *Producer[Request, Response]) reproduce(ctx context.Context, value Request, oldKey string) (*containers.Promise[Response], error) { +func (p *Producer[Request, Response]) produce(ctx context.Context, value Request) (*containers.Promise[Response], error) { val, err := json.Marshal(value) if err != nil { return nil, fmt.Errorf("marshaling value: %w", err) } - // catching the promiseLock before we sendXadd makes sure promise ids will - // be always ascending + // catching the promiseLock before we sendXadd makes sure promise ids will be always ascending p.promisesLock.Lock() defer p.promisesLock.Unlock() - id, err := p.client.XAdd(ctx, &redis.XAddArgs{ + msgId, err := p.client.XAdd(ctx, &redis.XAddArgs{ Stream: p.redisStream, Values: map[string]any{messageKey: val}, }).Result() if err != nil { return nil, fmt.Errorf("adding values to redis: %w", err) } - promise := p.promises[oldKey] - if oldKey != "" && promise == nil { - // This will happen if the old consumer became inactive but then ack_d - // the message afterwards. - // don't error - log.Warn("tried reproducing a message but it wasn't found - probably got response", "oldKey", oldKey) - } - if oldKey == "" || promise == nil { - pr := containers.NewPromise[Response](nil) - promise = &pr - } - delete(p.promises, oldKey) - p.promises[id] = promise - return promise, nil + promise := containers.NewPromise[Response](nil) + p.promises[msgId] = &promise + return &promise, nil } func (p *Producer[Request, Response]) Produce(ctx context.Context, value Request) (*containers.Promise[Response], error) { log.Debug("Redis stream producing", "value", value) p.once.Do(func() { - p.StopWaiter.CallIteratively(p.checkAndReproduce) p.StopWaiter.CallIteratively(p.checkResponses) + p.StopWaiter.CallIteratively(p.clearMessages) }) - return p.reproduce(ctx, value, "") -} - -// Check if a consumer is with specified ID is alive. -func (p *Producer[Request, Response]) isConsumerAlive(ctx context.Context, consumerID string) bool { - if _, err := p.client.Get(ctx, heartBeatKey(consumerID)).Int64(); err != nil { - return false - } - return true -} - -func (p *Producer[Request, Response]) havePromiseFor(messageID string) bool { - p.promisesLock.Lock() - defer p.promisesLock.Unlock() - _, found := p.promises[messageID] - return found -} - -// returns ids of pending messages that's worker doesn't appear alive -func (p *Producer[Request, Response]) checkPending(ctx context.Context) ([]string, error) { - pendingMessages, err := p.client.XPendingExt(ctx, &redis.XPendingExtArgs{ - Stream: p.redisStream, - Group: p.redisGroup, - Start: "-", - End: "+", - Count: p.cfg.CheckPendingItems, - }).Result() - - if err != nil && !errors.Is(err, redis.Nil) { - return nil, fmt.Errorf("querying pending messages: %w", err) - } - if len(pendingMessages) == 0 { - return nil, nil - } - if len(pendingMessages) >= int(p.cfg.CheckPendingItems) { - log.Warn("redis producer: many pending items found", "stream", p.redisStream, "check-pending-items", p.cfg.CheckPendingItems) - } - // IDs of the pending messages with inactive consumers. - var ids []string - active := make(map[string]bool) - for _, msg := range pendingMessages { - // Ignore messages not produced by this producer. - if !p.havePromiseFor(msg.ID) { - continue - } - alive, found := active[msg.Consumer] - if !found { - alive = p.isConsumerAlive(ctx, msg.Consumer) - active[msg.Consumer] = alive - } - if alive { - continue - } - ids = append(ids, msg.ID) - } - return ids, nil + return p.produce(ctx, value) } diff --git a/pubsub/pubsub_test.go b/pubsub/pubsub_test.go index 9f774b6372..8bd1aed25d 100644 --- a/pubsub/pubsub_test.go +++ b/pubsub/pubsub_test.go @@ -10,11 +10,11 @@ import ( "time" "github.com/ethereum/go-ethereum/log" - "github.com/go-redis/redis/v8" "github.com/google/go-cmp/cmp" "github.com/google/uuid" "github.com/offchainlabs/nitro/util/containers" "github.com/offchainlabs/nitro/util/redisutil" + "github.com/redis/go-redis/v9" ) var ( @@ -23,7 +23,8 @@ var ( ) type testRequest struct { - Request string + Request string + IsInvalid bool } type testResponse struct { @@ -45,36 +46,21 @@ func destroyRedisGroup(ctx context.Context, t *testing.T, streamName string, cli } } -type configOpt interface { - apply(consCfg *ConsumerConfig, prodCfg *ProducerConfig) -} - -type withReproduce struct { - reproduce bool -} - -func (e *withReproduce) apply(_ *ConsumerConfig, prodCfg *ProducerConfig) { - prodCfg.EnableReproduce = e.reproduce -} - func producerCfg() *ProducerConfig { return &ProducerConfig{ - EnableReproduce: TestProducerConfig.EnableReproduce, - CheckPendingInterval: TestProducerConfig.CheckPendingInterval, - KeepAliveTimeout: TestProducerConfig.KeepAliveTimeout, - CheckResultInterval: TestProducerConfig.CheckResultInterval, - CheckPendingItems: TestProducerConfig.CheckPendingItems, + CheckResultInterval: TestProducerConfig.CheckResultInterval, + RequestTimeout: 2 * time.Second, } } func consumerCfg() *ConsumerConfig { return &ConsumerConfig{ ResponseEntryTimeout: TestConsumerConfig.ResponseEntryTimeout, - KeepAliveTimeout: TestConsumerConfig.KeepAliveTimeout, + IdletimeToAutoclaim: TestConsumerConfig.IdletimeToAutoclaim, } } -func newProducerConsumers(ctx context.Context, t *testing.T, opts ...configOpt) (redis.UniversalClient, string, *Producer[testRequest, testResponse], []*Consumer[testRequest, testResponse]) { +func newProducerConsumers(ctx context.Context, t *testing.T) (redis.UniversalClient, string, *Producer[testRequest, testResponse], []*Consumer[testRequest, testResponse]) { t.Helper() redisClient, err := redisutil.RedisClientFromURL(redisutil.CreateTestRedis(ctx, t)) if err != nil { @@ -82,9 +68,7 @@ func newProducerConsumers(ctx context.Context, t *testing.T, opts ...configOpt) } prodCfg, consCfg := producerCfg(), consumerCfg() streamName := fmt.Sprintf("stream:%s", uuid.NewString()) - for _, o := range opts { - o.apply(consCfg, prodCfg) - } + producer, err := NewProducer[testRequest, testResponse](redisClient, streamName, prodCfg) if err != nil { t.Fatalf("Error creating new producer: %v", err) @@ -102,13 +86,6 @@ func newProducerConsumers(ctx context.Context, t *testing.T, opts ...configOpt) t.Cleanup(func() { ctx := context.Background() destroyRedisGroup(ctx, t, streamName, producer.client) - var keys []string - for _, c := range consumers { - keys = append(keys, c.heartBeatKey()) - } - if _, err := producer.client.Del(ctx, keys...).Result(); err != nil { - log.Debug("Error deleting heartbeat keys", "error", err) - } }) return redisClient, streamName, producer, consumers } @@ -125,10 +102,10 @@ func msgForIndex(idx int) string { return fmt.Sprintf("msg: %d", idx) } -func wantMessages(n int) []string { +func wantMessages(n int, group string) []string { var ret []string for i := 0; i < n; i++ { - ret = append(ret, msgForIndex(i)) + ret = append(ret, group+msgForIndex(i)) } sort.Strings(ret) return ret @@ -143,10 +120,14 @@ func flatten(responses [][]string) []string { return ret } -func produceMessages(ctx context.Context, msgs []string, producer *Producer[testRequest, testResponse]) ([]*containers.Promise[testResponse], error) { +func produceMessages(ctx context.Context, msgs []string, producer *Producer[testRequest, testResponse], withInvalidEntries bool) ([]*containers.Promise[testResponse], error) { var promises []*containers.Promise[testResponse] - for i := 0; i < messagesCount; i++ { - promise, err := producer.Produce(ctx, testRequest{Request: msgs[i]}) + for i := 0; i < len(msgs); i++ { + req := testRequest{Request: msgs[i]} + if withInvalidEntries && i%50 == 0 { + req.IsInvalid = true + } + promise, err := producer.Produce(ctx, req) if err != nil { return nil, err } @@ -197,51 +178,97 @@ func consume(ctx context.Context, t *testing.T, consumers []*Consumer[testReques continue } gotMessages[idx][res.ID] = res.Value.Request - resp := fmt.Sprintf("result for: %v", res.ID) - if err := c.SetResult(ctx, res.ID, testResponse{Response: resp}); err != nil { - t.Errorf("Error setting a result: %v", err) + if !res.Value.IsInvalid { + resp := fmt.Sprintf("result for: %v", res.ID) + if err := c.SetResult(ctx, res.ID, testResponse{Response: resp}); err != nil { + t.Errorf("Error setting a result: %v", err) + } + wantResponses[idx] = append(wantResponses[idx], resp) } - wantResponses[idx] = append(wantResponses[idx], resp) + res.Ack() } }) } return wantResponses } -func TestRedisProduce(t *testing.T) { +func TestRedisProduceComplex(t *testing.T) { log.SetDefault(log.NewLogger(log.NewTerminalHandlerWithLevel(os.Stderr, log.LevelTrace, true))) t.Parallel() for _, tc := range []struct { - name string - killConsumers bool - autoRecover bool + name string + entriesCount []int + numProducers int + killConsumers bool + withInvalidEntries bool // If this is set, then every 50th entry is invalid (requests that can't be solved by any consumer) }{ { - name: "all consumers are active", - killConsumers: false, - autoRecover: false, + name: "one producer, all consumers are active", + entriesCount: []int{messagesCount}, + numProducers: 1, + }, + { + name: "two producers, all consumers are active", + entriesCount: []int{20, 20}, + numProducers: 2, }, { - name: "some consumers killed, others should take over their work", + name: "one producer, some consumers killed, others should take over their work", + entriesCount: []int{messagesCount}, + numProducers: 1, killConsumers: true, - autoRecover: true, }, + { - name: "some consumers killed, should return failure", + name: "two producers, some consumers killed, others should take over their work, unequal number of requests from producers", + entriesCount: []int{messagesCount, 2 * messagesCount}, + numProducers: 2, killConsumers: true, - autoRecover: false, + }, + { + name: "two producers, some consumers killed, others should take over their work, some invalid entries, unequal number of requests from producers", + entriesCount: []int{messagesCount, 2 * messagesCount}, + numProducers: 2, + killConsumers: true, + withInvalidEntries: true, }, } { t.Run(tc.name, func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - redisClient, streamName, producer, consumers := newProducerConsumers(ctx, t, &withReproduce{tc.autoRecover}) - producer.Start(ctx) - wantMsgs := wantMessages(messagesCount) - promises, err := produceMessages(ctx, wantMsgs, producer) - if err != nil { - t.Fatalf("Error producing messages: %v", err) + + var producers []*Producer[testRequest, testResponse] + redisClient, streamName, producer, consumers := newProducerConsumers(ctx, t) + producers = append(producers, producer) + if tc.numProducers == 2 { + producer, err := NewProducer[testRequest, testResponse](redisClient, streamName, producerCfg()) + if err != nil { + t.Fatalf("Error creating second producer: %v", err) + } + producers = append(producers, producer) + } + + for _, producer := range producers { + producer.Start(ctx) + } + + var entries [][]string + if tc.numProducers == 2 { + entries = append(entries, wantMessages(tc.entriesCount[0], "1.")) + entries = append(entries, wantMessages(tc.entriesCount[1], "2.")) + } else { + entries = append(entries, wantMessages(tc.entriesCount[0], "")) } + + var promises [][]*containers.Promise[testResponse] + for i := 0; i < tc.numProducers; i++ { + prs, err := produceMessages(ctx, entries[i], producers[i], tc.withInvalidEntries) + if err != nil { + t.Fatalf("Error producing messages from producer%d: %v", i, err) + } + promises = append(promises, prs) + } + gotMessages := messagesMaps(len(consumers)) if tc.killConsumers { // Consumer messages in every third consumer but don't ack them to check @@ -252,40 +279,79 @@ func TestRedisProduce(t *testing.T) { if err != nil { t.Errorf("Error consuming message: %v", err) } - if !tc.autoRecover { - gotMessages[i][req.ID] = req.Value.Request + if req == nil { + t.Error("Didn't consume any message") } + // Kills the actnotifier hence allowing XAUTOCLAIM consumers[i].StopAndWait() } } + time.Sleep(time.Second) wantResponses := consume(ctx, t, consumers, gotMessages) - gotResponses, errIndexes := awaitResponses(ctx, promises) - if len(errIndexes) != 0 && tc.autoRecover { - t.Fatalf("Error awaiting responses: %v", errIndexes) + + var gotResponses []string + for i := 0; i < tc.numProducers; i++ { + grs, errIndexes := awaitResponses(ctx, promises[i]) + if tc.withInvalidEntries { + if errIndexes[len(errIndexes)-1]+50 < len(entries[i]) { + t.Fatalf("Unexpected number of invalid requests while awaiting responses") + } + for j, idx := range errIndexes { + if idx != j*50 { + t.Fatalf("Invalid request' index mismatch want: %d got %d", j*50, idx) + } + } + } else if len(errIndexes) != 0 { + t.Fatalf("Error awaiting responses from promises %d: %v", i, errIndexes) + } + gotResponses = append(gotResponses, grs...) } - producer.StopAndWait() + for _, c := range consumers { c.StopAndWait() } - got, err := mergeValues(gotMessages) + + got, err := mergeValues(gotMessages, tc.withInvalidEntries) if err != nil { t.Fatalf("mergeMaps() unexpected error: %v", err) } + // Only when there are invalid entries got will have duplicates + if tc.withInvalidEntries { + got = removeDuplicates(got) + } + + var combinedEntries []string + for i := 0; i < tc.numProducers; i++ { + combinedEntries = append(combinedEntries, entries[i]...) + } + wantMsgs := combinedEntries if diff := cmp.Diff(wantMsgs, got); diff != "" { t.Errorf("Unexpected diff (-want +got):\n%s\n", diff) } - wantResp := flatten(wantResponses) + sort.Strings(gotResponses) + wantResp := flatten(wantResponses) if diff := cmp.Diff(wantResp, gotResponses); diff != "" { t.Errorf("Unexpected diff in responses:\n%s\n", diff) } - if cnt := producer.promisesLen(); cnt != 0 { - t.Errorf("Producer still has %d unfullfilled promises", cnt) + + // Check each producers all promises were responded to + for i := 0; i < tc.numProducers; i++ { + if cnt := producers[i].promisesLen(); cnt != 0 { + t.Errorf("Producer%d still has %d unfullfilled promises", i, cnt) + } } + // Trigger a trim - producer.checkResponses(ctx) + time.Sleep(time.Second) + for i := 0; i < tc.numProducers; i++ { + producers[i].checkResponses(ctx) + producers[i].StopAndWait() + } + + // Check that no messages remain in the stream msgs, err := redisClient.XRange(ctx, streamName, "-", "+").Result() if err != nil { t.Errorf("XRange failed: %v", err) @@ -297,14 +363,27 @@ func TestRedisProduce(t *testing.T) { } } +func removeDuplicates(list []string) []string { + capture := map[string]bool{} + var ret []string + for _, elem := range list { + if _, found := capture[elem]; !found { + ret = append(ret, elem) + capture[elem] = true + } + } + sort.Strings(ret) + return ret +} + // mergeValues merges maps from the slice and returns their values. // Returns and error if there exists duplicate key. -func mergeValues(messages []map[string]string) ([]string, error) { +func mergeValues(messages []map[string]string, withInvalidEntries bool) ([]string, error) { res := make(map[string]any) var ret []string for _, m := range messages { for k, v := range m { - if _, found := res[k]; found { + if _, found := res[k]; found && !withInvalidEntries { return nil, fmt.Errorf("duplicate key: %v", k) } res[k] = v diff --git a/system_tests/common_test.go b/system_tests/common_test.go index 93c38b5eae..dbb2b86f13 100644 --- a/system_tests/common_test.go +++ b/system_tests/common_test.go @@ -20,7 +20,6 @@ import ( "testing" "time" - "github.com/go-redis/redis/v8" "github.com/offchainlabs/nitro/arbos" "github.com/offchainlabs/nitro/arbos/arbostypes" "github.com/offchainlabs/nitro/arbos/util" @@ -42,6 +41,7 @@ import ( "github.com/offchainlabs/nitro/validator/server_common" "github.com/offchainlabs/nitro/validator/valnode" rediscons "github.com/offchainlabs/nitro/validator/valnode/redis" + "github.com/redis/go-redis/v9" "github.com/ethereum/go-ethereum" "github.com/ethereum/go-ethereum/accounts/abi" @@ -86,12 +86,13 @@ import ( type info = *BlockchainTestInfo type SecondNodeParams struct { - nodeConfig *arbnode.Config - execConfig *gethexec.Config - stackConfig *node.Config - dasConfig *das.DataAvailabilityConfig - initData *statetransfer.ArbosInitializationInfo - addresses *chaininfo.RollupAddresses + nodeConfig *arbnode.Config + execConfig *gethexec.Config + stackConfig *node.Config + dasConfig *das.DataAvailabilityConfig + initData *statetransfer.ArbosInitializationInfo + addresses *chaininfo.RollupAddresses + wasmCacheTag uint32 } type TestClient struct { @@ -155,19 +156,20 @@ func (tc *TestClient) EnsureTxSucceededWithTimeout(transaction *types.Transactio } var TestCachingConfig = gethexec.CachingConfig{ - Archive: false, - BlockCount: 128, - BlockAge: 30 * time.Minute, - TrieTimeLimit: time.Hour, - TrieDirtyCache: 1024, - TrieCleanCache: 600, - SnapshotCache: 400, - DatabaseCache: 2048, - SnapshotRestoreGasLimit: 300_000_000_000, - MaxNumberOfBlocksToSkipStateSaving: 0, - MaxAmountOfGasToSkipStateSaving: 0, - StylusLRUCacheCapacity: 0, - StateScheme: env.GetTestStateScheme(), + Archive: false, + BlockCount: 128, + BlockAge: 30 * time.Minute, + TrieTimeLimit: time.Hour, + TrieDirtyCache: 1024, + TrieCleanCache: 600, + SnapshotCache: 400, + DatabaseCache: 2048, + SnapshotRestoreGasLimit: 300_000_000_000, + MaxNumberOfBlocksToSkipStateSaving: 0, + MaxAmountOfGasToSkipStateSaving: 0, + StylusLRUCacheCapacity: 0, + DisableStylusCacheMetricsCollection: true, + StateScheme: env.GetTestStateScheme(), } var DefaultTestForwarderConfig = gethexec.ForwarderConfig{ @@ -248,6 +250,7 @@ type NodeBuilder struct { initMessage *arbostypes.ParsedInitMessage l3InitMessage *arbostypes.ParsedInitMessage withProdConfirmPeriodBlocks bool + wasmCacheTag uint32 // Created nodes L1 *TestClient @@ -351,6 +354,15 @@ func (b *NodeBuilder) WithExtraArchs(targets []string) *NodeBuilder { return b } +func (b *NodeBuilder) WithStylusLongTermCache(enabled bool) *NodeBuilder { + if enabled { + b.wasmCacheTag = 1 + } else { + b.wasmCacheTag = 0 + } + return b +} + func (b *NodeBuilder) Build(t *testing.T) func() { b.CheckConfig(t) if b.withL1 { @@ -424,6 +436,8 @@ func buildOnParentChain( initMessage *arbostypes.ParsedInitMessage, addresses *chaininfo.RollupAddresses, + + wasmCacheTag uint32, ) *TestClient { if parentChainTestClient == nil { t.Fatal("must build parent chain before building chain") @@ -435,7 +449,7 @@ func buildOnParentChain( var arbDb ethdb.Database var blockchain *core.BlockChain _, chainTestClient.Stack, chainDb, arbDb, blockchain = createNonL1BlockChainWithStackConfig( - t, chainInfo, dataDir, chainConfig, initMessage, stackConfig, execConfig) + t, chainInfo, dataDir, chainConfig, initMessage, stackConfig, execConfig, wasmCacheTag) var sequencerTxOptsPtr *bind.TransactOpts var dataSigner signature.DataSignerFunc @@ -523,6 +537,8 @@ func (b *NodeBuilder) BuildL3OnL2(t *testing.T) func() { b.l3InitMessage, b.l3Addresses, + + b.wasmCacheTag, ) return func() { @@ -551,6 +567,8 @@ func (b *NodeBuilder) BuildL2OnL1(t *testing.T) func() { b.initMessage, b.addresses, + + b.wasmCacheTag, ) return func() { @@ -572,7 +590,7 @@ func (b *NodeBuilder) BuildL2(t *testing.T) func() { var arbDb ethdb.Database var blockchain *core.BlockChain b.L2Info, b.L2.Stack, chainDb, arbDb, blockchain = createL2BlockChain( - t, b.L2Info, b.dataDir, b.chainConfig, b.execConfig) + t, b.L2Info, b.dataDir, b.chainConfig, b.execConfig, b.wasmCacheTag) Require(t, b.execConfig.Validate()) execConfig := b.execConfig @@ -623,7 +641,7 @@ func (b *NodeBuilder) RestartL2Node(t *testing.T) { } b.L2.cleanup() - l2info, stack, chainDb, arbDb, blockchain := createNonL1BlockChainWithStackConfig(t, b.L2Info, b.dataDir, b.chainConfig, b.initMessage, b.l2StackConfig, b.execConfig) + l2info, stack, chainDb, arbDb, blockchain := createNonL1BlockChainWithStackConfig(t, b.L2Info, b.dataDir, b.chainConfig, b.initMessage, b.l2StackConfig, b.execConfig, b.wasmCacheTag) execConfigFetcher := func() *gethexec.Config { return b.execConfig } execNode, err := gethexec.CreateExecutionNode(b.ctx, stack, chainDb, blockchain, nil, execConfigFetcher) @@ -700,7 +718,7 @@ func build2ndNode( testClient := NewTestClient(ctx) testClient.Client, testClient.ConsensusNode = - Create2ndNodeWithConfig(t, ctx, firstNodeTestClient.ConsensusNode, parentChainTestClient.Stack, parentChainInfo, params.initData, params.nodeConfig, params.execConfig, params.stackConfig, valnodeConfig, params.addresses, initMessage) + Create2ndNodeWithConfig(t, ctx, firstNodeTestClient.ConsensusNode, parentChainTestClient.Stack, parentChainInfo, params.initData, params.nodeConfig, params.execConfig, params.stackConfig, valnodeConfig, params.addresses, initMessage, params.wasmCacheTag) testClient.ExecNode = getExecNode(t, testClient.ConsensusNode) testClient.cleanup = func() { testClient.ConsensusNode.StopAndWait() } return testClient, func() { testClient.cleanup() } @@ -1286,13 +1304,13 @@ func deployOnParentChain( } func createL2BlockChain( - t *testing.T, l2info *BlockchainTestInfo, dataDir string, chainConfig *params.ChainConfig, execConfig *gethexec.Config, + t *testing.T, l2info *BlockchainTestInfo, dataDir string, chainConfig *params.ChainConfig, execConfig *gethexec.Config, wasmCacheTag uint32, ) (*BlockchainTestInfo, *node.Node, ethdb.Database, ethdb.Database, *core.BlockChain) { - return createNonL1BlockChainWithStackConfig(t, l2info, dataDir, chainConfig, nil, nil, execConfig) + return createNonL1BlockChainWithStackConfig(t, l2info, dataDir, chainConfig, nil, nil, execConfig, wasmCacheTag) } func createNonL1BlockChainWithStackConfig( - t *testing.T, info *BlockchainTestInfo, dataDir string, chainConfig *params.ChainConfig, initMessage *arbostypes.ParsedInitMessage, stackConfig *node.Config, execConfig *gethexec.Config, + t *testing.T, info *BlockchainTestInfo, dataDir string, chainConfig *params.ChainConfig, initMessage *arbostypes.ParsedInitMessage, stackConfig *node.Config, execConfig *gethexec.Config, wasmCacheTag uint32, ) (*BlockchainTestInfo, *node.Node, ethdb.Database, ethdb.Database, *core.BlockChain) { if info == nil { info = NewArbTestInfo(t, chainConfig.ChainID) @@ -1311,7 +1329,7 @@ func createNonL1BlockChainWithStackConfig( Require(t, err) wasmData, err := stack.OpenDatabaseWithExtraOptions("wasm", 0, 0, "wasm/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("wasm")) Require(t, err) - chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmData, 0, execConfig.StylusTarget.WasmTargets()) + chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmData, wasmCacheTag, execConfig.StylusTarget.WasmTargets()) arbDb, err := stack.OpenDatabaseWithExtraOptions("arbitrumdata", 0, 0, "arbitrumdata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("arbitrumdata")) Require(t, err) @@ -1382,6 +1400,7 @@ func Create2ndNodeWithConfig( valnodeConfig *valnode.Config, addresses *chaininfo.RollupAddresses, initMessage *arbostypes.ParsedInitMessage, + wasmCacheTag uint32, ) (*ethclient.Client, *arbnode.Node) { if nodeConfig == nil { nodeConfig = arbnode.ConfigDefaultL1NonSequencerTest() @@ -1403,7 +1422,7 @@ func Create2ndNodeWithConfig( Require(t, err) wasmData, err := chainStack.OpenDatabaseWithExtraOptions("wasm", 0, 0, "wasm/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("wasm")) Require(t, err) - chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmData, 0, execConfig.StylusTarget.WasmTargets()) + chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmData, wasmCacheTag, execConfig.StylusTarget.WasmTargets()) arbDb, err := chainStack.OpenDatabaseWithExtraOptions("arbitrumdata", 0, 0, "arbitrumdata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("arbitrumdata")) Require(t, err) diff --git a/system_tests/das_test.go b/system_tests/das_test.go index 69872b3e62..ed3844d528 100644 --- a/system_tests/das_test.go +++ b/system_tests/das_test.go @@ -6,6 +6,7 @@ package arbtest import ( "context" "encoding/base64" + "errors" "io" "math/big" "net" @@ -43,18 +44,13 @@ func startLocalDASServer( pubkey, _, err := das.GenerateAndStoreKeys(keyDir) Require(t, err) - config := das.DataAvailabilityConfig{ - Enable: true, - Key: das.KeyConfig{ - KeyDir: keyDir, - }, - LocalFileStorage: das.LocalFileStorageConfig{ - Enable: true, - DataDir: dataDir, - }, - ParentChainNodeURL: "none", - RequestTimeout: 5 * time.Second, - } + config := das.DefaultDataAvailabilityConfig + config.Enable = true + config.Key = das.KeyConfig{KeyDir: keyDir} + config.ParentChainNodeURL = "none" + config.LocalFileStorage = das.DefaultLocalFileStorageConfig + config.LocalFileStorage.Enable = true + config.LocalFileStorage.DataDir = dataDir storageService, lifecycleManager, err := das.CreatePersistentStorageService(ctx, &config) defer lifecycleManager.StopAndWaitUntil(time.Second) @@ -326,3 +322,80 @@ func initTest(t *testing.T) { enableLogging(logLvl) } } + +func TestDASBatchPosterFallback(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Setup L1 + builder := NewNodeBuilder(ctx).DefaultConfig(t, true) + builder.chainConfig = params.ArbitrumDevTestDASChainConfig() + builder.BuildL1(t) + l1client := builder.L1.Client + l1info := builder.L1Info + + // Setup DAS server + dasDataDir := t.TempDir() + dasRpcServer, pubkey, backendConfig, _, restServerUrl := startLocalDASServer( + t, ctx, dasDataDir, l1client, builder.addresses.SequencerInbox) + authorizeDASKeyset(t, ctx, pubkey, l1info, l1client) + + // Setup sequence/batch-poster L2 node + builder.nodeConfig.DataAvailability.Enable = true + builder.nodeConfig.DataAvailability.RPCAggregator = aggConfigForBackend(backendConfig) + builder.nodeConfig.DataAvailability.RestAggregator = das.DefaultRestfulClientAggregatorConfig + builder.nodeConfig.DataAvailability.RestAggregator.Enable = true + builder.nodeConfig.DataAvailability.RestAggregator.Urls = []string{restServerUrl} + builder.nodeConfig.DataAvailability.ParentChainNodeURL = "none" + builder.nodeConfig.BatchPoster.DisableDapFallbackStoreDataOnChain = true // Disable DAS fallback + builder.nodeConfig.BatchPoster.ErrorDelay = time.Millisecond * 250 // Increase error delay because we expect errors + builder.L2Info = NewArbTestInfo(t, builder.chainConfig.ChainID) + builder.L2Info.GenerateAccount("User2") + cleanup := builder.BuildL2OnL1(t) + defer cleanup() + l2client := builder.L2.Client + l2info := builder.L2Info + + // Setup secondary L2 node + nodeConfigB := arbnode.ConfigDefaultL1NonSequencerTest() + nodeConfigB.BlockValidator.Enable = false + nodeConfigB.DataAvailability.Enable = true + nodeConfigB.DataAvailability.RestAggregator = das.DefaultRestfulClientAggregatorConfig + nodeConfigB.DataAvailability.RestAggregator.Enable = true + nodeConfigB.DataAvailability.RestAggregator.Urls = []string{restServerUrl} + nodeConfigB.DataAvailability.ParentChainNodeURL = "none" + nodeBParams := SecondNodeParams{ + nodeConfig: nodeConfigB, + initData: &l2info.ArbInitData, + } + l2B, cleanupB := builder.Build2ndNode(t, &nodeBParams) + defer cleanupB() + + // Check batch posting using the DAS + checkBatchPosting(t, ctx, l1client, l2client, l1info, l2info, big.NewInt(1e12), l2B.Client) + + // Shutdown the DAS + err := dasRpcServer.Shutdown(ctx) + Require(t, err) + + // Send 2nd transaction and check it doesn't arrive on second node + tx, _ := TransferBalanceTo(t, "Owner", l2info.GetAddress("User2"), big.NewInt(1e12), l2info, l2client, ctx) + _, err = WaitForTx(ctx, l2B.Client, tx.Hash(), time.Second*3) + if err == nil || !errors.Is(err, context.DeadlineExceeded) { + Fatal(t, "expected context-deadline exceeded error, but got:", err) + } + + // Enable the DAP fallback and check the transaction on the second node. + // (We don't need to restart the node because of the hot-reload.) + builder.nodeConfig.BatchPoster.DisableDapFallbackStoreDataOnChain = false + _, err = WaitForTx(ctx, l2B.Client, tx.Hash(), time.Second*3) + Require(t, err) + l2balance, err := l2B.Client.BalanceAt(ctx, l2info.GetAddress("User2"), nil) + Require(t, err) + if l2balance.Cmp(big.NewInt(2e12)) != 0 { + Fatal(t, "Unexpected balance:", l2balance) + } + + // Send another transaction with fallback on + checkBatchPosting(t, ctx, l1client, l2client, l1info, l2info, big.NewInt(3e12), l2B.Client) +} diff --git a/system_tests/program_test.go b/system_tests/program_test.go index cf8cd72559..4755096b26 100644 --- a/system_tests/program_test.go +++ b/system_tests/program_test.go @@ -1385,7 +1385,7 @@ func TestProgramCacheManager(t *testing.T) { isManager, err := arbWasmCache.IsCacheManager(nil, manager) assert(!isManager, err) - // athorize the manager + // authorize the manager ensure(arbOwner.AddWasmCacheManager(&ownerAuth, manager)) assert(arbWasmCache.IsCacheManager(nil, manager)) all, err := arbWasmCache.AllCacheManagers(nil) @@ -2013,7 +2013,7 @@ func checkWasmStoreContent(t *testing.T, wasmDb ethdb.KeyValueStore, targets []s } } -func deployWasmAndGetLruEntrySizeEstimateBytes( +func deployWasmAndGetEntrySizeEstimateBytes( t *testing.T, builder *NodeBuilder, auth bind.TransactOpts, @@ -2044,12 +2044,12 @@ func deployWasmAndGetLruEntrySizeEstimateBytes( module, err := statedb.TryGetActivatedAsm(rawdb.LocalTarget(), log.ModuleHash) Require(t, err, ", wasmName:", wasmName) - lruEntrySizeEstimateBytes := programs.GetLruEntrySizeEstimateBytes(module, log.Version, true) + entrySizeEstimateBytes := programs.GetEntrySizeEstimateBytes(module, log.Version, true) // just a sanity check - if lruEntrySizeEstimateBytes == 0 { - Fatal(t, "lruEntrySizeEstimateBytes is 0, wasmName:", wasmName) + if entrySizeEstimateBytes == 0 { + Fatal(t, "entrySizeEstimateBytes is 0, wasmName:", wasmName) } - return programAddress, lruEntrySizeEstimateBytes + return programAddress, entrySizeEstimateBytes } func TestWasmLruCache(t *testing.T) { @@ -2059,81 +2059,356 @@ func TestWasmLruCache(t *testing.T) { l2client := builder.L2.Client defer cleanup() + ensure := func(tx *types.Transaction, err error) *types.Receipt { + t.Helper() + Require(t, err) + receipt, err := EnsureTxSucceeded(ctx, l2client, tx) + Require(t, err) + return receipt + } + auth.GasLimit = 32000000 auth.Value = oneEth - fallibleProgramAddress, fallibleLruEntrySizeEstimateBytes := deployWasmAndGetLruEntrySizeEstimateBytes(t, builder, auth, "fallible") - keccakProgramAddress, keccakLruEntrySizeEstimateBytes := deployWasmAndGetLruEntrySizeEstimateBytes(t, builder, auth, "keccak") - mathProgramAddress, mathLruEntrySizeEstimateBytes := deployWasmAndGetLruEntrySizeEstimateBytes(t, builder, auth, "math") + fallibleProgramAddress, fallibleEntrySize := deployWasmAndGetEntrySizeEstimateBytes(t, builder, auth, "fallible") + keccakProgramAddress, keccakEntrySize := deployWasmAndGetEntrySizeEstimateBytes(t, builder, auth, "keccak") + mathProgramAddress, mathEntrySize := deployWasmAndGetEntrySizeEstimateBytes(t, builder, auth, "math") t.Log( - "lruEntrySizeEstimateBytes, ", - "fallible:", fallibleLruEntrySizeEstimateBytes, - "keccak:", keccakLruEntrySizeEstimateBytes, - "math:", mathLruEntrySizeEstimateBytes, + "entrySizeEstimateBytes, ", + "fallible:", fallibleEntrySize, + "keccak:", keccakEntrySize, + "math:", mathEntrySize, ) + if fallibleEntrySize == keccakEntrySize || fallibleEntrySize == mathEntrySize || keccakEntrySize == mathEntrySize { + Fatal(t, "at least two programs have the same entry size") + } programs.ClearWasmLruCache() - lruMetrics := programs.GetWasmLruCacheMetrics() - if lruMetrics.Count != 0 { - t.Fatalf("lruMetrics.Count, expected: %v, actual: %v", 0, lruMetrics.Count) - } - if lruMetrics.SizeBytes != 0 { - t.Fatalf("lruMetrics.SizeBytes, expected: %v, actual: %v", 0, lruMetrics.SizeBytes) - } + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) - programs.SetWasmLruCacheCapacity(fallibleLruEntrySizeEstimateBytes - 1) + programs.SetWasmLruCacheCapacity(fallibleEntrySize - 1) // fallible wasm program will not be cached since its size is greater than lru cache capacity tx := l2info.PrepareTxTo("Owner", &fallibleProgramAddress, l2info.TransferGas, nil, []byte{0x01}) - Require(t, l2client.SendTransaction(ctx, tx)) - _, err := EnsureTxSucceeded(ctx, l2client, tx) - Require(t, err) - lruMetrics = programs.GetWasmLruCacheMetrics() - if lruMetrics.Count != 0 { - t.Fatalf("lruMetrics.Count, expected: %v, actual: %v", 0, lruMetrics.Count) - } - if lruMetrics.SizeBytes != 0 { - t.Fatalf("lruMetrics.SizeBytes, expected: %v, actual: %v", 0, lruMetrics.SizeBytes) - } + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) programs.SetWasmLruCacheCapacity( - fallibleLruEntrySizeEstimateBytes + keccakLruEntrySizeEstimateBytes + mathLruEntrySizeEstimateBytes - 1, + fallibleEntrySize + keccakEntrySize + mathEntrySize - 1, ) // fallible wasm program will be cached tx = l2info.PrepareTxTo("Owner", &fallibleProgramAddress, l2info.TransferGas, nil, []byte{0x01}) - Require(t, l2client.SendTransaction(ctx, tx)) - _, err = EnsureTxSucceeded(ctx, l2client, tx) - Require(t, err) - lruMetrics = programs.GetWasmLruCacheMetrics() - if lruMetrics.Count != 1 { - t.Fatalf("lruMetrics.Count, expected: %v, actual: %v", 1, lruMetrics.Count) - } - if lruMetrics.SizeBytes != fallibleLruEntrySizeEstimateBytes { - t.Fatalf("lruMetrics.SizeBytes, expected: %v, actual: %v", fallibleLruEntrySizeEstimateBytes, lruMetrics.SizeBytes) - } + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 1, + SizeBytes: fallibleEntrySize, + }) // keccak wasm program will be cached tx = l2info.PrepareTxTo("Owner", &keccakProgramAddress, l2info.TransferGas, nil, []byte{0x01}) - Require(t, l2client.SendTransaction(ctx, tx)) - _, err = EnsureTxSucceeded(ctx, l2client, tx) - Require(t, err) - lruMetrics = programs.GetWasmLruCacheMetrics() - if lruMetrics.Count != 2 { - t.Fatalf("lruMetrics.Count, expected: %v, actual: %v", 2, lruMetrics.Count) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 2, + SizeBytes: fallibleEntrySize + keccakEntrySize, + }) + + // math wasm program will be cached, but fallible will be evicted since (fallible + keccak + math) > lruCacheCapacity + tx = l2info.PrepareTxTo("Owner", &mathProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 2, + SizeBytes: keccakEntrySize + mathEntrySize, + }) +} + +func checkLongTermCacheMetrics(t *testing.T, expected programs.WasmLongTermCacheMetrics) { + t.Helper() + longTermMetrics := programs.GetWasmCacheMetrics().LongTerm + if longTermMetrics.Count != expected.Count { + t.Fatalf("longTermMetrics.Count, expected: %v, actual: %v", expected.Count, longTermMetrics.Count) } - if lruMetrics.SizeBytes != fallibleLruEntrySizeEstimateBytes+keccakLruEntrySizeEstimateBytes { - t.Fatalf("lruMetrics.SizeBytes, expected: %v, actual: %v", fallibleLruEntrySizeEstimateBytes+keccakLruEntrySizeEstimateBytes, lruMetrics.SizeBytes) + if longTermMetrics.SizeBytes != expected.SizeBytes { + t.Fatalf("longTermMetrics.SizeBytes, expected: %v, actual: %v", expected.SizeBytes, longTermMetrics.SizeBytes) } +} - // math wasm program will be cached, but fallible will be evicted since (fallible + keccak + math) > lruCacheCapacity +func checkLruCacheMetrics(t *testing.T, expected programs.WasmLruCacheMetrics) { + t.Helper() + lruMetrics := programs.GetWasmCacheMetrics().Lru + if lruMetrics.Count != expected.Count { + t.Fatalf("lruMetrics.Count, expected: %v, actual: %v", expected.Count, lruMetrics.Count) + } + if lruMetrics.SizeBytes != expected.SizeBytes { + t.Fatalf("lruMetrics.SizeBytes, expected: %v, actual: %v", expected.SizeBytes, lruMetrics.SizeBytes) + } +} + +func TestWasmLongTermCache(t *testing.T) { + builder, ownerAuth, cleanup := setupProgramTest(t, true, func(builder *NodeBuilder) { + builder.WithStylusLongTermCache(true) + }) + ctx := builder.ctx + l2info := builder.L2Info + l2client := builder.L2.Client + defer cleanup() + + ensure := func(tx *types.Transaction, err error) *types.Receipt { + t.Helper() + Require(t, err) + receipt, err := EnsureTxSucceeded(ctx, l2client, tx) + Require(t, err) + return receipt + } + + arbWasmCache, err := pgen.NewArbWasmCache(types.ArbWasmCacheAddress, builder.L2.Client) + Require(t, err) + arbOwner, err := pgen.NewArbOwner(types.ArbOwnerAddress, builder.L2.Client) + Require(t, err) + ensure(arbOwner.SetInkPrice(&ownerAuth, 10_000)) + + ownerAuth.GasLimit = 32000000 + ownerAuth.Value = oneEth + + fallibleProgramAddress, fallibleEntrySize := deployWasmAndGetEntrySizeEstimateBytes(t, builder, ownerAuth, "fallible") + keccakProgramAddress, keccakEntrySize := deployWasmAndGetEntrySizeEstimateBytes(t, builder, ownerAuth, "keccak") + mathProgramAddress, mathEntrySize := deployWasmAndGetEntrySizeEstimateBytes(t, builder, ownerAuth, "math") + t.Log( + "entrySizeEstimateBytes, ", + "fallible:", fallibleEntrySize, + "keccak:", keccakEntrySize, + "math:", mathEntrySize, + ) + if fallibleEntrySize == keccakEntrySize || fallibleEntrySize == mathEntrySize || keccakEntrySize == mathEntrySize { + Fatal(t, "at least two programs have the same entry size") + } + + ownerAuth.Value = common.Big0 + + programs.ClearWasmLongTermCache() + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) + + // fallible wasm program will not be cached since caching is not set for this program + tx := l2info.PrepareTxTo("Owner", &fallibleProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) + + ensure(arbWasmCache.CacheProgram(&ownerAuth, fallibleProgramAddress)) + // fallible wasm program will be cached + tx = l2info.PrepareTxTo("Owner", &fallibleProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 1, + SizeBytes: fallibleEntrySize, + }) + + // keccak wasm program will be cached + ensure(arbWasmCache.CacheProgram(&ownerAuth, keccakProgramAddress)) + tx = l2info.PrepareTxTo("Owner", &keccakProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 2, + SizeBytes: fallibleEntrySize + keccakEntrySize, + }) + + // math wasm program will not be cached tx = l2info.PrepareTxTo("Owner", &mathProgramAddress, l2info.TransferGas, nil, []byte{0x01}) - Require(t, l2client.SendTransaction(ctx, tx)) - _, err = EnsureTxSucceeded(ctx, l2client, tx) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 2, + SizeBytes: fallibleEntrySize + keccakEntrySize, + }) + + // math wasm program will be cached + ensure(arbWasmCache.CacheProgram(&ownerAuth, mathProgramAddress)) + tx = l2info.PrepareTxTo("Owner", &mathProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 3, + SizeBytes: fallibleEntrySize + keccakEntrySize + mathEntrySize, + }) + + statedb, err := builder.L2.ExecNode.Backend.ArbInterface().BlockChain().State() Require(t, err) - lruMetrics = programs.GetWasmLruCacheMetrics() - if lruMetrics.Count != 2 { - t.Fatalf("lruMetrics.Count, expected: %v, actual: %v", 2, lruMetrics.Count) + fallibleProgramHash := statedb.GetCodeHash(fallibleProgramAddress) + keccakProgramHash := statedb.GetCodeHash(keccakProgramAddress) + mathProgramHash := statedb.GetCodeHash(mathProgramAddress) + + ensure(arbWasmCache.EvictCodehash(&ownerAuth, keccakProgramHash)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 2, + SizeBytes: fallibleEntrySize + mathEntrySize, + }) + + // keccak wasm program will not be cached + tx = l2info.PrepareTxTo("Owner", &keccakProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 2, + SizeBytes: fallibleEntrySize + mathEntrySize, + }) + + // keccak wasm program will be cached + ensure(arbWasmCache.CacheProgram(&ownerAuth, keccakProgramAddress)) + tx = l2info.PrepareTxTo("Owner", &mathProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 3, + SizeBytes: fallibleEntrySize + keccakEntrySize + mathEntrySize, + }) + + ensure(arbWasmCache.EvictCodehash(&ownerAuth, fallibleProgramHash)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 2, + SizeBytes: keccakEntrySize + mathEntrySize, + }) + + ensure(arbWasmCache.EvictCodehash(&ownerAuth, mathProgramHash)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 1, + SizeBytes: keccakEntrySize, + }) + + ensure(arbWasmCache.EvictCodehash(&ownerAuth, keccakProgramHash)) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) +} + +func TestRepopulateWasmLongTermCacheFromLru(t *testing.T) { + builder, ownerAuth, cleanup := setupProgramTest(t, true, func(builder *NodeBuilder) { + builder.WithStylusLongTermCache(true) + }) + ctx := builder.ctx + l2info := builder.L2Info + l2client := builder.L2.Client + defer cleanup() + + ensure := func(tx *types.Transaction, err error) *types.Receipt { + t.Helper() + Require(t, err) + receipt, err := EnsureTxSucceeded(ctx, l2client, tx) + Require(t, err) + return receipt } - if lruMetrics.SizeBytes != keccakLruEntrySizeEstimateBytes+mathLruEntrySizeEstimateBytes { - t.Fatalf("lruMetrics.SizeBytes, expected: %v, actual: %v", keccakLruEntrySizeEstimateBytes+mathLruEntrySizeEstimateBytes, lruMetrics.SizeBytes) + + arbWasmCache, err := pgen.NewArbWasmCache(types.ArbWasmCacheAddress, builder.L2.Client) + Require(t, err) + arbOwner, err := pgen.NewArbOwner(types.ArbOwnerAddress, builder.L2.Client) + Require(t, err) + ensure(arbOwner.SetInkPrice(&ownerAuth, 10_000)) + + ownerAuth.GasLimit = 32000000 + ownerAuth.Value = oneEth + + fallibleProgramAddress, fallibleEntrySize := deployWasmAndGetEntrySizeEstimateBytes(t, builder, ownerAuth, "fallible") + keccakProgramAddress, keccakEntrySize := deployWasmAndGetEntrySizeEstimateBytes(t, builder, ownerAuth, "keccak") + mathProgramAddress, mathEntrySize := deployWasmAndGetEntrySizeEstimateBytes(t, builder, ownerAuth, "math") + if fallibleEntrySize == keccakEntrySize || fallibleEntrySize == mathEntrySize || keccakEntrySize == mathEntrySize { + Fatal(t, "at least two programs have the same entry size") } + + ownerAuth.Value = common.Big0 + + programs.ClearWasmLongTermCache() + programs.ClearWasmLruCache() + // only 2 out of 3 programs should fit lru + programs.SetWasmLruCacheCapacity( + fallibleEntrySize + keccakEntrySize + mathEntrySize - 1, + ) + + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) + + ensure(arbWasmCache.CacheProgram(&ownerAuth, fallibleProgramAddress)) + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 1, + SizeBytes: fallibleEntrySize, + }) + + // clear long term cache to emulate restart + programs.ClearWasmLongTermCache() + programs.ClearWasmLruCache() + + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) + + nonce := builder.L2Info.GetInfoWithPrivKey("Owner").Nonce.Load() + tx := l2info.PrepareTxTo("Owner", &fallibleProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + _, err = arbutil.SendTxAsCall(ctx, l2client, tx, l2info.GetAddress("Owner"), nil, true) + Require(t, err) + // restore nonce in L2Info + builder.L2Info.GetInfoWithPrivKey("Owner").Nonce.Store(nonce) + // fallibleProgram should be added only to lru cache as the api call should be processed with wasm cache tag = 0 + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 1, + SizeBytes: fallibleEntrySize, + }) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) + + tx = l2info.PrepareTxTo("Owner", &keccakProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 2, + SizeBytes: fallibleEntrySize + keccakEntrySize, + }) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 0, + SizeBytes: 0, + }) + + tx = l2info.PrepareTxTo("Owner", &fallibleProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 2, + SizeBytes: fallibleEntrySize + keccakEntrySize, + }) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 1, + SizeBytes: fallibleEntrySize, + }) + + // mathProgram should end up in lru cache and + // as result fallibleProgram should be evicted as least recently used item (tx that restores the program back to long term cache shouldn't promote the lru item); + // fallibleProgram should remain in long term cache + tx = l2info.PrepareTxTo("Owner", &mathProgramAddress, l2info.TransferGas, nil, []byte{0x01}) + ensure(tx, l2client.SendTransaction(ctx, tx)) + checkLruCacheMetrics(t, programs.WasmLruCacheMetrics{ + Count: 2, + SizeBytes: keccakEntrySize + mathEntrySize, + }) + checkLongTermCacheMetrics(t, programs.WasmLongTermCacheMetrics{ + Count: 1, + SizeBytes: fallibleEntrySize, + }) } diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index 4833d35536..22329a1be5 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "math/big" + "runtime" "strings" "sync" "testing" @@ -338,6 +339,7 @@ func TestRecreateStateForRPCBlockNotFoundWhileRecreating(t *testing.T) { } func testSkippingSavingStateAndRecreatingAfterRestart(t *testing.T, cacheConfig *gethexec.CachingConfig, txCount int) { + t.Parallel() maxRecreateStateDepth := int64(30 * 1000 * 1000) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -451,20 +453,26 @@ func TestSkippingSavingStateAndRecreatingAfterRestart(t *testing.T) { cacheConfig.SnapshotCache = 0 // disable snapshots cacheConfig.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are + runTestCase := func(t *testing.T, cacheConfig gethexec.CachingConfig, txes int) { + t.Run(fmt.Sprintf("skip-blocks-%d-skip-gas-%d-txes-%d", cacheConfig.MaxNumberOfBlocksToSkipStateSaving, cacheConfig.MaxAmountOfGasToSkipStateSaving, txes), func(t *testing.T) { + testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, txes) + }) + } + // test defaults - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 512) + runTestCase(t, cacheConfig, 512) cacheConfig.MaxNumberOfBlocksToSkipStateSaving = 127 cacheConfig.MaxAmountOfGasToSkipStateSaving = 0 - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 512) + runTestCase(t, cacheConfig, 512) cacheConfig.MaxNumberOfBlocksToSkipStateSaving = 0 cacheConfig.MaxAmountOfGasToSkipStateSaving = 15 * 1000 * 1000 - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 512) + runTestCase(t, cacheConfig, 512) cacheConfig.MaxNumberOfBlocksToSkipStateSaving = 127 cacheConfig.MaxAmountOfGasToSkipStateSaving = 15 * 1000 * 1000 - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 512) + runTestCase(t, cacheConfig, 512) // lower number of blocks in triegc below 100 blocks, to be able to check for nonexistence in testSkippingSavingStateAndRecreatingAfterRestart (it doesn't check last BlockCount blocks as some of them may be persisted on node shutdown) cacheConfig.BlockCount = 16 @@ -480,21 +488,16 @@ func TestSkippingSavingStateAndRecreatingAfterRestart(t *testing.T) { cacheConfig.MaxAmountOfGasToSkipStateSaving = skipGas // #nosec G115 cacheConfig.MaxNumberOfBlocksToSkipStateSaving = uint32(skipBlocks) - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 100) + runTestCase(t, cacheConfig, 100) } } } -func TestGettingStateForRPCFullNode(t *testing.T) { +func testGettingState(t *testing.T, execConfig *gethexec.Config) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - execConfig := ExecConfigDefaultTest(t) - execConfig.Caching.SnapshotCache = 0 // disable snapshots - execConfig.Caching.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are - execConfig.Sequencer.MaxBlockSpeed = 0 - execConfig.Sequencer.MaxTxDataSize = 150 // 1 test tx ~= 110 builder, cancelNode := prepareNodeWithHistory(t, ctx, execConfig, 16) - execNode, _ := builder.L2.ExecNode, builder.L2.Client + execNode := builder.L2.ExecNode defer cancelNode() bc := execNode.Backend.ArbInterface().BlockChain() api := execNode.Backend.APIBackend() @@ -521,18 +524,40 @@ func TestGettingStateForRPCFullNode(t *testing.T) { blockCountRequiredToFlushDirties := builder.execConfig.Caching.BlockCount makeSomeTransfers(t, ctx, builder, blockCountRequiredToFlushDirties) + // force garbage collection to check if it won't break anything + runtime.GC() + exists = state.Exist(addr) err = state.Error() Require(t, err) if !exists { Fatal(t, "User2 address does not exist in the state") } + + // force garbage collection of StateDB object, what should cause the state finalizer to run + state = nil + runtime.GC() + _, err = bc.StateAt(header.Root) + if err == nil { + Fatal(t, "StateAndHeaderByNumber didn't failed as expected") + } + expectedErr := &trie.MissingNodeError{} + if !errors.As(err, &expectedErr) { + Fatal(t, "StateAndHeaderByNumber failed with unexpected error:", err) + } } -func TestGettingStateForRPCHybridArchiveNode(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() +func TestGettingState(t *testing.T) { execConfig := ExecConfigDefaultTest(t) + execConfig.Caching.SnapshotCache = 0 // disable snapshots + execConfig.Caching.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are + execConfig.Sequencer.MaxBlockSpeed = 0 + execConfig.Sequencer.MaxTxDataSize = 150 // 1 test tx ~= 110 + t.Run("full-node", func(t *testing.T) { + testGettingState(t, execConfig) + }) + + execConfig = ExecConfigDefaultTest(t) execConfig.Caching.Archive = true // For now Archive node should use HashScheme execConfig.Caching.StateScheme = rawdb.HashScheme @@ -542,42 +567,13 @@ func TestGettingStateForRPCHybridArchiveNode(t *testing.T) { execConfig.Caching.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are execConfig.Sequencer.MaxBlockSpeed = 0 execConfig.Sequencer.MaxTxDataSize = 150 // 1 test tx ~= 110 - builder, cancelNode := prepareNodeWithHistory(t, ctx, execConfig, 16) - execNode, _ := builder.L2.ExecNode, builder.L2.Client - defer cancelNode() - bc := execNode.Backend.ArbInterface().BlockChain() - api := execNode.Backend.APIBackend() - - header := bc.CurrentBlock() - if header == nil { - Fatal(t, "failed to get current block header") - } - // #nosec G115 - state, _, err := api.StateAndHeaderByNumber(ctx, rpc.BlockNumber(header.Number.Uint64())) - Require(t, err) - addr := builder.L2Info.GetAddress("User2") - exists := state.Exist(addr) - err = state.Error() - Require(t, err) - if !exists { - Fatal(t, "User2 address does not exist in the state") - } - // Get the state again to avoid caching - // #nosec G115 - state, _, err = api.StateAndHeaderByNumber(ctx, rpc.BlockNumber(header.Number.Uint64())) - Require(t, err) - - blockCountRequiredToFlushDirties := builder.execConfig.Caching.BlockCount - makeSomeTransfers(t, ctx, builder, blockCountRequiredToFlushDirties) - - exists = state.Exist(addr) - err = state.Error() - Require(t, err) - if !exists { - Fatal(t, "User2 address does not exist in the state") - } + t.Run("archive-node", func(t *testing.T) { + testGettingState(t, execConfig) + }) } +// regression test for issue caused by accessing block state that has just been committed to TrieDB but not yet referenced in core.BlockChain.writeBlockWithState (here called state of "recent" block) +// before the corresponding fix, access to the recent block state caused premature garbage collection of the head block state func TestStateAndHeaderForRecentBlock(t *testing.T) { threads := 32 ctx, cancel := context.WithCancel(context.Background()) @@ -618,15 +614,22 @@ func TestStateAndHeaderForRecentBlock(t *testing.T) { }() api := builder.L2.ExecNode.Backend.APIBackend() db := builder.L2.ExecNode.Backend.ChainDb() - i := 1 + + recentBlock := 1 var mtx sync.RWMutex var wgCallers sync.WaitGroup for j := 0; j < threads && ctx.Err() == nil; j++ { wgCallers.Add(1) + // each thread attempts to get state for a block that is just being created (here called recent): + // 1. Before state trie node is referenced in core.BlockChain.writeBlockWithState, block body is written to database with key prefix `b` followed by block number and then block hash (see: rawdb.blockBodyKey) + // 2. Each thread tries to read the block body entry to: a. extract recent block hash b. congest resource usage to slow down execution of core.BlockChain.writeBlockWithState + // 3. After extracting the hash from block body entry key, StateAndHeaderByNumberOfHash is called for the hash. It is expected that it will: + // a. either fail with "ahead of current block" if we made it before rawdb.WriteCanonicalHash is called in core.BlockChain.writeHeadBlock, which is called after writeBlockWithState finishes, + // b. or it will succeed if the canonical hash was written for the block meaning that writeBlockWithState was fully executed (i.a. state root trie node correctly referenced) - then the recentBlock is advanced go func() { defer wgCallers.Done() mtx.RLock() - blockNumber := i + blockNumber := recentBlock mtx.RUnlock() for blockNumber < 300 && ctx.Err() == nil { prefix := make([]byte, 8) @@ -645,8 +648,8 @@ func TestStateAndHeaderForRecentBlock(t *testing.T) { _, _, err := api.StateAndHeaderByNumberOrHash(ctx, rpc.BlockNumberOrHash{BlockHash: &blockHash}) if err == nil { mtx.Lock() - if blockNumber == i { - i++ + if blockNumber == recentBlock { + recentBlock++ } mtx.Unlock() break @@ -666,7 +669,7 @@ func TestStateAndHeaderForRecentBlock(t *testing.T) { } it.Release() mtx.RLock() - blockNumber = i + blockNumber = recentBlock mtx.RUnlock() } }() diff --git a/system_tests/seq_coordinator_test.go b/system_tests/seq_coordinator_test.go index 1b8926a1b9..e9b2adabe8 100644 --- a/system_tests/seq_coordinator_test.go +++ b/system_tests/seq_coordinator_test.go @@ -12,7 +12,7 @@ import ( "testing" "time" - "github.com/go-redis/redis/v8" + "github.com/redis/go-redis/v9" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" diff --git a/system_tests/state_fuzz_test.go b/system_tests/state_fuzz_test.go index 24140e480d..c8312350e6 100644 --- a/system_tests/state_fuzz_test.go +++ b/system_tests/state_fuzz_test.go @@ -38,6 +38,7 @@ func BuildBlock( chainConfig *params.ChainConfig, inbox arbstate.InboxBackend, seqBatch []byte, + runMode core.MessageRunMode, ) (*types.Block, error) { var delayedMessagesRead uint64 if lastBlockHeader != nil { @@ -59,11 +60,13 @@ func BuildBlock( } err = l1Message.FillInBatchGasCost(batchFetcher) if err != nil { - return nil, err + // skip malformed batch posting report + // nolint:nilerr + return nil, nil } block, _, err := arbos.ProduceBlock( - l1Message, delayedMessagesRead, lastBlockHeader, statedb, chainContext, chainConfig, false, + l1Message, delayedMessagesRead, lastBlockHeader, statedb, chainContext, chainConfig, false, runMode, ) return block, err } @@ -127,7 +130,7 @@ func (c noopChainContext) GetHeader(common.Hash, uint64) *types.Header { } func FuzzStateTransition(f *testing.F) { - f.Fuzz(func(t *testing.T, compressSeqMsg bool, seqMsg []byte, delayedMsg []byte) { + f.Fuzz(func(t *testing.T, compressSeqMsg bool, seqMsg []byte, delayedMsg []byte, runModeSeed uint8) { if len(seqMsg) > 0 && daprovider.IsL1AuthenticatedMessageHeaderByte(seqMsg[0]) { return } @@ -201,7 +204,9 @@ func FuzzStateTransition(f *testing.F) { positionWithinMessage: 0, delayedMessages: delayedMessages, } - _, err = BuildBlock(statedb, genesis, noopChainContext{}, params.ArbitrumOneChainConfig(), inbox, seqBatch) + numberOfMessageRunModes := uint8(core.MessageReplayMode) + 1 // TODO update number of run modes when new mode is added + runMode := core.MessageRunMode(runModeSeed % numberOfMessageRunModes) + _, err = BuildBlock(statedb, genesis, noopChainContext{}, params.ArbitrumOneChainConfig(), inbox, seqBatch, runMode) if err != nil { // With the fixed header it shouldn't be possible to read a delayed message, // and no other type of error should be possible. diff --git a/util/redisutil/redis_coordinator.go b/util/redisutil/redis_coordinator.go index 2c12ffec50..62cf76c0bd 100644 --- a/util/redisutil/redis_coordinator.go +++ b/util/redisutil/redis_coordinator.go @@ -6,7 +6,7 @@ import ( "fmt" "strings" - "github.com/go-redis/redis/v8" + "github.com/redis/go-redis/v9" "github.com/ethereum/go-ethereum/log" diff --git a/util/redisutil/redisutil.go b/util/redisutil/redisutil.go index f89c250e9a..01ba836d5b 100644 --- a/util/redisutil/redisutil.go +++ b/util/redisutil/redisutil.go @@ -1,6 +1,6 @@ package redisutil -import "github.com/go-redis/redis/v8" +import "github.com/redis/go-redis/v9" func RedisClientFromURL(url string) (redis.UniversalClient, error) { if url == "" { diff --git a/validator/client/redis/producer.go b/validator/client/redis/producer.go index adc2f34af5..c5726ffe8b 100644 --- a/validator/client/redis/producer.go +++ b/validator/client/redis/producer.go @@ -9,7 +9,6 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" - "github.com/go-redis/redis/v8" "github.com/offchainlabs/nitro/pubsub" "github.com/offchainlabs/nitro/util/containers" "github.com/offchainlabs/nitro/util/redisutil" @@ -17,6 +16,7 @@ import ( "github.com/offchainlabs/nitro/validator" "github.com/offchainlabs/nitro/validator/server_api" "github.com/offchainlabs/nitro/validator/server_common" + "github.com/redis/go-redis/v9" "github.com/spf13/pflag" ) diff --git a/validator/valnode/redis/consumer.go b/validator/valnode/redis/consumer.go index e0d53ffb2e..4392a3c91e 100644 --- a/validator/valnode/redis/consumer.go +++ b/validator/valnode/redis/consumer.go @@ -161,9 +161,13 @@ func (s *ValidationServer) Start(ctx_in context.Context) { res, err := valRun.Await(ctx) if err != nil { log.Error("Error validating", "request value", work.req.Value, "error", err) + work.req.Ack() } else { log.Debug("done work", "thread", i, "workid", work.req.ID) - if err := s.consumers[work.moduleRoot].SetResult(ctx, work.req.ID, res); err != nil { + err := s.consumers[work.moduleRoot].SetResult(ctx, work.req.ID, res) + // Even in error we close ackNotifier as there's no retry mechanism here and closing it will alow other consumers to autoclaim + work.req.Ack() + if err != nil { log.Error("Error setting result for request", "id", work.req.ID, "result", res, "error", err) } log.Debug("set result", "thread", i, "workid", work.req.ID)