Skip to content

Commit

Permalink
Dynamic catchain delays, state serialization improvements (ton-blockc…
Browse files Browse the repository at this point in the history
…hain#1140)

* Validator improvements

* Fix cancelling state serialization
* Disable state serializer on all mainnet validators
* Flag --catchain-max-block-delay-slow

* Set default catchain-max-block-delay to 0.4, delay-slow to 1.0

---------

Co-authored-by: SpyCheese <[email protected]>
  • Loading branch information
EmelyanenkoK and SpyCheese authored Aug 30, 2024
1 parent 97c57c3 commit e081111
Show file tree
Hide file tree
Showing 15 changed files with 84 additions and 27 deletions.
6 changes: 6 additions & 0 deletions catchain/catchain-receiver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,12 @@ void CatChainReceiverImpl::add_block(td::BufferSlice payload, std::vector<CatCha
}

int height = prev->height_ + 1;
auto max_block_height = get_max_block_height(opts_, sources_.size());
if (height > max_block_height) {
VLOG(CATCHAIN_WARNING) << this << ": cannot create block: max height exceeded (" << max_block_height << ")";
active_send_ = false;
return;
}
auto block_data = create_tl_object<ton_api::catchain_block_data>(std::move(prev), std::move(deps_arr));
auto block = create_tl_object<ton_api::catchain_block>(incarnation_, local_idx_, height, std::move(block_data),
td::BufferSlice());
Expand Down
17 changes: 15 additions & 2 deletions validator-engine/validator-engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1448,6 +1448,9 @@ td::Status ValidatorEngine::load_global_config() {
if (catchain_max_block_delay_) {
validator_options_.write().set_catchain_max_block_delay(catchain_max_block_delay_.value());
}
if (catchain_max_block_delay_slow_) {
validator_options_.write().set_catchain_max_block_delay_slow(catchain_max_block_delay_slow_.value());
}

std::vector<ton::BlockIdExt> h;
for (auto &x : conf.validator_->hardforks_) {
Expand Down Expand Up @@ -4072,7 +4075,7 @@ int main(int argc, char *argv[]) {
logger_ = td::TsFileLog::create(fname.str()).move_as_ok();
td::log_interface = logger_.get();
});
p.add_checked_option('s', "state-ttl", "state will be gc'd after this time (in seconds) default=3600",
p.add_checked_option('s', "state-ttl", "state will be gc'd after this time (in seconds) default=86400",
[&](td::Slice fname) {
auto v = td::to_double(fname);
if (v <= 0) {
Expand Down Expand Up @@ -4233,7 +4236,7 @@ int main(int argc, char *argv[]) {
"preload all cells from CellDb on startup (recommended to use with big enough celldb-cache-size and celldb-direct-io)",
[&]() { acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_celldb_preload_all, true); }); });
p.add_checked_option(
'\0', "catchain-max-block-delay", "delay before creating a new catchain block, in seconds (default: 0.5)",
'\0', "catchain-max-block-delay", "delay before creating a new catchain block, in seconds (default: 0.4)",
[&](td::Slice s) -> td::Status {
auto v = td::to_double(s);
if (v < 0) {
Expand All @@ -4242,6 +4245,16 @@ int main(int argc, char *argv[]) {
acts.push_back([&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_catchain_max_block_delay, v); });
return td::Status::OK();
});
p.add_checked_option(
'\0', "catchain-max-block-delay-slow", "max extended catchain block delay (for too long rounds), (default: 1.0)",
[&](td::Slice s) -> td::Status {
auto v = td::to_double(s);
if (v < 0) {
return td::Status::Error("catchain-max-block-delay-slow should be non-negative");
}
acts.push_back([&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_catchain_max_block_delay_slow, v); });
return td::Status::OK();
});
p.add_option(
'\0', "fast-state-serializer",
"faster persistent state serializer, but requires more RAM (enabled automatically on machines with >= 90GB RAM)",
Expand Down
5 changes: 4 additions & 1 deletion validator-engine/validator-engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ class ValidatorEngine : public td::actor::Actor {
td::optional<td::uint64> celldb_cache_size_ = 1LL << 30;
bool celldb_direct_io_ = false;
bool celldb_preload_all_ = false;
td::optional<double> catchain_max_block_delay_;
td::optional<double> catchain_max_block_delay_, catchain_max_block_delay_slow_;
bool read_config_ = false;
bool started_keyring_ = false;
bool started_ = false;
Expand Down Expand Up @@ -300,6 +300,9 @@ class ValidatorEngine : public td::actor::Actor {
void set_catchain_max_block_delay(double value) {
catchain_max_block_delay_ = value;
}
void set_catchain_max_block_delay_slow(double value) {
catchain_max_block_delay_slow_ = value;
}
void set_fast_state_serializer_enabled(bool value) {
fast_state_serializer_enabled_ = value;
}
Expand Down
8 changes: 8 additions & 0 deletions validator-session/validator-session-state.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,14 @@ class ValidatorSessionState : public ValidatorSessionDescription::RootObject {
auto get_ts(td::uint32 src_idx) const {
return att_->at(src_idx);
}
td::uint32 cur_attempt_in_round(const ValidatorSessionDescription& desc) const {
td::uint32 first_attempt = cur_round_->get_first_attempt(desc.get_self_idx());
td::uint32 cur_attempt = desc.get_attempt_seqno(desc.get_ts());
if (cur_attempt < first_attempt || first_attempt == 0) {
return 0;
}
return cur_attempt - first_attempt;
}

const SentBlock* choose_block_to_sign(ValidatorSessionDescription& desc, td::uint32 src_idx, bool& found) const;
const SentBlock* get_committed_block(ValidatorSessionDescription& desc, td::uint32 seqno) const;
Expand Down
18 changes: 15 additions & 3 deletions validator-session/validator-session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,13 +813,25 @@ void ValidatorSessionImpl::request_new_block(bool now) {
} else {
double lambda = 10.0 / description().get_total_nodes();
double x = -1 / lambda * log(td::Random::fast(1, 999) * 0.001);
if (x > catchain_max_block_delay_) { // default = 0.5
x = catchain_max_block_delay_;
}
x = std::min(x, get_current_max_block_delay()); // default = 0.4
td::actor::send_closure(catchain_, &catchain::CatChain::need_new_block, td::Timestamp::in(x));
}
}

double ValidatorSessionImpl::get_current_max_block_delay() const {
td::uint32 att = real_state_->cur_attempt_in_round(*description_);
td::uint32 att1 = description_->opts().max_round_attempts;
if (att <= att1) {
return catchain_max_block_delay_;
}
td::uint32 att2 = att1 + 4;
if (att >= att2) {
return catchain_max_block_delay_slow_;
}
return catchain_max_block_delay_ +
(catchain_max_block_delay_slow_ - catchain_max_block_delay_) * (double)(att - att1) / (double)(att2 - att1);
}

void ValidatorSessionImpl::on_new_round(td::uint32 round) {
if (round != 0) {
CHECK(cur_round_ < round);
Expand Down
2 changes: 1 addition & 1 deletion validator-session/validator-session.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class ValidatorSession : public td::actor::Actor {
virtual void get_validator_group_info_for_litequery(
td::uint32 cur_round,
td::Promise<std::vector<tl_object_ptr<lite_api::liteServer_nonfinal_candidateInfo>>> promise) = 0;
virtual void set_catchain_max_block_delay(double value) = 0;
virtual void set_catchain_max_block_delay(double delay, double delay_slow) = 0;

static td::actor::ActorOwn<ValidatorSession> create(
catchain::CatChainSessionId session_id, ValidatorSessionOptions opts, PublicKeyHash local_id,
Expand Down
8 changes: 6 additions & 2 deletions validator-session/validator-session.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class ValidatorSessionImpl : public ValidatorSession {
std::unique_ptr<ValidatorSessionDescription> description_;

double catchain_max_block_delay_ = 0.4;
double catchain_max_block_delay_slow_ = 1.0;

void on_new_round(td::uint32 round);
void on_catchain_started();
Expand Down Expand Up @@ -150,6 +151,7 @@ class ValidatorSessionImpl : public ValidatorSession {
}

void request_new_block(bool now);
double get_current_max_block_delay() const;
void get_broadcast_p2p(PublicKeyHash node, ValidatorSessionFileHash file_hash,
ValidatorSessionCollatedDataFileHash collated_data_file_hash, PublicKeyHash src,
td::uint32 round, ValidatorSessionRootHash root_hash, td::Promise<td::BufferSlice> promise,
Expand Down Expand Up @@ -191,8 +193,10 @@ class ValidatorSessionImpl : public ValidatorSession {
void get_validator_group_info_for_litequery(
td::uint32 cur_round,
td::Promise<std::vector<tl_object_ptr<lite_api::liteServer_nonfinal_candidateInfo>>> promise) override;
void set_catchain_max_block_delay(double value) override {
catchain_max_block_delay_ = value;

void set_catchain_max_block_delay(double delay, double delay_slow) override {
catchain_max_block_delay_ = delay;
catchain_max_block_delay_slow_ = delay_slow;
}

void process_blocks(std::vector<catchain::CatChainBlock *> blocks);
Expand Down
2 changes: 2 additions & 0 deletions validator/impl/shard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ ShardStateQ::ShardStateQ(const ShardStateQ& other)
, root(other.root)
, lt(other.lt)
, utime(other.utime)
, global_id_(other.global_id_)
, before_split_(other.before_split_)
, fake_split_(other.fake_split_)
, fake_merge_(other.fake_merge_) {
Expand Down Expand Up @@ -121,6 +122,7 @@ td::Status ShardStateQ::init() {
}
lt = info.gen_lt;
utime = info.gen_utime;
global_id_ = info.global_id;
before_split_ = info.before_split;
block::ShardId id{info.shard_id};
ton::BlockId hdr_id{ton::ShardIdFull(id), info.seq_no};
Expand Down
4 changes: 4 additions & 0 deletions validator/impl/shard.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class ShardStateQ : virtual public ShardState {
Ref<vm::Cell> root;
LogicalTime lt{0};
UnixTime utime{0};
td::int32 global_id_{0};
bool before_split_{false};
bool fake_split_{false};
bool fake_merge_{false};
Expand Down Expand Up @@ -81,6 +82,9 @@ class ShardStateQ : virtual public ShardState {
LogicalTime get_logical_time() const override {
return lt;
}
td::int32 get_global_id() const override {
return global_id_;
}
td::optional<BlockIdExt> get_master_ref() const override {
return master_ref;
}
Expand Down
1 change: 1 addition & 0 deletions validator/interfaces/shard.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class ShardState : public td::CntObject {

virtual UnixTime get_unix_time() const = 0;
virtual LogicalTime get_logical_time() const = 0;
virtual td::int32 get_global_id() const = 0;
virtual ShardIdFull get_shard() const = 0;
virtual BlockSeqno get_seqno() const = 0;
virtual BlockIdExt get_block_id() const = 0;
Expand Down
18 changes: 6 additions & 12 deletions validator/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2057,7 +2057,6 @@ void ValidatorManagerImpl::update_shards() {
}
}

bool validating_masterchain = false;
if (allow_validate_) {
for (auto &desc : new_shards) {
auto shard = desc.first;
Expand All @@ -2074,9 +2073,6 @@ void ValidatorManagerImpl::update_shards() {
auto validator_id = get_validator(shard, val_set);

if (!validator_id.is_zero()) {
if (shard.is_masterchain()) {
validating_masterchain = true;
}
auto val_group_id = get_validator_set_id(shard, val_set, opts_hash, key_seqno, opts);

if (force_recover) {
Expand Down Expand Up @@ -2171,16 +2167,14 @@ void ValidatorManagerImpl::update_shards() {
td::actor::send_closure(SelfId, &ValidatorManagerImpl::written_destroyed_validator_sessions, std::move(gc));
});
td::actor::send_closure(db_, &Db::update_destroyed_validator_sessions, gc_list_, std::move(P));
}

if (!serializer_.empty()) {
td::actor::send_closure(
serializer_, &AsyncStateSerializer::auto_disable_serializer,
validating_masterchain &&
last_masterchain_state_->get_validator_set(ShardIdFull{masterchainId})->export_vector().size() * 2 <=
last_masterchain_state_->get_total_validator_set(0)->export_vector().size());
}
if (!serializer_.empty()) {
td::actor::send_closure(
serializer_, &AsyncStateSerializer::auto_disable_serializer,
!validator_groups_.empty() && last_masterchain_state_->get_global_id() == -239); // mainnet only
}
} // namespace validator
}

void ValidatorManagerImpl::written_destroyed_validator_sessions(std::vector<td::actor::ActorId<ValidatorGroup>> list) {
for (auto &v : list) {
Expand Down
2 changes: 1 addition & 1 deletion validator/state-serializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ void AsyncStateSerializer::got_shard_handle(BlockHandle handle) {

void AsyncStateSerializer::got_shard_state(BlockHandle handle, td::Ref<ShardState> state,
std::shared_ptr<vm::CellDbReader> cell_db_reader) {
next_idx_++;
if (!opts_->get_state_serializer_enabled() || auto_disabled_) {
success_handler();
return;
Expand Down Expand Up @@ -406,7 +407,6 @@ void AsyncStateSerializer::got_shard_state(BlockHandle handle, td::Ref<ShardStat
});
td::actor::send_closure(manager_, &ValidatorManager::store_persistent_state_file_gen, handle->id(),
masterchain_handle_->id(), write_data, std::move(P));
next_idx_++;
}

void AsyncStateSerializer::fail_handler(td::Status reason) {
Expand Down
10 changes: 6 additions & 4 deletions validator/validator-group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,12 @@ void ValidatorGroup::create_session() {
<< ".",
allow_unsafe_self_blocks_resync_);
}
if (opts_->get_catchain_max_block_delay()) {
td::actor::send_closure(session_, &validatorsession::ValidatorSession::set_catchain_max_block_delay,
opts_->get_catchain_max_block_delay().value());
}
double catchain_delay = opts_->get_catchain_max_block_delay() ? opts_->get_catchain_max_block_delay().value() : 0.4;
double catchain_delay_slow =
std::max(catchain_delay,
opts_->get_catchain_max_block_delay_slow() ? opts_->get_catchain_max_block_delay_slow().value() : 1.0);
td::actor::send_closure(session_, &validatorsession::ValidatorSession::set_catchain_max_block_delay, catchain_delay,
catchain_delay_slow);
if (started_) {
td::actor::send_closure(session_, &validatorsession::ValidatorSession::start);
}
Expand Down
8 changes: 7 additions & 1 deletion validator/validator-options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
td::optional<double> get_catchain_max_block_delay() const override {
return catchain_max_block_delay_;
}
td::optional<double> get_catchain_max_block_delay_slow() const override {
return catchain_max_block_delay_slow_;
}
bool get_state_serializer_enabled() const override {
return state_serializer_enabled_;
}
Expand Down Expand Up @@ -230,6 +233,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
void set_catchain_max_block_delay(double value) override {
catchain_max_block_delay_ = value;
}
void set_catchain_max_block_delay_slow(double value) override {
catchain_max_block_delay_slow_ = value;
}
void set_state_serializer_enabled(bool value) override {
state_serializer_enabled_ = value;
}
Expand Down Expand Up @@ -289,7 +295,7 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
td::optional<td::uint64> celldb_cache_size_;
bool celldb_direct_io_ = false;
bool celldb_preload_all_ = false;
td::optional<double> catchain_max_block_delay_;
td::optional<double> catchain_max_block_delay_, catchain_max_block_delay_slow_;
bool state_serializer_enabled_ = true;
td::Ref<CollatorOptions> collator_options_{true};
bool fast_state_serializer_enabled_ = false;
Expand Down
2 changes: 2 additions & 0 deletions validator/validator.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ struct ValidatorManagerOptions : public td::CntObject {
virtual bool get_celldb_direct_io() const = 0;
virtual bool get_celldb_preload_all() const = 0;
virtual td::optional<double> get_catchain_max_block_delay() const = 0;
virtual td::optional<double> get_catchain_max_block_delay_slow() const = 0;
virtual bool get_state_serializer_enabled() const = 0;
virtual td::Ref<CollatorOptions> get_collator_options() const = 0;
virtual bool get_fast_state_serializer_enabled() const = 0;
Expand Down Expand Up @@ -136,6 +137,7 @@ struct ValidatorManagerOptions : public td::CntObject {
virtual void set_celldb_direct_io(bool value) = 0;
virtual void set_celldb_preload_all(bool value) = 0;
virtual void set_catchain_max_block_delay(double value) = 0;
virtual void set_catchain_max_block_delay_slow(double value) = 0;
virtual void set_state_serializer_enabled(bool value) = 0;
virtual void set_collator_options(td::Ref<CollatorOptions> value) = 0;
virtual void set_fast_state_serializer_enabled(bool value) = 0;
Expand Down

0 comments on commit e081111

Please sign in to comment.