From a551f709a35b75f5dfeb9f0e0b0644538313a293 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 1 Apr 2022 12:00:13 +1030 Subject: [PATCH] channeld: send error, not warning, if peer has old commitment number. This is the minimal change to meet the desired outcome of https://github.com/lightning/bolts/issues/934 which wants to give obsolete-db nodes a chance to fix things up, before we close the channel. We need to dance around a bit here, since we *will* close the channel if we receive an ERROR, so we suppress that. Signed-off-by: Rusty Russell --- channeld/channeld.c | 14 ++++---- tests/test_connection.py | 70 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 10 deletions(-) diff --git a/channeld/channeld.c b/channeld/channeld.c index 5e20d71e773d..a89f1a7bd220 100644 --- a/channeld/channeld.c +++ b/channeld/channeld.c @@ -3000,12 +3000,14 @@ static void peer_reconnect(struct peer *peer, } retransmit_revoke_and_ack = true; } else if (next_revocation_number < peer->next_index[LOCAL] - 1) { - peer_failed_err(peer->pps, - &peer->channel_id, - "bad reestablish revocation_number: %"PRIu64 - " vs %"PRIu64, - next_revocation_number, - peer->next_index[LOCAL]); + /* Send a warning here! Because this is what it looks like if peer is + * in the past, and they might still recover. */ + peer_failed_warn(peer->pps, + &peer->channel_id, + "bad reestablish revocation_number: %"PRIu64 + " vs %"PRIu64, + next_revocation_number, + peer->next_index[LOCAL]); } else if (next_revocation_number > peer->next_index[LOCAL] - 1) { if (!check_extra_fields) /* They don't support option_data_loss_protect or diff --git a/tests/test_connection.py b/tests/test_connection.py index b880e23640ec..dcc7c1464182 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -2926,11 +2926,11 @@ def test_opener_simple_reconnect(node_factory, bitcoind): @unittest.skipIf(os.getenv('TEST_DB_PROVIDER', 'sqlite3') != 'sqlite3', "sqlite3-specific DB rollback") -@pytest.mark.developer("needs LIGHTNINGD_DEV_LOG_IO") @pytest.mark.openchannel('v1') @pytest.mark.openchannel('v2') def test_dataloss_protection(node_factory, bitcoind): l1 = node_factory.get_node(may_reconnect=True, options={'log-level': 'io'}, + allow_warning=True, feerates=(7500, 7500, 7500, 7500)) l2 = node_factory.get_node(may_reconnect=True, options={'log-level': 'io'}, feerates=(7500, 7500, 7500, 7500), allow_broken_log=True) @@ -2998,14 +2998,16 @@ def test_dataloss_protection(node_factory, bitcoind): # l2 should freak out! l2.daemon.wait_for_log("Peer permanent failure in CHANNELD_NORMAL: Awaiting unilateral close") - # l1 should drop to chain. - l1.wait_for_channel_onchain(l2.info['id']) - # l2 must NOT drop to chain. l2.daemon.wait_for_log("Cannot broadcast our commitment tx: they have a future one") assert not l2.daemon.is_in_log('sendrawtx exit 0', start=l2.daemon.logsearch_start) + # l1 should drop to chain, but doesn't always receive ERROR before it sends warning. + # We have to reconnect once + l1.rpc.connect(l2.info['id'], 'localhost', l2.port) + l1.wait_for_channel_onchain(l2.info['id']) + closetxid = only_one(bitcoind.rpc.getrawmempool(False)) # l2 should still recover something! @@ -3024,6 +3026,66 @@ def test_dataloss_protection(node_factory, bitcoind): assert (closetxid, "confirmed") in set([(o['txid'], o['status']) for o in l2.rpc.listfunds()['outputs']]) +@unittest.skipIf(os.getenv('TEST_DB_PROVIDER', 'sqlite3') != 'sqlite3', "sqlite3-specific DB rollback") +@pytest.mark.openchannel('v1') +@pytest.mark.openchannel('v2') +@pytest.mark.developer("needs dev-disconnect, dev-no-reconnect") +def test_dataloss_protection_no_broadcast(node_factory, bitcoind): + # If l2 sends an old version, but *doesn't* send an error, l1 should not broadcast tx. + # (https://github.com/lightning/bolts/issues/934) + l1 = node_factory.get_node(may_reconnect=True, + feerates=(7500, 7500, 7500, 7500), + allow_warning=True, + options={'dev-no-reconnect': None}) + l2 = node_factory.get_node(may_reconnect=True, + feerates=(7500, 7500, 7500, 7500), allow_broken_log=True, + disconnect=['-WIRE_ERROR'], + options={'dev-no-reconnect': None}) + + l1.rpc.connect(l2.info['id'], 'localhost', l2.port) + l1.fundchannel(l2, 10**6) + l2.stop() + + # Save copy of the db. + dbpath = os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, "lightningd.sqlite3") + orig_db = open(dbpath, "rb").read() + l2.start() + + l1.rpc.connect(l2.info['id'], 'localhost', l2.port) + # After an htlc, we should get different results (two more commits) + l1.pay(l2, 200000000) + + # Make sure both sides consider it completely settled (has received both + # REVOKE_AND_ACK) + l1.daemon.wait_for_logs(["peer_in WIRE_REVOKE_AND_ACK"] * 2) + l2.daemon.wait_for_logs(["peer_in WIRE_REVOKE_AND_ACK"] * 2) + + # Now, move l2 back in time. + l2.stop() + # Save new db + new_db = open(dbpath, "rb").read() + # Overwrite with OLD db. + open(dbpath, "wb").write(orig_db) + l2.start() + + l1.rpc.connect(l2.info['id'], 'localhost', l2.port) + # l2 should freak out! + l2.daemon.wait_for_logs(["Peer permanent failure in CHANNELD_NORMAL: Awaiting unilateral close"]) + + # l1 should NOT drop to chain, since it didn't receive an error. + time.sleep(5) + assert bitcoind.rpc.getrawmempool(False) == [] + + # fix up l2. + l2.stop() + open(dbpath, "wb").write(new_db) + l2.start() + + # All is forgiven + l1.rpc.connect(l2.info['id'], 'localhost', l2.port) + l1.pay(l2, 200000000) + + @pytest.mark.developer("needs dev_disconnect") def test_restart_multi_htlc_rexmit(node_factory, bitcoind, executor): # l1 disables commit timer once we send first htlc, dies on commit