From e4b827e638b582dea1696d2f9bbecf8aef1e5395 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Oct 2024 23:43:16 -0400 Subject: [PATCH] bcachefs: Reserve 8 bits in bch_reflink_p Better repair for reflink pointers, as well as propagating new inode options to indirect extents, are going to require a few extra bits bch_reflink_p: so claim a few from the high end of the destination index. Also add some missing bounds checking. Signed-off-by: Kent Overstreet --- fs/bcachefs/extent_update.c | 2 +- fs/bcachefs/extents.c | 2 +- fs/bcachefs/io_read.c | 14 ++++------- fs/bcachefs/reflink.c | 45 +++++++++++++++++++++++----------- fs/bcachefs/reflink_format.h | 4 ++- fs/bcachefs/sb-errors_format.h | 3 ++- 6 files changed, 43 insertions(+), 27 deletions(-) diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 45c87c019f6b7..6aac579a692ae 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -64,7 +64,7 @@ static int count_iters_for_insert(struct btree_trans *trans, break; case KEY_TYPE_reflink_p: { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx = le64_to_cpu(p.v->idx); + u64 idx = REFLINK_P_IDX(p.v); unsigned sectors = bpos_min(*end, p.k->p).offset - bkey_start_offset(p.k); struct btree_iter iter; diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 58e4644d43c43..38cd9335df7dd 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1492,7 +1492,7 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k) case KEY_TYPE_reflink_p: { struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k); - le64_add_cpu(&p.v->idx, sub); + SET_REFLINK_P_IDX(p.v, REFLINK_P_IDX(p.v) + sub); break; } case KEY_TYPE_inline_data: diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 9b03421451cb3..8ec4314d11ab1 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -753,17 +753,13 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, unsigned *offset_into_extent, struct bkey_buf *orig_k) { - struct btree_iter iter; - struct bkey_s_c k; - u64 reflink_offset; - int ret; + struct bkey_i_reflink_p *p = bkey_i_to_reflink_p(orig_k->k); + u64 reflink_offset = REFLINK_P_IDX(&p->v) + *offset_into_extent; - reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + - *offset_into_extent; - - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, POS(0, reflink_offset), 0); - ret = bkey_err(k); + int ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 96cf50f4705d2..addaf5f746249 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -35,10 +35,10 @@ int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); int ret = 0; - bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), + bkey_fsck_err_on(REFLINK_P_IDX(p.v) < le32_to_cpu(p.v->front_pad), c, reflink_p_front_pad_bad, "idx < front_pad (%llu < %u)", - le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); + REFLINK_P_IDX(p.v), le32_to_cpu(p.v->front_pad)); fsck_err: return ret; } @@ -49,7 +49,7 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); prt_printf(out, "idx %llu front_pad %u back_pad %u", - le64_to_cpu(p.v->idx), + REFLINK_P_IDX(p.v), le32_to_cpu(p.v->front_pad), le32_to_cpu(p.v->back_pad)); } @@ -65,7 +65,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r */ return false; - if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx)) + if (REFLINK_P_IDX(l.v) + l.k->size != REFLINK_P_IDX(r.v)) return false; bch2_key_resize(l.k, l.k->size + r.k->size); @@ -115,12 +115,12 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, u64 pad; pad = max_t(s64, le32_to_cpu(v->front_pad), - le64_to_cpu(v->idx) - bkey_start_offset(&k->k)); + REFLINK_P_IDX(v) - bkey_start_offset(&k->k)); BUG_ON(pad > U32_MAX); v->front_pad = cpu_to_le32(pad); pad = max_t(s64, le32_to_cpu(v->back_pad), - k->k.p.offset - p.k->size - le64_to_cpu(v->idx)); + k->k.p.offset - p.k->size - REFLINK_P_IDX(v)); BUG_ON(pad > U32_MAX); v->back_pad = cpu_to_le32(pad); } @@ -147,8 +147,8 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, struct bch_fs *c = trans->c; struct reflink_gc *r; int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; - u64 start = le64_to_cpu(p.v->idx); - u64 end = le64_to_cpu(p.v->idx) + p.k->size; + u64 start = REFLINK_P_IDX(p.v); + u64 end = start + p.k->size; u64 next_idx = end + le32_to_cpu(p.v->back_pad); s64 ret = 0; struct printbuf buf = PRINTBUF; @@ -210,8 +210,8 @@ static int __trigger_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); int ret = 0; - u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); - u64 end = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad); + u64 idx = REFLINK_P_IDX(p.v) - le32_to_cpu(p.v->front_pad); + u64 end = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); if (flags & BTREE_TRIGGER_transactional) { while (idx < end && !ret) @@ -258,7 +258,16 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags) { - return bch2_bkey_ptrs_validate(c, k, flags); + int ret = 0; + + bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, REFLINK_P_IDX_MAX)), + c, reflink_v_pos_bad, + "indirect extent above maximum position 0:%llu", + REFLINK_P_IDX_MAX); + + ret = bch2_bkey_ptrs_validate(c, k, flags); +fsck_err: + return ret; } void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, @@ -358,6 +367,14 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, if (ret) goto err; + /* + * XXX: we're assuming that 56 bits will be enough for the life of the + * filesystem: we need to implement wraparound, with a cursor in the + * logged ops btree: + */ + if (bkey_ge(reflink_iter.pos, POS(0, REFLINK_P_IDX_MAX - orig->k.size))) + return -ENOSPC; + r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); ret = PTR_ERR_OR_ZERO(r_v); if (ret) @@ -394,7 +411,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, memset(&r_p->v, 0, sizeof(r_p->v)); #endif - r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); + SET_REFLINK_P_IDX(&r_p->v, bkey_start_offset(&r_v->k)); ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, BTREE_UPDATE_internal_snapshot_node); @@ -533,11 +550,11 @@ s64 bch2_remap_range(struct bch_fs *c, struct bkey_i_reflink_p *dst_p = bkey_reflink_p_init(new_dst.k); - u64 offset = le64_to_cpu(src_p.v->idx) + + u64 offset = REFLINK_P_IDX(src_p.v) + (src_want.offset - bkey_start_offset(src_k.k)); - dst_p->v.idx = cpu_to_le64(offset); + SET_REFLINK_P_IDX(&dst_p->v, offset); } else { BUG(); } diff --git a/fs/bcachefs/reflink_format.h b/fs/bcachefs/reflink_format.h index 6772eebb1fc66..0d8de13b9ddf7 100644 --- a/fs/bcachefs/reflink_format.h +++ b/fs/bcachefs/reflink_format.h @@ -4,7 +4,7 @@ struct bch_reflink_p { struct bch_val v; - __le64 idx; + __le64 idx_flags; /* * A reflink pointer might point to an indirect extent which is then * later split (by copygc or rebalance). If we only pointed to part of @@ -17,6 +17,8 @@ struct bch_reflink_p { __le32 back_pad; } __packed __aligned(8); +LE64_BITMASK(REFLINK_P_IDX, struct bch_reflink_p, idx_flags, 0, 56); + struct bch_reflink_v { struct bch_val v; __le64 refcount; diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 3ada94a42a68c..45455d31aef7c 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -177,6 +177,7 @@ enum bch_fsck_flags { x(ptr_stripe_redundant, 163, 0) \ x(reservation_key_nr_replicas_invalid, 164, 0) \ x(reflink_v_refcount_wrong, 165, 0) \ + x(reflink_v_pos_bad, 292, 0) \ x(reflink_p_to_missing_reflink_v, 166, 0) \ x(stripe_pos_bad, 167, 0) \ x(stripe_val_size_bad, 168, 0) \ @@ -302,7 +303,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 292, 0) + x(MAX, 293, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n,