From 58474f76a770bcc79d4b2d7232e4d6650e732b50 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 12 Aug 2024 02:27:36 -0400
Subject: [PATCH] bcachefs: bcachefs_metadata_version_disk_accounting_inum

This adds another disk accounting counter to track usage per inode
number (any snapshot ID).

This will be used for a couple things:

- It'll give us a way to tell the user how much space a given file ista
  consuming in all snapshots; i.e. how much extra space it's consuming
  due to snapshot versioning.

- It counts number of extents and total size of extents (both in btree
  keyspace sectors and actual disk usage), meaning it gives us average
  extent size: that is, it'll let us cheaply find fragmented files that
  should be defragmented.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h        |  3 ++-
 fs/bcachefs/buckets.c                | 14 ++++++++++++++
 fs/bcachefs/disk_accounting.c        |  3 +++
 fs/bcachefs/disk_accounting.h        |  3 +++
 fs/bcachefs/disk_accounting_format.h |  8 +++++++-
 fs/bcachefs/sb-downgrade.c           |  5 ++++-
 6 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index b25f863567287..c75f2e0f32bb9 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -676,7 +676,8 @@ struct bch_sb_field_ext {
 	x(mi_btree_bitmap,		BCH_VERSION(1,  7))		\
 	x(bucket_stripe_sectors,	BCH_VERSION(1,  8))		\
 	x(disk_accounting_v2,		BCH_VERSION(1,  9))		\
-	x(disk_accounting_v3,		BCH_VERSION(1, 10))
+	x(disk_accounting_v3,		BCH_VERSION(1, 10))		\
+	x(disk_accounting_inum,		BCH_VERSION(1, 11))
 
 enum bcachefs_metadata_version {
 	bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 9f7004e941ce4..b69ef4b3de6e2 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -810,6 +810,20 @@ static int __trigger_extent(struct btree_trans *trans,
 		ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc);
 		if (ret)
 			return ret;
+	} else {
+		bool insert = !(flags & BTREE_TRIGGER_overwrite);
+		struct disk_accounting_pos acc_inum_key = {
+			.type		= BCH_DISK_ACCOUNTING_inum,
+			.inum.inum	= k.k->p.inode,
+		};
+		s64 v[3] = {
+			insert ? 1 : -1,
+			insert ? k.k->size : -((s64) k.k->size),
+			replicas_sectors,
+		};
+		ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc);
+		if (ret)
+			return ret;
 	}
 
 	if (bch2_bkey_rebalance_opts(k)) {
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index e57b40623cd98..e972e2bca546a 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -768,6 +768,9 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
 			if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
 				continue;
 
+			if (acc_k.type == BCH_DISK_ACCOUNTING_inum)
+				continue;
+
 			bch2_accounting_mem_read(c, k.k->p, v, nr);
 
 			if (memcmp(a.v->d, v, nr * sizeof(u64))) {
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index 653090667aaa4..f29fd0dd9581f 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -116,6 +116,9 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru
 	struct disk_accounting_pos acc_k;
 	bpos_to_disk_accounting_pos(&acc_k, a.k->p);
 
+	if (acc_k.type == BCH_DISK_ACCOUNTING_inum)
+		return 0;
+
 	if (!gc && !read) {
 		switch (acc_k.type) {
 		case BCH_DISK_ACCOUNTING_persistent_reserved:
diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h
index a93cf26ff4a94..7b6e6c97e6aa6 100644
--- a/fs/bcachefs/disk_accounting_format.h
+++ b/fs/bcachefs/disk_accounting_format.h
@@ -103,7 +103,8 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
 	x(compression,		4)		\
 	x(snapshot,		5)		\
 	x(btree,		6)		\
-	x(rebalance_work,	7)
+	x(rebalance_work,	7)		\
+	x(inum,			8)
 
 enum disk_accounting_type {
 #define x(f, nr)	BCH_DISK_ACCOUNTING_##f	= nr,
@@ -136,6 +137,10 @@ struct bch_acct_btree {
 	__u32			id;
 } __packed;
 
+struct bch_acct_inum {
+	__u64			inum;
+} __packed;
+
 struct bch_acct_rebalance_work {
 };
 
@@ -152,6 +157,7 @@ struct disk_accounting_pos {
 		struct bch_acct_snapshot	snapshot;
 		struct bch_acct_btree		btree;
 		struct bch_acct_rebalance_work	rebalance_work;
+		struct bch_acct_inum		inum;
 		} __packed;
 	} __packed;
 		struct bpos			_pad;
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index 9f82d497d9e05..650a1f77ca403 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -72,7 +72,10 @@
 	  BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0,	\
 	  BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad,	\
 	  BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted,	\
-	  BCH_FSCK_ERR_accounting_key_junk_at_end)
+	  BCH_FSCK_ERR_accounting_key_junk_at_end)		\
+	x(disk_accounting_inum,					\
+	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
+	  BCH_FSCK_ERR_accounting_mismatch)
 
 #define DOWNGRADE_TABLE()					\
 	x(bucket_stripe_sectors,				\
-- 
GitLab