From b1325fb142bd9de94cdcd666d54d2c6f11ac3131 Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Wed, 21 Jan 2026 17:21:22 +0800 Subject: [PATCH 1/3] add blk metrics for gc --- conanfile.py | 2 +- src/lib/homestore_backend/gc_manager.cpp | 8 +++++--- src/lib/homestore_backend/gc_manager.hpp | 14 +++++++++++++- src/lib/homestore_backend/hs_cp_callbacks.cpp | 2 +- src/lib/homestore_backend/hs_homeobject.hpp | 4 ---- .../replication_state_machine.cpp | 4 ++-- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/conanfile.py b/conanfile.py index 92cf4de05..5f137c4b3 100644 --- a/conanfile.py +++ b/conanfile.py @@ -10,7 +10,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "3.0.16" + version = "3.0.17" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeStore" diff --git a/src/lib/homestore_backend/gc_manager.cpp b/src/lib/homestore_backend/gc_manager.cpp index 8c6956a0f..e6232f854 100644 --- a/src/lib/homestore_backend/gc_manager.cpp +++ b/src/lib/homestore_backend/gc_manager.cpp @@ -774,9 +774,6 @@ bool GCManager::pdev_gc_actor::copy_valid_data( */ } #endif - - // for emergent gc, we directly use the current shard header as the new header - // TODO::involve ratelimiter in the following code, where read/write are scheduled. or do we need a central // ratelimter shared by all components except client io? auto succeed_copying_shard = @@ -786,6 +783,8 @@ bool GCManager::pdev_gc_actor::copy_valid_data( &valid_blob_indexes, &data_service, task_id, &last_shard_state, &copied_blobs, pg_id, header_sgs = std::move(header_sgs)](auto&& err) { RELEASE_ASSERT(header_sgs.iovs.size() == 1, "header_sgs.iovs.size() should be 1, but not!"); + // shard header occupies one blk + metrics_.inc_gc_write_blk_count(1); iomanager.iobuf_free(reinterpret_cast< uint8_t* >(header_sgs.iovs[0].iov_base)); if (err) { GCLOGE(task_id, pg_id, shard_id, @@ -822,6 +821,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data( data_service.async_read(pba, data_sgs, total_size) .thenValue([this, k, &hints, &move_from_chunk, &move_to_chunk, &data_service, task_id, pg_id, data_sgs = std::move(data_sgs), pba, &copied_blobs](auto&& err) { + metrics_.inc_gc_read_blk_count(pba.blk_count()); RELEASE_ASSERT(data_sgs.iovs.size() == 1, "data_sgs.iovs.size() should be 1, but not!"); @@ -865,6 +865,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data( return data_service.async_alloc_write(data_sgs, hints, new_pba) .thenValue([this, shard_id, blob_id, new_pba, &move_to_chunk, task_id, pg_id, &copied_blobs, data_sgs = std::move(data_sgs)](auto&& err) { + metrics_.inc_gc_write_blk_count(new_pba.blk_count()); RELEASE_ASSERT(data_sgs.iovs.size() == 1, "data_sgs.iovs.size() should be 1, but not!"); iomanager.iobuf_free( @@ -939,6 +940,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data( // write shard footer homestore::MultiBlkId out_blkids; + metrics_.inc_gc_write_blk_count(1); return data_service.async_alloc_write(footer_sgs, hints, out_blkids); }) .thenValue([this, &move_to_chunk, &shard_id, footer_sgs, task_id, pg_id](auto&& err) { diff --git a/src/lib/homestore_backend/gc_manager.hpp b/src/lib/homestore_backend/gc_manager.hpp index 06ee0876a..5506e01c6 100644 --- a/src/lib/homestore_backend/gc_manager.hpp +++ b/src/lib/homestore_backend/gc_manager.hpp @@ -132,6 +132,8 @@ class GCManager { REGISTER_GAUGE(failed_egc_task_count, "Number of failed emergent gc tasks"); REGISTER_GAUGE(total_reclaimed_space_by_gc, "Total reclaimed space by gc task"); REGISTER_GAUGE(total_reclaimed_space_by_egc, "Total reclaimed space by emergent gc task"); + REGISTER_GAUGE(gc_read_blk_count, "Total read blk count by gc in this pdev"); + REGISTER_GAUGE(gc_write_blk_count, "Total writted blk count by gc in this pdev"); // gc task level histogram metrics REGISTER_HISTOGRAM(reclaim_ratio_gc, "the ratio of reclaimed blks to total blks in a gc task", @@ -149,6 +151,7 @@ class GCManager { register_me_to_farm(); attach_gather_cb(std::bind(&pdev_gc_metrics::on_gather, this)); } + ~pdev_gc_metrics() { deregister_me_from_farm(); } pdev_gc_metrics(const pdev_gc_metrics&) = delete; pdev_gc_metrics(pdev_gc_metrics&&) noexcept = delete; @@ -172,11 +175,20 @@ class GCManager { *this, total_reclaimed_space_by_egc, gc_actor_.durable_entities().total_reclaimed_blk_count_by_egc.load(std::memory_order_relaxed) * blk_size_); + + GAUGE_UPDATE(*this, gc_read_blk_count, gc_read_blk_count.load(std::memory_order_relaxed)); + GAUGE_UPDATE(*this, gc_write_blk_count, gc_write_blk_count.load(std::memory_order_relaxed)); } + public: + void inc_gc_read_blk_count(uint64_t count) { gc_read_blk_count.fetch_add(count); } + void inc_gc_write_blk_count(uint64_t count) { gc_write_blk_count.fetch_add(count); } + private: pdev_gc_actor const& gc_actor_; uint32_t blk_size_; + atomic_uint64_t gc_read_blk_count{0ull}; + atomic_uint64_t gc_write_blk_count{0ull}; }; public: @@ -313,7 +325,7 @@ class GCManager { void drain_pg_pending_gc_task(const pg_id_t pg_id); void decr_pg_pending_gc_task(const pg_id_t pg_id); void incr_pg_pending_gc_task(const pg_id_t pg_id); - auto& get_gc_actore_superblks() { return m_gc_actor_sbs; } + auto& get_gc_actor_superblks() { return m_gc_actor_sbs; } std::shared_ptr< pdev_gc_actor > get_pdev_gc_actor(uint32_t pdev_id); private: diff --git a/src/lib/homestore_backend/hs_cp_callbacks.cpp b/src/lib/homestore_backend/hs_cp_callbacks.cpp index 07b91f6da..711af667e 100644 --- a/src/lib/homestore_backend/hs_cp_callbacks.cpp +++ b/src/lib/homestore_backend/hs_cp_callbacks.cpp @@ -56,7 +56,7 @@ folly::Future< bool > HSHomeObject::MyCPCallbacks::cp_flush(CP* cp) { // flush gc durable_entities auto gc_manager = home_obj_.gc_manager(); - auto& gc_actor_superblks = gc_manager->get_gc_actore_superblks(); + auto& gc_actor_superblks = gc_manager->get_gc_actor_superblks(); for (auto& gc_actor_sb : gc_actor_superblks) { const auto pdev_id = gc_actor_sb->pdev_id; const auto gc_actor = gc_manager->get_pdev_gc_actor(pdev_id); diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp index dc77db198..e3ef772e4 100644 --- a/src/lib/homestore_backend/hs_homeobject.hpp +++ b/src/lib/homestore_backend/hs_homeobject.hpp @@ -479,10 +479,6 @@ class HSHomeObject : public HomeObjectImpl { homestore::MultiBlkId pbas; }; - struct BlobInfoData : public BlobInfo { - Blob blob; - }; - enum class BlobState : uint8_t { ALIVE = 0, TOMBSTONE = 1, diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index ff6546ec7..6641f9968 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -724,14 +724,14 @@ folly::Future< std::error_code > ReplicationStateMachine::on_fetch_data(const in auto rc = index_table->get(get_req); if (sisl_unlikely(homestore::btree_status_t::success != rc)) { // blob never exists or has been gc - LOGD("on_fetch_data failed to get from index table, blob never exists or has been gc, blob_id={}, " + LOGD("on_fetch_data: failed to get from index table, blob never exists or has been gc, blob_id={}, " "shardID=0x{:x}, pg={}", blob_id, shard_id, pg_id); should_return_delete_marker = true; } else { pbas = index_value.pbas(); if (sisl_unlikely(pbas == HSHomeObject::tombstone_pbas)) { - LOGD("on_fetch_data: blob has been deleted, blob_id={}, shardID=0x{:x}, pg={}", blob_id, + LOGD("on_fetch_data: got tombstone pba for blob_id={}, shardID=0x{:x}, pg={}", blob_id, shard_id, pg_id); should_return_delete_marker = true; } From 88df24b85da2372c8f1db41ac3e0d32013c5cabf Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Wed, 21 Jan 2026 23:46:54 +0800 Subject: [PATCH 2/3] use REGISTER_COUNTER and COUNTER_INCREMENT --- src/lib/homestore_backend/gc_manager.cpp | 10 +++++----- src/lib/homestore_backend/gc_manager.hpp | 13 ++----------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/src/lib/homestore_backend/gc_manager.cpp b/src/lib/homestore_backend/gc_manager.cpp index e6232f854..4b9ca70b9 100644 --- a/src/lib/homestore_backend/gc_manager.cpp +++ b/src/lib/homestore_backend/gc_manager.cpp @@ -784,7 +784,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data( header_sgs = std::move(header_sgs)](auto&& err) { RELEASE_ASSERT(header_sgs.iovs.size() == 1, "header_sgs.iovs.size() should be 1, but not!"); // shard header occupies one blk - metrics_.inc_gc_write_blk_count(1); + COUNTER_INCREMENT(metrics_, gc_write_blk_count, 1); iomanager.iobuf_free(reinterpret_cast< uint8_t* >(header_sgs.iovs[0].iov_base)); if (err) { GCLOGE(task_id, pg_id, shard_id, @@ -821,7 +821,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data( data_service.async_read(pba, data_sgs, total_size) .thenValue([this, k, &hints, &move_from_chunk, &move_to_chunk, &data_service, task_id, pg_id, data_sgs = std::move(data_sgs), pba, &copied_blobs](auto&& err) { - metrics_.inc_gc_read_blk_count(pba.blk_count()); + COUNTER_INCREMENT(metrics_, gc_read_blk_count, pba.blk_count()); RELEASE_ASSERT(data_sgs.iovs.size() == 1, "data_sgs.iovs.size() should be 1, but not!"); @@ -865,7 +865,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data( return data_service.async_alloc_write(data_sgs, hints, new_pba) .thenValue([this, shard_id, blob_id, new_pba, &move_to_chunk, task_id, pg_id, &copied_blobs, data_sgs = std::move(data_sgs)](auto&& err) { - metrics_.inc_gc_write_blk_count(new_pba.blk_count()); + COUNTER_INCREMENT(metrics_, gc_write_blk_count, new_pba.blk_count()); RELEASE_ASSERT(data_sgs.iovs.size() == 1, "data_sgs.iovs.size() should be 1, but not!"); iomanager.iobuf_free( @@ -938,9 +938,9 @@ bool GCManager::pdev_gc_actor::copy_valid_data( return folly::makeFuture< std::error_code >(std::error_code{}); } - // write shard footer + // write shard footer, which occupies one blk homestore::MultiBlkId out_blkids; - metrics_.inc_gc_write_blk_count(1); + COUNTER_INCREMENT(metrics_, gc_write_blk_count, 1); return data_service.async_alloc_write(footer_sgs, hints, out_blkids); }) .thenValue([this, &move_to_chunk, &shard_id, footer_sgs, task_id, pg_id](auto&& err) { diff --git a/src/lib/homestore_backend/gc_manager.hpp b/src/lib/homestore_backend/gc_manager.hpp index 5506e01c6..2e1f03407 100644 --- a/src/lib/homestore_backend/gc_manager.hpp +++ b/src/lib/homestore_backend/gc_manager.hpp @@ -132,8 +132,8 @@ class GCManager { REGISTER_GAUGE(failed_egc_task_count, "Number of failed emergent gc tasks"); REGISTER_GAUGE(total_reclaimed_space_by_gc, "Total reclaimed space by gc task"); REGISTER_GAUGE(total_reclaimed_space_by_egc, "Total reclaimed space by emergent gc task"); - REGISTER_GAUGE(gc_read_blk_count, "Total read blk count by gc in this pdev"); - REGISTER_GAUGE(gc_write_blk_count, "Total writted blk count by gc in this pdev"); + REGISTER_COUNTER(gc_read_blk_count, "Total read blk count by gc in this pdev"); + REGISTER_COUNTER(gc_write_blk_count, "Total writted blk count by gc in this pdev"); // gc task level histogram metrics REGISTER_HISTOGRAM(reclaim_ratio_gc, "the ratio of reclaimed blks to total blks in a gc task", @@ -175,20 +175,11 @@ class GCManager { *this, total_reclaimed_space_by_egc, gc_actor_.durable_entities().total_reclaimed_blk_count_by_egc.load(std::memory_order_relaxed) * blk_size_); - - GAUGE_UPDATE(*this, gc_read_blk_count, gc_read_blk_count.load(std::memory_order_relaxed)); - GAUGE_UPDATE(*this, gc_write_blk_count, gc_write_blk_count.load(std::memory_order_relaxed)); } - public: - void inc_gc_read_blk_count(uint64_t count) { gc_read_blk_count.fetch_add(count); } - void inc_gc_write_blk_count(uint64_t count) { gc_write_blk_count.fetch_add(count); } - private: pdev_gc_actor const& gc_actor_; uint32_t blk_size_; - atomic_uint64_t gc_read_blk_count{0ull}; - atomic_uint64_t gc_write_blk_count{0ull}; }; public: From 43aae12ed91336378ed5a9c683e901ade7ea7bc4 Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Thu, 22 Jan 2026 08:13:28 +0800 Subject: [PATCH 3/3] fix typo --- src/lib/homestore_backend/gc_manager.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/homestore_backend/gc_manager.hpp b/src/lib/homestore_backend/gc_manager.hpp index 2e1f03407..9e2ccfe78 100644 --- a/src/lib/homestore_backend/gc_manager.hpp +++ b/src/lib/homestore_backend/gc_manager.hpp @@ -133,7 +133,7 @@ class GCManager { REGISTER_GAUGE(total_reclaimed_space_by_gc, "Total reclaimed space by gc task"); REGISTER_GAUGE(total_reclaimed_space_by_egc, "Total reclaimed space by emergent gc task"); REGISTER_COUNTER(gc_read_blk_count, "Total read blk count by gc in this pdev"); - REGISTER_COUNTER(gc_write_blk_count, "Total writted blk count by gc in this pdev"); + REGISTER_COUNTER(gc_write_blk_count, "Total written blk count by gc in this pdev"); // gc task level histogram metrics REGISTER_HISTOGRAM(reclaim_ratio_gc, "the ratio of reclaimed blks to total blks in a gc task",