From 5c6e937359ca43204d702a64fde7c3d8bea964a0 Mon Sep 17 00:00:00 2001 From: Sanal P Date: Fri, 31 Oct 2025 11:48:17 -0700 Subject: [PATCH] Check is_blk_alloced and use free_blk_now during log replay. For same lba if there are multiple writes, we free the old blkid's. If those free blkid's are reused and freed again there will be multiple log entries. So during log replay check if they are alloced and if alloced directly free the blk from blk allocator bitmap cache. --- conanfile.py | 2 +- src/lib/volume/volume.cpp | 50 ++++++++++++++++++++------------------- src/lib/volume_mgr.cpp | 15 +++++++++--- 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/conanfile.py b/conanfile.py index beef9b5..aaeb036 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeBlocksConan(ConanFile): name = "homeblocks" - version = "4.0.1" + version = "4.0.2" homepage = "https://github.com/eBay/HomeBlocks" description = "Block Store built on HomeStore" diff --git a/src/lib/volume/volume.cpp b/src/lib/volume/volume.cpp index bfe6461..ac51c6e 100644 --- a/src/lib/volume/volume.cpp +++ b/src/lib/volume/volume.cpp @@ -239,6 +239,7 @@ VolumeManager::NullAsyncResult Volume::write(const vol_interface_req_ptr& vol_re lba_t start_lba = vol_req->lba; for (auto& blkid : new_blkids) { DEBUG_ASSERT_EQ(blkid.num_pieces(), 1, "Multiple blkid pieces"); + LOGT("volume write blkid={} lba={}", blkid.to_string(), start_lba); // Split the large blkid to individual blkid having only one block because each LBA points // to a blkid containing single blk which is stored in index value. Calculate the checksum for each @@ -263,11 +264,11 @@ VolumeManager::NullAsyncResult Volume::write(const vol_interface_req_ptr& vol_re } HISTOGRAM_OBSERVE(*metrics_, volume_map_write_latency, get_elapsed_time_us(vol_req->index_start_time)); - vol_req->journal_start_time = Clock::now(); - // Collect all old blocks to write to journal. - for (auto& [_, info] : blocks_info) { - if (info.old_blkid.is_valid()) { old_blkids.emplace_back(info.old_blkid); } - } + vol_req->journal_start_time = Clock::now(); + // Collect all old blocks to write to journal. + for (auto& [_, info] : blocks_info) { + if (info.old_blkid.is_valid()) { old_blkids.emplace_back(info.old_blkid); } + } auto csum_size = sizeof(homestore::csum_t) * vol_req->nlbas; auto old_blkids_size = sizeof(BlkId) * old_blkids.size(); @@ -309,25 +310,26 @@ VolumeManager::NullAsyncResult Volume::write(const vol_interface_req_ptr& vol_re } #endif - rd()->async_write_journal(new_blkids, req->cheader_buf(), req->ckey_buf(), data_size, req); - - return req->result() - .via(&folly::InlineExecutor::instance()) - .thenValue([this, vol_req](const auto&& result) -> folly::Expected< folly::Unit, VolumeError > { - if (result.hasError()) { - LOGE("Failed to write to journal for volume: {}, lba: {}, nlbas: {}, error: {}", - vol_info_->name, vol_req->lba, vol_req->nlbas, result.error()); - auto err = result.error(); - return folly::makeUnexpected(err); - } - HISTOGRAM_OBSERVE(*metrics_, volume_journal_write_latency, get_elapsed_time_us(vol_req->journal_start_time)); - auto write_size = vol_req->nlbas * rd()->get_blk_size(); - COUNTER_INCREMENT(*metrics_, volume_write_size_total, write_size); - HISTOGRAM_OBSERVE(*metrics_, volume_write_size_distribution, write_size); - HISTOGRAM_OBSERVE(*metrics_, volume_write_latency, get_elapsed_time_us(vol_req->io_start_time)); - return folly::Unit(); - }); - }); + rd()->async_write_journal(new_blkids, req->cheader_buf(), req->ckey_buf(), data_size, req); + + return req->result() + .via(&folly::InlineExecutor::instance()) + .thenValue([this, vol_req](const auto&& result) -> folly::Expected< folly::Unit, VolumeError > { + if (result.hasError()) { + LOGE("Failed to write to journal for volume: {}, lba: {}, nlbas: {}, error: {}", + vol_info_->name, vol_req->lba, vol_req->nlbas, result.error()); + auto err = result.error(); + return folly::makeUnexpected(err); + } + HISTOGRAM_OBSERVE(*metrics_, volume_journal_write_latency, + get_elapsed_time_us(vol_req->journal_start_time)); + auto write_size = vol_req->nlbas * rd()->get_blk_size(); + COUNTER_INCREMENT(*metrics_, volume_write_size_total, write_size); + HISTOGRAM_OBSERVE(*metrics_, volume_write_size_distribution, write_size); + HISTOGRAM_OBSERVE(*metrics_, volume_write_latency, get_elapsed_time_us(vol_req->io_start_time)); + return folly::Unit(); + }); + }); } VolumeManager::NullAsyncResult Volume::read(const vol_interface_req_ptr& req) { diff --git a/src/lib/volume_mgr.cpp b/src/lib/volume_mgr.cpp index e41d52a..f4c7c76 100644 --- a/src/lib/volume_mgr.cpp +++ b/src/lib/volume_mgr.cpp @@ -357,11 +357,20 @@ void HomeBlocksImpl::on_write(int64_t lsn, const sisl::blob& header, const sisl: } // Free all the old blkids. This happens for both normal writes - // and crash recovery. + // and crash recovery. During recovery we also if it has been alloced, + // because we could have stale log entries which have old blkid's + // which may be already freed. for (uint32_t i = 0; i < journal_entry->num_old_blks; i++) { BlkId old_blkid = *r_cast< const BlkId* >(key_buffer); - LOGT("on_write free blk {}", old_blkid); - vol_ptr->rd()->async_free_blks(lsn, old_blkid); + if (repl_ctx == nullptr) { + if (homestore::hs()->data_service().is_blk_alloced(old_blkid)) { + LOGT("on_write free blk {} start_lba {}", old_blkid, journal_entry->start_lba); + homestore::hs()->data_service().free_blk_now(old_blkid); + } + } else { + LOGT("on_write free blk {} start_lba {}", old_blkid, journal_entry->start_lba); + homestore::hs()->data_service().free_blk_now(old_blkid); + } key_buffer += sizeof(BlkId); }