From 0ae990dfdf7f14dc76483ce4d99987b91698a508 Mon Sep 17 00:00:00 2001 From: "david.chen" Date: Fri, 25 Oct 2024 00:26:12 +0000 Subject: [PATCH] ZFS send should use spill block prefetched from send_reader_thread Currently, even though send_reader_thread prefetches spill block, do_dump() will not use it and issues its own blocking arc_read. This causes significant performance degradation when sending datasets with lots of spill blocks. Signed-off-by: Chunwei Chen --- module/zfs/dmu_send.c | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index a174972e9b57..e5fb0bd3213b 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -939,34 +939,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) ASSERT3U(srdp->datablksz, ==, BP_GET_LSIZE(bp)); ASSERT3U(range->start_blkid + 1, ==, range->end_blkid); - if (BP_GET_TYPE(bp) == DMU_OT_SA) { - arc_flags_t aflags = ARC_FLAG_WAIT; - zio_flag_t zioflags = ZIO_FLAG_CANFAIL; - - if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW) { - ASSERT(BP_IS_PROTECTED(bp)); - zioflags |= ZIO_FLAG_RAW; - } - zbookmark_phys_t zb; - ASSERT3U(range->start_blkid, ==, DMU_SPILL_BLKID); - zb.zb_objset = dmu_objset_id(dscp->dsc_os); - zb.zb_object = range->object; - zb.zb_level = 0; - zb.zb_blkid = range->start_blkid; - - arc_buf_t *abuf = NULL; - if (!dscp->dsc_dso->dso_dryrun && arc_read(NULL, spa, - bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, - zioflags, &aflags, &zb) != 0) - return (SET_ERROR(EIO)); - - err = dump_spill(dscp, bp, zb.zb_object, - (abuf == NULL ? NULL : abuf->b_data)); - if (abuf != NULL) - arc_buf_destroy(abuf, &abuf); - return (err); - } if (send_do_embed(bp, dscp->dsc_featureflags)) { err = dump_write_embedded(dscp, range->object, range->start_blkid * srdp->datablksz, @@ -975,8 +948,9 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) } ASSERT(range->object > dscp->dsc_resume_object || (range->object == dscp->dsc_resume_object && + (range->start_blkid == DMU_SPILL_BLKID || range->start_blkid * srdp->datablksz >= - dscp->dsc_resume_offset)); + dscp->dsc_resume_offset))); /* it's a level-0 block of a regular object */ mutex_enter(&srdp->lock); @@ -1006,8 +980,6 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) ASSERT(dscp->dsc_dso->dso_dryrun || srdp->abuf != NULL || srdp->abd != NULL); - uint64_t offset = range->start_blkid * srdp->datablksz; - char *data = NULL; if (srdp->abd != NULL) { data = abd_to_buf(srdp->abd); @@ -1016,6 +988,14 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) data = srdp->abuf->b_data; } + if (BP_GET_TYPE(bp) == DMU_OT_SA) { + ASSERT3U(range->start_blkid, ==, DMU_SPILL_BLKID); + err = dump_spill(dscp, bp, range->object, data); + return (err); + } + + uint64_t offset = range->start_blkid * srdp->datablksz; + /* * If we have large blocks stored on disk but the send flags * don't allow us to send large blocks, we split the data from