Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resilver restarts repeatedly when checkpoint is present #17109

Open
amarshall opened this issue Mar 3, 2025 · 0 comments
Open

Resilver restarts repeatedly when checkpoint is present #17109

amarshall opened this issue Mar 3, 2025 · 0 comments
Labels
Type: Defect Incorrect behavior (e.g. crash, hang)

Comments

@amarshall
Copy link

System information

Type Version/Name
Distribution Name NixOS
Distribution Version unstable
Kernel Version 6.6
Architecture x86_64
OpenZFS Version 2.2.7

Describe the problem you're observing

Resilver restarts repeatedly when checkpoint is present. Discarding the checkpoint resolves.

Describe how to reproduce the problem

I do not have specific, repeatably reproduction steps at this time. Vaguely, though:

  1. zpool checkpoint tank
  2. Cause a disk to need a resilver
  3. zpool clear the disk
  4. …clear the checkpoint to fix resilver constantly restarting

Include any warning/errors/backtraces from the system logs

zfs events

This is truncated.

Mar  3 2025 00:08:23.950317618 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "errors=0"
        history_internal_name = "starting deferred resilver"
        history_txg = 0x9620f1
        history_time = 0x67c4f2f7
        time = 0x67c4f2f7 0x38a4b232 
        eid = 0x455

Mar  3 2025 00:08:29.172279886 sysevent.fs.zfs.resilver_start
        version = 0x0
        class = "sysevent.fs.zfs.resilver_start"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        resilver_type = "healing"
        time = 0x67c4f2fd 0xa44c84e 
        eid = 0x456

Mar  3 2025 00:08:29.172279886 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "func=2 mintxg=9782617 maxtxg=9838805"
        history_internal_name = "scan setup"
        history_txg = 0x9620f2
        history_time = 0x67c4f2fd
        time = 0x67c4f2fd 0xa44c84e 
        eid = 0x457

Mar  3 2025 00:09:02.500040402 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "errors=0"
        history_internal_name = "scan done"
        history_txg = 0x9620f9
        history_time = 0x67c4f31e
        time = 0x67c4f31e 0x1dce02d2 
        eid = 0x458

Mar  3 2025 00:09:02.501040395 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "errors=0"
        history_internal_name = "starting deferred resilver"
        history_txg = 0x9620f9
        history_time = 0x67c4f31e
        time = 0x67c4f31e 0x1ddd450b 
        eid = 0x459

Mar  3 2025 00:09:07.572004153 sysevent.fs.zfs.resilver_start
        version = 0x0
        class = "sysevent.fs.zfs.resilver_start"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        resilver_type = "healing"
        time = 0x67c4f323 0x22181739 
        eid = 0x45a

Mar  3 2025 00:09:07.572004153 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "func=2 mintxg=9782617 maxtxg=9838805"
        history_internal_name = "scan setup"
        history_txg = 0x9620fa
        history_time = 0x67c4f323
        time = 0x67c4f323 0x22181739 
        eid = 0x45b

Mar  3 2025 00:09:39.641776094 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "errors=0"
        history_internal_name = "scan done"
        history_txg = 0x962101
        history_time = 0x67c4f343
        time = 0x67c4f343 0x2640b9de 
        eid = 0x45c

Mar  3 2025 00:09:39.642776087 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "errors=0"
        history_internal_name = "starting deferred resilver"
        history_txg = 0x962101
        history_time = 0x67c4f343
        time = 0x67c4f343 0x264ffc17 
        eid = 0x45d

Mar  3 2025 00:09:44.947738547 sysevent.fs.zfs.resilver_start
        version = 0x0
        class = "sysevent.fs.zfs.resilver_start"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        resilver_type = "healing"
        time = 0x67c4f348 0x387d57b3 
        eid = 0x45e

Mar  3 2025 00:09:44.947738547 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "func=2 mintxg=9782617 maxtxg=9838805"
        history_internal_name = "scan setup"
        history_txg = 0x962102
        history_time = 0x67c4f348
        time = 0x67c4f348 0x387d57b3 
        eid = 0x45f

Mar  3 2025 00:10:16.871513707 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "errors=0"
        history_internal_name = "scan done"
        history_txg = 0x962109
        history_time = 0x67c4f368
        time = 0x67c4f368 0x33f23e6b 
        eid = 0x460

Mar  3 2025 00:10:16.872513700 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "errors=0"
        history_internal_name = "starting deferred resilver"
        history_txg = 0x962109
        history_time = 0x67c4f368
        time = 0x67c4f368 0x340180a4 
        eid = 0x461

Mar  3 2025 00:10:22.323475486 sysevent.fs.zfs.resilver_start
        version = 0x0
        class = "sysevent.fs.zfs.resilver_start"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        resilver_type = "healing"
        time = 0x67c4f36e 0x1347d81e 
        eid = 0x462

Mar  3 2025 00:10:22.323475486 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "func=2 mintxg=9782617 maxtxg=9838805"
        history_internal_name = "scan setup"
        history_txg = 0x96210a
        history_time = 0x67c4f36e
        time = 0x67c4f36e 0x1347d81e 
        eid = 0x463

Mar  3 2025 00:10:40.365349358 resource.fs.zfs.statechange
        version = 0x0
        class = "resource.fs.zfs.statechange"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        vdev_guid = 0xb4ec113378d257d1
        vdev_state = "FAULTED" (0x5)
        vdev_path = "/dev/mydisk"
        vdev_laststate = "ONLINE" (0x7)
        time = 0x67c4f380 0x15c6c9ee 
        eid = 0x464

Mar  3 2025 00:10:50.011282144 sysevent.fs.zfs.config_sync
        version = 0x0
        class = "sysevent.fs.zfs.config_sync"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        time = 0x67c4f38a 0xac26e0 
        eid = 0x465

Mar  3 2025 00:10:52.673263622 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "errors=0"
        history_internal_name = "scan done"
        history_txg = 0x962112
        history_time = 0x67c4f38c
        time = 0x67c4f38c 0x28213006 
        eid = 0x466

Mar  3 2025 00:11:25.640035151 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "started discarding checkpointed state from the pool"
        history_internal_name = "spa discard checkpoint"
        history_txg = 0x962119
        history_time = 0x67c4f3ad
        time = 0x67c4f3ad 0x2626294f 
        eid = 0x467

Mar  3 2025 00:11:26.058032265 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "finished discarding checkpointed state from the pool"
        history_internal_name = "spa discard checkpoint"
        history_txg = 0x96211b
        history_time = 0x67c4f3ae
        time = 0x67c4f3ae 0x3758089 
        eid = 0x468

Mar  3 2025 00:11:32.473987996 resource.fs.zfs.statechange
        version = 0x0
        class = "resource.fs.zfs.statechange"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        vdev_guid = 0xb4ec113378d257d1
        vdev_state = "ONLINE" (0x7)
        vdev_path = "/dev/mydisk"
        vdev_laststate = "FAULTED" (0x5)
        time = 0x67c4f3b4 0x1c407b9c 
        eid = 0x469

Mar  3 2025 00:11:32.580987259 sysevent.fs.zfs.vdev_clear
        version = 0x0
        class = "sysevent.fs.zfs.vdev_clear"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        vdev_guid = 0xb4ec113378d257d1
        vdev_state = "ONLINE" (0x7)
        vdev_path = "/dev/mydisk"
        time = 0x67c4f3b4 0x22a1297b 
        eid = 0x46a

Mar  3 2025 00:11:32.864985301 sysevent.fs.zfs.resilver_start
        version = 0x0
        class = "sysevent.fs.zfs.resilver_start"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        resilver_type = "healing"
        time = 0x67c4f3b4 0x338ea0d5 
        eid = 0x46b

Mar  3 2025 00:11:32.864985301 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "func=2 mintxg=9782617 maxtxg=9838877"
        history_internal_name = "scan setup"
        history_txg = 0x96211e
        history_time = 0x67c4f3b4
        time = 0x67c4f3b4 0x338ea0d5 
        eid = 0x46c

Mar  3 2025 00:11:39.313940869 sysevent.fs.zfs.config_sync
        version = 0x0
        class = "sysevent.fs.zfs.config_sync"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        time = 0x67c4f3bb 0x12b65b85 
        eid = 0x46d

Mar  3 2025 00:12:05.063764056 sysevent.fs.zfs.history_event
        version = 0x0
        class = "sysevent.fs.zfs.history_event"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        history_hostname = "myhostname"
        history_internal_str = "errors=0"
        history_internal_name = "scan done"
        history_txg = 0x962126
        history_time = 0x67c4f3d5
        time = 0x67c4f3d5 0x3ccf658 
        eid = 0x46e

Mar  3 2025 00:12:05.063764056 sysevent.fs.zfs.resilver_finish
        version = 0x0
        class = "sysevent.fs.zfs.resilver_finish"
        pool = "mytank"
        pool_guid = 0xec15e7c2130c98d4
        pool_state = 0x0
        pool_context = 0x0
        resilver_type = "healing"
        time = 0x67c4f3d5 0x3ccf658 
        eid = 0x46f
@amarshall amarshall added the Type: Defect Incorrect behavior (e.g. crash, hang) label Mar 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Type: Defect Incorrect behavior (e.g. crash, hang)
Projects
None yet
Development

No branches or pull requests

1 participant