Skip to content

Commit 5cca586

Browse files
committed
Add support for autoexpand property
While the autoexpand property may seem like a small feature it depends on a significant amount of system infrastructure. Enough of that infrastructure is now in place with a few customizations for Linux the autoexpand property for whole disk configurations can be supported. Autoexpand works as follows; when a block device is resized a change event is generated by udev with the DISK_MEDIA_CHANGE key. The ZED, which is monitoring udev events detects the event for disks (but not partitions) and hands it off to zfs_deliver_dle(). The zfs_deliver_dle() function appends the exected whole disk partition suffix, and if the partition can be matched against a known pool vdev it re-opens it. Re-opening the vdev with trigger a re-reading of the partition table so the maximum possible expansion size can be reported. Next if the property autoexpand is set to "on" a vdev expansion will be attempted. After performing some sanity checks on the disk to verify it's safe to expand the ZFS partition (-part1) it will be expanded an the partition table updated. The partition is then re-opened again to detect the updated size which allows the new capacity to be used. Added PHYS_PATH="/dev/zvol/dataset" to vdev configuration for ZFS volumes. This was required for the test cases which test expansion by layering a new pool on top of ZFS volumes. Enable the zpool_expand_001_pos and /zpool_expand_003_pos test cases which excercise the autoexpand property. Fixed zfs_zevent_wait() signal handling which could result in the ZED spinning when a signal was not handled. Removed vdev_disk_rrpart() functionality which can be abandoned in favour of re-opening the device which trigger a re-read of the partition table as long no other partitions are in use. This will be true as long as we're working with hole disks. As a bonus this allows us to remove to Linux kernel API checks. Signed-off-by: Brian Behlendorf <[email protected]> Issue openzfs#120 Issue openzfs#2437 Issue openzfs#5771 Issue openzfs#7582
1 parent 6d464db commit 5cca586

File tree

11 files changed

+179
-211
lines changed

11 files changed

+179
-211
lines changed

cmd/zed/agents/zfs_mod.c

+11-4
Original file line numberDiff line numberDiff line change
@@ -751,23 +751,30 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
751751
}
752752

753753
/*
754-
* This function handles the ESC_DEV_DLE event.
754+
* This function handles the ESC_DEV_DLE (DISK_MEDIA_CHANGE) event which
755+
* is only delivered for the disk itself, not for each partition. Presume
756+
* that a 'wholedisk' partition exists and append the expected partition
757+
* suffix in order to attempt a match.
755758
*/
756759
static int
757760
zfs_deliver_dle(nvlist_t *nvl)
758761
{
759-
char *devname;
762+
char *devname, pname[MAXPATHLEN];
760763

761764
if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
762765
zed_log_msg(LOG_INFO, "zfs_deliver_dle: no physpath");
763766
return (-1);
764767
}
765768

766-
if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
769+
strlcpy(pname, devname, MAXPATHLEN);
770+
zfs_append_partition(pname, MAXPATHLEN);
771+
772+
if (zpool_iter(g_zfshdl, zfsdle_vdev_online, pname) != 1) {
767773
zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
768-
"found", devname);
774+
"found", pname);
769775
return (1);
770776
}
777+
771778
return (0);
772779
}
773780

cmd/zed/zed_disk_event.c

+23-9
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,12 @@ zed_udev_monitor(void *arg)
165165

166166
while (1) {
167167
struct udev_device *dev;
168-
const char *action, *type, *part, *sectors;
168+
const char *action, *type, *part, *sectors, *change;
169169
const char *bus, *uuid;
170170
const char *class, *subclass;
171171
nvlist_t *nvl;
172172
boolean_t is_zfs = B_FALSE;
173+
boolean_t is_disk_media_change = B_FALSE;
173174

174175
/* allow a cancellation while blocked (recvmsg) */
175176
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
@@ -202,14 +203,26 @@ zed_udev_monitor(void *arg)
202203
}
203204

204205
/*
205-
* if this is a disk and it is partitioned, then the
206+
* Disk media change events are allowed for auto-expand.
207+
* Whether the device contains a zfs_member is determined
208+
* at the time of the attempted expansion.
209+
*/
210+
change = udev_device_get_property_value(dev,
211+
"DISK_MEDIA_CHANGE");
212+
if (change != NULL && change[0] == '1')
213+
is_disk_media_change = B_TRUE;
214+
215+
/*
216+
* If this is a disk and it is partitioned, then the
206217
* zfs label will reside in a DEVTYPE=partition and
207-
* we can skip passing this event
218+
* we can skip passing this event. Unless it's a disk
219+
* media changes event which is expected for auto-expand.
208220
*/
209221
type = udev_device_get_property_value(dev, "DEVTYPE");
210222
part = udev_device_get_property_value(dev,
211223
"ID_PART_TABLE_TYPE");
212-
if (type != NULL && type[0] != '\0' &&
224+
if (!is_disk_media_change &&
225+
type != NULL && type[0] != '\0' &&
213226
strcmp(type, "disk") == 0 &&
214227
part != NULL && part[0] != '\0') {
215228
/* skip and wait for partition event */
@@ -231,14 +244,15 @@ zed_udev_monitor(void *arg)
231244
}
232245

233246
/*
234-
* If the blkid probe didn't find ZFS, then a persistent
235-
* device id string is required in the message schema
236-
* for matching with vdevs. Preflight here for expected
237-
* udev information.
247+
* If the blkid probe didn't find ZFS and this is not a
248+
* disk media change event. Then a persistent device id
249+
* string is required in the message schema for matching
250+
* with vdevs. Preflight here for expected udev information.
238251
*/
239252
bus = udev_device_get_property_value(dev, "ID_BUS");
240253
uuid = udev_device_get_property_value(dev, "DM_UUID");
241-
if (!is_zfs && (bus == NULL && uuid == NULL)) {
254+
if (!is_zfs && !is_disk_media_change &&
255+
bus == NULL && uuid == NULL) {
242256
zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid "
243257
"source", udev_device_get_devnode(dev));
244258
udev_device_unref(dev);

config/kernel-blkdev-get.m4

-19
This file was deleted.

config/kernel-get-gendisk.m4

-17
This file was deleted.

config/kernel.m4

-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
4343
ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
4444
ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
4545
ZFS_AC_KERNEL_TYPE_FMODE_T
46-
ZFS_AC_KERNEL_3ARG_BLKDEV_GET
4746
ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH
4847
ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE
4948
ZFS_AC_KERNEL_LOOKUP_BDEV
@@ -72,7 +71,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
7271
ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BLK_PLUG
7372
ZFS_AC_KERNEL_GET_DISK_AND_MODULE
7473
ZFS_AC_KERNEL_GET_DISK_RO
75-
ZFS_AC_KERNEL_GET_GENDISK
7674
ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS
7775
ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL
7876
ZFS_AC_KERNEL_DISCARD_GRANULARITY

lib/libzfs/libzfs_import.c

+47-14
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,21 @@ zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
145145
return (0);
146146
}
147147

148+
/*
149+
* For volumes use the persistent /dev/zvol/dataset identifier
150+
*/
151+
entry = udev_device_get_devlinks_list_entry(dev);
152+
while (entry != NULL) {
153+
const char *name;
154+
155+
name = udev_list_entry_get_name(entry);
156+
if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
157+
(void) strlcpy(bufptr, name, buflen);
158+
return (0);
159+
}
160+
entry = udev_list_entry_get_next(entry);
161+
}
162+
148163
/*
149164
* NVME 'by-id' symlinks are similar to bus case
150165
*/
@@ -187,26 +202,44 @@ int
187202
zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
188203
{
189204
const char *physpath = NULL;
205+
struct udev_list_entry *entry;
190206

191207
/*
192-
* Normal disks use ID_PATH for their physical path. Device mapper
193-
* devices are virtual and don't have a physical path. For them we
194-
* use ID_VDEV instead, which is setup via the /etc/vdev_id.conf file.
195-
* ID_VDEV provides a persistent path to a virtual device. If you
196-
* don't have vdev_id.conf setup, you cannot use multipath autoreplace.
208+
* Normal disks use ID_PATH for their physical path.
197209
*/
198-
if (!((physpath = udev_device_get_property_value(dev, "ID_PATH")) &&
199-
physpath[0])) {
200-
if (!((physpath =
201-
udev_device_get_property_value(dev, "ID_VDEV")) &&
202-
physpath[0])) {
203-
return (ENODATA);
204-
}
210+
physpath = udev_device_get_property_value(dev, "ID_PATH");
211+
if (physpath != NULL && strlen(physpath) > 0) {
212+
(void) strlcpy(bufptr, physpath, buflen);
213+
return (0);
214+
}
215+
216+
/*
217+
* Device mapper devices are virtual and don't have a physical
218+
* path. For them we use ID_VDEV instead, which is setup via the
219+
* /etc/vdev_id.conf file. ID_VDEV provides a persistent path
220+
* to a virtual device. If you don't have vdev_id.conf setup,
221+
* you cannot use multipath autoreplace with device mapper.
222+
*/
223+
physpath = udev_device_get_property_value(dev, "ID_VDEV");
224+
if (physpath != NULL && strlen(physpath) > 0) {
225+
(void) strlcpy(bufptr, physpath, buflen);
226+
return (0);
205227
}
206228

207-
(void) strlcpy(bufptr, physpath, buflen);
229+
/*
230+
* For volumes use the persistent /dev/zvol/dataset identifier
231+
*/
232+
entry = udev_device_get_devlinks_list_entry(dev);
233+
while (entry != NULL) {
234+
physpath = udev_list_entry_get_name(entry);
235+
if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
236+
(void) strlcpy(bufptr, physpath, buflen);
237+
return (0);
238+
}
239+
entry = udev_list_entry_get_next(entry);
240+
}
208241

209-
return (0);
242+
return (ENODATA);
210243
}
211244

212245
boolean_t

module/zfs/fm.c

+21-9
Original file line numberDiff line numberDiff line change
@@ -671,19 +671,31 @@ zfs_zevent_wait(zfs_zevent_t *ze)
671671
int error = 0;
672672

673673
mutex_enter(&zevent_lock);
674+
zevent_waiters++;
674675

675-
if (zevent_flags & ZEVENT_SHUTDOWN) {
676-
error = ESHUTDOWN;
677-
goto out;
678-
}
676+
while (error == 0) {
677+
if (zevent_flags & ZEVENT_SHUTDOWN) {
678+
error = SET_ERROR(ESHUTDOWN);
679+
break;
680+
}
679681

680-
zevent_waiters++;
681-
cv_wait_sig(&zevent_cv, &zevent_lock);
682-
if (issig(JUSTLOOKING))
683-
error = EINTR;
682+
error = cv_timedwait_sig(&zevent_cv, &zevent_lock,
683+
ddi_get_lbolt() + hz);
684+
if (signal_pending(current) || fatal_signal_pending(current)) {
685+
error = SET_ERROR(EINTR);
686+
break;
687+
} else {
688+
if (error == -1) {
689+
error = 0;
690+
continue;
691+
} else {
692+
error = 0;
693+
break;
694+
}
695+
}
696+
}
684697

685698
zevent_waiters--;
686-
out:
687699
mutex_exit(&zevent_lock);
688700

689701
return (error);

module/zfs/vdev.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -3097,7 +3097,8 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
30973097
/* XXX - L2ARC 1.0 does not support expansion */
30983098
if (!vd->vdev_aux) {
30993099
for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
3100-
pvd->vdev_expanding = !!(flags & ZFS_ONLINE_EXPAND);
3100+
pvd->vdev_expanding = !!((flags & ZFS_ONLINE_EXPAND) ||
3101+
spa->spa_autoexpand);
31013102
}
31023103

31033104
vdev_reopen(tvd);

0 commit comments

Comments
 (0)