Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wait in libzfs_init() for the /dev/zfs device #3431

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cmd/mount_zfs/mount_zfs.c
Original file line number Diff line number Diff line change
@@ -473,8 +473,10 @@ main(int argc, char **argv)
if (zfsflags & ZS_ZFSUTIL)
zfsutil = 1;

if ((g_zfs = libzfs_init()) == NULL)
if ((g_zfs = libzfs_init()) == NULL) {
(void) fprintf(stderr, "%s", libzfs_error_init(errno));
return (MOUNT_SYSERR);
}

/* try to open the dataset to access the mount point */
if ((zhp = zfs_open(g_zfs, dataset,
4 changes: 3 additions & 1 deletion cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
@@ -3699,8 +3699,10 @@ main(int argc, char **argv)
zfs_vdev_async_read_max_active = 10;

kernel_init(FREAD);
if ((g_zfs = libzfs_init()) == NULL)
if ((g_zfs = libzfs_init()) == NULL) {
(void) fprintf(stderr, "%s", libzfs_error_init(errno));
return (1);
}

if (dump_all)
verbose = MAX(verbose, 1);
4 changes: 3 additions & 1 deletion cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
@@ -6708,8 +6708,10 @@ main(int argc, char **argv)
(strcmp(cmdname, "--help") == 0))
usage(B_TRUE);

if ((g_zfs = libzfs_init()) == NULL)
if ((g_zfs = libzfs_init()) == NULL) {
(void) fprintf(stderr, "%s", libzfs_error_init(errno));
return (1);
}

mnttab_file = g_zfs->libzfs_mnttab;

4 changes: 3 additions & 1 deletion cmd/zinject/zinject.c
Original file line number Diff line number Diff line change
@@ -572,8 +572,10 @@ main(int argc, char **argv)
int ret;
int flags = 0;

if ((g_zfs = libzfs_init()) == NULL)
if ((g_zfs = libzfs_init()) == NULL) {
(void) fprintf(stderr, "%s", libzfs_error_init(errno));
return (1);
}

libzfs_print_on_error(g_zfs, B_TRUE);

4 changes: 3 additions & 1 deletion cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
@@ -5919,8 +5919,10 @@ main(int argc, char **argv)
if ((strcmp(cmdname, "-?") == 0) || strcmp(cmdname, "--help") == 0)
usage(B_TRUE);

if ((g_zfs = libzfs_init()) == NULL)
if ((g_zfs = libzfs_init()) == NULL) {
(void) fprintf(stderr, "%s", libzfs_error_init(errno));
return (1);
}

libzfs_print_on_error(g_zfs, B_TRUE);

2 changes: 1 addition & 1 deletion etc/systemd/system/zfs-import-cache.service.in
Original file line number Diff line number Diff line change
@@ -9,4 +9,4 @@ ConditionPathExists=@sysconfdir@/zfs/zpool.cache
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN
ExecStart=/sbin/modprobe zfs && @sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest adding a -F here to match the behavior when the module auto-imports the pool itself to fix #3434.

2 changes: 1 addition & 1 deletion etc/systemd/system/zfs-import-scan.service.in
Original file line number Diff line number Diff line change
@@ -9,4 +9,4 @@ ConditionPathExists=!@sysconfdir@/zfs/zpool.cache
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=@sbindir@/zpool import -d /dev/disk/by-id -aN
ExecStart=/sbin/modprobe zfs && @sbindir@/zpool import -d /dev/disk/by-id -aN
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest adding a -F here to match the behavior when the module auto-imports the pool itself to fix #3434.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than sneak that in here. Let's tackle that in a separate patch because why that's desirable is going to need a fair bit of explanation and it would be a change in existing behavior.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is fine.

2 changes: 1 addition & 1 deletion include/libzfs.h
Original file line number Diff line number Diff line change
@@ -203,6 +203,7 @@ extern void zfs_save_arguments(int argc, char **, char *, int);
extern int zpool_log_history(libzfs_handle_t *, const char *);

extern int libzfs_errno(libzfs_handle_t *);
extern const char *libzfs_error_init(int);
extern const char *libzfs_error_action(libzfs_handle_t *);
extern const char *libzfs_error_description(libzfs_handle_t *);
extern int zfs_standard_error(libzfs_handle_t *, int, const char *);
@@ -749,7 +750,6 @@ extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
#define STDERR_VERBOSE 0x02

int libzfs_run_process(const char *, char **, int flags);
int libzfs_load_module(const char *);

/*
* Given a device or file, determine if it is part of a pool.
115 changes: 95 additions & 20 deletions lib/libzfs/libzfs_util.c
Original file line number Diff line number Diff line change
@@ -58,6 +58,31 @@ libzfs_errno(libzfs_handle_t *hdl)
return (hdl->libzfs_error);
}

const char *
libzfs_error_init(int error)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name libzfs_error_init makes me think you're initializing some libzfs_error subsystem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I oped for this to be consistent with the existing libzfs_error_action() and libzfs_error_description() functions. Consistency is good.

{
switch (error) {
case ENXIO:
return (dgettext(TEXT_DOMAIN, "The ZFS modules are not "
"loaded.\nTry running '/sbin/modprobe zfs' as root "
"to load them.\n"));
case ENOENT:
return (dgettext(TEXT_DOMAIN, "The /dev/zfs device is "
"missing and must be created.\nTry running 'udevadm "
"trigger' as root to create it.\n"));
case ENOEXEC:
return (dgettext(TEXT_DOMAIN, "The ZFS modules cannot be "
"auto-loaded.\nTry running '/sbin/modprobe zfs' as "
"root to manually load them.\n"));
case EACCES:
return (dgettext(TEXT_DOMAIN, "Permission denied the "
"ZFS utilities must be run as root.\n"));
default:
return (dgettext(TEXT_DOMAIN, "Failed to initialize the "
"libzfs library.\n"));
}
}

const char *
libzfs_error_action(libzfs_handle_t *hdl)
{
@@ -628,7 +653,7 @@ int
libzfs_run_process(const char *path, char *argv[], int flags)
{
pid_t pid;
int rc, devnull_fd;
int error, devnull_fd;

pid = vfork();
if (pid == 0) {
@@ -650,9 +675,9 @@ libzfs_run_process(const char *path, char *argv[], int flags)
} else if (pid > 0) {
int status;

while ((rc = waitpid(pid, &status, 0)) == -1 &&
while ((error = waitpid(pid, &status, 0)) == -1 &&
errno == EINTR);
if (rc < 0 || !WIFEXITED(status))
if (error < 0 || !WIFEXITED(status))
return (-1);

return (WEXITSTATUS(status));
@@ -661,26 +686,85 @@ libzfs_run_process(const char *path, char *argv[], int flags)
return (-1);
}

int
/*
* Verify the required ZFS_DEV device is available and optionally attempt
* to load the ZFS modules. Under normal circumstances the modules
* should already have been loaded by some external mechanism.
*
* Environment variables:
* - ZFS_MODULE_LOADING="YES|yes|ON|on" - Attempt to load modules.
* - ZFS_MODULE_TIMEOUT="<seconds>" - Seconds to wait for ZFS_DEV
*/
static int
libzfs_load_module(const char *module)
{
char *argv[4] = {"/sbin/modprobe", "-q", (char *)module, (char *)0};
char *load_str, *timeout_str;
long timeout = 10; /* seconds */
long busy_timeout = 10; /* milliseconds */
int load = 0, fd;
hrtime_t start;

/* Optionally request module loading */
if (!libzfs_module_loaded(module)) {
load_str = getenv("ZFS_MODULE_LOADING");
if (load_str) {
if (!strncasecmp(load_str, "YES", strlen("YES")) ||
!strncasecmp(load_str, "ON", strlen("ON")))
load = 1;
else
load = 0;
}

if (libzfs_module_loaded(module))
return (0);
if (load && libzfs_run_process("/sbin/modprobe", argv, 0))
return (ENOEXEC);
}

/* Module loading is synchronous it must be available */
if (!libzfs_module_loaded(module))
return (ENXIO);

/*
* Device creation by udev is asynchronous and waiting may be
* required. Busy wait for 10ms and then fall back to polling every
* 10ms for the allowed timeout (default 10s, max 10m). This is
* done to optimize for the common case where the device is
* immediately available and to avoid penalizing the possible
* case where udev is slow or unable to create the device.
*/
timeout_str = getenv("ZFS_MODULE_TIMEOUT");
if (timeout_str) {
timeout = strtol(timeout_str, NULL, 0);
timeout = MAX(MIN(timeout, (10 * 60)), 0); /* 0 <= N <= 600 */
}

return (libzfs_run_process("/sbin/modprobe", argv, 0));
start = gethrtime();
do {
fd = open(ZFS_DEV, O_RDWR);
if (fd >= 0) {
(void) close(fd);
return (0);
} else if (errno != ENOENT) {
return (errno);
} else if (NSEC2MSEC(gethrtime() - start) < busy_timeout) {
sched_yield();
} else {
usleep(10 * MILLISEC);
}
} while (NSEC2MSEC(gethrtime() - start) < (timeout * MILLISEC));

return (ENOENT);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think of structuring the loop like this?

start = gethrtime();
do {
        fd = open(ZFS_DEV, O_RDWR);
        if (fd >= 0) {
                (void) close(fd);
                return (0);
        } else if (errno != ENOENT) {
                return (errno);
        } else if (NSEC2MSEC(gethrtime() - start) < busy_timeout) {
                sched_yield();
        } else {
                usleep(10 * MILLISEC);
        }
} while (NSEC2MSEC(gethrtime() - start) < (timeout * MILLISEC));

return (ENOENT);

The do-while ensures the open() will be attempted at least once even if timeout is 0.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can get on board with this. Originally @ryao has structured this as a do-while which normally I like to avoid since I think it hurts readability. But in this case it does makes good sense.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A do-while loop is more readable than a while loop when you want the condition to only be checked after the first iteration.


libzfs_handle_t *
libzfs_init(void)
{
libzfs_handle_t *hdl;
int error;

if (libzfs_load_module("zfs") != 0) {
(void) fprintf(stderr, gettext("Failed to load ZFS module "
"stack.\nLoad the module manually by running "
"'insmod <location>/zfs.ko' as root.\n"));
error = libzfs_load_module(ZFS_DRIVER);
if (error) {
errno = error;
return (NULL);
}

@@ -689,13 +773,6 @@ libzfs_init(void)
}

if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
(void) fprintf(stderr, gettext("Unable to open %s: %s.\n"),
ZFS_DEV, strerror(errno));
if (errno == ENOENT)
(void) fprintf(stderr,
gettext("Verify the ZFS module stack is "
"loaded by running '/sbin/modprobe zfs'.\n"));

free(hdl);
return (NULL);
}
@@ -706,8 +783,6 @@ libzfs_init(void)
if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
#endif
(void) close(hdl->libzfs_fd);
(void) fprintf(stderr,
gettext("mtab is not present at %s.\n"), MNTTAB);
free(hdl);
return (NULL);
}