Skip to content

Commit c03185f

Browse files
dhowellsgregkh
authored andcommitted
vfs: Fix potential circular locking through setxattr() and removexattr()
[ Upstream commit c3a5e3e ] When using cachefiles, lockdep may emit something similar to the circular locking dependency notice below. The problem appears to stem from the following: (1) Cachefiles manipulates xattrs on the files in its cache when called from ->writepages(). (2) The setxattr() and removexattr() system call handlers get the name (and value) from userspace after taking the sb_writers lock, putting accesses of the vma->vm_lock and mm->mmap_lock inside of that. (3) The afs filesystem uses a per-inode lock to prevent multiple revalidation RPCs and in writeback vs truncate to prevent parallel operations from deadlocking against the server on one side and local page locks on the other. Fix this by moving the getting of the name and value in {get,remove}xattr() outside of the sb_writers lock. This also has the minor benefits that we don't need to reget these in the event of a retry and we never try to take the sb_writers lock in the event we can't pull the name and value into the kernel. Alternative approaches that might fix this include moving the dispatch of a write to the cache off to a workqueue or trying to do without the validation lock in afs. Note that this might also affect other filesystems that use netfslib and/or cachefiles. ====================================================== WARNING: possible circular locking dependency detected 6.10.0-build2+ torvalds#956 Not tainted ------------------------------------------------------ fsstress/6050 is trying to acquire lock: ffff888138fd82f0 (mapping.invalidate_lock#3){++++}-{3:3}, at: filemap_fault+0x26e/0x8b0 but task is already holding lock: ffff888113f26d18 (&vma->vm_lock->lock){++++}-{3:3}, at: lock_vma_under_rcu+0x165/0x250 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> Freescale#4 (&vma->vm_lock->lock){++++}-{3:3}: __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 down_write+0x3b/0x50 vma_start_write+0x6b/0xa0 vma_link+0xcc/0x140 insert_vm_struct+0xb7/0xf0 alloc_bprm+0x2c1/0x390 kernel_execve+0x65/0x1a0 call_usermodehelper_exec_async+0x14d/0x190 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 -> Freescale#3 (&mm->mmap_lock){++++}-{3:3}: __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 __might_fault+0x7c/0xb0 strncpy_from_user+0x25/0x160 removexattr+0x7f/0x100 __do_sys_fremovexattr+0x7e/0xb0 do_syscall_64+0x9f/0x100 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> Freescale#2 (sb_writers#14){.+.+}-{0:0}: __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 percpu_down_read+0x3c/0x90 vfs_iocb_iter_write+0xe9/0x1d0 __cachefiles_write+0x367/0x430 cachefiles_issue_write+0x299/0x2f0 netfs_advance_write+0x117/0x140 netfs_write_folio.isra.0+0x5ca/0x6e0 netfs_writepages+0x230/0x2f0 afs_writepages+0x4d/0x70 do_writepages+0x1e8/0x3e0 filemap_fdatawrite_wbc+0x84/0xa0 __filemap_fdatawrite_range+0xa8/0xf0 file_write_and_wait_range+0x59/0x90 afs_release+0x10f/0x270 __fput+0x25f/0x3d0 __do_sys_close+0x43/0x70 do_syscall_64+0x9f/0x100 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> Freescale#1 (&vnode->validate_lock){++++}-{3:3}: __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 down_read+0x95/0x200 afs_writepages+0x37/0x70 do_writepages+0x1e8/0x3e0 filemap_fdatawrite_wbc+0x84/0xa0 filemap_invalidate_inode+0x167/0x1e0 netfs_unbuffered_write_iter+0x1bd/0x2d0 vfs_write+0x22e/0x320 ksys_write+0xbc/0x130 do_syscall_64+0x9f/0x100 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (mapping.invalidate_lock#3){++++}-{3:3}: check_noncircular+0x119/0x160 check_prev_add+0x195/0x430 __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 down_read+0x95/0x200 filemap_fault+0x26e/0x8b0 __do_fault+0x57/0xd0 do_pte_missing+0x23b/0x320 __handle_mm_fault+0x2d4/0x320 handle_mm_fault+0x14f/0x260 do_user_addr_fault+0x2a2/0x500 exc_page_fault+0x71/0x90 asm_exc_page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: mapping.invalidate_lock#3 --> &mm->mmap_lock --> &vma->vm_lock->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- rlock(&vma->vm_lock->lock); lock(&mm->mmap_lock); lock(&vma->vm_lock->lock); rlock(mapping.invalidate_lock#3); *** DEADLOCK *** 1 lock held by fsstress/6050: #0: ffff888113f26d18 (&vma->vm_lock->lock){++++}-{3:3}, at: lock_vma_under_rcu+0x165/0x250 stack backtrace: CPU: 0 PID: 6050 Comm: fsstress Not tainted 6.10.0-build2+ torvalds#956 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: <TASK> dump_stack_lvl+0x57/0x80 check_noncircular+0x119/0x160 ? queued_spin_lock_slowpath+0x4be/0x510 ? __pfx_check_noncircular+0x10/0x10 ? __pfx_queued_spin_lock_slowpath+0x10/0x10 ? mark_lock+0x47/0x160 ? init_chain_block+0x9c/0xc0 ? add_chain_block+0x84/0xf0 check_prev_add+0x195/0x430 __lock_acquire+0xaf0/0xd80 ? __pfx___lock_acquire+0x10/0x10 ? __lock_release.isra.0+0x13b/0x230 lock_acquire.part.0+0x103/0x280 ? filemap_fault+0x26e/0x8b0 ? __pfx_lock_acquire.part.0+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? lock_acquire+0xd7/0x120 down_read+0x95/0x200 ? filemap_fault+0x26e/0x8b0 ? __pfx_down_read+0x10/0x10 ? __filemap_get_folio+0x25/0x1a0 filemap_fault+0x26e/0x8b0 ? __pfx_filemap_fault+0x10/0x10 ? find_held_lock+0x7c/0x90 ? __pfx___lock_release.isra.0+0x10/0x10 ? __pte_offset_map+0x99/0x110 __do_fault+0x57/0xd0 do_pte_missing+0x23b/0x320 __handle_mm_fault+0x2d4/0x320 ? __pfx___handle_mm_fault+0x10/0x10 handle_mm_fault+0x14f/0x260 do_user_addr_fault+0x2a2/0x500 exc_page_fault+0x71/0x90 asm_exc_page_fault+0x22/0x30 Signed-off-by: David Howells <[email protected]> Link: https://lore.kernel.org/r/[email protected] cc: Alexander Viro <[email protected]> cc: Christian Brauner <[email protected]> cc: Jan Kara <[email protected]> cc: Jeff Layton <[email protected]> cc: Gao Xiang <[email protected]> cc: Matthew Wilcox <[email protected]> cc: [email protected] cc: [email protected] cc: [email protected] [brauner: fix minor issues] Signed-off-by: Christian Brauner <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent e42ea96 commit c03185f

File tree

1 file changed

+48
-43
lines changed

1 file changed

+48
-43
lines changed

fs/xattr.c

+48-43
Original file line numberDiff line numberDiff line change
@@ -631,10 +631,9 @@ int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
631631
ctx->kvalue, ctx->size, ctx->flags);
632632
}
633633

634-
static long
635-
setxattr(struct mnt_idmap *idmap, struct dentry *d,
636-
const char __user *name, const void __user *value, size_t size,
637-
int flags)
634+
static int path_setxattr(const char __user *pathname,
635+
const char __user *name, const void __user *value,
636+
size_t size, int flags, unsigned int lookup_flags)
638637
{
639638
struct xattr_name kname;
640639
struct xattr_ctx ctx = {
@@ -644,40 +643,30 @@ setxattr(struct mnt_idmap *idmap, struct dentry *d,
644643
.kname = &kname,
645644
.flags = flags,
646645
};
646+
struct path path;
647647
int error;
648648

649649
error = setxattr_copy(name, &ctx);
650650
if (error)
651651
return error;
652652

653-
error = do_setxattr(idmap, d, &ctx);
654-
655-
kvfree(ctx.kvalue);
656-
return error;
657-
}
658-
659-
static int path_setxattr(const char __user *pathname,
660-
const char __user *name, const void __user *value,
661-
size_t size, int flags, unsigned int lookup_flags)
662-
{
663-
struct path path;
664-
int error;
665-
666653
retry:
667654
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
668655
if (error)
669-
return error;
656+
goto out;
670657
error = mnt_want_write(path.mnt);
671658
if (!error) {
672-
error = setxattr(mnt_idmap(path.mnt), path.dentry, name,
673-
value, size, flags);
659+
error = do_setxattr(mnt_idmap(path.mnt), path.dentry, &ctx);
674660
mnt_drop_write(path.mnt);
675661
}
676662
path_put(&path);
677663
if (retry_estale(error, lookup_flags)) {
678664
lookup_flags |= LOOKUP_REVAL;
679665
goto retry;
680666
}
667+
668+
out:
669+
kvfree(ctx.kvalue);
681670
return error;
682671
}
683672

@@ -698,20 +687,32 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
698687
SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
699688
const void __user *,value, size_t, size, int, flags)
700689
{
701-
struct fd f = fdget(fd);
702-
int error = -EBADF;
690+
struct xattr_name kname;
691+
struct xattr_ctx ctx = {
692+
.cvalue = value,
693+
.kvalue = NULL,
694+
.size = size,
695+
.kname = &kname,
696+
.flags = flags,
697+
};
698+
int error;
703699

700+
CLASS(fd, f)(fd);
704701
if (!f.file)
705-
return error;
702+
return -EBADF;
703+
706704
audit_file(f.file);
705+
error = setxattr_copy(name, &ctx);
706+
if (error)
707+
return error;
708+
707709
error = mnt_want_write_file(f.file);
708710
if (!error) {
709-
error = setxattr(file_mnt_idmap(f.file),
710-
f.file->f_path.dentry, name,
711-
value, size, flags);
711+
error = do_setxattr(file_mnt_idmap(f.file),
712+
f.file->f_path.dentry, &ctx);
712713
mnt_drop_write_file(f.file);
713714
}
714-
fdput(f);
715+
kvfree(ctx.kvalue);
715716
return error;
716717
}
717718

@@ -900,9 +901,17 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
900901
* Extended attribute REMOVE operations
901902
*/
902903
static long
903-
removexattr(struct mnt_idmap *idmap, struct dentry *d,
904-
const char __user *name)
904+
removexattr(struct mnt_idmap *idmap, struct dentry *d, const char *name)
905905
{
906+
if (is_posix_acl_xattr(name))
907+
return vfs_remove_acl(idmap, d, name);
908+
return vfs_removexattr(idmap, d, name);
909+
}
910+
911+
static int path_removexattr(const char __user *pathname,
912+
const char __user *name, unsigned int lookup_flags)
913+
{
914+
struct path path;
906915
int error;
907916
char kname[XATTR_NAME_MAX + 1];
908917

@@ -911,25 +920,13 @@ removexattr(struct mnt_idmap *idmap, struct dentry *d,
911920
error = -ERANGE;
912921
if (error < 0)
913922
return error;
914-
915-
if (is_posix_acl_xattr(kname))
916-
return vfs_remove_acl(idmap, d, kname);
917-
918-
return vfs_removexattr(idmap, d, kname);
919-
}
920-
921-
static int path_removexattr(const char __user *pathname,
922-
const char __user *name, unsigned int lookup_flags)
923-
{
924-
struct path path;
925-
int error;
926923
retry:
927924
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
928925
if (error)
929926
return error;
930927
error = mnt_want_write(path.mnt);
931928
if (!error) {
932-
error = removexattr(mnt_idmap(path.mnt), path.dentry, name);
929+
error = removexattr(mnt_idmap(path.mnt), path.dentry, kname);
933930
mnt_drop_write(path.mnt);
934931
}
935932
path_put(&path);
@@ -955,15 +952,23 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
955952
SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
956953
{
957954
struct fd f = fdget(fd);
955+
char kname[XATTR_NAME_MAX + 1];
958956
int error = -EBADF;
959957

960958
if (!f.file)
961959
return error;
962960
audit_file(f.file);
961+
962+
error = strncpy_from_user(kname, name, sizeof(kname));
963+
if (error == 0 || error == sizeof(kname))
964+
error = -ERANGE;
965+
if (error < 0)
966+
return error;
967+
963968
error = mnt_want_write_file(f.file);
964969
if (!error) {
965970
error = removexattr(file_mnt_idmap(f.file),
966-
f.file->f_path.dentry, name);
971+
f.file->f_path.dentry, kname);
967972
mnt_drop_write_file(f.file);
968973
}
969974
fdput(f);

0 commit comments

Comments
 (0)