Skip to content

Commit 19d632a

Browse files
Alexey Dobriyansfrothwell
Alexey Dobriyan
authored andcommitted
proc: faster open/read/close with "permanent" files
Now that "struct proc_ops" exist we can start putting there stuff which could not fly with VFS "struct file_operations"... Most of fs/proc/inode.c file is dedicated to make open/read/.../close reliable in the event of disappearing /proc entries which usually happens if module is getting removed. Files like /proc/cpuinfo which never disappear simply do not need such protection. Save 2 atomic ops, 1 allocation, 1 free per open/read/close sequence for such "permanent" files. Enable "permanent" flag for /proc/cpuinfo /proc/kmsg /proc/modules /proc/slabinfo /proc/stat /proc/sysvipc/* /proc/swaps More will come once I figure out foolproof way to prevent out module authors from marking their stuff "permanent" for performance reasons when it is not. This should help with scalability: benchmark is "read /proc/cpuinfo R times by N threads scattered over the system". N R t, s (before) t, s (after) ----------------------------------------------------- 64 4096 1.582458 1.530502 -3.2% 256 4096 6.371926 6.125168 -3.9% 1024 4096 25.64888 24.47528 -4.6% Benchmark source: #include <chrono> #include <iostream> #include <thread> #include <vector> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> const int NR_CPUS = sysconf(_SC_NPROCESSORS_ONLN); int N; const char *filename; int R; int xxx = 0; int glue(int n) { cpu_set_t m; CPU_ZERO(&m); CPU_SET(n, &m); return sched_setaffinity(0, sizeof(cpu_set_t), &m); } void f(int n) { glue(n % NR_CPUS); while (*(volatile int *)&xxx == 0) { } for (int i = 0; i < R; i++) { int fd = open(filename, O_RDONLY); char buf[4096]; ssize_t rv = read(fd, buf, sizeof(buf)); asm volatile ("" :: "g" (rv)); close(fd); } } int main(int argc, char *argv[]) { if (argc < 4) { std::cerr << "usage: " << argv[0] << ' ' << "N /proc/filename R "; return 1; } N = atoi(argv[1]); filename = argv[2]; R = atoi(argv[3]); for (int i = 0; i < NR_CPUS; i++) { if (glue(i) == 0) break; } std::vector<std::thread> T; T.reserve(N); for (int i = 0; i < N; i++) { T.emplace_back(f, i); } auto t0 = std::chrono::system_clock::now(); { *(volatile int *)&xxx = 1; for (auto& t: T) { t.join(); } } auto t1 = std::chrono::system_clock::now(); std::chrono::duration<double> dt = t1 - t0; std::cout << dt.count() << ' '; return 0; } P.S.: Explicit randomization marker is added because adding non-function pointer will silently disable structure layout randomization. Link: http://lkml.kernel.org/r/20200222201539.GA22576@avx2 Signed-off-by: Alexey Dobriyan <[email protected]> Reported-by: kbuild test robot <[email protected]> Reported-by: Dan Carpenter <[email protected]> Cc: Al Viro <[email protected]> Cc: Joe Perches <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Stephen Rothwell <[email protected]>
1 parent 9984fb7 commit 19d632a

File tree

11 files changed

+196
-54
lines changed

11 files changed

+196
-54
lines changed

fs/proc/cpuinfo.c

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
1717
}
1818

1919
static const struct proc_ops cpuinfo_proc_ops = {
20+
.proc_flags = PROC_ENTRY_PERMANENT,
2021
.proc_open = cpuinfo_open,
2122
.proc_read = seq_read,
2223
.proc_lseek = seq_lseek,

fs/proc/generic.c

+30-3
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,13 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
531531
return p;
532532
}
533533

534+
static inline void pde_set_flags(struct proc_dir_entry *pde)
535+
{
536+
if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT) {
537+
pde->flags |= PROC_ENTRY_PERMANENT;
538+
}
539+
}
540+
534541
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
535542
struct proc_dir_entry *parent,
536543
const struct proc_ops *proc_ops, void *data)
@@ -541,6 +548,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
541548
if (!p)
542549
return NULL;
543550
p->proc_ops = proc_ops;
551+
pde_set_flags(p);
544552
return proc_register(parent, p);
545553
}
546554
EXPORT_SYMBOL(proc_create_data);
@@ -572,6 +580,7 @@ static int proc_seq_release(struct inode *inode, struct file *file)
572580
}
573581

574582
static const struct proc_ops proc_seq_ops = {
583+
/* not permanent -- can call into arbitrary seq_operations */
575584
.proc_open = proc_seq_open,
576585
.proc_read = seq_read,
577586
.proc_lseek = seq_lseek,
@@ -602,6 +611,7 @@ static int proc_single_open(struct inode *inode, struct file *file)
602611
}
603612

604613
static const struct proc_ops proc_single_ops = {
614+
/* not permanent -- can call into arbitrary ->single_show */
605615
.proc_open = proc_single_open,
606616
.proc_read = seq_read,
607617
.proc_lseek = seq_lseek,
@@ -662,9 +672,14 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
662672

663673
de = pde_subdir_find(parent, fn, len);
664674
if (de) {
665-
rb_erase(&de->subdir_node, &parent->subdir);
666-
if (S_ISDIR(de->mode)) {
667-
parent->nlink--;
675+
if (unlikely(pde_is_permanent(de))) {
676+
WARN(1, "removing permanent /proc entry '%s'", de->name);
677+
de = NULL;
678+
} else {
679+
rb_erase(&de->subdir_node, &parent->subdir);
680+
if (S_ISDIR(de->mode)) {
681+
parent->nlink--;
682+
}
668683
}
669684
}
670685
write_unlock(&proc_subdir_lock);
@@ -700,12 +715,24 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
700715
write_unlock(&proc_subdir_lock);
701716
return -ENOENT;
702717
}
718+
if (unlikely(pde_is_permanent(root))) {
719+
write_unlock(&proc_subdir_lock);
720+
WARN(1, "removing permanent /proc entry '%s/%s'",
721+
root->parent->name, root->name);
722+
return -EINVAL;
723+
}
703724
rb_erase(&root->subdir_node, &parent->subdir);
704725

705726
de = root;
706727
while (1) {
707728
next = pde_subdir_first(de);
708729
if (next) {
730+
if (unlikely(pde_is_permanent(root))) {
731+
write_unlock(&proc_subdir_lock);
732+
WARN(1, "removing permanent /proc entry '%s/%s'",
733+
next->parent->name, next->name);
734+
return -EINVAL;
735+
}
709736
rb_erase(&next->subdir_node, &de->subdir);
710737
de = next;
711738
continue;

fs/proc/inode.c

+137-50
Original file line numberDiff line numberDiff line change
@@ -196,135 +196,204 @@ void proc_entry_rundown(struct proc_dir_entry *de)
196196
spin_unlock(&de->pde_unload_lock);
197197
}
198198

199+
static loff_t pde_lseek(struct proc_dir_entry *pde, struct file *file, loff_t offset, int whence)
200+
{
201+
typeof_member(struct proc_ops, proc_lseek) lseek;
202+
203+
lseek = pde->proc_ops->proc_lseek;
204+
if (!lseek)
205+
lseek = default_llseek;
206+
return lseek(file, offset, whence);
207+
}
208+
199209
static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
200210
{
201211
struct proc_dir_entry *pde = PDE(file_inode(file));
202212
loff_t rv = -EINVAL;
203-
if (use_pde(pde)) {
204-
typeof_member(struct proc_ops, proc_lseek) lseek;
205213

206-
lseek = pde->proc_ops->proc_lseek;
207-
if (!lseek)
208-
lseek = default_llseek;
209-
rv = lseek(file, offset, whence);
214+
if (pde_is_permanent(pde)) {
215+
return pde_lseek(pde, file, offset, whence);
216+
} else if (use_pde(pde)) {
217+
rv = pde_lseek(pde, file, offset, whence);
210218
unuse_pde(pde);
211219
}
212220
return rv;
213221
}
214222

223+
static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos)
224+
{
225+
typeof_member(struct proc_ops, proc_read) read;
226+
227+
read = pde->proc_ops->proc_read;
228+
if (read)
229+
return read(file, buf, count, ppos);
230+
return -EIO;
231+
}
232+
215233
static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
216234
{
217235
struct proc_dir_entry *pde = PDE(file_inode(file));
218236
ssize_t rv = -EIO;
219-
if (use_pde(pde)) {
220-
typeof_member(struct proc_ops, proc_read) read;
221237

222-
read = pde->proc_ops->proc_read;
223-
if (read)
224-
rv = read(file, buf, count, ppos);
238+
if (pde_is_permanent(pde)) {
239+
return pde_read(pde, file, buf, count, ppos);
240+
} else if (use_pde(pde)) {
241+
rv = pde_read(pde, file, buf, count, ppos);
225242
unuse_pde(pde);
226243
}
227244
return rv;
228245
}
229246

247+
static ssize_t pde_write(struct proc_dir_entry *pde, struct file *file, const char __user *buf, size_t count, loff_t *ppos)
248+
{
249+
typeof_member(struct proc_ops, proc_write) write;
250+
251+
write = pde->proc_ops->proc_write;
252+
if (write)
253+
return write(file, buf, count, ppos);
254+
return -EIO;
255+
}
256+
230257
static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
231258
{
232259
struct proc_dir_entry *pde = PDE(file_inode(file));
233260
ssize_t rv = -EIO;
234-
if (use_pde(pde)) {
235-
typeof_member(struct proc_ops, proc_write) write;
236261

237-
write = pde->proc_ops->proc_write;
238-
if (write)
239-
rv = write(file, buf, count, ppos);
262+
if (pde_is_permanent(pde)) {
263+
return pde_write(pde, file, buf, count, ppos);
264+
} else if (use_pde(pde)) {
265+
rv = pde_write(pde, file, buf, count, ppos);
240266
unuse_pde(pde);
241267
}
242268
return rv;
243269
}
244270

271+
static __poll_t pde_poll(struct proc_dir_entry *pde, struct file *file, struct poll_table_struct *pts)
272+
{
273+
typeof_member(struct proc_ops, proc_poll) poll;
274+
275+
poll = pde->proc_ops->proc_poll;
276+
if (poll)
277+
return poll(file, pts);
278+
return DEFAULT_POLLMASK;
279+
}
280+
245281
static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts)
246282
{
247283
struct proc_dir_entry *pde = PDE(file_inode(file));
248284
__poll_t rv = DEFAULT_POLLMASK;
249-
if (use_pde(pde)) {
250-
typeof_member(struct proc_ops, proc_poll) poll;
251285

252-
poll = pde->proc_ops->proc_poll;
253-
if (poll)
254-
rv = poll(file, pts);
286+
if (pde_is_permanent(pde)) {
287+
return pde_poll(pde, file, pts);
288+
} else if (use_pde(pde)) {
289+
rv = pde_poll(pde, file, pts);
255290
unuse_pde(pde);
256291
}
257292
return rv;
258293
}
259294

295+
static long pde_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
296+
{
297+
typeof_member(struct proc_ops, proc_ioctl) ioctl;
298+
299+
ioctl = pde->proc_ops->proc_ioctl;
300+
if (ioctl)
301+
return ioctl(file, cmd, arg);
302+
return -ENOTTY;
303+
}
304+
260305
static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
261306
{
262307
struct proc_dir_entry *pde = PDE(file_inode(file));
263308
long rv = -ENOTTY;
264-
if (use_pde(pde)) {
265-
typeof_member(struct proc_ops, proc_ioctl) ioctl;
266309

267-
ioctl = pde->proc_ops->proc_ioctl;
268-
if (ioctl)
269-
rv = ioctl(file, cmd, arg);
310+
if (pde_is_permanent(pde)) {
311+
return pde_ioctl(pde, file, cmd, arg);
312+
} else if (use_pde(pde)) {
313+
rv = pde_ioctl(pde, file, cmd, arg);
270314
unuse_pde(pde);
271315
}
272316
return rv;
273317
}
274318

275319
#ifdef CONFIG_COMPAT
320+
static long pde_compat_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
321+
{
322+
typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;
323+
324+
compat_ioctl = pde->proc_ops->proc_compat_ioctl;
325+
if (compat_ioctl)
326+
return compat_ioctl(file, cmd, arg);
327+
return -ENOTTY;
328+
}
329+
276330
static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
277331
{
278332
struct proc_dir_entry *pde = PDE(file_inode(file));
279333
long rv = -ENOTTY;
280-
if (use_pde(pde)) {
281-
typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;
282-
283-
compat_ioctl = pde->proc_ops->proc_compat_ioctl;
284-
if (compat_ioctl)
285-
rv = compat_ioctl(file, cmd, arg);
334+
if (pde_is_permanent(pde)) {
335+
return pde_compat_ioctl(pde, file, cmd, arg);
336+
} else if (use_pde(pde)) {
337+
rv = pde_compat_ioctl(pde, file, cmd, arg);
286338
unuse_pde(pde);
287339
}
288340
return rv;
289341
}
290342
#endif
291343

344+
static int pde_mmap(struct proc_dir_entry *pde, struct file *file, struct vm_area_struct *vma)
345+
{
346+
typeof_member(struct proc_ops, proc_mmap) mmap;
347+
348+
mmap = pde->proc_ops->proc_mmap;
349+
if (mmap)
350+
return mmap(file, vma);
351+
return -EIO;
352+
}
353+
292354
static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
293355
{
294356
struct proc_dir_entry *pde = PDE(file_inode(file));
295357
int rv = -EIO;
296-
if (use_pde(pde)) {
297-
typeof_member(struct proc_ops, proc_mmap) mmap;
298358

299-
mmap = pde->proc_ops->proc_mmap;
300-
if (mmap)
301-
rv = mmap(file, vma);
359+
if (pde_is_permanent(pde)) {
360+
return pde_mmap(pde, file, vma);
361+
} else if (use_pde(pde)) {
362+
rv = pde_mmap(pde, file, vma);
302363
unuse_pde(pde);
303364
}
304365
return rv;
305366
}
306367

307368
static unsigned long
308-
proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
369+
pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned long orig_addr,
309370
unsigned long len, unsigned long pgoff,
310371
unsigned long flags)
311372
{
312-
struct proc_dir_entry *pde = PDE(file_inode(file));
313-
unsigned long rv = -EIO;
314-
315-
if (use_pde(pde)) {
316-
typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;
373+
typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;
317374

318-
get_area = pde->proc_ops->proc_get_unmapped_area;
375+
get_area = pde->proc_ops->proc_get_unmapped_area;
319376
#ifdef CONFIG_MMU
320-
if (!get_area)
321-
get_area = current->mm->get_unmapped_area;
377+
if (!get_area)
378+
get_area = current->mm->get_unmapped_area;
322379
#endif
380+
if (get_area)
381+
return get_area(file, orig_addr, len, pgoff, flags);
382+
return orig_addr;
383+
}
384+
385+
static unsigned long
386+
proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
387+
unsigned long len, unsigned long pgoff,
388+
unsigned long flags)
389+
{
390+
struct proc_dir_entry *pde = PDE(file_inode(file));
391+
unsigned long rv = -EIO;
323392

324-
if (get_area)
325-
rv = get_area(file, orig_addr, len, pgoff, flags);
326-
else
327-
rv = orig_addr;
393+
if (pde_is_permanent(pde)) {
394+
return pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
395+
} else if (use_pde(pde)) {
396+
rv = pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
328397
unuse_pde(pde);
329398
}
330399
return rv;
@@ -338,6 +407,13 @@ static int proc_reg_open(struct inode *inode, struct file *file)
338407
typeof_member(struct proc_ops, proc_release) release;
339408
struct pde_opener *pdeo;
340409

410+
if (pde_is_permanent(pde)) {
411+
open = pde->proc_ops->proc_open;
412+
if (open)
413+
rv = open(inode, file);
414+
return rv;
415+
}
416+
341417
/*
342418
* Ensure that
343419
* 1) PDE's ->release hook will be called no matter what
@@ -387,6 +463,17 @@ static int proc_reg_release(struct inode *inode, struct file *file)
387463
{
388464
struct proc_dir_entry *pde = PDE(inode);
389465
struct pde_opener *pdeo;
466+
467+
if (pde_is_permanent(pde)) {
468+
typeof_member(struct proc_ops, proc_release) release;
469+
470+
release = pde->proc_ops->proc_release;
471+
if (release) {
472+
return release(inode, file);
473+
}
474+
return 0;
475+
}
476+
390477
spin_lock(&pde->pde_unload_lock);
391478
list_for_each_entry(pdeo, &pde->pde_openers, lh) {
392479
if (pdeo->file == file) {

fs/proc/internal.h

+6
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ struct proc_dir_entry {
6161
struct rb_node subdir_node;
6262
char *name;
6363
umode_t mode;
64+
u8 flags;
6465
u8 namelen;
6566
char inline_name[];
6667
} __randomize_layout;
@@ -73,6 +74,11 @@ struct proc_dir_entry {
7374
0)
7475
#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry))
7576

77+
static inline bool pde_is_permanent(const struct proc_dir_entry *pde)
78+
{
79+
return pde->flags & PROC_ENTRY_PERMANENT;
80+
}
81+
7682
extern struct kmem_cache *proc_dir_entry_cache;
7783
void pde_free(struct proc_dir_entry *pde);
7884

0 commit comments

Comments
 (0)