Skip to content

Commit a137e1c

Browse files
Andi Kleentorvalds
Andi Kleen
authored andcommitted
hugetlbfs: per mount huge page sizes
Add the ability to configure the hugetlb hstate used on a per mount basis. - Add a new pagesize= option to the hugetlbfs mount that allows setting the page size - This option causes the mount code to find the hstate corresponding to the specified size, and sets up a pointer to the hstate in the mount's superblock. - Change the hstate accessors to use this information rather than the global_hstate they were using (requires a slight change in mm/memory.c so we don't NULL deref in the error-unmap path -- see comments). [np: take hstate out of hugetlbfs inode and vma->vm_private_data] Acked-by: Adam Litke <[email protected]> Acked-by: Nishanth Aravamudan <[email protected]> Signed-off-by: Andi Kleen <[email protected]> Signed-off-by: Nick Piggin <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent e5ff215 commit a137e1c

File tree

4 files changed

+64
-29
lines changed

4 files changed

+64
-29
lines changed

fs/hugetlbfs/inode.c

+36-9
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
5353
enum {
5454
Opt_size, Opt_nr_inodes,
5555
Opt_mode, Opt_uid, Opt_gid,
56+
Opt_pagesize,
5657
Opt_err,
5758
};
5859

@@ -62,6 +63,7 @@ static match_table_t tokens = {
6263
{Opt_mode, "mode=%o"},
6364
{Opt_uid, "uid=%u"},
6465
{Opt_gid, "gid=%u"},
66+
{Opt_pagesize, "pagesize=%s"},
6567
{Opt_err, NULL},
6668
};
6769

@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
750752
char *p, *rest;
751753
substring_t args[MAX_OPT_ARGS];
752754
int option;
755+
unsigned long long size = 0;
756+
enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
753757

754758
if (!options)
755759
return 0;
@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
780784
break;
781785

782786
case Opt_size: {
783-
unsigned long long size;
784787
/* memparse() will accept a K/M/G without a digit */
785788
if (!isdigit(*args[0].from))
786789
goto bad_val;
787790
size = memparse(args[0].from, &rest);
788-
if (*rest == '%') {
789-
size <<= HPAGE_SHIFT;
790-
size *= max_huge_pages;
791-
do_div(size, 100);
792-
}
793-
pconfig->nr_blocks = (size >> HPAGE_SHIFT);
791+
setsize = SIZE_STD;
792+
if (*rest == '%')
793+
setsize = SIZE_PERCENT;
794794
break;
795795
}
796796

@@ -801,13 +801,38 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
801801
pconfig->nr_inodes = memparse(args[0].from, &rest);
802802
break;
803803

804+
case Opt_pagesize: {
805+
unsigned long ps;
806+
ps = memparse(args[0].from, &rest);
807+
pconfig->hstate = size_to_hstate(ps);
808+
if (!pconfig->hstate) {
809+
printk(KERN_ERR
810+
"hugetlbfs: Unsupported page size %lu MB\n",
811+
ps >> 20);
812+
return -EINVAL;
813+
}
814+
break;
815+
}
816+
804817
default:
805818
printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
806819
p);
807820
return -EINVAL;
808821
break;
809822
}
810823
}
824+
825+
/* Do size after hstate is set up */
826+
if (setsize > NO_SIZE) {
827+
struct hstate *h = pconfig->hstate;
828+
if (setsize == SIZE_PERCENT) {
829+
size <<= huge_page_shift(h);
830+
size *= h->max_huge_pages;
831+
do_div(size, 100);
832+
}
833+
pconfig->nr_blocks = (size >> huge_page_shift(h));
834+
}
835+
811836
return 0;
812837

813838
bad_val:
@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
832857
config.uid = current->fsuid;
833858
config.gid = current->fsgid;
834859
config.mode = 0755;
860+
config.hstate = &default_hstate;
835861
ret = hugetlbfs_parse_options(data, &config);
836862
if (ret)
837863
return ret;
@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
840866
if (!sbinfo)
841867
return -ENOMEM;
842868
sb->s_fs_info = sbinfo;
869+
sbinfo->hstate = config.hstate;
843870
spin_lock_init(&sbinfo->stat_lock);
844871
sbinfo->max_blocks = config.nr_blocks;
845872
sbinfo->free_blocks = config.nr_blocks;
846873
sbinfo->max_inodes = config.nr_inodes;
847874
sbinfo->free_inodes = config.nr_inodes;
848875
sb->s_maxbytes = MAX_LFS_FILESIZE;
849-
sb->s_blocksize = HPAGE_SIZE;
850-
sb->s_blocksize_bits = HPAGE_SHIFT;
876+
sb->s_blocksize = huge_page_size(config.hstate);
877+
sb->s_blocksize_bits = huge_page_shift(config.hstate);
851878
sb->s_magic = HUGETLBFS_MAGIC;
852879
sb->s_op = &hugetlbfs_ops;
853880
sb->s_time_gran = 1;

include/linux/hugetlb.h

+9-5
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ struct hugetlbfs_config {
100100
umode_t mode;
101101
long nr_blocks;
102102
long nr_inodes;
103+
struct hstate *hstate;
103104
};
104105

105106
struct hugetlbfs_sb_info {
@@ -108,6 +109,7 @@ struct hugetlbfs_sb_info {
108109
long max_inodes; /* inodes allowed */
109110
long free_inodes; /* inodes free */
110111
spinlock_t stat_lock;
112+
struct hstate *hstate;
111113
};
112114

113115

@@ -191,19 +193,21 @@ extern unsigned int default_hstate_idx;
191193

192194
#define default_hstate (hstates[default_hstate_idx])
193195

194-
static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
196+
static inline struct hstate *hstate_inode(struct inode *i)
195197
{
196-
return &default_hstate;
198+
struct hugetlbfs_sb_info *hsb;
199+
hsb = HUGETLBFS_SB(i->i_sb);
200+
return hsb->hstate;
197201
}
198202

199203
static inline struct hstate *hstate_file(struct file *f)
200204
{
201-
return &default_hstate;
205+
return hstate_inode(f->f_dentry->d_inode);
202206
}
203207

204-
static inline struct hstate *hstate_inode(struct inode *i)
208+
static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
205209
{
206-
return &default_hstate;
210+
return hstate_file(vma->vm_file);
207211
}
208212

209213
static inline unsigned long huge_page_size(struct hstate *h)

mm/hugetlb.c

+3-13
Original file line numberDiff line numberDiff line change
@@ -1439,19 +1439,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
14391439
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
14401440
unsigned long end, struct page *ref_page)
14411441
{
1442-
/*
1443-
* It is undesirable to test vma->vm_file as it should be non-null
1444-
* for valid hugetlb area. However, vm_file will be NULL in the error
1445-
* cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
1446-
* do_mmap_pgoff() nullifies vma->vm_file before calling this function
1447-
* to clean up. Since no pte has actually been setup, it is safe to
1448-
* do nothing in this case.
1449-
*/
1450-
if (vma->vm_file) {
1451-
spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
1452-
__unmap_hugepage_range(vma, start, end, ref_page);
1453-
spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
1454-
}
1442+
spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
1443+
__unmap_hugepage_range(vma, start, end, ref_page);
1444+
spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
14551445
}
14561446

14571447
/*

mm/memory.c

+16-2
Original file line numberDiff line numberDiff line change
@@ -901,9 +901,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
901901
}
902902

903903
if (unlikely(is_vm_hugetlb_page(vma))) {
904-
unmap_hugepage_range(vma, start, end, NULL);
905-
zap_work -= (end - start) /
904+
/*
905+
* It is undesirable to test vma->vm_file as it
906+
* should be non-null for valid hugetlb area.
907+
* However, vm_file will be NULL in the error
908+
* cleanup path of do_mmap_pgoff. When
909+
* hugetlbfs ->mmap method fails,
910+
* do_mmap_pgoff() nullifies vma->vm_file
911+
* before calling this function to clean up.
912+
* Since no pte has actually been setup, it is
913+
* safe to do nothing in this case.
914+
*/
915+
if (vma->vm_file) {
916+
unmap_hugepage_range(vma, start, end, NULL);
917+
zap_work -= (end - start) /
906918
pages_per_huge_page(hstate_vma(vma));
919+
}
920+
907921
start = end;
908922
} else
909923
start = unmap_page_range(*tlbp, vma,

0 commit comments

Comments
 (0)