@@ -1132,7 +1132,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1132
1132
if (css == & root -> css )
1133
1133
break ;
1134
1134
1135
- if (css_tryget_online (css )) {
1135
+ if (css_tryget (css )) {
1136
1136
/*
1137
1137
* Make sure the memcg is initialized:
1138
1138
* mem_cgroup_css_online() orders the the
@@ -3316,79 +3316,6 @@ static int mem_cgroup_move_account(struct page *page,
3316
3316
return ret ;
3317
3317
}
3318
3318
3319
- /**
3320
- * mem_cgroup_move_parent - moves page to the parent group
3321
- * @page: the page to move
3322
- * @pc: page_cgroup of the page
3323
- * @child: page's cgroup
3324
- *
3325
- * move charges to its parent or the root cgroup if the group has no
3326
- * parent (aka use_hierarchy==0).
3327
- * Although this might fail (get_page_unless_zero, isolate_lru_page or
3328
- * mem_cgroup_move_account fails) the failure is always temporary and
3329
- * it signals a race with a page removal/uncharge or migration. In the
3330
- * first case the page is on the way out and it will vanish from the LRU
3331
- * on the next attempt and the call should be retried later.
3332
- * Isolation from the LRU fails only if page has been isolated from
3333
- * the LRU since we looked at it and that usually means either global
3334
- * reclaim or migration going on. The page will either get back to the
3335
- * LRU or vanish.
3336
- * Finaly mem_cgroup_move_account fails only if the page got uncharged
3337
- * (!PageCgroupUsed) or moved to a different group. The page will
3338
- * disappear in the next attempt.
3339
- */
3340
- static int mem_cgroup_move_parent (struct page * page ,
3341
- struct page_cgroup * pc ,
3342
- struct mem_cgroup * child )
3343
- {
3344
- struct mem_cgroup * parent ;
3345
- unsigned int nr_pages ;
3346
- unsigned long uninitialized_var (flags );
3347
- int ret ;
3348
-
3349
- VM_BUG_ON (mem_cgroup_is_root (child ));
3350
-
3351
- ret = - EBUSY ;
3352
- if (!get_page_unless_zero (page ))
3353
- goto out ;
3354
- if (isolate_lru_page (page ))
3355
- goto put ;
3356
-
3357
- nr_pages = hpage_nr_pages (page );
3358
-
3359
- parent = parent_mem_cgroup (child );
3360
- /*
3361
- * If no parent, move charges to root cgroup.
3362
- */
3363
- if (!parent )
3364
- parent = root_mem_cgroup ;
3365
-
3366
- if (nr_pages > 1 ) {
3367
- VM_BUG_ON_PAGE (!PageTransHuge (page ), page );
3368
- flags = compound_lock_irqsave (page );
3369
- }
3370
-
3371
- ret = mem_cgroup_move_account (page , nr_pages ,
3372
- pc , child , parent );
3373
- if (!ret ) {
3374
- if (!mem_cgroup_is_root (parent ))
3375
- css_get_many (& parent -> css , nr_pages );
3376
- /* Take charge off the local counters */
3377
- page_counter_cancel (& child -> memory , nr_pages );
3378
- if (do_swap_account )
3379
- page_counter_cancel (& child -> memsw , nr_pages );
3380
- css_put_many (& child -> css , nr_pages );
3381
- }
3382
-
3383
- if (nr_pages > 1 )
3384
- compound_unlock_irqrestore (page , flags );
3385
- putback_lru_page (page );
3386
- put :
3387
- put_page (page );
3388
- out :
3389
- return ret ;
3390
- }
3391
-
3392
3319
#ifdef CONFIG_MEMCG_SWAP
3393
3320
static void mem_cgroup_swap_statistics (struct mem_cgroup * memcg ,
3394
3321
bool charge )
@@ -3682,105 +3609,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3682
3609
return nr_reclaimed ;
3683
3610
}
3684
3611
3685
- /**
3686
- * mem_cgroup_force_empty_list - clears LRU of a group
3687
- * @memcg: group to clear
3688
- * @node: NUMA node
3689
- * @zid: zone id
3690
- * @lru: lru to to clear
3691
- *
3692
- * Traverse a specified page_cgroup list and try to drop them all. This doesn't
3693
- * reclaim the pages page themselves - pages are moved to the parent (or root)
3694
- * group.
3695
- */
3696
- static void mem_cgroup_force_empty_list (struct mem_cgroup * memcg ,
3697
- int node , int zid , enum lru_list lru )
3698
- {
3699
- struct lruvec * lruvec ;
3700
- unsigned long flags ;
3701
- struct list_head * list ;
3702
- struct page * busy ;
3703
- struct zone * zone ;
3704
-
3705
- zone = & NODE_DATA (node )-> node_zones [zid ];
3706
- lruvec = mem_cgroup_zone_lruvec (zone , memcg );
3707
- list = & lruvec -> lists [lru ];
3708
-
3709
- busy = NULL ;
3710
- do {
3711
- struct page_cgroup * pc ;
3712
- struct page * page ;
3713
-
3714
- spin_lock_irqsave (& zone -> lru_lock , flags );
3715
- if (list_empty (list )) {
3716
- spin_unlock_irqrestore (& zone -> lru_lock , flags );
3717
- break ;
3718
- }
3719
- page = list_entry (list -> prev , struct page , lru );
3720
- if (busy == page ) {
3721
- list_move (& page -> lru , list );
3722
- busy = NULL ;
3723
- spin_unlock_irqrestore (& zone -> lru_lock , flags );
3724
- continue ;
3725
- }
3726
- spin_unlock_irqrestore (& zone -> lru_lock , flags );
3727
-
3728
- pc = lookup_page_cgroup (page );
3729
-
3730
- if (mem_cgroup_move_parent (page , pc , memcg )) {
3731
- /* found lock contention or "pc" is obsolete. */
3732
- busy = page ;
3733
- } else
3734
- busy = NULL ;
3735
- cond_resched ();
3736
- } while (!list_empty (list ));
3737
- }
3738
-
3739
- /*
3740
- * make mem_cgroup's charge to be 0 if there is no task by moving
3741
- * all the charges and pages to the parent.
3742
- * This enables deleting this mem_cgroup.
3743
- *
3744
- * Caller is responsible for holding css reference on the memcg.
3745
- */
3746
- static void mem_cgroup_reparent_charges (struct mem_cgroup * memcg )
3747
- {
3748
- int node , zid ;
3749
-
3750
- do {
3751
- /* This is for making all *used* pages to be on LRU. */
3752
- lru_add_drain_all ();
3753
- drain_all_stock_sync (memcg );
3754
- mem_cgroup_start_move (memcg );
3755
- for_each_node_state (node , N_MEMORY ) {
3756
- for (zid = 0 ; zid < MAX_NR_ZONES ; zid ++ ) {
3757
- enum lru_list lru ;
3758
- for_each_lru (lru ) {
3759
- mem_cgroup_force_empty_list (memcg ,
3760
- node , zid , lru );
3761
- }
3762
- }
3763
- }
3764
- mem_cgroup_end_move (memcg );
3765
- memcg_oom_recover (memcg );
3766
- cond_resched ();
3767
-
3768
- /*
3769
- * Kernel memory may not necessarily be trackable to a specific
3770
- * process. So they are not migrated, and therefore we can't
3771
- * expect their value to drop to 0 here.
3772
- * Having res filled up with kmem only is enough.
3773
- *
3774
- * This is a safety check because mem_cgroup_force_empty_list
3775
- * could have raced with mem_cgroup_replace_page_cache callers
3776
- * so the lru seemed empty but the page could have been added
3777
- * right after the check. RES_USAGE should be safe as we always
3778
- * charge before adding to the LRU.
3779
- */
3780
- } while (page_counter_read (& memcg -> memory ) -
3781
- page_counter_read (& memcg -> kmem ) > 0 );
3782
- }
3783
-
3784
3612
/*
3785
3613
* Test whether @memcg has children, dead or alive. Note that this
3786
3614
* function doesn't care whether @memcg has use_hierarchy enabled and
@@ -5323,7 +5151,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
5323
5151
{
5324
5152
struct mem_cgroup * memcg = mem_cgroup_from_css (css );
5325
5153
struct mem_cgroup_event * event , * tmp ;
5326
- struct cgroup_subsys_state * iter ;
5327
5154
5328
5155
/*
5329
5156
* Unregister events and notify userspace.
@@ -5337,56 +5164,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
5337
5164
}
5338
5165
spin_unlock (& memcg -> event_list_lock );
5339
5166
5340
- /*
5341
- * This requires that offlining is serialized. Right now that is
5342
- * guaranteed because css_killed_work_fn() holds the cgroup_mutex.
5343
- */
5344
- css_for_each_descendant_post (iter , css )
5345
- mem_cgroup_reparent_charges (mem_cgroup_from_css (iter ));
5346
-
5347
5167
memcg_unregister_all_caches (memcg );
5348
5168
vmpressure_cleanup (& memcg -> vmpressure );
5349
5169
}
5350
5170
5351
5171
static void mem_cgroup_css_free (struct cgroup_subsys_state * css )
5352
5172
{
5353
5173
struct mem_cgroup * memcg = mem_cgroup_from_css (css );
5354
- /*
5355
- * XXX: css_offline() would be where we should reparent all
5356
- * memory to prepare the cgroup for destruction. However,
5357
- * memcg does not do css_tryget_online() and page_counter charging
5358
- * under the same RCU lock region, which means that charging
5359
- * could race with offlining. Offlining only happens to
5360
- * cgroups with no tasks in them but charges can show up
5361
- * without any tasks from the swapin path when the target
5362
- * memcg is looked up from the swapout record and not from the
5363
- * current task as it usually is. A race like this can leak
5364
- * charges and put pages with stale cgroup pointers into
5365
- * circulation:
5366
- *
5367
- * #0 #1
5368
- * lookup_swap_cgroup_id()
5369
- * rcu_read_lock()
5370
- * mem_cgroup_lookup()
5371
- * css_tryget_online()
5372
- * rcu_read_unlock()
5373
- * disable css_tryget_online()
5374
- * call_rcu()
5375
- * offline_css()
5376
- * reparent_charges()
5377
- * page_counter_try_charge()
5378
- * css_put()
5379
- * css_free()
5380
- * pc->mem_cgroup = dead memcg
5381
- * add page to lru
5382
- *
5383
- * The bulk of the charges are still moved in offline_css() to
5384
- * avoid pinning a lot of pages in case a long-term reference
5385
- * like a swapout record is deferring the css_free() to long
5386
- * after offlining. But this makes sure we catch any charges
5387
- * made after offlining:
5388
- */
5389
- mem_cgroup_reparent_charges (memcg );
5390
5174
5391
5175
memcg_destroy_kmem (memcg );
5392
5176
__mem_cgroup_free (memcg );
0 commit comments