Skip to content

Commit d62241c

Browse files
htejunaxboe
authored andcommitted
writeback, memcg: Implement cgroup_writeback_by_id()
Implement cgroup_writeback_by_id() which initiates cgroup writeback from bdi and memcg IDs. This will be used by memcg foreign inode flushing. v2: Use wb_get_lookup() instead of wb_get_create() to avoid creating spurious wbs. v3: Interpret 0 @nr as 1.25 * nr_dirty to implement best-effort flushing while avoding possible livelocks. Reviewed-by: Jan Kara <[email protected]> Signed-off-by: Tejun Heo <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent ed288dc commit d62241c

File tree

2 files changed

+85
-0
lines changed

2 files changed

+85
-0
lines changed

fs/fs-writeback.c

+83
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,89 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
891891
wb_put(last_wb);
892892
}
893893

894+
/**
895+
* cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
896+
* @bdi_id: target bdi id
897+
* @memcg_id: target memcg css id
898+
* @nr_pages: number of pages to write, 0 for best-effort dirty flushing
899+
* @reason: reason why some writeback work initiated
900+
* @done: target wb_completion
901+
*
902+
* Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
903+
* with the specified parameters.
904+
*/
905+
int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
906+
enum wb_reason reason, struct wb_completion *done)
907+
{
908+
struct backing_dev_info *bdi;
909+
struct cgroup_subsys_state *memcg_css;
910+
struct bdi_writeback *wb;
911+
struct wb_writeback_work *work;
912+
int ret;
913+
914+
/* lookup bdi and memcg */
915+
bdi = bdi_get_by_id(bdi_id);
916+
if (!bdi)
917+
return -ENOENT;
918+
919+
rcu_read_lock();
920+
memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
921+
if (memcg_css && !css_tryget(memcg_css))
922+
memcg_css = NULL;
923+
rcu_read_unlock();
924+
if (!memcg_css) {
925+
ret = -ENOENT;
926+
goto out_bdi_put;
927+
}
928+
929+
/*
930+
* And find the associated wb. If the wb isn't there already
931+
* there's nothing to flush, don't create one.
932+
*/
933+
wb = wb_get_lookup(bdi, memcg_css);
934+
if (!wb) {
935+
ret = -ENOENT;
936+
goto out_css_put;
937+
}
938+
939+
/*
940+
* If @nr is zero, the caller is attempting to write out most of
941+
* the currently dirty pages. Let's take the current dirty page
942+
* count and inflate it by 25% which should be large enough to
943+
* flush out most dirty pages while avoiding getting livelocked by
944+
* concurrent dirtiers.
945+
*/
946+
if (!nr) {
947+
unsigned long filepages, headroom, dirty, writeback;
948+
949+
mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
950+
&writeback);
951+
nr = dirty * 10 / 8;
952+
}
953+
954+
/* issue the writeback work */
955+
work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
956+
if (work) {
957+
work->nr_pages = nr;
958+
work->sync_mode = WB_SYNC_NONE;
959+
work->range_cyclic = 1;
960+
work->reason = reason;
961+
work->done = done;
962+
work->auto_free = 1;
963+
wb_queue_work(wb, work);
964+
ret = 0;
965+
} else {
966+
ret = -ENOMEM;
967+
}
968+
969+
wb_put(wb);
970+
out_css_put:
971+
css_put(memcg_css);
972+
out_bdi_put:
973+
bdi_put(bdi);
974+
return ret;
975+
}
976+
894977
/**
895978
* cgroup_writeback_umount - flush inode wb switches for umount
896979
*

include/linux/writeback.h

+2
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
217217
void wbc_detach_inode(struct writeback_control *wbc);
218218
void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
219219
size_t bytes);
220+
int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
221+
enum wb_reason reason, struct wb_completion *done);
220222
void cgroup_writeback_umount(void);
221223

222224
/**

0 commit comments

Comments
 (0)