Skip to content

Commit aa75f4d

Browse files
harshadjstytso
authored andcommitted
ext4: main fast-commit commit path
This patch adds main fast commit commit path handlers. The overall patch can be divided into two inter-related parts: (A) Metadata updates tracking This part consists of helper functions to track changes that need to be committed during a commit operation. These updates are maintained by Ext4 in different in-memory queues. Following are the APIs and their short description that are implemented in this patch: - ext4_fc_track_link/unlink/creat() - Track unlink. link and creat operations - ext4_fc_track_range() - Track changed logical block offsets inodes - ext4_fc_track_inode() - Track inodes - ext4_fc_mark_ineligible() - Mark file system fast commit ineligible() - ext4_fc_start_update() / ext4_fc_stop_update() / ext4_fc_start_ineligible() / ext4_fc_stop_ineligible() These functions are useful for co-ordinating inode updates with commits. (B) Main commit Path This part consists of functions to convert updates tracked in in-memory data structures into on-disk commits. Function ext4_fc_commit() is the main entry point to commit path. Reported-by: kernel test robot <[email protected]> Signed-off-by: Harshad Shirwadkar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent ff780b9 commit aa75f4d

File tree

13 files changed

+1707
-29
lines changed

13 files changed

+1707
-29
lines changed

fs/ext4/acl.c

+2
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
242242
handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
243243
if (IS_ERR(handle))
244244
return PTR_ERR(handle);
245+
ext4_fc_start_update(inode);
245246

246247
if ((type == ACL_TYPE_ACCESS) && acl) {
247248
error = posix_acl_update_mode(inode, &mode, &acl);
@@ -259,6 +260,7 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
259260
}
260261
out_stop:
261262
ext4_journal_stop(handle);
263+
ext4_fc_stop_update(inode);
262264
if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
263265
goto retry;
264266
return error;

fs/ext4/ext4.h

+70
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,31 @@ struct ext4_inode_info {
10211021

10221022
struct list_head i_orphan; /* unlinked but open inodes */
10231023

1024+
/* Fast commit related info */
1025+
1026+
struct list_head i_fc_list; /*
1027+
* inodes that need fast commit
1028+
* protected by sbi->s_fc_lock.
1029+
*/
1030+
1031+
/* Fast commit subtid when this inode was committed */
1032+
unsigned int i_fc_committed_subtid;
1033+
1034+
/* Start of lblk range that needs to be committed in this fast commit */
1035+
ext4_lblk_t i_fc_lblk_start;
1036+
1037+
/* End of lblk range that needs to be committed in this fast commit */
1038+
ext4_lblk_t i_fc_lblk_len;
1039+
1040+
/* Number of ongoing updates on this inode */
1041+
atomic_t i_fc_updates;
1042+
1043+
/* Fast commit wait queue for this inode */
1044+
wait_queue_head_t i_fc_wait;
1045+
1046+
/* Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len */
1047+
struct mutex i_fc_lock;
1048+
10241049
/*
10251050
* i_disksize keeps track of what the inode size is ON DISK, not
10261051
* in memory. During truncate, i_size is set to the new size by
@@ -1141,6 +1166,10 @@ struct ext4_inode_info {
11411166
#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
11421167
#define EXT4_ERROR_FS 0x0002 /* Errors detected */
11431168
#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
1169+
#define EXT4_FC_INELIGIBLE 0x0008 /* Fast commit ineligible */
1170+
#define EXT4_FC_COMMITTING 0x0010 /* File system underoing a fast
1171+
* commit.
1172+
*/
11441173

11451174
/*
11461175
* Misc. filesystem flags
@@ -1613,6 +1642,30 @@ struct ext4_sb_info {
16131642
/* Record the errseq of the backing block device */
16141643
errseq_t s_bdev_wb_err;
16151644
spinlock_t s_bdev_wb_lock;
1645+
1646+
/* Ext4 fast commit stuff */
1647+
atomic_t s_fc_subtid;
1648+
atomic_t s_fc_ineligible_updates;
1649+
/*
1650+
* After commit starts, the main queue gets locked, and the further
1651+
* updates get added in the staging queue.
1652+
*/
1653+
#define FC_Q_MAIN 0
1654+
#define FC_Q_STAGING 1
1655+
struct list_head s_fc_q[2]; /* Inodes staged for fast commit
1656+
* that have data changes in them.
1657+
*/
1658+
struct list_head s_fc_dentry_q[2]; /* directory entry updates */
1659+
unsigned int s_fc_bytes;
1660+
/*
1661+
* Main fast commit lock. This lock protects accesses to the
1662+
* following fields:
1663+
* ei->i_fc_list, s_fc_dentry_q, s_fc_q, s_fc_bytes, s_fc_bh.
1664+
*/
1665+
spinlock_t s_fc_lock;
1666+
struct buffer_head *s_fc_bh;
1667+
struct ext4_fc_stats s_fc_stats;
1668+
u64 s_fc_avg_commit_time;
16161669
};
16171670

16181671
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1723,6 +1776,7 @@ enum {
17231776
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
17241777
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
17251778
EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
1779+
EXT4_STATE_FC_COMMITTING, /* Fast commit ongoing */
17261780
};
17271781

17281782
#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -2682,6 +2736,22 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
26822736
/* fast_commit.c */
26832737

26842738
void ext4_fc_init(struct super_block *sb, journal_t *journal);
2739+
void ext4_fc_init_inode(struct inode *inode);
2740+
void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
2741+
ext4_lblk_t end);
2742+
void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry);
2743+
void ext4_fc_track_link(struct inode *inode, struct dentry *dentry);
2744+
void ext4_fc_track_create(struct inode *inode, struct dentry *dentry);
2745+
void ext4_fc_track_inode(struct inode *inode);
2746+
void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
2747+
void ext4_fc_start_ineligible(struct super_block *sb, int reason);
2748+
void ext4_fc_stop_ineligible(struct super_block *sb);
2749+
void ext4_fc_start_update(struct inode *inode);
2750+
void ext4_fc_stop_update(struct inode *inode);
2751+
void ext4_fc_del(struct inode *inode);
2752+
int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
2753+
int __init ext4_fc_init_dentry_cache(void);
2754+
26852755
/* mballoc.c */
26862756
extern const struct seq_operations ext4_mb_seq_groups_ops;
26872757
extern long ext4_mb_stats;

fs/ext4/extents.c

+34-14
Original file line numberDiff line numberDiff line change
@@ -3723,6 +3723,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
37233723
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
37243724
out:
37253725
ext4_ext_show_leaf(inode, path);
3726+
ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
37263727
return err;
37273728
}
37283729

@@ -3794,6 +3795,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
37943795
if (*allocated > map->m_len)
37953796
*allocated = map->m_len;
37963797
map->m_len = *allocated;
3798+
ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
37973799
return 0;
37983800
}
37993801

@@ -4327,7 +4329,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
43274329
map->m_len = ar.len;
43284330
allocated = map->m_len;
43294331
ext4_ext_show_leaf(inode, path);
4330-
4332+
ext4_fc_track_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1);
43314333
out:
43324334
ext4_ext_drop_refs(path);
43334335
kfree(path);
@@ -4600,7 +4602,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
46004602
ret = ext4_mark_inode_dirty(handle, inode);
46014603
if (unlikely(ret))
46024604
goto out_handle;
4603-
4605+
ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits,
4606+
(offset + len - 1) >> inode->i_sb->s_blocksize_bits);
46044607
/* Zero out partial block at the edges of the range */
46054608
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
46064609
if (ret >= 0)
@@ -4648,23 +4651,34 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
46484651
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
46494652
FALLOC_FL_INSERT_RANGE))
46504653
return -EOPNOTSUPP;
4654+
ext4_fc_track_range(inode, offset >> blkbits,
4655+
(offset + len - 1) >> blkbits);
46514656

4652-
if (mode & FALLOC_FL_PUNCH_HOLE)
4653-
return ext4_punch_hole(inode, offset, len);
4657+
ext4_fc_start_update(inode);
4658+
4659+
if (mode & FALLOC_FL_PUNCH_HOLE) {
4660+
ret = ext4_punch_hole(inode, offset, len);
4661+
goto exit;
4662+
}
46544663

46554664
ret = ext4_convert_inline_data(inode);
46564665
if (ret)
4657-
return ret;
4666+
goto exit;
46584667

4659-
if (mode & FALLOC_FL_COLLAPSE_RANGE)
4660-
return ext4_collapse_range(inode, offset, len);
4661-
4662-
if (mode & FALLOC_FL_INSERT_RANGE)
4663-
return ext4_insert_range(inode, offset, len);
4668+
if (mode & FALLOC_FL_COLLAPSE_RANGE) {
4669+
ret = ext4_collapse_range(inode, offset, len);
4670+
goto exit;
4671+
}
46644672

4665-
if (mode & FALLOC_FL_ZERO_RANGE)
4666-
return ext4_zero_range(file, offset, len, mode);
4673+
if (mode & FALLOC_FL_INSERT_RANGE) {
4674+
ret = ext4_insert_range(inode, offset, len);
4675+
goto exit;
4676+
}
46674677

4678+
if (mode & FALLOC_FL_ZERO_RANGE) {
4679+
ret = ext4_zero_range(file, offset, len, mode);
4680+
goto exit;
4681+
}
46684682
trace_ext4_fallocate_enter(inode, offset, len, mode);
46694683
lblk = offset >> blkbits;
46704684

@@ -4698,12 +4712,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
46984712
goto out;
46994713

47004714
if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
4701-
ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
4702-
EXT4_I(inode)->i_sync_tid);
4715+
ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
4716+
EXT4_I(inode)->i_sync_tid);
47034717
}
47044718
out:
47054719
inode_unlock(inode);
47064720
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4721+
exit:
4722+
ext4_fc_stop_update(inode);
47074723
return ret;
47084724
}
47094725

@@ -5291,6 +5307,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
52915307
ret = PTR_ERR(handle);
52925308
goto out_mmap;
52935309
}
5310+
ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
52945311

52955312
down_write(&EXT4_I(inode)->i_data_sem);
52965313
ext4_discard_preallocations(inode, 0);
@@ -5329,6 +5346,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
53295346

53305347
out_stop:
53315348
ext4_journal_stop(handle);
5349+
ext4_fc_stop_ineligible(sb);
53325350
out_mmap:
53335351
up_write(&EXT4_I(inode)->i_mmap_sem);
53345352
out_mutex:
@@ -5429,6 +5447,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
54295447
ret = PTR_ERR(handle);
54305448
goto out_mmap;
54315449
}
5450+
ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
54325451

54335452
/* Expand file to avoid data loss if there is error while shifting */
54345453
inode->i_size += len;
@@ -5503,6 +5522,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
55035522

55045523
out_stop:
55055524
ext4_journal_stop(handle);
5525+
ext4_fc_stop_ineligible(sb);
55065526
out_mmap:
55075527
up_write(&EXT4_I(inode)->i_mmap_sem);
55085528
out_mutex:

0 commit comments

Comments
 (0)