GFS2: Reduce file fragmentation
This patch reduces GFS2 file fragmentation by pre-reserving blocks. The
resulting improved on disk layout greatly speeds up operations in cases
which would have resulted in interlaced allocation of blocks previously.
A typical example of this is 10 parallel dd processes, each writing to a
file in a common dirctory.
The implementation uses an rbtree of reservations attached to each
resource group (and each inode).
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2b035e0..c53c67e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -521,6 +521,9 @@
int error;
munge_mode_uid_gid(dip, &mode, &uid, &gid);
+ error = gfs2_rindex_update(sdp);
+ if (error)
+ return error;
error = gfs2_quota_lock(dip, uid, gid);
if (error)
@@ -551,6 +554,10 @@
struct buffer_head *dibh;
int error;
+ error = gfs2_rindex_update(sdp);
+ if (error)
+ return error;
+
error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
if (error)
goto fail;
@@ -596,7 +603,8 @@
gfs2_trans_end(sdp);
fail_ipreserv:
- gfs2_inplace_release(dip);
+ if (alloc_required)
+ gfs2_inplace_release(dip);
fail_quota_locks:
gfs2_quota_unlock(dip);
@@ -647,7 +655,7 @@
const struct qstr *name = &dentry->d_name;
struct gfs2_holder ghs[2];
struct inode *inode = NULL;
- struct gfs2_inode *dip = GFS2_I(dir);
+ struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
int error;
@@ -657,6 +665,11 @@
if (!name->len || name->len > GFS2_FNAMESIZE)
return -ENAMETOOLONG;
+ /* We need a reservation to allocate the new dinode block. The
+ directory ip temporarily points to the reservation, but this is
+ being done to get a set of contiguous blocks for the new dinode.
+ Since this is a create, we don't have a sizehint yet, so it will
+ have to use the minimum reservation size. */
error = gfs2_rs_alloc(dip);
if (error)
return error;
@@ -694,24 +707,29 @@
if (IS_ERR(inode))
goto fail_gunlock2;
- error = gfs2_inode_refresh(GFS2_I(inode));
+ ip = GFS2_I(inode);
+ error = gfs2_inode_refresh(ip);
if (error)
goto fail_gunlock2;
- /* the new inode needs a reservation so it can allocate xattrs. */
- error = gfs2_rs_alloc(GFS2_I(inode));
- if (error)
- goto fail_gunlock2;
+ /* The newly created inode needs a reservation so it can allocate
+ xattrs. At the same time, we want new blocks allocated to the new
+ dinode to be as contiguous as possible. Since we allocated the
+ dinode block under the directory's reservation, we transfer
+ ownership of that reservation to the new inode. The directory
+ doesn't need a reservation unless it needs a new allocation. */
+ ip->i_res = dip->i_res;
+ dip->i_res = NULL;
error = gfs2_acl_create(dip, inode);
if (error)
goto fail_gunlock2;
- error = gfs2_security_init(dip, GFS2_I(inode), name);
+ error = gfs2_security_init(dip, ip, name);
if (error)
goto fail_gunlock2;
- error = link_dinode(dip, name, GFS2_I(inode));
+ error = link_dinode(dip, name, ip);
if (error)
goto fail_gunlock2;
@@ -738,6 +756,7 @@
iput(inode);
}
fail:
+ gfs2_rs_delete(dip);
if (bh)
brelse(bh);
return error;