GFS2: Reduce file fragmentation

This patch reduces GFS2 file fragmentation by pre-reserving blocks. The
resulting improved on disk layout greatly speeds up operations in cases
which would have resulted in interlaced allocation of blocks previously.
A typical example of this is 10 parallel dd processes, each writing to a
file in a common dirctory.

The implementation uses an rbtree of reservations attached to each
resource group (and each inode).

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2b035e0..c53c67e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -521,6 +521,9 @@
 	int error;
 
 	munge_mode_uid_gid(dip, &mode, &uid, &gid);
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
 
 	error = gfs2_quota_lock(dip, uid, gid);
 	if (error)
@@ -551,6 +554,10 @@
 	struct buffer_head *dibh;
 	int error;
 
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
+
 	error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		goto fail;
@@ -596,7 +603,8 @@
 	gfs2_trans_end(sdp);
 
 fail_ipreserv:
-	gfs2_inplace_release(dip);
+	if (alloc_required)
+		gfs2_inplace_release(dip);
 
 fail_quota_locks:
 	gfs2_quota_unlock(dip);
@@ -647,7 +655,7 @@
 	const struct qstr *name = &dentry->d_name;
 	struct gfs2_holder ghs[2];
 	struct inode *inode = NULL;
-	struct gfs2_inode *dip = GFS2_I(dir);
+	struct gfs2_inode *dip = GFS2_I(dir), *ip;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
 	int error;
@@ -657,6 +665,11 @@
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return -ENAMETOOLONG;
 
+	/* We need a reservation to allocate the new dinode block. The
+	   directory ip temporarily points to the reservation, but this is
+	   being done to get a set of contiguous blocks for the new dinode.
+	   Since this is a create, we don't have a sizehint yet, so it will
+	   have to use the minimum reservation size. */
 	error = gfs2_rs_alloc(dip);
 	if (error)
 		return error;
@@ -694,24 +707,29 @@
 	if (IS_ERR(inode))
 		goto fail_gunlock2;
 
-	error = gfs2_inode_refresh(GFS2_I(inode));
+	ip = GFS2_I(inode);
+	error = gfs2_inode_refresh(ip);
 	if (error)
 		goto fail_gunlock2;
 
-	/* the new inode needs a reservation so it can allocate xattrs. */
-	error = gfs2_rs_alloc(GFS2_I(inode));
-	if (error)
-		goto fail_gunlock2;
+	/* The newly created inode needs a reservation so it can allocate
+	   xattrs. At the same time, we want new blocks allocated to the new
+	   dinode to be as contiguous as possible. Since we allocated the
+	   dinode block under the directory's reservation, we transfer
+	   ownership of that reservation to the new inode. The directory
+	   doesn't need a reservation unless it needs a new allocation. */
+	ip->i_res = dip->i_res;
+	dip->i_res = NULL;
 
 	error = gfs2_acl_create(dip, inode);
 	if (error)
 		goto fail_gunlock2;
 
-	error = gfs2_security_init(dip, GFS2_I(inode), name);
+	error = gfs2_security_init(dip, ip, name);
 	if (error)
 		goto fail_gunlock2;
 
-	error = link_dinode(dip, name, GFS2_I(inode));
+	error = link_dinode(dip, name, ip);
 	if (error)
 		goto fail_gunlock2;
 
@@ -738,6 +756,7 @@
 		iput(inode);
 	}
 fail:
+	gfs2_rs_delete(dip);
 	if (bh)
 		brelse(bh);
 	return error;