xfs: add support for large btree blocks

Add support for larger btree blocks that contains a CRC32C checksum,
a filesystem uuid and block number for detecting filesystem
consistency and out of place writes.

[dchinner@redhat.com] Also include an owner field to allow reverse
mappings to be implemented for improved repairability and a LSN
field to so that log recovery can easily determine the last
modification that made it to disk for each buffer.

[dchinner@redhat.com] Add buffer log format flags to indicate the
type of buffer to recovery so that we don't have to do blind magic
number tests to determine what the buffer is.

[dchinner@redhat.com] Modified to fit into the verifier structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index bec344b..c82ac88 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -34,6 +34,7 @@
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
+#include "xfs_cksum.h"
 
 
 STATIC int
@@ -182,52 +183,88 @@
 			  cur->bc_rec.i.ir_startino;
 }
 
-void
+static int
 xfs_inobt_verify(
 	struct xfs_buf		*bp)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
+	struct xfs_perag	*pag = bp->b_pag;
 	unsigned int		level;
-	int			sblock_ok; /* block passes checks */
 
-	/* magic number and level verification */
+	/*
+	 * During growfs operations, we can't verify the exact owner as the
+	 * perag is not fully initialised and hence not attached to the buffer.
+	 *
+	 * Similarly, during log recovery we will have a perag structure
+	 * attached, but the agi information will not yet have been initialised
+	 * from the on disk AGI. We don't currently use any of this information,
+	 * but beware of the landmine (i.e. need to check pag->pagi_init) if we
+	 * ever do.
+	 */
+	switch (block->bb_magic) {
+	case cpu_to_be32(XFS_IBT_CRC_MAGIC):
+		if (!xfs_sb_version_hascrc(&mp->m_sb))
+			return false;
+		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+			return false;
+		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+			return false;
+		if (pag &&
+		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+			return false;
+		/* fall through */
+	case cpu_to_be32(XFS_IBT_MAGIC):
+		break;
+	default:
+		return 0;
+	}
+
+	/* numrecs and level verification */
 	level = be16_to_cpu(block->bb_level);
-	sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) &&
-		    level < mp->m_in_maxlevels;
-
-	/* numrecs verification */
-	sblock_ok = sblock_ok &&
-		be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0];
+	if (level >= mp->m_in_maxlevels)
+		return false;
+	if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
+		return false;
 
 	/* sibling pointer verification */
-	sblock_ok = sblock_ok &&
-		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
-		 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
-		block->bb_u.s.bb_leftsib &&
-		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
-		 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
-		block->bb_u.s.bb_rightsib;
+	if (!block->bb_u.s.bb_leftsib ||
+	    (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+	     block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+		return false;
+	if (!block->bb_u.s.bb_rightsib ||
+	    (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+	     block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+		return false;
 
-	if (!sblock_ok) {
-		trace_xfs_btree_corrupt(bp, _RET_IP_);
-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
-		xfs_buf_ioerror(bp, EFSCORRUPTED);
-	}
+	return true;
 }
 
 static void
 xfs_inobt_read_verify(
 	struct xfs_buf	*bp)
 {
-	xfs_inobt_verify(bp);
+	if (!(xfs_btree_sblock_verify_crc(bp) &&
+	      xfs_inobt_verify(bp))) {
+		trace_xfs_btree_corrupt(bp, _RET_IP_);
+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+				     bp->b_target->bt_mount, bp->b_addr);
+		xfs_buf_ioerror(bp, EFSCORRUPTED);
+	}
 }
 
 static void
 xfs_inobt_write_verify(
 	struct xfs_buf	*bp)
 {
-	xfs_inobt_verify(bp);
+	if (!xfs_inobt_verify(bp)) {
+		trace_xfs_btree_corrupt(bp, _RET_IP_);
+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+				     bp->b_target->bt_mount, bp->b_addr);
+		xfs_buf_ioerror(bp, EFSCORRUPTED);
+	}
+	xfs_btree_sblock_calc_crc(bp);
+
 }
 
 const struct xfs_buf_ops xfs_inobt_buf_ops = {
@@ -301,6 +338,8 @@
 	cur->bc_blocklog = mp->m_sb.sb_blocklog;
 
 	cur->bc_ops = &xfs_inobt_ops;
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
 
 	cur->bc_private.a.agbp = agbp;
 	cur->bc_private.a.agno = agno;