| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  *   Copyright (C) International Business Machines Corp., 2000-2004 | 
 | 3 |  * | 
 | 4 |  *   This program is free software;  you can redistribute it and/or modify | 
 | 5 |  *   it under the terms of the GNU General Public License as published by | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 6 |  *   the Free Software Foundation; either version 2 of the License, or | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 7 |  *   (at your option) any later version. | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 8 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 |  *   This program is distributed in the hope that it will be useful, | 
 | 10 |  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of | 
 | 11 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See | 
 | 12 |  *   the GNU General Public License for more details. | 
 | 13 |  * | 
 | 14 |  *   You should have received a copy of the GNU General Public License | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 15 |  *   along with this program;  if not, write to the Free Software | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 16 |  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 
 | 17 |  */ | 
 | 18 |  | 
 | 19 | #include <linux/fs.h> | 
 | 20 | #include "jfs_incore.h" | 
 | 21 | #include "jfs_superblock.h" | 
 | 22 | #include "jfs_dmap.h" | 
 | 23 | #include "jfs_imap.h" | 
 | 24 | #include "jfs_lock.h" | 
 | 25 | #include "jfs_metapage.h" | 
 | 26 | #include "jfs_debug.h" | 
 | 27 |  | 
 | 28 | /* | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 29 |  *	SERIALIZATION of the Block Allocation Map. | 
 | 30 |  * | 
 | 31 |  *	the working state of the block allocation map is accessed in | 
 | 32 |  *	two directions: | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 33 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 34 |  *	1) allocation and free requests that start at the dmap | 
 | 35 |  *	   level and move up through the dmap control pages (i.e. | 
 | 36 |  *	   the vast majority of requests). | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 37 |  * | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 38 |  *	2) allocation requests that start at dmap control page | 
 | 39 |  *	   level and work down towards the dmaps. | 
 | 40 |  * | 
 | 41 |  *	the serialization scheme used here is as follows. | 
 | 42 |  * | 
 | 43 |  *	requests which start at the bottom are serialized against each | 
 | 44 |  *	other through buffers and each requests holds onto its buffers | 
 | 45 |  *	as it works it way up from a single dmap to the required level | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 46 |  *	of dmap control page. | 
 | 47 |  *	requests that start at the top are serialized against each other | 
 | 48 |  *	and request that start from the bottom by the multiple read/single | 
 | 49 |  *	write inode lock of the bmap inode. requests starting at the top | 
 | 50 |  *	take this lock in write mode while request starting at the bottom | 
 | 51 |  *	take the lock in read mode.  a single top-down request may proceed | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 52 |  *	exclusively while multiple bottoms-up requests may proceed | 
 | 53 |  *	simultaneously (under the protection of busy buffers). | 
 | 54 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 55 |  *	in addition to information found in dmaps and dmap control pages, | 
 | 56 |  *	the working state of the block allocation map also includes read/ | 
 | 57 |  *	write information maintained in the bmap descriptor (i.e. total | 
 | 58 |  *	free block count, allocation group level free block counts). | 
 | 59 |  *	a single exclusive lock (BMAP_LOCK) is used to guard this information | 
 | 60 |  *	in the face of multiple-bottoms up requests. | 
 | 61 |  *	(lock ordering: IREAD_LOCK, BMAP_LOCK); | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 62 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 63 |  *	accesses to the persistent state of the block allocation map (limited | 
 | 64 |  *	to the persistent bitmaps in dmaps) is guarded by (busy) buffers. | 
 | 65 |  */ | 
 | 66 |  | 
| Ingo Molnar | 1de8744 | 2006-01-24 15:22:50 -0600 | [diff] [blame] | 67 | #define BMAP_LOCK_INIT(bmp)	mutex_init(&bmp->db_bmaplock) | 
 | 68 | #define BMAP_LOCK(bmp)		mutex_lock(&bmp->db_bmaplock) | 
 | 69 | #define BMAP_UNLOCK(bmp)	mutex_unlock(&bmp->db_bmaplock) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 70 |  | 
 | 71 | /* | 
 | 72 |  * forward references | 
 | 73 |  */ | 
 | 74 | static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 75 | 			int nblocks); | 
 | 76 | static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); | 
| Dave Kleikamp | b6a47fd | 2005-10-11 09:06:59 -0500 | [diff] [blame] | 77 | static int dbBackSplit(dmtree_t * tp, int leafno); | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 78 | static int dbJoin(dmtree_t * tp, int leafno, int newval); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 79 | static void dbAdjTree(dmtree_t * tp, int leafno, int newval); | 
 | 80 | static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, | 
 | 81 | 		    int level); | 
 | 82 | static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results); | 
 | 83 | static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 84 | 		       int nblocks); | 
 | 85 | static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 86 | 		       int nblocks, | 
 | 87 | 		       int l2nb, s64 * results); | 
 | 88 | static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 89 | 		       int nblocks); | 
 | 90 | static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks, | 
 | 91 | 			  int l2nb, | 
 | 92 | 			  s64 * results); | 
 | 93 | static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, | 
 | 94 | 		     s64 * results); | 
 | 95 | static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, | 
 | 96 | 		      s64 * results); | 
 | 97 | static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks); | 
 | 98 | static int dbFindBits(u32 word, int l2nb); | 
 | 99 | static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); | 
 | 100 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx); | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 101 | static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 102 | 		      int nblocks); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 103 | static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 104 | 		      int nblocks); | 
 | 105 | static int dbMaxBud(u8 * cp); | 
 | 106 | s64 dbMapFileSizeToMapSize(struct inode *ipbmap); | 
 | 107 | static int blkstol2(s64 nb); | 
 | 108 |  | 
 | 109 | static int cntlz(u32 value); | 
 | 110 | static int cnttz(u32 word); | 
 | 111 |  | 
 | 112 | static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 113 | 			 int nblocks); | 
 | 114 | static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks); | 
 | 115 | static int dbInitDmapTree(struct dmap * dp); | 
 | 116 | static int dbInitTree(struct dmaptree * dtp); | 
 | 117 | static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i); | 
 | 118 | static int dbGetL2AGSize(s64 nblocks); | 
 | 119 |  | 
 | 120 | /* | 
 | 121 |  *	buddy table | 
 | 122 |  * | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 123 |  * table used for determining buddy sizes within characters of | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 124 |  * dmap bitmap words.  the characters themselves serve as indexes | 
 | 125 |  * into the table, with the table elements yielding the maximum | 
 | 126 |  * binary buddy of free bits within the character. | 
 | 127 |  */ | 
| Arjan van de Ven | 4d5dbd0 | 2005-11-29 08:28:58 -0600 | [diff] [blame] | 128 | static const s8 budtab[256] = { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 129 | 	3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 
 | 130 | 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 
 | 131 | 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 
 | 132 | 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 
 | 133 | 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 
 | 134 | 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 
 | 135 | 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 
 | 136 | 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 
 | 137 | 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 
 | 138 | 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 
 | 139 | 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 
 | 140 | 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 
 | 141 | 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 
 | 142 | 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 
 | 143 | 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | 
 | 144 | 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 | 
 | 145 | }; | 
 | 146 |  | 
 | 147 |  | 
 | 148 | /* | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 149 |  * NAME:	dbMount() | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 150 |  * | 
 | 151 |  * FUNCTION:	initializate the block allocation map. | 
 | 152 |  * | 
 | 153 |  *		memory is allocated for the in-core bmap descriptor and | 
 | 154 |  *		the in-core descriptor is initialized from disk. | 
 | 155 |  * | 
 | 156 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 157 |  *	ipbmap	- pointer to in-core inode for the block map. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 158 |  * | 
 | 159 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 160 |  *	0	- success | 
 | 161 |  *	-ENOMEM	- insufficient memory | 
 | 162 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 163 |  */ | 
 | 164 | int dbMount(struct inode *ipbmap) | 
 | 165 | { | 
 | 166 | 	struct bmap *bmp; | 
 | 167 | 	struct dbmap_disk *dbmp_le; | 
 | 168 | 	struct metapage *mp; | 
 | 169 | 	int i; | 
 | 170 |  | 
 | 171 | 	/* | 
 | 172 | 	 * allocate/initialize the in-memory bmap descriptor | 
 | 173 | 	 */ | 
 | 174 | 	/* allocate memory for the in-memory bmap descriptor */ | 
 | 175 | 	bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL); | 
 | 176 | 	if (bmp == NULL) | 
 | 177 | 		return -ENOMEM; | 
 | 178 |  | 
 | 179 | 	/* read the on-disk bmap descriptor. */ | 
 | 180 | 	mp = read_metapage(ipbmap, | 
 | 181 | 			   BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, | 
 | 182 | 			   PSIZE, 0); | 
 | 183 | 	if (mp == NULL) { | 
 | 184 | 		kfree(bmp); | 
 | 185 | 		return -EIO; | 
 | 186 | 	} | 
 | 187 |  | 
 | 188 | 	/* copy the on-disk bmap descriptor to its in-memory version. */ | 
 | 189 | 	dbmp_le = (struct dbmap_disk *) mp->data; | 
 | 190 | 	bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); | 
 | 191 | 	bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); | 
 | 192 | 	bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); | 
 | 193 | 	bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); | 
 | 194 | 	bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); | 
 | 195 | 	bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); | 
 | 196 | 	bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); | 
 | 197 | 	bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); | 
 | 198 | 	bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); | 
 | 199 | 	bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); | 
 | 200 | 	bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); | 
 | 201 | 	bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); | 
 | 202 | 	for (i = 0; i < MAXAG; i++) | 
 | 203 | 		bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); | 
 | 204 | 	bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); | 
 | 205 | 	bmp->db_maxfreebud = dbmp_le->dn_maxfreebud; | 
 | 206 |  | 
 | 207 | 	/* release the buffer. */ | 
 | 208 | 	release_metapage(mp); | 
 | 209 |  | 
 | 210 | 	/* bind the bmap inode and the bmap descriptor to each other. */ | 
 | 211 | 	bmp->db_ipbmap = ipbmap; | 
 | 212 | 	JFS_SBI(ipbmap->i_sb)->bmap = bmp; | 
 | 213 |  | 
 | 214 | 	memset(bmp->db_active, 0, sizeof(bmp->db_active)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 215 |  | 
 | 216 | 	/* | 
 | 217 | 	 * allocate/initialize the bmap lock | 
 | 218 | 	 */ | 
 | 219 | 	BMAP_LOCK_INIT(bmp); | 
 | 220 |  | 
 | 221 | 	return (0); | 
 | 222 | } | 
 | 223 |  | 
 | 224 |  | 
 | 225 | /* | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 226 |  * NAME:	dbUnmount() | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 227 |  * | 
 | 228 |  * FUNCTION:	terminate the block allocation map in preparation for | 
 | 229 |  *		file system unmount. | 
 | 230 |  * | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 231 |  *		the in-core bmap descriptor is written to disk and | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 232 |  *		the memory for this descriptor is freed. | 
 | 233 |  * | 
 | 234 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 235 |  *	ipbmap	- pointer to in-core inode for the block map. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 236 |  * | 
 | 237 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 238 |  *	0	- success | 
 | 239 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 240 |  */ | 
 | 241 | int dbUnmount(struct inode *ipbmap, int mounterror) | 
 | 242 | { | 
 | 243 | 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 244 |  | 
 | 245 | 	if (!(mounterror || isReadOnly(ipbmap))) | 
 | 246 | 		dbSync(ipbmap); | 
 | 247 |  | 
 | 248 | 	/* | 
 | 249 | 	 * Invalidate the page cache buffers | 
 | 250 | 	 */ | 
 | 251 | 	truncate_inode_pages(ipbmap->i_mapping, 0); | 
 | 252 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 253 | 	/* free the memory for the in-memory bmap. */ | 
 | 254 | 	kfree(bmp); | 
 | 255 |  | 
 | 256 | 	return (0); | 
 | 257 | } | 
 | 258 |  | 
 | 259 | /* | 
 | 260 |  *	dbSync() | 
 | 261 |  */ | 
 | 262 | int dbSync(struct inode *ipbmap) | 
 | 263 | { | 
 | 264 | 	struct dbmap_disk *dbmp_le; | 
 | 265 | 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 
 | 266 | 	struct metapage *mp; | 
 | 267 | 	int i; | 
 | 268 |  | 
 | 269 | 	/* | 
 | 270 | 	 * write bmap global control page | 
 | 271 | 	 */ | 
 | 272 | 	/* get the buffer for the on-disk bmap descriptor. */ | 
 | 273 | 	mp = read_metapage(ipbmap, | 
 | 274 | 			   BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, | 
 | 275 | 			   PSIZE, 0); | 
 | 276 | 	if (mp == NULL) { | 
 | 277 | 		jfs_err("dbSync: read_metapage failed!"); | 
 | 278 | 		return -EIO; | 
 | 279 | 	} | 
 | 280 | 	/* copy the in-memory version of the bmap to the on-disk version */ | 
 | 281 | 	dbmp_le = (struct dbmap_disk *) mp->data; | 
 | 282 | 	dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize); | 
 | 283 | 	dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree); | 
 | 284 | 	dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage); | 
 | 285 | 	dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag); | 
 | 286 | 	dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel); | 
 | 287 | 	dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); | 
 | 288 | 	dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); | 
 | 289 | 	dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); | 
 | 290 | 	dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); | 
 | 291 | 	dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); | 
 | 292 | 	dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); | 
 | 293 | 	dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); | 
 | 294 | 	for (i = 0; i < MAXAG; i++) | 
 | 295 | 		dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]); | 
 | 296 | 	dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize); | 
 | 297 | 	dbmp_le->dn_maxfreebud = bmp->db_maxfreebud; | 
 | 298 |  | 
 | 299 | 	/* write the buffer */ | 
 | 300 | 	write_metapage(mp); | 
 | 301 |  | 
 | 302 | 	/* | 
 | 303 | 	 * write out dirty pages of bmap | 
 | 304 | 	 */ | 
| OGAWA Hirofumi | 28fd129 | 2006-01-08 01:02:14 -0800 | [diff] [blame] | 305 | 	filemap_write_and_wait(ipbmap->i_mapping); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 306 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 307 | 	diWriteSpecial(ipbmap, 0); | 
 | 308 |  | 
 | 309 | 	return (0); | 
 | 310 | } | 
 | 311 |  | 
 | 312 |  | 
 | 313 | /* | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 314 |  * NAME:	dbFree() | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 315 |  * | 
 | 316 |  * FUNCTION:	free the specified block range from the working block | 
 | 317 |  *		allocation map. | 
 | 318 |  * | 
 | 319 |  *		the blocks will be free from the working map one dmap | 
 | 320 |  *		at a time. | 
 | 321 |  * | 
 | 322 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 323 |  *	ip	- pointer to in-core inode; | 
 | 324 |  *	blkno	- starting block number to be freed. | 
 | 325 |  *	nblocks	- number of blocks to be freed. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 326 |  * | 
 | 327 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 328 |  *	0	- success | 
 | 329 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 330 |  */ | 
 | 331 | int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | 
 | 332 | { | 
 | 333 | 	struct metapage *mp; | 
 | 334 | 	struct dmap *dp; | 
 | 335 | 	int nb, rc; | 
 | 336 | 	s64 lblkno, rem; | 
 | 337 | 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 
 | 338 | 	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | 
 | 339 |  | 
| Dave Kleikamp | 82d5b9a | 2007-01-09 14:14:48 -0600 | [diff] [blame] | 340 | 	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 341 |  | 
 | 342 | 	/* block to be freed better be within the mapsize. */ | 
 | 343 | 	if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { | 
 | 344 | 		IREAD_UNLOCK(ipbmap); | 
 | 345 | 		printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", | 
 | 346 | 		       (unsigned long long) blkno, | 
 | 347 | 		       (unsigned long long) nblocks); | 
 | 348 | 		jfs_error(ip->i_sb, | 
 | 349 | 			  "dbFree: block to be freed is outside the map"); | 
 | 350 | 		return -EIO; | 
 | 351 | 	} | 
 | 352 |  | 
 | 353 | 	/* | 
 | 354 | 	 * free the blocks a dmap at a time. | 
 | 355 | 	 */ | 
 | 356 | 	mp = NULL; | 
 | 357 | 	for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { | 
 | 358 | 		/* release previous dmap if any */ | 
 | 359 | 		if (mp) { | 
 | 360 | 			write_metapage(mp); | 
 | 361 | 		} | 
 | 362 |  | 
 | 363 | 		/* get the buffer for the current dmap. */ | 
 | 364 | 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 
 | 365 | 		mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 
 | 366 | 		if (mp == NULL) { | 
 | 367 | 			IREAD_UNLOCK(ipbmap); | 
 | 368 | 			return -EIO; | 
 | 369 | 		} | 
 | 370 | 		dp = (struct dmap *) mp->data; | 
 | 371 |  | 
 | 372 | 		/* determine the number of blocks to be freed from | 
 | 373 | 		 * this dmap. | 
 | 374 | 		 */ | 
 | 375 | 		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); | 
 | 376 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 377 | 		/* free the blocks. */ | 
 | 378 | 		if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 379 | 			jfs_error(ip->i_sb, "dbFree: error in block map\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 380 | 			release_metapage(mp); | 
 | 381 | 			IREAD_UNLOCK(ipbmap); | 
 | 382 | 			return (rc); | 
 | 383 | 		} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 384 | 	} | 
 | 385 |  | 
 | 386 | 	/* write the last buffer. */ | 
 | 387 | 	write_metapage(mp); | 
 | 388 |  | 
 | 389 | 	IREAD_UNLOCK(ipbmap); | 
 | 390 |  | 
 | 391 | 	return (0); | 
 | 392 | } | 
 | 393 |  | 
 | 394 |  | 
 | 395 | /* | 
 | 396 |  * NAME:	dbUpdatePMap() | 
 | 397 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 398 |  * FUNCTION:	update the allocation state (free or allocate) of the | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 399 |  *		specified block range in the persistent block allocation map. | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 400 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 401 |  *		the blocks will be updated in the persistent map one | 
 | 402 |  *		dmap at a time. | 
 | 403 |  * | 
 | 404 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 405 |  *	ipbmap	- pointer to in-core inode for the block map. | 
 | 406 |  *	free	- 'true' if block range is to be freed from the persistent | 
 | 407 |  *		  map; 'false' if it is to be allocated. | 
 | 408 |  *	blkno	- starting block number of the range. | 
 | 409 |  *	nblocks	- number of contiguous blocks in the range. | 
 | 410 |  *	tblk	- transaction block; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 411 |  * | 
 | 412 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 413 |  *	0	- success | 
 | 414 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 415 |  */ | 
 | 416 | int | 
 | 417 | dbUpdatePMap(struct inode *ipbmap, | 
 | 418 | 	     int free, s64 blkno, s64 nblocks, struct tblock * tblk) | 
 | 419 | { | 
 | 420 | 	int nblks, dbitno, wbitno, rbits; | 
 | 421 | 	int word, nbits, nwords; | 
 | 422 | 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 
 | 423 | 	s64 lblkno, rem, lastlblkno; | 
 | 424 | 	u32 mask; | 
 | 425 | 	struct dmap *dp; | 
 | 426 | 	struct metapage *mp; | 
 | 427 | 	struct jfs_log *log; | 
 | 428 | 	int lsn, difft, diffp; | 
| Dave Kleikamp | 7fab479 | 2005-05-02 12:25:02 -0600 | [diff] [blame] | 429 | 	unsigned long flags; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 430 |  | 
 | 431 | 	/* the blocks better be within the mapsize. */ | 
 | 432 | 	if (blkno + nblocks > bmp->db_mapsize) { | 
 | 433 | 		printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", | 
 | 434 | 		       (unsigned long long) blkno, | 
 | 435 | 		       (unsigned long long) nblocks); | 
 | 436 | 		jfs_error(ipbmap->i_sb, | 
 | 437 | 			  "dbUpdatePMap: blocks are outside the map"); | 
 | 438 | 		return -EIO; | 
 | 439 | 	} | 
 | 440 |  | 
 | 441 | 	/* compute delta of transaction lsn from log syncpt */ | 
 | 442 | 	lsn = tblk->lsn; | 
 | 443 | 	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; | 
 | 444 | 	logdiff(difft, lsn, log); | 
 | 445 |  | 
 | 446 | 	/* | 
 | 447 | 	 * update the block state a dmap at a time. | 
 | 448 | 	 */ | 
 | 449 | 	mp = NULL; | 
 | 450 | 	lastlblkno = 0; | 
 | 451 | 	for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) { | 
 | 452 | 		/* get the buffer for the current dmap. */ | 
 | 453 | 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 
 | 454 | 		if (lblkno != lastlblkno) { | 
 | 455 | 			if (mp) { | 
 | 456 | 				write_metapage(mp); | 
 | 457 | 			} | 
 | 458 |  | 
 | 459 | 			mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, | 
 | 460 | 					   0); | 
 | 461 | 			if (mp == NULL) | 
 | 462 | 				return -EIO; | 
| Dave Kleikamp | 7fab479 | 2005-05-02 12:25:02 -0600 | [diff] [blame] | 463 | 			metapage_wait_for_io(mp); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 464 | 		} | 
 | 465 | 		dp = (struct dmap *) mp->data; | 
 | 466 |  | 
 | 467 | 		/* determine the bit number and word within the dmap of | 
 | 468 | 		 * the starting block.  also determine how many blocks | 
 | 469 | 		 * are to be updated within this dmap. | 
 | 470 | 		 */ | 
 | 471 | 		dbitno = blkno & (BPERDMAP - 1); | 
 | 472 | 		word = dbitno >> L2DBWORD; | 
 | 473 | 		nblks = min(rem, (s64)BPERDMAP - dbitno); | 
 | 474 |  | 
 | 475 | 		/* update the bits of the dmap words. the first and last | 
 | 476 | 		 * words may only have a subset of their bits updated. if | 
 | 477 | 		 * this is the case, we'll work against that word (i.e. | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 478 | 		 * partial first and/or last) only in a single pass.  a | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 479 | 		 * single pass will also be used to update all words that | 
 | 480 | 		 * are to have all their bits updated. | 
 | 481 | 		 */ | 
 | 482 | 		for (rbits = nblks; rbits > 0; | 
 | 483 | 		     rbits -= nbits, dbitno += nbits) { | 
 | 484 | 			/* determine the bit number within the word and | 
 | 485 | 			 * the number of bits within the word. | 
 | 486 | 			 */ | 
 | 487 | 			wbitno = dbitno & (DBWORD - 1); | 
 | 488 | 			nbits = min(rbits, DBWORD - wbitno); | 
 | 489 |  | 
 | 490 | 			/* check if only part of the word is to be updated. */ | 
 | 491 | 			if (nbits < DBWORD) { | 
 | 492 | 				/* update (free or allocate) the bits | 
 | 493 | 				 * in this word. | 
 | 494 | 				 */ | 
 | 495 | 				mask = | 
 | 496 | 				    (ONES << (DBWORD - nbits) >> wbitno); | 
 | 497 | 				if (free) | 
 | 498 | 					dp->pmap[word] &= | 
 | 499 | 					    cpu_to_le32(~mask); | 
 | 500 | 				else | 
 | 501 | 					dp->pmap[word] |= | 
 | 502 | 					    cpu_to_le32(mask); | 
 | 503 |  | 
 | 504 | 				word += 1; | 
 | 505 | 			} else { | 
 | 506 | 				/* one or more words are to have all | 
 | 507 | 				 * their bits updated.  determine how | 
 | 508 | 				 * many words and how many bits. | 
 | 509 | 				 */ | 
 | 510 | 				nwords = rbits >> L2DBWORD; | 
 | 511 | 				nbits = nwords << L2DBWORD; | 
 | 512 |  | 
 | 513 | 				/* update (free or allocate) the bits | 
 | 514 | 				 * in these words. | 
 | 515 | 				 */ | 
 | 516 | 				if (free) | 
 | 517 | 					memset(&dp->pmap[word], 0, | 
 | 518 | 					       nwords * 4); | 
 | 519 | 				else | 
 | 520 | 					memset(&dp->pmap[word], (int) ONES, | 
 | 521 | 					       nwords * 4); | 
 | 522 |  | 
 | 523 | 				word += nwords; | 
 | 524 | 			} | 
 | 525 | 		} | 
 | 526 |  | 
 | 527 | 		/* | 
 | 528 | 		 * update dmap lsn | 
 | 529 | 		 */ | 
 | 530 | 		if (lblkno == lastlblkno) | 
 | 531 | 			continue; | 
 | 532 |  | 
 | 533 | 		lastlblkno = lblkno; | 
 | 534 |  | 
| Dave Kleikamp | be0bf7d | 2006-03-08 10:59:15 -0600 | [diff] [blame] | 535 | 		LOGSYNC_LOCK(log, flags); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 536 | 		if (mp->lsn != 0) { | 
 | 537 | 			/* inherit older/smaller lsn */ | 
 | 538 | 			logdiff(diffp, mp->lsn, log); | 
 | 539 | 			if (difft < diffp) { | 
 | 540 | 				mp->lsn = lsn; | 
 | 541 |  | 
 | 542 | 				/* move bp after tblock in logsync list */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 543 | 				list_move(&mp->synclist, &tblk->synclist); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 544 | 			} | 
 | 545 |  | 
 | 546 | 			/* inherit younger/larger clsn */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 547 | 			logdiff(difft, tblk->clsn, log); | 
 | 548 | 			logdiff(diffp, mp->clsn, log); | 
 | 549 | 			if (difft > diffp) | 
 | 550 | 				mp->clsn = tblk->clsn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 551 | 		} else { | 
 | 552 | 			mp->log = log; | 
 | 553 | 			mp->lsn = lsn; | 
 | 554 |  | 
 | 555 | 			/* insert bp after tblock in logsync list */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 556 | 			log->count++; | 
 | 557 | 			list_add(&mp->synclist, &tblk->synclist); | 
 | 558 |  | 
 | 559 | 			mp->clsn = tblk->clsn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 560 | 		} | 
| Dave Kleikamp | be0bf7d | 2006-03-08 10:59:15 -0600 | [diff] [blame] | 561 | 		LOGSYNC_UNLOCK(log, flags); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 562 | 	} | 
 | 563 |  | 
 | 564 | 	/* write the last buffer. */ | 
 | 565 | 	if (mp) { | 
 | 566 | 		write_metapage(mp); | 
 | 567 | 	} | 
 | 568 |  | 
 | 569 | 	return (0); | 
 | 570 | } | 
 | 571 |  | 
 | 572 |  | 
 | 573 | /* | 
 | 574 |  * NAME:	dbNextAG() | 
 | 575 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 576 |  * FUNCTION:	find the preferred allocation group for new allocations. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 577 |  * | 
 | 578 |  *		Within the allocation groups, we maintain a preferred | 
 | 579 |  *		allocation group which consists of a group with at least | 
 | 580 |  *		average free space.  It is the preferred group that we target | 
 | 581 |  *		new inode allocation towards.  The tie-in between inode | 
 | 582 |  *		allocation and block allocation occurs as we allocate the | 
 | 583 |  *		first (data) block of an inode and specify the inode (block) | 
 | 584 |  *		as the allocation hint for this block. | 
 | 585 |  * | 
 | 586 |  *		We try to avoid having more than one open file growing in | 
 | 587 |  *		an allocation group, as this will lead to fragmentation. | 
 | 588 |  *		This differs from the old OS/2 method of trying to keep | 
 | 589 |  *		empty ags around for large allocations. | 
 | 590 |  * | 
 | 591 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 592 |  *	ipbmap	- pointer to in-core inode for the block map. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 593 |  * | 
 | 594 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 595 |  *	the preferred allocation group number. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 596 |  */ | 
 | 597 | int dbNextAG(struct inode *ipbmap) | 
 | 598 | { | 
 | 599 | 	s64 avgfree; | 
 | 600 | 	int agpref; | 
 | 601 | 	s64 hwm = 0; | 
 | 602 | 	int i; | 
 | 603 | 	int next_best = -1; | 
 | 604 | 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 
 | 605 |  | 
 | 606 | 	BMAP_LOCK(bmp); | 
 | 607 |  | 
 | 608 | 	/* determine the average number of free blocks within the ags. */ | 
 | 609 | 	avgfree = (u32)bmp->db_nfree / bmp->db_numag; | 
 | 610 |  | 
 | 611 | 	/* | 
 | 612 | 	 * if the current preferred ag does not have an active allocator | 
 | 613 | 	 * and has at least average freespace, return it | 
 | 614 | 	 */ | 
 | 615 | 	agpref = bmp->db_agpref; | 
 | 616 | 	if ((atomic_read(&bmp->db_active[agpref]) == 0) && | 
 | 617 | 	    (bmp->db_agfree[agpref] >= avgfree)) | 
 | 618 | 		goto unlock; | 
 | 619 |  | 
 | 620 | 	/* From the last preferred ag, find the next one with at least | 
 | 621 | 	 * average free space. | 
 | 622 | 	 */ | 
 | 623 | 	for (i = 0 ; i < bmp->db_numag; i++, agpref++) { | 
 | 624 | 		if (agpref == bmp->db_numag) | 
 | 625 | 			agpref = 0; | 
 | 626 |  | 
 | 627 | 		if (atomic_read(&bmp->db_active[agpref])) | 
 | 628 | 			/* open file is currently growing in this ag */ | 
 | 629 | 			continue; | 
 | 630 | 		if (bmp->db_agfree[agpref] >= avgfree) { | 
 | 631 | 			/* Return this one */ | 
 | 632 | 			bmp->db_agpref = agpref; | 
 | 633 | 			goto unlock; | 
 | 634 | 		} else if (bmp->db_agfree[agpref] > hwm) { | 
 | 635 | 			/* Less than avg. freespace, but best so far */ | 
 | 636 | 			hwm = bmp->db_agfree[agpref]; | 
 | 637 | 			next_best = agpref; | 
 | 638 | 		} | 
 | 639 | 	} | 
 | 640 |  | 
 | 641 | 	/* | 
 | 642 | 	 * If no inactive ag was found with average freespace, use the | 
 | 643 | 	 * next best | 
 | 644 | 	 */ | 
 | 645 | 	if (next_best != -1) | 
 | 646 | 		bmp->db_agpref = next_best; | 
 | 647 | 	/* else leave db_agpref unchanged */ | 
 | 648 | unlock: | 
 | 649 | 	BMAP_UNLOCK(bmp); | 
 | 650 |  | 
 | 651 | 	/* return the preferred group. | 
 | 652 | 	 */ | 
 | 653 | 	return (bmp->db_agpref); | 
 | 654 | } | 
 | 655 |  | 
 | 656 | /* | 
 | 657 |  * NAME:	dbAlloc() | 
 | 658 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 659 |  * FUNCTION:	attempt to allocate a specified number of contiguous free | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 660 |  *		blocks from the working allocation block map. | 
 | 661 |  * | 
 | 662 |  *		the block allocation policy uses hints and a multi-step | 
 | 663 |  *		approach. | 
 | 664 |  * | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 665 |  *		for allocation requests smaller than the number of blocks | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 666 |  *		per dmap, we first try to allocate the new blocks | 
 | 667 |  *		immediately following the hint.  if these blocks are not | 
 | 668 |  *		available, we try to allocate blocks near the hint.  if | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 669 |  *		no blocks near the hint are available, we next try to | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 670 |  *		allocate within the same dmap as contains the hint. | 
 | 671 |  * | 
 | 672 |  *		if no blocks are available in the dmap or the allocation | 
 | 673 |  *		request is larger than the dmap size, we try to allocate | 
 | 674 |  *		within the same allocation group as contains the hint. if | 
 | 675 |  *		this does not succeed, we finally try to allocate anywhere | 
 | 676 |  *		within the aggregate. | 
 | 677 |  * | 
 | 678 |  *		we also try to allocate anywhere within the aggregate for | 
 | 679 |  *		for allocation requests larger than the allocation group | 
 | 680 |  *		size or requests that specify no hint value. | 
 | 681 |  * | 
 | 682 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 683 |  *	ip	- pointer to in-core inode; | 
 | 684 |  *	hint	- allocation hint. | 
 | 685 |  *	nblocks	- number of contiguous blocks in the range. | 
 | 686 |  *	results	- on successful return, set to the starting block number | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 687 |  *		  of the newly allocated contiguous range. | 
 | 688 |  * | 
 | 689 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 690 |  *	0	- success | 
 | 691 |  *	-ENOSPC	- insufficient disk resources | 
 | 692 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 693 |  */ | 
 | 694 | int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | 
 | 695 | { | 
 | 696 | 	int rc, agno; | 
 | 697 | 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 
 | 698 | 	struct bmap *bmp; | 
 | 699 | 	struct metapage *mp; | 
 | 700 | 	s64 lblkno, blkno; | 
 | 701 | 	struct dmap *dp; | 
 | 702 | 	int l2nb; | 
 | 703 | 	s64 mapSize; | 
 | 704 | 	int writers; | 
 | 705 |  | 
 | 706 | 	/* assert that nblocks is valid */ | 
 | 707 | 	assert(nblocks > 0); | 
 | 708 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 709 | 	/* get the log2 number of blocks to be allocated. | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 710 | 	 * if the number of blocks is not a log2 multiple, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 711 | 	 * it will be rounded up to the next log2 multiple. | 
 | 712 | 	 */ | 
 | 713 | 	l2nb = BLKSTOL2(nblocks); | 
 | 714 |  | 
 | 715 | 	bmp = JFS_SBI(ip->i_sb)->bmap; | 
 | 716 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 717 | 	mapSize = bmp->db_mapsize; | 
 | 718 |  | 
 | 719 | 	/* the hint should be within the map */ | 
 | 720 | 	if (hint >= mapSize) { | 
 | 721 | 		jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); | 
 | 722 | 		return -EIO; | 
 | 723 | 	} | 
 | 724 |  | 
 | 725 | 	/* if the number of blocks to be allocated is greater than the | 
 | 726 | 	 * allocation group size, try to allocate anywhere. | 
 | 727 | 	 */ | 
 | 728 | 	if (l2nb > bmp->db_agl2size) { | 
| Dave Kleikamp | 82d5b9a | 2007-01-09 14:14:48 -0600 | [diff] [blame] | 729 | 		IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 730 |  | 
 | 731 | 		rc = dbAllocAny(bmp, nblocks, l2nb, results); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 732 |  | 
 | 733 | 		goto write_unlock; | 
 | 734 | 	} | 
 | 735 |  | 
 | 736 | 	/* | 
 | 737 | 	 * If no hint, let dbNextAG recommend an allocation group | 
 | 738 | 	 */ | 
 | 739 | 	if (hint == 0) | 
 | 740 | 		goto pref_ag; | 
 | 741 |  | 
 | 742 | 	/* we would like to allocate close to the hint.  adjust the | 
 | 743 | 	 * hint to the block following the hint since the allocators | 
 | 744 | 	 * will start looking for free space starting at this point. | 
 | 745 | 	 */ | 
 | 746 | 	blkno = hint + 1; | 
 | 747 |  | 
 | 748 | 	if (blkno >= bmp->db_mapsize) | 
 | 749 | 		goto pref_ag; | 
 | 750 |  | 
 | 751 | 	agno = blkno >> bmp->db_agl2size; | 
 | 752 |  | 
 | 753 | 	/* check if blkno crosses over into a new allocation group. | 
 | 754 | 	 * if so, check if we should allow allocations within this | 
 | 755 | 	 * allocation group. | 
 | 756 | 	 */ | 
 | 757 | 	if ((blkno & (bmp->db_agsize - 1)) == 0) | 
 | 758 | 		/* check if the AG is currenly being written to. | 
 | 759 | 		 * if so, call dbNextAG() to find a non-busy | 
 | 760 | 		 * AG with sufficient free space. | 
 | 761 | 		 */ | 
 | 762 | 		if (atomic_read(&bmp->db_active[agno])) | 
 | 763 | 			goto pref_ag; | 
 | 764 |  | 
 | 765 | 	/* check if the allocation request size can be satisfied from a | 
 | 766 | 	 * single dmap.  if so, try to allocate from the dmap containing | 
 | 767 | 	 * the hint using a tiered strategy. | 
 | 768 | 	 */ | 
 | 769 | 	if (nblocks <= BPERDMAP) { | 
| Dave Kleikamp | 82d5b9a | 2007-01-09 14:14:48 -0600 | [diff] [blame] | 770 | 		IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 771 |  | 
 | 772 | 		/* get the buffer for the dmap containing the hint. | 
 | 773 | 		 */ | 
 | 774 | 		rc = -EIO; | 
 | 775 | 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 
 | 776 | 		mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 
 | 777 | 		if (mp == NULL) | 
 | 778 | 			goto read_unlock; | 
 | 779 |  | 
 | 780 | 		dp = (struct dmap *) mp->data; | 
 | 781 |  | 
 | 782 | 		/* first, try to satisfy the allocation request with the | 
 | 783 | 		 * blocks beginning at the hint. | 
 | 784 | 		 */ | 
 | 785 | 		if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks)) | 
 | 786 | 		    != -ENOSPC) { | 
 | 787 | 			if (rc == 0) { | 
 | 788 | 				*results = blkno; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 789 | 				mark_metapage_dirty(mp); | 
 | 790 | 			} | 
 | 791 |  | 
 | 792 | 			release_metapage(mp); | 
 | 793 | 			goto read_unlock; | 
 | 794 | 		} | 
 | 795 |  | 
 | 796 | 		writers = atomic_read(&bmp->db_active[agno]); | 
 | 797 | 		if ((writers > 1) || | 
 | 798 | 		    ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) { | 
 | 799 | 			/* | 
 | 800 | 			 * Someone else is writing in this allocation | 
 | 801 | 			 * group.  To avoid fragmenting, try another ag | 
 | 802 | 			 */ | 
 | 803 | 			release_metapage(mp); | 
 | 804 | 			IREAD_UNLOCK(ipbmap); | 
 | 805 | 			goto pref_ag; | 
 | 806 | 		} | 
 | 807 |  | 
 | 808 | 		/* next, try to satisfy the allocation request with blocks | 
 | 809 | 		 * near the hint. | 
 | 810 | 		 */ | 
 | 811 | 		if ((rc = | 
 | 812 | 		     dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results)) | 
 | 813 | 		    != -ENOSPC) { | 
| Dave Kleikamp | b38a3ab | 2005-06-27 15:35:37 -0500 | [diff] [blame] | 814 | 			if (rc == 0) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 815 | 				mark_metapage_dirty(mp); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 816 |  | 
 | 817 | 			release_metapage(mp); | 
 | 818 | 			goto read_unlock; | 
 | 819 | 		} | 
 | 820 |  | 
 | 821 | 		/* try to satisfy the allocation request with blocks within | 
 | 822 | 		 * the same dmap as the hint. | 
 | 823 | 		 */ | 
 | 824 | 		if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) | 
 | 825 | 		    != -ENOSPC) { | 
| Dave Kleikamp | b38a3ab | 2005-06-27 15:35:37 -0500 | [diff] [blame] | 826 | 			if (rc == 0) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 827 | 				mark_metapage_dirty(mp); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 828 |  | 
 | 829 | 			release_metapage(mp); | 
 | 830 | 			goto read_unlock; | 
 | 831 | 		} | 
 | 832 |  | 
 | 833 | 		release_metapage(mp); | 
 | 834 | 		IREAD_UNLOCK(ipbmap); | 
 | 835 | 	} | 
 | 836 |  | 
 | 837 | 	/* try to satisfy the allocation request with blocks within | 
 | 838 | 	 * the same allocation group as the hint. | 
 | 839 | 	 */ | 
| Dave Kleikamp | 82d5b9a | 2007-01-09 14:14:48 -0600 | [diff] [blame] | 840 | 	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); | 
| Dave Kleikamp | b38a3ab | 2005-06-27 15:35:37 -0500 | [diff] [blame] | 841 | 	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 842 | 		goto write_unlock; | 
| Dave Kleikamp | b38a3ab | 2005-06-27 15:35:37 -0500 | [diff] [blame] | 843 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 844 | 	IWRITE_UNLOCK(ipbmap); | 
 | 845 |  | 
 | 846 |  | 
 | 847 |       pref_ag: | 
 | 848 | 	/* | 
 | 849 | 	 * Let dbNextAG recommend a preferred allocation group | 
 | 850 | 	 */ | 
 | 851 | 	agno = dbNextAG(ipbmap); | 
| Dave Kleikamp | 82d5b9a | 2007-01-09 14:14:48 -0600 | [diff] [blame] | 852 | 	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 853 |  | 
 | 854 | 	/* Try to allocate within this allocation group.  if that fails, try to | 
 | 855 | 	 * allocate anywhere in the map. | 
 | 856 | 	 */ | 
 | 857 | 	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC) | 
 | 858 | 		rc = dbAllocAny(bmp, nblocks, l2nb, results); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 859 |  | 
 | 860 |       write_unlock: | 
 | 861 | 	IWRITE_UNLOCK(ipbmap); | 
 | 862 |  | 
 | 863 | 	return (rc); | 
 | 864 |  | 
 | 865 |       read_unlock: | 
 | 866 | 	IREAD_UNLOCK(ipbmap); | 
 | 867 |  | 
 | 868 | 	return (rc); | 
 | 869 | } | 
 | 870 |  | 
 | 871 | #ifdef _NOTYET | 
 | 872 | /* | 
 | 873 |  * NAME:	dbAllocExact() | 
 | 874 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 875 |  * FUNCTION:	try to allocate the requested extent; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 876 |  * | 
 | 877 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 878 |  *	ip	- pointer to in-core inode; | 
 | 879 |  *	blkno	- extent address; | 
 | 880 |  *	nblocks	- extent length; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 881 |  * | 
 | 882 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 883 |  *	0	- success | 
 | 884 |  *	-ENOSPC	- insufficient disk resources | 
 | 885 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 886 |  */ | 
 | 887 | int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) | 
 | 888 | { | 
 | 889 | 	int rc; | 
 | 890 | 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 
 | 891 | 	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | 
 | 892 | 	struct dmap *dp; | 
 | 893 | 	s64 lblkno; | 
 | 894 | 	struct metapage *mp; | 
 | 895 |  | 
| Dave Kleikamp | 82d5b9a | 2007-01-09 14:14:48 -0600 | [diff] [blame] | 896 | 	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 897 |  | 
 | 898 | 	/* | 
 | 899 | 	 * validate extent request: | 
 | 900 | 	 * | 
 | 901 | 	 * note: defragfs policy: | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 902 | 	 *  max 64 blocks will be moved. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 903 | 	 *  allocation request size must be satisfied from a single dmap. | 
 | 904 | 	 */ | 
 | 905 | 	if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { | 
 | 906 | 		IREAD_UNLOCK(ipbmap); | 
 | 907 | 		return -EINVAL; | 
 | 908 | 	} | 
 | 909 |  | 
 | 910 | 	if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) { | 
 | 911 | 		/* the free space is no longer available */ | 
 | 912 | 		IREAD_UNLOCK(ipbmap); | 
 | 913 | 		return -ENOSPC; | 
 | 914 | 	} | 
 | 915 |  | 
 | 916 | 	/* read in the dmap covering the extent */ | 
 | 917 | 	lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 
 | 918 | 	mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 
 | 919 | 	if (mp == NULL) { | 
 | 920 | 		IREAD_UNLOCK(ipbmap); | 
 | 921 | 		return -EIO; | 
 | 922 | 	} | 
 | 923 | 	dp = (struct dmap *) mp->data; | 
 | 924 |  | 
 | 925 | 	/* try to allocate the requested extent */ | 
 | 926 | 	rc = dbAllocNext(bmp, dp, blkno, nblocks); | 
 | 927 |  | 
 | 928 | 	IREAD_UNLOCK(ipbmap); | 
 | 929 |  | 
| Dave Kleikamp | b38a3ab | 2005-06-27 15:35:37 -0500 | [diff] [blame] | 930 | 	if (rc == 0) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 931 | 		mark_metapage_dirty(mp); | 
| Dave Kleikamp | b38a3ab | 2005-06-27 15:35:37 -0500 | [diff] [blame] | 932 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 933 | 	release_metapage(mp); | 
 | 934 |  | 
 | 935 | 	return (rc); | 
 | 936 | } | 
 | 937 | #endif /* _NOTYET */ | 
 | 938 |  | 
 | 939 | /* | 
 | 940 |  * NAME:	dbReAlloc() | 
 | 941 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 942 |  * FUNCTION:	attempt to extend a current allocation by a specified | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 943 |  *		number of blocks. | 
 | 944 |  * | 
 | 945 |  *		this routine attempts to satisfy the allocation request | 
 | 946 |  *		by first trying to extend the existing allocation in | 
 | 947 |  *		place by allocating the additional blocks as the blocks | 
 | 948 |  *		immediately following the current allocation.  if these | 
 | 949 |  *		blocks are not available, this routine will attempt to | 
 | 950 |  *		allocate a new set of contiguous blocks large enough | 
 | 951 |  *		to cover the existing allocation plus the additional | 
 | 952 |  *		number of blocks required. | 
 | 953 |  * | 
 | 954 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 955 |  *	ip	    -  pointer to in-core inode requiring allocation. | 
 | 956 |  *	blkno	    -  starting block of the current allocation. | 
 | 957 |  *	nblocks	    -  number of contiguous blocks within the current | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 958 |  *		       allocation. | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 959 |  *	addnblocks  -  number of blocks to add to the allocation. | 
 | 960 |  *	results	-      on successful return, set to the starting block number | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 961 |  *		       of the existing allocation if the existing allocation | 
 | 962 |  *		       was extended in place or to a newly allocated contiguous | 
 | 963 |  *		       range if the existing allocation could not be extended | 
 | 964 |  *		       in place. | 
 | 965 |  * | 
 | 966 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 967 |  *	0	- success | 
 | 968 |  *	-ENOSPC	- insufficient disk resources | 
 | 969 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 970 |  */ | 
 | 971 | int | 
 | 972 | dbReAlloc(struct inode *ip, | 
 | 973 | 	  s64 blkno, s64 nblocks, s64 addnblocks, s64 * results) | 
 | 974 | { | 
 | 975 | 	int rc; | 
 | 976 |  | 
 | 977 | 	/* try to extend the allocation in place. | 
 | 978 | 	 */ | 
 | 979 | 	if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) { | 
 | 980 | 		*results = blkno; | 
 | 981 | 		return (0); | 
 | 982 | 	} else { | 
 | 983 | 		if (rc != -ENOSPC) | 
 | 984 | 			return (rc); | 
 | 985 | 	} | 
 | 986 |  | 
 | 987 | 	/* could not extend the allocation in place, so allocate a | 
 | 988 | 	 * new set of blocks for the entire request (i.e. try to get | 
 | 989 | 	 * a range of contiguous blocks large enough to cover the | 
 | 990 | 	 * existing allocation plus the additional blocks.) | 
 | 991 | 	 */ | 
 | 992 | 	return (dbAlloc | 
 | 993 | 		(ip, blkno + nblocks - 1, addnblocks + nblocks, results)); | 
 | 994 | } | 
 | 995 |  | 
 | 996 |  | 
 | 997 | /* | 
 | 998 |  * NAME:	dbExtend() | 
 | 999 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1000 |  * FUNCTION:	attempt to extend a current allocation by a specified | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1001 |  *		number of blocks. | 
 | 1002 |  * | 
 | 1003 |  *		this routine attempts to satisfy the allocation request | 
 | 1004 |  *		by first trying to extend the existing allocation in | 
 | 1005 |  *		place by allocating the additional blocks as the blocks | 
 | 1006 |  *		immediately following the current allocation. | 
 | 1007 |  * | 
 | 1008 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1009 |  *	ip	    -  pointer to in-core inode requiring allocation. | 
 | 1010 |  *	blkno	    -  starting block of the current allocation. | 
 | 1011 |  *	nblocks	    -  number of contiguous blocks within the current | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1012 |  *		       allocation. | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1013 |  *	addnblocks  -  number of blocks to add to the allocation. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1014 |  * | 
 | 1015 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1016 |  *	0	- success | 
 | 1017 |  *	-ENOSPC	- insufficient disk resources | 
 | 1018 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1019 |  */ | 
 | 1020 | static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) | 
 | 1021 | { | 
 | 1022 | 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); | 
 | 1023 | 	s64 lblkno, lastblkno, extblkno; | 
 | 1024 | 	uint rel_block; | 
 | 1025 | 	struct metapage *mp; | 
 | 1026 | 	struct dmap *dp; | 
 | 1027 | 	int rc; | 
 | 1028 | 	struct inode *ipbmap = sbi->ipbmap; | 
 | 1029 | 	struct bmap *bmp; | 
 | 1030 |  | 
 | 1031 | 	/* | 
 | 1032 | 	 * We don't want a non-aligned extent to cross a page boundary | 
 | 1033 | 	 */ | 
 | 1034 | 	if (((rel_block = blkno & (sbi->nbperpage - 1))) && | 
 | 1035 | 	    (rel_block + nblocks + addnblocks > sbi->nbperpage)) | 
 | 1036 | 		return -ENOSPC; | 
 | 1037 |  | 
 | 1038 | 	/* get the last block of the current allocation */ | 
 | 1039 | 	lastblkno = blkno + nblocks - 1; | 
 | 1040 |  | 
 | 1041 | 	/* determine the block number of the block following | 
 | 1042 | 	 * the existing allocation. | 
 | 1043 | 	 */ | 
 | 1044 | 	extblkno = lastblkno + 1; | 
 | 1045 |  | 
| Dave Kleikamp | 82d5b9a | 2007-01-09 14:14:48 -0600 | [diff] [blame] | 1046 | 	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1047 |  | 
 | 1048 | 	/* better be within the file system */ | 
 | 1049 | 	bmp = sbi->bmap; | 
 | 1050 | 	if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { | 
 | 1051 | 		IREAD_UNLOCK(ipbmap); | 
 | 1052 | 		jfs_error(ip->i_sb, | 
 | 1053 | 			  "dbExtend: the block is outside the filesystem"); | 
 | 1054 | 		return -EIO; | 
 | 1055 | 	} | 
 | 1056 |  | 
 | 1057 | 	/* we'll attempt to extend the current allocation in place by | 
 | 1058 | 	 * allocating the additional blocks as the blocks immediately | 
 | 1059 | 	 * following the current allocation.  we only try to extend the | 
 | 1060 | 	 * current allocation in place if the number of additional blocks | 
 | 1061 | 	 * can fit into a dmap, the last block of the current allocation | 
 | 1062 | 	 * is not the last block of the file system, and the start of the | 
 | 1063 | 	 * inplace extension is not on an allocation group boundary. | 
 | 1064 | 	 */ | 
 | 1065 | 	if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize || | 
 | 1066 | 	    (extblkno & (bmp->db_agsize - 1)) == 0) { | 
 | 1067 | 		IREAD_UNLOCK(ipbmap); | 
 | 1068 | 		return -ENOSPC; | 
 | 1069 | 	} | 
 | 1070 |  | 
 | 1071 | 	/* get the buffer for the dmap containing the first block | 
 | 1072 | 	 * of the extension. | 
 | 1073 | 	 */ | 
 | 1074 | 	lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage); | 
 | 1075 | 	mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 
 | 1076 | 	if (mp == NULL) { | 
 | 1077 | 		IREAD_UNLOCK(ipbmap); | 
 | 1078 | 		return -EIO; | 
 | 1079 | 	} | 
 | 1080 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1081 | 	dp = (struct dmap *) mp->data; | 
 | 1082 |  | 
 | 1083 | 	/* try to allocate the blocks immediately following the | 
 | 1084 | 	 * current allocation. | 
 | 1085 | 	 */ | 
 | 1086 | 	rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks); | 
 | 1087 |  | 
 | 1088 | 	IREAD_UNLOCK(ipbmap); | 
 | 1089 |  | 
 | 1090 | 	/* were we successful ? */ | 
| Dave Kleikamp | b38a3ab | 2005-06-27 15:35:37 -0500 | [diff] [blame] | 1091 | 	if (rc == 0) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1092 | 		write_metapage(mp); | 
| Dave Kleikamp | b38a3ab | 2005-06-27 15:35:37 -0500 | [diff] [blame] | 1093 | 	else | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1094 | 		/* we were not successful */ | 
 | 1095 | 		release_metapage(mp); | 
 | 1096 |  | 
 | 1097 |  | 
 | 1098 | 	return (rc); | 
 | 1099 | } | 
 | 1100 |  | 
 | 1101 |  | 
 | 1102 | /* | 
 | 1103 |  * NAME:	dbAllocNext() | 
 | 1104 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1105 |  * FUNCTION:	attempt to allocate the blocks of the specified block | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1106 |  *		range within a dmap. | 
 | 1107 |  * | 
 | 1108 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1109 |  *	bmp	-  pointer to bmap descriptor | 
 | 1110 |  *	dp	-  pointer to dmap. | 
 | 1111 |  *	blkno	-  starting block number of the range. | 
 | 1112 |  *	nblocks	-  number of contiguous free blocks of the range. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1113 |  * | 
 | 1114 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1115 |  *	0	- success | 
 | 1116 |  *	-ENOSPC	- insufficient disk resources | 
 | 1117 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1118 |  * | 
 | 1119 |  * serialization: IREAD_LOCK(ipbmap) held on entry/exit; | 
 | 1120 |  */ | 
 | 1121 | static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 1122 | 		       int nblocks) | 
 | 1123 | { | 
 | 1124 | 	int dbitno, word, rembits, nb, nwords, wbitno, nw; | 
 | 1125 | 	int l2size; | 
 | 1126 | 	s8 *leaf; | 
 | 1127 | 	u32 mask; | 
 | 1128 |  | 
 | 1129 | 	if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { | 
 | 1130 | 		jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1131 | 			  "dbAllocNext: Corrupt dmap page"); | 
 | 1132 | 		return -EIO; | 
 | 1133 | 	} | 
 | 1134 |  | 
 | 1135 | 	/* pick up a pointer to the leaves of the dmap tree. | 
 | 1136 | 	 */ | 
 | 1137 | 	leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); | 
 | 1138 |  | 
 | 1139 | 	/* determine the bit number and word within the dmap of the | 
 | 1140 | 	 * starting block. | 
 | 1141 | 	 */ | 
 | 1142 | 	dbitno = blkno & (BPERDMAP - 1); | 
 | 1143 | 	word = dbitno >> L2DBWORD; | 
 | 1144 |  | 
 | 1145 | 	/* check if the specified block range is contained within | 
 | 1146 | 	 * this dmap. | 
 | 1147 | 	 */ | 
 | 1148 | 	if (dbitno + nblocks > BPERDMAP) | 
 | 1149 | 		return -ENOSPC; | 
 | 1150 |  | 
 | 1151 | 	/* check if the starting leaf indicates that anything | 
 | 1152 | 	 * is free. | 
 | 1153 | 	 */ | 
 | 1154 | 	if (leaf[word] == NOFREE) | 
 | 1155 | 		return -ENOSPC; | 
 | 1156 |  | 
 | 1157 | 	/* check the dmaps words corresponding to block range to see | 
 | 1158 | 	 * if the block range is free.  not all bits of the first and | 
 | 1159 | 	 * last words may be contained within the block range.  if this | 
 | 1160 | 	 * is the case, we'll work against those words (i.e. partial first | 
 | 1161 | 	 * and/or last) on an individual basis (a single pass) and examine | 
 | 1162 | 	 * the actual bits to determine if they are free.  a single pass | 
 | 1163 | 	 * will be used for all dmap words fully contained within the | 
 | 1164 | 	 * specified range.  within this pass, the leaves of the dmap | 
 | 1165 | 	 * tree will be examined to determine if the blocks are free. a | 
 | 1166 | 	 * single leaf may describe the free space of multiple dmap | 
 | 1167 | 	 * words, so we may visit only a subset of the actual leaves | 
 | 1168 | 	 * corresponding to the dmap words of the block range. | 
 | 1169 | 	 */ | 
 | 1170 | 	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { | 
 | 1171 | 		/* determine the bit number within the word and | 
 | 1172 | 		 * the number of bits within the word. | 
 | 1173 | 		 */ | 
 | 1174 | 		wbitno = dbitno & (DBWORD - 1); | 
 | 1175 | 		nb = min(rembits, DBWORD - wbitno); | 
 | 1176 |  | 
 | 1177 | 		/* check if only part of the word is to be examined. | 
 | 1178 | 		 */ | 
 | 1179 | 		if (nb < DBWORD) { | 
 | 1180 | 			/* check if the bits are free. | 
 | 1181 | 			 */ | 
 | 1182 | 			mask = (ONES << (DBWORD - nb) >> wbitno); | 
 | 1183 | 			if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask) | 
 | 1184 | 				return -ENOSPC; | 
 | 1185 |  | 
 | 1186 | 			word += 1; | 
 | 1187 | 		} else { | 
 | 1188 | 			/* one or more dmap words are fully contained | 
 | 1189 | 			 * within the block range.  determine how many | 
 | 1190 | 			 * words and how many bits. | 
 | 1191 | 			 */ | 
 | 1192 | 			nwords = rembits >> L2DBWORD; | 
 | 1193 | 			nb = nwords << L2DBWORD; | 
 | 1194 |  | 
 | 1195 | 			/* now examine the appropriate leaves to determine | 
 | 1196 | 			 * if the blocks are free. | 
 | 1197 | 			 */ | 
 | 1198 | 			while (nwords > 0) { | 
 | 1199 | 				/* does the leaf describe any free space ? | 
 | 1200 | 				 */ | 
 | 1201 | 				if (leaf[word] < BUDMIN) | 
 | 1202 | 					return -ENOSPC; | 
 | 1203 |  | 
 | 1204 | 				/* determine the l2 number of bits provided | 
 | 1205 | 				 * by this leaf. | 
 | 1206 | 				 */ | 
 | 1207 | 				l2size = | 
 | 1208 | 				    min((int)leaf[word], NLSTOL2BSZ(nwords)); | 
 | 1209 |  | 
 | 1210 | 				/* determine how many words were handled. | 
 | 1211 | 				 */ | 
 | 1212 | 				nw = BUDSIZE(l2size, BUDMIN); | 
 | 1213 |  | 
 | 1214 | 				nwords -= nw; | 
 | 1215 | 				word += nw; | 
 | 1216 | 			} | 
 | 1217 | 		} | 
 | 1218 | 	} | 
 | 1219 |  | 
 | 1220 | 	/* allocate the blocks. | 
 | 1221 | 	 */ | 
 | 1222 | 	return (dbAllocDmap(bmp, dp, blkno, nblocks)); | 
 | 1223 | } | 
 | 1224 |  | 
 | 1225 |  | 
 | 1226 | /* | 
 | 1227 |  * NAME:	dbAllocNear() | 
 | 1228 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1229 |  * FUNCTION:	attempt to allocate a number of contiguous free blocks near | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1230 |  *		a specified block (hint) within a dmap. | 
 | 1231 |  * | 
 | 1232 |  *		starting with the dmap leaf that covers the hint, we'll | 
 | 1233 |  *		check the next four contiguous leaves for sufficient free | 
 | 1234 |  *		space.  if sufficient free space is found, we'll allocate | 
 | 1235 |  *		the desired free space. | 
 | 1236 |  * | 
 | 1237 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1238 |  *	bmp	-  pointer to bmap descriptor | 
 | 1239 |  *	dp	-  pointer to dmap. | 
 | 1240 |  *	blkno	-  block number to allocate near. | 
 | 1241 |  *	nblocks	-  actual number of contiguous free blocks desired. | 
 | 1242 |  *	l2nb	-  log2 number of contiguous free blocks desired. | 
 | 1243 |  *	results	-  on successful return, set to the starting block number | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1244 |  *		   of the newly allocated range. | 
 | 1245 |  * | 
 | 1246 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1247 |  *	0	- success | 
 | 1248 |  *	-ENOSPC	- insufficient disk resources | 
 | 1249 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1250 |  * | 
 | 1251 |  * serialization: IREAD_LOCK(ipbmap) held on entry/exit; | 
 | 1252 |  */ | 
 | 1253 | static int | 
 | 1254 | dbAllocNear(struct bmap * bmp, | 
 | 1255 | 	    struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results) | 
 | 1256 | { | 
 | 1257 | 	int word, lword, rc; | 
 | 1258 | 	s8 *leaf; | 
 | 1259 |  | 
 | 1260 | 	if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { | 
 | 1261 | 		jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1262 | 			  "dbAllocNear: Corrupt dmap page"); | 
 | 1263 | 		return -EIO; | 
 | 1264 | 	} | 
 | 1265 |  | 
 | 1266 | 	leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); | 
 | 1267 |  | 
 | 1268 | 	/* determine the word within the dmap that holds the hint | 
 | 1269 | 	 * (i.e. blkno).  also, determine the last word in the dmap | 
 | 1270 | 	 * that we'll include in our examination. | 
 | 1271 | 	 */ | 
 | 1272 | 	word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; | 
 | 1273 | 	lword = min(word + 4, LPERDMAP); | 
 | 1274 |  | 
 | 1275 | 	/* examine the leaves for sufficient free space. | 
 | 1276 | 	 */ | 
 | 1277 | 	for (; word < lword; word++) { | 
 | 1278 | 		/* does the leaf describe sufficient free space ? | 
 | 1279 | 		 */ | 
 | 1280 | 		if (leaf[word] < l2nb) | 
 | 1281 | 			continue; | 
 | 1282 |  | 
 | 1283 | 		/* determine the block number within the file system | 
 | 1284 | 		 * of the first block described by this dmap word. | 
 | 1285 | 		 */ | 
 | 1286 | 		blkno = le64_to_cpu(dp->start) + (word << L2DBWORD); | 
 | 1287 |  | 
 | 1288 | 		/* if not all bits of the dmap word are free, get the | 
 | 1289 | 		 * starting bit number within the dmap word of the required | 
 | 1290 | 		 * string of free bits and adjust the block number with the | 
 | 1291 | 		 * value. | 
 | 1292 | 		 */ | 
 | 1293 | 		if (leaf[word] < BUDMIN) | 
 | 1294 | 			blkno += | 
 | 1295 | 			    dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb); | 
 | 1296 |  | 
 | 1297 | 		/* allocate the blocks. | 
 | 1298 | 		 */ | 
 | 1299 | 		if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) | 
 | 1300 | 			*results = blkno; | 
 | 1301 |  | 
 | 1302 | 		return (rc); | 
 | 1303 | 	} | 
 | 1304 |  | 
 | 1305 | 	return -ENOSPC; | 
 | 1306 | } | 
 | 1307 |  | 
 | 1308 |  | 
 | 1309 | /* | 
 | 1310 |  * NAME:	dbAllocAG() | 
 | 1311 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1312 |  * FUNCTION:	attempt to allocate the specified number of contiguous | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1313 |  *		free blocks within the specified allocation group. | 
 | 1314 |  * | 
 | 1315 |  *		unless the allocation group size is equal to the number | 
 | 1316 |  *		of blocks per dmap, the dmap control pages will be used to | 
 | 1317 |  *		find the required free space, if available.  we start the | 
 | 1318 |  *		search at the highest dmap control page level which | 
 | 1319 |  *		distinctly describes the allocation group's free space | 
 | 1320 |  *		(i.e. the highest level at which the allocation group's | 
 | 1321 |  *		free space is not mixed in with that of any other group). | 
 | 1322 |  *		in addition, we start the search within this level at a | 
 | 1323 |  *		height of the dmapctl dmtree at which the nodes distinctly | 
 | 1324 |  *		describe the allocation group's free space.  at this height, | 
 | 1325 |  *		the allocation group's free space may be represented by 1 | 
 | 1326 |  *		or two sub-trees, depending on the allocation group size. | 
 | 1327 |  *		we search the top nodes of these subtrees left to right for | 
 | 1328 |  *		sufficient free space.  if sufficient free space is found, | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 1329 |  *		the subtree is searched to find the leftmost leaf that | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1330 |  *		has free space.  once we have made it to the leaf, we | 
 | 1331 |  *		move the search to the next lower level dmap control page | 
 | 1332 |  *		corresponding to this leaf.  we continue down the dmap control | 
 | 1333 |  *		pages until we find the dmap that contains or starts the | 
 | 1334 |  *		sufficient free space and we allocate at this dmap. | 
 | 1335 |  * | 
 | 1336 |  *		if the allocation group size is equal to the dmap size, | 
 | 1337 |  *		we'll start at the dmap corresponding to the allocation | 
 | 1338 |  *		group and attempt the allocation at this level. | 
 | 1339 |  * | 
 | 1340 |  *		the dmap control page search is also not performed if the | 
 | 1341 |  *		allocation group is completely free and we go to the first | 
 | 1342 |  *		dmap of the allocation group to do the allocation.  this is | 
 | 1343 |  *		done because the allocation group may be part (not the first | 
 | 1344 |  *		part) of a larger binary buddy system, causing the dmap | 
 | 1345 |  *		control pages to indicate no free space (NOFREE) within | 
 | 1346 |  *		the allocation group. | 
 | 1347 |  * | 
 | 1348 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1349 |  *	bmp	-  pointer to bmap descriptor | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1350 |  *	agno	- allocation group number. | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1351 |  *	nblocks	-  actual number of contiguous free blocks desired. | 
 | 1352 |  *	l2nb	-  log2 number of contiguous free blocks desired. | 
 | 1353 |  *	results	-  on successful return, set to the starting block number | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1354 |  *		   of the newly allocated range. | 
 | 1355 |  * | 
 | 1356 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1357 |  *	0	- success | 
 | 1358 |  *	-ENOSPC	- insufficient disk resources | 
 | 1359 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1360 |  * | 
 | 1361 |  * note: IWRITE_LOCK(ipmap) held on entry/exit; | 
 | 1362 |  */ | 
 | 1363 | static int | 
 | 1364 | dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | 
 | 1365 | { | 
 | 1366 | 	struct metapage *mp; | 
 | 1367 | 	struct dmapctl *dcp; | 
 | 1368 | 	int rc, ti, i, k, m, n, agperlev; | 
 | 1369 | 	s64 blkno, lblkno; | 
 | 1370 | 	int budmin; | 
 | 1371 |  | 
 | 1372 | 	/* allocation request should not be for more than the | 
 | 1373 | 	 * allocation group size. | 
 | 1374 | 	 */ | 
 | 1375 | 	if (l2nb > bmp->db_agl2size) { | 
 | 1376 | 		jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1377 | 			  "dbAllocAG: allocation request is larger than the " | 
 | 1378 | 			  "allocation group size"); | 
 | 1379 | 		return -EIO; | 
 | 1380 | 	} | 
 | 1381 |  | 
 | 1382 | 	/* determine the starting block number of the allocation | 
 | 1383 | 	 * group. | 
 | 1384 | 	 */ | 
 | 1385 | 	blkno = (s64) agno << bmp->db_agl2size; | 
 | 1386 |  | 
 | 1387 | 	/* check if the allocation group size is the minimum allocation | 
 | 1388 | 	 * group size or if the allocation group is completely free. if | 
 | 1389 | 	 * the allocation group size is the minimum size of BPERDMAP (i.e. | 
 | 1390 | 	 * 1 dmap), there is no need to search the dmap control page (below) | 
 | 1391 | 	 * that fully describes the allocation group since the allocation | 
 | 1392 | 	 * group is already fully described by a dmap.  in this case, we | 
 | 1393 | 	 * just call dbAllocCtl() to search the dmap tree and allocate the | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 1394 | 	 * required space if available. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1395 | 	 * | 
 | 1396 | 	 * if the allocation group is completely free, dbAllocCtl() is | 
 | 1397 | 	 * also called to allocate the required space.  this is done for | 
 | 1398 | 	 * two reasons.  first, it makes no sense searching the dmap control | 
 | 1399 | 	 * pages for free space when we know that free space exists.  second, | 
 | 1400 | 	 * the dmap control pages may indicate that the allocation group | 
 | 1401 | 	 * has no free space if the allocation group is part (not the first | 
 | 1402 | 	 * part) of a larger binary buddy system. | 
 | 1403 | 	 */ | 
 | 1404 | 	if (bmp->db_agsize == BPERDMAP | 
 | 1405 | 	    || bmp->db_agfree[agno] == bmp->db_agsize) { | 
 | 1406 | 		rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 
 | 1407 | 		if ((rc == -ENOSPC) && | 
 | 1408 | 		    (bmp->db_agfree[agno] == bmp->db_agsize)) { | 
 | 1409 | 			printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n", | 
 | 1410 | 			       (unsigned long long) blkno, | 
 | 1411 | 			       (unsigned long long) nblocks); | 
 | 1412 | 			jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1413 | 				  "dbAllocAG: dbAllocCtl failed in free AG"); | 
 | 1414 | 		} | 
 | 1415 | 		return (rc); | 
 | 1416 | 	} | 
 | 1417 |  | 
 | 1418 | 	/* the buffer for the dmap control page that fully describes the | 
 | 1419 | 	 * allocation group. | 
 | 1420 | 	 */ | 
 | 1421 | 	lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel); | 
 | 1422 | 	mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 
 | 1423 | 	if (mp == NULL) | 
 | 1424 | 		return -EIO; | 
 | 1425 | 	dcp = (struct dmapctl *) mp->data; | 
 | 1426 | 	budmin = dcp->budmin; | 
 | 1427 |  | 
 | 1428 | 	if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 
 | 1429 | 		jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1430 | 			  "dbAllocAG: Corrupt dmapctl page"); | 
 | 1431 | 		release_metapage(mp); | 
 | 1432 | 		return -EIO; | 
 | 1433 | 	} | 
 | 1434 |  | 
 | 1435 | 	/* search the subtree(s) of the dmap control page that describes | 
 | 1436 | 	 * the allocation group, looking for sufficient free space.  to begin, | 
 | 1437 | 	 * determine how many allocation groups are represented in a dmap | 
 | 1438 | 	 * control page at the control page level (i.e. L0, L1, L2) that | 
 | 1439 | 	 * fully describes an allocation group. next, determine the starting | 
 | 1440 | 	 * tree index of this allocation group within the control page. | 
 | 1441 | 	 */ | 
 | 1442 | 	agperlev = | 
 | 1443 | 	    (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; | 
 | 1444 | 	ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); | 
 | 1445 |  | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 1446 | 	/* dmap control page trees fan-out by 4 and a single allocation | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1447 | 	 * group may be described by 1 or 2 subtrees within the ag level | 
 | 1448 | 	 * dmap control page, depending upon the ag size. examine the ag's | 
 | 1449 | 	 * subtrees for sufficient free space, starting with the leftmost | 
 | 1450 | 	 * subtree. | 
 | 1451 | 	 */ | 
 | 1452 | 	for (i = 0; i < bmp->db_agwidth; i++, ti++) { | 
 | 1453 | 		/* is there sufficient free space ? | 
 | 1454 | 		 */ | 
 | 1455 | 		if (l2nb > dcp->stree[ti]) | 
 | 1456 | 			continue; | 
 | 1457 |  | 
 | 1458 | 		/* sufficient free space found in a subtree. now search down | 
 | 1459 | 		 * the subtree to find the leftmost leaf that describes this | 
 | 1460 | 		 * free space. | 
 | 1461 | 		 */ | 
 | 1462 | 		for (k = bmp->db_agheigth; k > 0; k--) { | 
 | 1463 | 			for (n = 0, m = (ti << 2) + 1; n < 4; n++) { | 
 | 1464 | 				if (l2nb <= dcp->stree[m + n]) { | 
 | 1465 | 					ti = m + n; | 
 | 1466 | 					break; | 
 | 1467 | 				} | 
 | 1468 | 			} | 
 | 1469 | 			if (n == 4) { | 
 | 1470 | 				jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1471 | 					  "dbAllocAG: failed descending stree"); | 
 | 1472 | 				release_metapage(mp); | 
 | 1473 | 				return -EIO; | 
 | 1474 | 			} | 
 | 1475 | 		} | 
 | 1476 |  | 
 | 1477 | 		/* determine the block number within the file system | 
 | 1478 | 		 * that corresponds to this leaf. | 
 | 1479 | 		 */ | 
 | 1480 | 		if (bmp->db_aglevel == 2) | 
 | 1481 | 			blkno = 0; | 
 | 1482 | 		else if (bmp->db_aglevel == 1) | 
 | 1483 | 			blkno &= ~(MAXL1SIZE - 1); | 
 | 1484 | 		else		/* bmp->db_aglevel == 0 */ | 
 | 1485 | 			blkno &= ~(MAXL0SIZE - 1); | 
 | 1486 |  | 
 | 1487 | 		blkno += | 
 | 1488 | 		    ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin; | 
 | 1489 |  | 
 | 1490 | 		/* release the buffer in preparation for going down | 
 | 1491 | 		 * the next level of dmap control pages. | 
 | 1492 | 		 */ | 
 | 1493 | 		release_metapage(mp); | 
 | 1494 |  | 
 | 1495 | 		/* check if we need to continue to search down the lower | 
 | 1496 | 		 * level dmap control pages.  we need to if the number of | 
 | 1497 | 		 * blocks required is less than maximum number of blocks | 
 | 1498 | 		 * described at the next lower level. | 
 | 1499 | 		 */ | 
 | 1500 | 		if (l2nb < budmin) { | 
 | 1501 |  | 
 | 1502 | 			/* search the lower level dmap control pages to get | 
| Michael Opdenacker | 59c5159 | 2007-05-09 08:57:56 +0200 | [diff] [blame] | 1503 | 			 * the starting block number of the dmap that | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1504 | 			 * contains or starts off the free space. | 
 | 1505 | 			 */ | 
 | 1506 | 			if ((rc = | 
 | 1507 | 			     dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1, | 
 | 1508 | 				       &blkno))) { | 
 | 1509 | 				if (rc == -ENOSPC) { | 
 | 1510 | 					jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1511 | 						  "dbAllocAG: control page " | 
 | 1512 | 						  "inconsistent"); | 
 | 1513 | 					return -EIO; | 
 | 1514 | 				} | 
 | 1515 | 				return (rc); | 
 | 1516 | 			} | 
 | 1517 | 		} | 
 | 1518 |  | 
 | 1519 | 		/* allocate the blocks. | 
 | 1520 | 		 */ | 
 | 1521 | 		rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 
 | 1522 | 		if (rc == -ENOSPC) { | 
 | 1523 | 			jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1524 | 				  "dbAllocAG: unable to allocate blocks"); | 
 | 1525 | 			rc = -EIO; | 
 | 1526 | 		} | 
 | 1527 | 		return (rc); | 
 | 1528 | 	} | 
 | 1529 |  | 
 | 1530 | 	/* no space in the allocation group.  release the buffer and | 
 | 1531 | 	 * return -ENOSPC. | 
 | 1532 | 	 */ | 
 | 1533 | 	release_metapage(mp); | 
 | 1534 |  | 
 | 1535 | 	return -ENOSPC; | 
 | 1536 | } | 
 | 1537 |  | 
 | 1538 |  | 
 | 1539 | /* | 
 | 1540 |  * NAME:	dbAllocAny() | 
 | 1541 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1542 |  * FUNCTION:	attempt to allocate the specified number of contiguous | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1543 |  *		free blocks anywhere in the file system. | 
 | 1544 |  * | 
 | 1545 |  *		dbAllocAny() attempts to find the sufficient free space by | 
 | 1546 |  *		searching down the dmap control pages, starting with the | 
 | 1547 |  *		highest level (i.e. L0, L1, L2) control page.  if free space | 
 | 1548 |  *		large enough to satisfy the desired free space is found, the | 
 | 1549 |  *		desired free space is allocated. | 
 | 1550 |  * | 
 | 1551 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1552 |  *	bmp	-  pointer to bmap descriptor | 
 | 1553 |  *	nblocks	 -  actual number of contiguous free blocks desired. | 
 | 1554 |  *	l2nb	 -  log2 number of contiguous free blocks desired. | 
 | 1555 |  *	results	-  on successful return, set to the starting block number | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1556 |  *		   of the newly allocated range. | 
 | 1557 |  * | 
 | 1558 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1559 |  *	0	- success | 
 | 1560 |  *	-ENOSPC	- insufficient disk resources | 
 | 1561 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1562 |  * | 
 | 1563 |  * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 1564 |  */ | 
 | 1565 | static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) | 
 | 1566 | { | 
 | 1567 | 	int rc; | 
 | 1568 | 	s64 blkno = 0; | 
 | 1569 |  | 
 | 1570 | 	/* starting with the top level dmap control page, search | 
 | 1571 | 	 * down the dmap control levels for sufficient free space. | 
 | 1572 | 	 * if free space is found, dbFindCtl() returns the starting | 
 | 1573 | 	 * block number of the dmap that contains or starts off the | 
 | 1574 | 	 * range of free space. | 
 | 1575 | 	 */ | 
 | 1576 | 	if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno))) | 
 | 1577 | 		return (rc); | 
 | 1578 |  | 
 | 1579 | 	/* allocate the blocks. | 
 | 1580 | 	 */ | 
 | 1581 | 	rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 
 | 1582 | 	if (rc == -ENOSPC) { | 
 | 1583 | 		jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1584 | 			  "dbAllocAny: unable to allocate blocks"); | 
 | 1585 | 		return -EIO; | 
 | 1586 | 	} | 
 | 1587 | 	return (rc); | 
 | 1588 | } | 
 | 1589 |  | 
 | 1590 |  | 
 | 1591 | /* | 
 | 1592 |  * NAME:	dbFindCtl() | 
 | 1593 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1594 |  * FUNCTION:	starting at a specified dmap control page level and block | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1595 |  *		number, search down the dmap control levels for a range of | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1596 |  *		contiguous free blocks large enough to satisfy an allocation | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1597 |  *		request for the specified number of free blocks. | 
 | 1598 |  * | 
 | 1599 |  *		if sufficient contiguous free blocks are found, this routine | 
 | 1600 |  *		returns the starting block number within a dmap page that | 
 | 1601 |  *		contains or starts a range of contiqious free blocks that | 
 | 1602 |  *		is sufficient in size. | 
 | 1603 |  * | 
 | 1604 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1605 |  *	bmp	-  pointer to bmap descriptor | 
 | 1606 |  *	level	-  starting dmap control page level. | 
 | 1607 |  *	l2nb	-  log2 number of contiguous free blocks desired. | 
 | 1608 |  *	*blkno	-  on entry, starting block number for conducting the search. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1609 |  *		   on successful return, the first block within a dmap page | 
 | 1610 |  *		   that contains or starts a range of contiguous free blocks. | 
 | 1611 |  * | 
 | 1612 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1613 |  *	0	- success | 
 | 1614 |  *	-ENOSPC	- insufficient disk resources | 
 | 1615 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1616 |  * | 
 | 1617 |  * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 1618 |  */ | 
 | 1619 | static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | 
 | 1620 | { | 
 | 1621 | 	int rc, leafidx, lev; | 
 | 1622 | 	s64 b, lblkno; | 
 | 1623 | 	struct dmapctl *dcp; | 
 | 1624 | 	int budmin; | 
 | 1625 | 	struct metapage *mp; | 
 | 1626 |  | 
 | 1627 | 	/* starting at the specified dmap control page level and block | 
 | 1628 | 	 * number, search down the dmap control levels for the starting | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 1629 | 	 * block number of a dmap page that contains or starts off | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1630 | 	 * sufficient free blocks. | 
 | 1631 | 	 */ | 
 | 1632 | 	for (lev = level, b = *blkno; lev >= 0; lev--) { | 
 | 1633 | 		/* get the buffer of the dmap control page for the block | 
 | 1634 | 		 * number and level (i.e. L0, L1, L2). | 
 | 1635 | 		 */ | 
 | 1636 | 		lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev); | 
 | 1637 | 		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 
 | 1638 | 		if (mp == NULL) | 
 | 1639 | 			return -EIO; | 
 | 1640 | 		dcp = (struct dmapctl *) mp->data; | 
 | 1641 | 		budmin = dcp->budmin; | 
 | 1642 |  | 
 | 1643 | 		if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 
 | 1644 | 			jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1645 | 				  "dbFindCtl: Corrupt dmapctl page"); | 
 | 1646 | 			release_metapage(mp); | 
 | 1647 | 			return -EIO; | 
 | 1648 | 		} | 
 | 1649 |  | 
 | 1650 | 		/* search the tree within the dmap control page for | 
 | 1651 | 		 * sufficent free space.  if sufficient free space is found, | 
 | 1652 | 		 * dbFindLeaf() returns the index of the leaf at which | 
 | 1653 | 		 * free space was found. | 
 | 1654 | 		 */ | 
 | 1655 | 		rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx); | 
 | 1656 |  | 
 | 1657 | 		/* release the buffer. | 
 | 1658 | 		 */ | 
 | 1659 | 		release_metapage(mp); | 
 | 1660 |  | 
 | 1661 | 		/* space found ? | 
 | 1662 | 		 */ | 
 | 1663 | 		if (rc) { | 
 | 1664 | 			if (lev != level) { | 
 | 1665 | 				jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1666 | 					  "dbFindCtl: dmap inconsistent"); | 
 | 1667 | 				return -EIO; | 
 | 1668 | 			} | 
 | 1669 | 			return -ENOSPC; | 
 | 1670 | 		} | 
 | 1671 |  | 
 | 1672 | 		/* adjust the block number to reflect the location within | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 1673 | 		 * the dmap control page (i.e. the leaf) at which free | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1674 | 		 * space was found. | 
 | 1675 | 		 */ | 
 | 1676 | 		b += (((s64) leafidx) << budmin); | 
 | 1677 |  | 
 | 1678 | 		/* we stop the search at this dmap control page level if | 
 | 1679 | 		 * the number of blocks required is greater than or equal | 
 | 1680 | 		 * to the maximum number of blocks described at the next | 
 | 1681 | 		 * (lower) level. | 
 | 1682 | 		 */ | 
 | 1683 | 		if (l2nb >= budmin) | 
 | 1684 | 			break; | 
 | 1685 | 	} | 
 | 1686 |  | 
 | 1687 | 	*blkno = b; | 
 | 1688 | 	return (0); | 
 | 1689 | } | 
 | 1690 |  | 
 | 1691 |  | 
 | 1692 | /* | 
 | 1693 |  * NAME:	dbAllocCtl() | 
 | 1694 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1695 |  * FUNCTION:	attempt to allocate a specified number of contiguous | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 1696 |  *		blocks starting within a specific dmap. | 
 | 1697 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1698 |  *		this routine is called by higher level routines that search | 
 | 1699 |  *		the dmap control pages above the actual dmaps for contiguous | 
 | 1700 |  *		free space.  the result of successful searches by these | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 1701 |  *		routines are the starting block numbers within dmaps, with | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1702 |  *		the dmaps themselves containing the desired contiguous free | 
 | 1703 |  *		space or starting a contiguous free space of desired size | 
 | 1704 |  *		that is made up of the blocks of one or more dmaps. these | 
 | 1705 |  *		calls should not fail due to insufficent resources. | 
 | 1706 |  * | 
 | 1707 |  *		this routine is called in some cases where it is not known | 
 | 1708 |  *		whether it will fail due to insufficient resources.  more | 
 | 1709 |  *		specifically, this occurs when allocating from an allocation | 
 | 1710 |  *		group whose size is equal to the number of blocks per dmap. | 
 | 1711 |  *		in this case, the dmap control pages are not examined prior | 
 | 1712 |  *		to calling this routine (to save pathlength) and the call | 
 | 1713 |  *		might fail. | 
 | 1714 |  * | 
 | 1715 |  *		for a request size that fits within a dmap, this routine relies | 
 | 1716 |  *		upon the dmap's dmtree to find the requested contiguous free | 
 | 1717 |  *		space.  for request sizes that are larger than a dmap, the | 
 | 1718 |  *		requested free space will start at the first block of the | 
 | 1719 |  *		first dmap (i.e. blkno). | 
 | 1720 |  * | 
 | 1721 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1722 |  *	bmp	-  pointer to bmap descriptor | 
 | 1723 |  *	nblocks	 -  actual number of contiguous free blocks to allocate. | 
 | 1724 |  *	l2nb	 -  log2 number of contiguous free blocks to allocate. | 
 | 1725 |  *	blkno	 -  starting block number of the dmap to start the allocation | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1726 |  *		    from. | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1727 |  *	results	-  on successful return, set to the starting block number | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1728 |  *		   of the newly allocated range. | 
 | 1729 |  * | 
 | 1730 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1731 |  *	0	- success | 
 | 1732 |  *	-ENOSPC	- insufficient disk resources | 
 | 1733 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1734 |  * | 
 | 1735 |  * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 1736 |  */ | 
 | 1737 | static int | 
 | 1738 | dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | 
 | 1739 | { | 
 | 1740 | 	int rc, nb; | 
 | 1741 | 	s64 b, lblkno, n; | 
 | 1742 | 	struct metapage *mp; | 
 | 1743 | 	struct dmap *dp; | 
 | 1744 |  | 
 | 1745 | 	/* check if the allocation request is confined to a single dmap. | 
 | 1746 | 	 */ | 
 | 1747 | 	if (l2nb <= L2BPERDMAP) { | 
 | 1748 | 		/* get the buffer for the dmap. | 
 | 1749 | 		 */ | 
 | 1750 | 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 
 | 1751 | 		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 
 | 1752 | 		if (mp == NULL) | 
 | 1753 | 			return -EIO; | 
 | 1754 | 		dp = (struct dmap *) mp->data; | 
 | 1755 |  | 
 | 1756 | 		/* try to allocate the blocks. | 
 | 1757 | 		 */ | 
 | 1758 | 		rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results); | 
 | 1759 | 		if (rc == 0) | 
 | 1760 | 			mark_metapage_dirty(mp); | 
 | 1761 |  | 
 | 1762 | 		release_metapage(mp); | 
 | 1763 |  | 
 | 1764 | 		return (rc); | 
 | 1765 | 	} | 
 | 1766 |  | 
 | 1767 | 	/* allocation request involving multiple dmaps. it must start on | 
 | 1768 | 	 * a dmap boundary. | 
 | 1769 | 	 */ | 
 | 1770 | 	assert((blkno & (BPERDMAP - 1)) == 0); | 
 | 1771 |  | 
 | 1772 | 	/* allocate the blocks dmap by dmap. | 
 | 1773 | 	 */ | 
 | 1774 | 	for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) { | 
 | 1775 | 		/* get the buffer for the dmap. | 
 | 1776 | 		 */ | 
 | 1777 | 		lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); | 
 | 1778 | 		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 
 | 1779 | 		if (mp == NULL) { | 
 | 1780 | 			rc = -EIO; | 
 | 1781 | 			goto backout; | 
 | 1782 | 		} | 
 | 1783 | 		dp = (struct dmap *) mp->data; | 
 | 1784 |  | 
 | 1785 | 		/* the dmap better be all free. | 
 | 1786 | 		 */ | 
 | 1787 | 		if (dp->tree.stree[ROOT] != L2BPERDMAP) { | 
 | 1788 | 			release_metapage(mp); | 
 | 1789 | 			jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1790 | 				  "dbAllocCtl: the dmap is not all free"); | 
 | 1791 | 			rc = -EIO; | 
 | 1792 | 			goto backout; | 
 | 1793 | 		} | 
 | 1794 |  | 
 | 1795 | 		/* determine how many blocks to allocate from this dmap. | 
 | 1796 | 		 */ | 
 | 1797 | 		nb = min(n, (s64)BPERDMAP); | 
 | 1798 |  | 
 | 1799 | 		/* allocate the blocks from the dmap. | 
 | 1800 | 		 */ | 
 | 1801 | 		if ((rc = dbAllocDmap(bmp, dp, b, nb))) { | 
 | 1802 | 			release_metapage(mp); | 
 | 1803 | 			goto backout; | 
 | 1804 | 		} | 
 | 1805 |  | 
 | 1806 | 		/* write the buffer. | 
 | 1807 | 		 */ | 
 | 1808 | 		write_metapage(mp); | 
 | 1809 | 	} | 
 | 1810 |  | 
 | 1811 | 	/* set the results (starting block number) and return. | 
 | 1812 | 	 */ | 
 | 1813 | 	*results = blkno; | 
 | 1814 | 	return (0); | 
 | 1815 |  | 
 | 1816 | 	/* something failed in handling an allocation request involving | 
 | 1817 | 	 * multiple dmaps.  we'll try to clean up by backing out any | 
 | 1818 | 	 * allocation that has already happened for this request.  if | 
 | 1819 | 	 * we fail in backing out the allocation, we'll mark the file | 
 | 1820 | 	 * system to indicate that blocks have been leaked. | 
 | 1821 | 	 */ | 
 | 1822 |       backout: | 
 | 1823 |  | 
 | 1824 | 	/* try to backout the allocations dmap by dmap. | 
 | 1825 | 	 */ | 
 | 1826 | 	for (n = nblocks - n, b = blkno; n > 0; | 
 | 1827 | 	     n -= BPERDMAP, b += BPERDMAP) { | 
 | 1828 | 		/* get the buffer for this dmap. | 
 | 1829 | 		 */ | 
 | 1830 | 		lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); | 
 | 1831 | 		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 
 | 1832 | 		if (mp == NULL) { | 
 | 1833 | 			/* could not back out.  mark the file system | 
 | 1834 | 			 * to indicate that we have leaked blocks. | 
 | 1835 | 			 */ | 
 | 1836 | 			jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1837 | 				  "dbAllocCtl: I/O Error: Block Leakage."); | 
 | 1838 | 			continue; | 
 | 1839 | 		} | 
 | 1840 | 		dp = (struct dmap *) mp->data; | 
 | 1841 |  | 
 | 1842 | 		/* free the blocks is this dmap. | 
 | 1843 | 		 */ | 
 | 1844 | 		if (dbFreeDmap(bmp, dp, b, BPERDMAP)) { | 
 | 1845 | 			/* could not back out.  mark the file system | 
 | 1846 | 			 * to indicate that we have leaked blocks. | 
 | 1847 | 			 */ | 
 | 1848 | 			release_metapage(mp); | 
 | 1849 | 			jfs_error(bmp->db_ipbmap->i_sb, | 
 | 1850 | 				  "dbAllocCtl: Block Leakage."); | 
 | 1851 | 			continue; | 
 | 1852 | 		} | 
 | 1853 |  | 
 | 1854 | 		/* write the buffer. | 
 | 1855 | 		 */ | 
 | 1856 | 		write_metapage(mp); | 
 | 1857 | 	} | 
 | 1858 |  | 
 | 1859 | 	return (rc); | 
 | 1860 | } | 
 | 1861 |  | 
 | 1862 |  | 
 | 1863 | /* | 
 | 1864 |  * NAME:	dbAllocDmapLev() | 
 | 1865 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1866 |  * FUNCTION:	attempt to allocate a specified number of contiguous blocks | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1867 |  *		from a specified dmap. | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 1868 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1869 |  *		this routine checks if the contiguous blocks are available. | 
 | 1870 |  *		if so, nblocks of blocks are allocated; otherwise, ENOSPC is | 
 | 1871 |  *		returned. | 
 | 1872 |  * | 
 | 1873 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1874 |  *	mp	-  pointer to bmap descriptor | 
 | 1875 |  *	dp	-  pointer to dmap to attempt to allocate blocks from. | 
 | 1876 |  *	l2nb	-  log2 number of contiguous block desired. | 
 | 1877 |  *	nblocks	-  actual number of contiguous block desired. | 
 | 1878 |  *	results	-  on successful return, set to the starting block number | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1879 |  *		   of the newly allocated range. | 
 | 1880 |  * | 
 | 1881 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1882 |  *	0	- success | 
 | 1883 |  *	-ENOSPC	- insufficient disk resources | 
 | 1884 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1885 |  * | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 1886 |  * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1887 |  *	IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; | 
 | 1888 |  */ | 
 | 1889 | static int | 
 | 1890 | dbAllocDmapLev(struct bmap * bmp, | 
 | 1891 | 	       struct dmap * dp, int nblocks, int l2nb, s64 * results) | 
 | 1892 | { | 
 | 1893 | 	s64 blkno; | 
 | 1894 | 	int leafidx, rc; | 
 | 1895 |  | 
 | 1896 | 	/* can't be more than a dmaps worth of blocks */ | 
 | 1897 | 	assert(l2nb <= L2BPERDMAP); | 
 | 1898 |  | 
 | 1899 | 	/* search the tree within the dmap page for sufficient | 
 | 1900 | 	 * free space.  if sufficient free space is found, dbFindLeaf() | 
 | 1901 | 	 * returns the index of the leaf at which free space was found. | 
 | 1902 | 	 */ | 
 | 1903 | 	if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx)) | 
 | 1904 | 		return -ENOSPC; | 
 | 1905 |  | 
 | 1906 | 	/* determine the block number within the file system corresponding | 
 | 1907 | 	 * to the leaf at which free space was found. | 
 | 1908 | 	 */ | 
 | 1909 | 	blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD); | 
 | 1910 |  | 
 | 1911 | 	/* if not all bits of the dmap word are free, get the starting | 
 | 1912 | 	 * bit number within the dmap word of the required string of free | 
 | 1913 | 	 * bits and adjust the block number with this value. | 
 | 1914 | 	 */ | 
 | 1915 | 	if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN) | 
 | 1916 | 		blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb); | 
 | 1917 |  | 
 | 1918 | 	/* allocate the blocks */ | 
 | 1919 | 	if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) | 
 | 1920 | 		*results = blkno; | 
 | 1921 |  | 
 | 1922 | 	return (rc); | 
 | 1923 | } | 
 | 1924 |  | 
 | 1925 |  | 
 | 1926 | /* | 
 | 1927 |  * NAME:	dbAllocDmap() | 
 | 1928 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1929 |  * FUNCTION:	adjust the disk allocation map to reflect the allocation | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1930 |  *		of a specified block range within a dmap. | 
 | 1931 |  * | 
 | 1932 |  *		this routine allocates the specified blocks from the dmap | 
 | 1933 |  *		through a call to dbAllocBits(). if the allocation of the | 
 | 1934 |  *		block range causes the maximum string of free blocks within | 
 | 1935 |  *		the dmap to change (i.e. the value of the root of the dmap's | 
 | 1936 |  *		dmtree), this routine will cause this change to be reflected | 
 | 1937 |  *		up through the appropriate levels of the dmap control pages | 
 | 1938 |  *		by a call to dbAdjCtl() for the L0 dmap control page that | 
 | 1939 |  *		covers this dmap. | 
 | 1940 |  * | 
 | 1941 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1942 |  *	bmp	-  pointer to bmap descriptor | 
 | 1943 |  *	dp	-  pointer to dmap to allocate the block range from. | 
 | 1944 |  *	blkno	-  starting block number of the block to be allocated. | 
 | 1945 |  *	nblocks	-  number of blocks to be allocated. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1946 |  * | 
 | 1947 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1948 |  *	0	- success | 
 | 1949 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1950 |  * | 
 | 1951 |  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 1952 |  */ | 
 | 1953 | static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 1954 | 		       int nblocks) | 
 | 1955 | { | 
 | 1956 | 	s8 oldroot; | 
 | 1957 | 	int rc; | 
 | 1958 |  | 
 | 1959 | 	/* save the current value of the root (i.e. maximum free string) | 
 | 1960 | 	 * of the dmap tree. | 
 | 1961 | 	 */ | 
 | 1962 | 	oldroot = dp->tree.stree[ROOT]; | 
 | 1963 |  | 
 | 1964 | 	/* allocate the specified (blocks) bits */ | 
 | 1965 | 	dbAllocBits(bmp, dp, blkno, nblocks); | 
 | 1966 |  | 
 | 1967 | 	/* if the root has not changed, done. */ | 
 | 1968 | 	if (dp->tree.stree[ROOT] == oldroot) | 
 | 1969 | 		return (0); | 
 | 1970 |  | 
 | 1971 | 	/* root changed. bubble the change up to the dmap control pages. | 
 | 1972 | 	 * if the adjustment of the upper level control pages fails, | 
 | 1973 | 	 * backout the bit allocation (thus making everything consistent). | 
 | 1974 | 	 */ | 
 | 1975 | 	if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0))) | 
 | 1976 | 		dbFreeBits(bmp, dp, blkno, nblocks); | 
 | 1977 |  | 
 | 1978 | 	return (rc); | 
 | 1979 | } | 
 | 1980 |  | 
 | 1981 |  | 
 | 1982 | /* | 
 | 1983 |  * NAME:	dbFreeDmap() | 
 | 1984 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1985 |  * FUNCTION:	adjust the disk allocation map to reflect the allocation | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1986 |  *		of a specified block range within a dmap. | 
 | 1987 |  * | 
 | 1988 |  *		this routine frees the specified blocks from the dmap through | 
 | 1989 |  *		a call to dbFreeBits(). if the deallocation of the block range | 
 | 1990 |  *		causes the maximum string of free blocks within the dmap to | 
 | 1991 |  *		change (i.e. the value of the root of the dmap's dmtree), this | 
 | 1992 |  *		routine will cause this change to be reflected up through the | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1993 |  *		appropriate levels of the dmap control pages by a call to | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1994 |  *		dbAdjCtl() for the L0 dmap control page that covers this dmap. | 
 | 1995 |  * | 
 | 1996 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 1997 |  *	bmp	-  pointer to bmap descriptor | 
 | 1998 |  *	dp	-  pointer to dmap to free the block range from. | 
 | 1999 |  *	blkno	-  starting block number of the block to be freed. | 
 | 2000 |  *	nblocks	-  number of blocks to be freed. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2001 |  * | 
 | 2002 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2003 |  *	0	- success | 
 | 2004 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2005 |  * | 
 | 2006 |  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 2007 |  */ | 
 | 2008 | static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 2009 | 		      int nblocks) | 
 | 2010 | { | 
 | 2011 | 	s8 oldroot; | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2012 | 	int rc = 0, word; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2013 |  | 
 | 2014 | 	/* save the current value of the root (i.e. maximum free string) | 
 | 2015 | 	 * of the dmap tree. | 
 | 2016 | 	 */ | 
 | 2017 | 	oldroot = dp->tree.stree[ROOT]; | 
 | 2018 |  | 
 | 2019 | 	/* free the specified (blocks) bits */ | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2020 | 	rc = dbFreeBits(bmp, dp, blkno, nblocks); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2021 |  | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2022 | 	/* if error or the root has not changed, done. */ | 
 | 2023 | 	if (rc || (dp->tree.stree[ROOT] == oldroot)) | 
 | 2024 | 		return (rc); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2025 |  | 
 | 2026 | 	/* root changed. bubble the change up to the dmap control pages. | 
 | 2027 | 	 * if the adjustment of the upper level control pages fails, | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 2028 | 	 * backout the deallocation. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2029 | 	 */ | 
 | 2030 | 	if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { | 
 | 2031 | 		word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; | 
 | 2032 |  | 
 | 2033 | 		/* as part of backing out the deallocation, we will have | 
 | 2034 | 		 * to back split the dmap tree if the deallocation caused | 
 | 2035 | 		 * the freed blocks to become part of a larger binary buddy | 
 | 2036 | 		 * system. | 
 | 2037 | 		 */ | 
 | 2038 | 		if (dp->tree.stree[word] == NOFREE) | 
 | 2039 | 			dbBackSplit((dmtree_t *) & dp->tree, word); | 
 | 2040 |  | 
 | 2041 | 		dbAllocBits(bmp, dp, blkno, nblocks); | 
 | 2042 | 	} | 
 | 2043 |  | 
 | 2044 | 	return (rc); | 
 | 2045 | } | 
 | 2046 |  | 
 | 2047 |  | 
 | 2048 | /* | 
 | 2049 |  * NAME:	dbAllocBits() | 
 | 2050 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2051 |  * FUNCTION:	allocate a specified block range from a dmap. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2052 |  * | 
 | 2053 |  *		this routine updates the dmap to reflect the working | 
 | 2054 |  *		state allocation of the specified block range. it directly | 
 | 2055 |  *		updates the bits of the working map and causes the adjustment | 
 | 2056 |  *		of the binary buddy system described by the dmap's dmtree | 
 | 2057 |  *		leaves to reflect the bits allocated.  it also causes the | 
 | 2058 |  *		dmap's dmtree, as a whole, to reflect the allocated range. | 
 | 2059 |  * | 
 | 2060 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2061 |  *	bmp	-  pointer to bmap descriptor | 
 | 2062 |  *	dp	-  pointer to dmap to allocate bits from. | 
 | 2063 |  *	blkno	-  starting block number of the bits to be allocated. | 
 | 2064 |  *	nblocks	-  number of bits to be allocated. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2065 |  * | 
 | 2066 |  * RETURN VALUES: none | 
 | 2067 |  * | 
 | 2068 |  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 2069 |  */ | 
 | 2070 | static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 2071 | 			int nblocks) | 
 | 2072 | { | 
 | 2073 | 	int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; | 
 | 2074 | 	dmtree_t *tp = (dmtree_t *) & dp->tree; | 
 | 2075 | 	int size; | 
 | 2076 | 	s8 *leaf; | 
 | 2077 |  | 
 | 2078 | 	/* pick up a pointer to the leaves of the dmap tree */ | 
 | 2079 | 	leaf = dp->tree.stree + LEAFIND; | 
 | 2080 |  | 
 | 2081 | 	/* determine the bit number and word within the dmap of the | 
 | 2082 | 	 * starting block. | 
 | 2083 | 	 */ | 
 | 2084 | 	dbitno = blkno & (BPERDMAP - 1); | 
 | 2085 | 	word = dbitno >> L2DBWORD; | 
 | 2086 |  | 
 | 2087 | 	/* block range better be within the dmap */ | 
 | 2088 | 	assert(dbitno + nblocks <= BPERDMAP); | 
 | 2089 |  | 
 | 2090 | 	/* allocate the bits of the dmap's words corresponding to the block | 
 | 2091 | 	 * range. not all bits of the first and last words may be contained | 
 | 2092 | 	 * within the block range.  if this is the case, we'll work against | 
 | 2093 | 	 * those words (i.e. partial first and/or last) on an individual basis | 
 | 2094 | 	 * (a single pass), allocating the bits of interest by hand and | 
 | 2095 | 	 * updating the leaf corresponding to the dmap word. a single pass | 
 | 2096 | 	 * will be used for all dmap words fully contained within the | 
 | 2097 | 	 * specified range.  within this pass, the bits of all fully contained | 
 | 2098 | 	 * dmap words will be marked as free in a single shot and the leaves | 
 | 2099 | 	 * will be updated. a single leaf may describe the free space of | 
 | 2100 | 	 * multiple dmap words, so we may update only a subset of the actual | 
 | 2101 | 	 * leaves corresponding to the dmap words of the block range. | 
 | 2102 | 	 */ | 
 | 2103 | 	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { | 
 | 2104 | 		/* determine the bit number within the word and | 
 | 2105 | 		 * the number of bits within the word. | 
 | 2106 | 		 */ | 
 | 2107 | 		wbitno = dbitno & (DBWORD - 1); | 
 | 2108 | 		nb = min(rembits, DBWORD - wbitno); | 
 | 2109 |  | 
 | 2110 | 		/* check if only part of a word is to be allocated. | 
 | 2111 | 		 */ | 
 | 2112 | 		if (nb < DBWORD) { | 
 | 2113 | 			/* allocate (set to 1) the appropriate bits within | 
 | 2114 | 			 * this dmap word. | 
 | 2115 | 			 */ | 
 | 2116 | 			dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) | 
 | 2117 | 						      >> wbitno); | 
 | 2118 |  | 
 | 2119 | 			/* update the leaf for this dmap word. in addition | 
 | 2120 | 			 * to setting the leaf value to the binary buddy max | 
 | 2121 | 			 * of the updated dmap word, dbSplit() will split | 
 | 2122 | 			 * the binary system of the leaves if need be. | 
 | 2123 | 			 */ | 
 | 2124 | 			dbSplit(tp, word, BUDMIN, | 
 | 2125 | 				dbMaxBud((u8 *) & dp->wmap[word])); | 
 | 2126 |  | 
 | 2127 | 			word += 1; | 
 | 2128 | 		} else { | 
 | 2129 | 			/* one or more dmap words are fully contained | 
 | 2130 | 			 * within the block range.  determine how many | 
 | 2131 | 			 * words and allocate (set to 1) the bits of these | 
 | 2132 | 			 * words. | 
 | 2133 | 			 */ | 
 | 2134 | 			nwords = rembits >> L2DBWORD; | 
 | 2135 | 			memset(&dp->wmap[word], (int) ONES, nwords * 4); | 
 | 2136 |  | 
 | 2137 | 			/* determine how many bits. | 
 | 2138 | 			 */ | 
 | 2139 | 			nb = nwords << L2DBWORD; | 
 | 2140 |  | 
 | 2141 | 			/* now update the appropriate leaves to reflect | 
 | 2142 | 			 * the allocated words. | 
 | 2143 | 			 */ | 
 | 2144 | 			for (; nwords > 0; nwords -= nw) { | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2145 | 				if (leaf[word] < BUDMIN) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2146 | 					jfs_error(bmp->db_ipbmap->i_sb, | 
 | 2147 | 						  "dbAllocBits: leaf page " | 
 | 2148 | 						  "corrupt"); | 
 | 2149 | 					break; | 
 | 2150 | 				} | 
 | 2151 |  | 
 | 2152 | 				/* determine what the leaf value should be | 
 | 2153 | 				 * updated to as the minimum of the l2 number | 
 | 2154 | 				 * of bits being allocated and the l2 number | 
 | 2155 | 				 * of bits currently described by this leaf. | 
 | 2156 | 				 */ | 
 | 2157 | 				size = min((int)leaf[word], NLSTOL2BSZ(nwords)); | 
 | 2158 |  | 
 | 2159 | 				/* update the leaf to reflect the allocation. | 
 | 2160 | 				 * in addition to setting the leaf value to | 
 | 2161 | 				 * NOFREE, dbSplit() will split the binary | 
 | 2162 | 				 * system of the leaves to reflect the current | 
 | 2163 | 				 * allocation (size). | 
 | 2164 | 				 */ | 
 | 2165 | 				dbSplit(tp, word, size, NOFREE); | 
 | 2166 |  | 
 | 2167 | 				/* get the number of dmap words handled */ | 
 | 2168 | 				nw = BUDSIZE(size, BUDMIN); | 
 | 2169 | 				word += nw; | 
 | 2170 | 			} | 
 | 2171 | 		} | 
 | 2172 | 	} | 
 | 2173 |  | 
 | 2174 | 	/* update the free count for this dmap */ | 
| Marcin Slusarz | 8914562 | 2008-02-13 15:34:20 -0600 | [diff] [blame] | 2175 | 	le32_add_cpu(&dp->nfree, -nblocks); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2176 |  | 
 | 2177 | 	BMAP_LOCK(bmp); | 
 | 2178 |  | 
 | 2179 | 	/* if this allocation group is completely free, | 
 | 2180 | 	 * update the maximum allocation group number if this allocation | 
 | 2181 | 	 * group is the new max. | 
 | 2182 | 	 */ | 
 | 2183 | 	agno = blkno >> bmp->db_agl2size; | 
 | 2184 | 	if (agno > bmp->db_maxag) | 
 | 2185 | 		bmp->db_maxag = agno; | 
 | 2186 |  | 
 | 2187 | 	/* update the free count for the allocation group and map */ | 
 | 2188 | 	bmp->db_agfree[agno] -= nblocks; | 
 | 2189 | 	bmp->db_nfree -= nblocks; | 
 | 2190 |  | 
 | 2191 | 	BMAP_UNLOCK(bmp); | 
 | 2192 | } | 
 | 2193 |  | 
 | 2194 |  | 
 | 2195 | /* | 
 | 2196 |  * NAME:	dbFreeBits() | 
 | 2197 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2198 |  * FUNCTION:	free a specified block range from a dmap. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2199 |  * | 
 | 2200 |  *		this routine updates the dmap to reflect the working | 
 | 2201 |  *		state allocation of the specified block range. it directly | 
 | 2202 |  *		updates the bits of the working map and causes the adjustment | 
 | 2203 |  *		of the binary buddy system described by the dmap's dmtree | 
 | 2204 |  *		leaves to reflect the bits freed.  it also causes the dmap's | 
 | 2205 |  *		dmtree, as a whole, to reflect the deallocated range. | 
 | 2206 |  * | 
 | 2207 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2208 |  *	bmp	-  pointer to bmap descriptor | 
 | 2209 |  *	dp	-  pointer to dmap to free bits from. | 
 | 2210 |  *	blkno	-  starting block number of the bits to be freed. | 
 | 2211 |  *	nblocks	-  number of bits to be freed. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2212 |  * | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2213 |  * RETURN VALUES: 0 for success | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2214 |  * | 
 | 2215 |  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 2216 |  */ | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2217 | static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2218 | 		       int nblocks) | 
 | 2219 | { | 
 | 2220 | 	int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; | 
 | 2221 | 	dmtree_t *tp = (dmtree_t *) & dp->tree; | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2222 | 	int rc = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2223 | 	int size; | 
 | 2224 |  | 
 | 2225 | 	/* determine the bit number and word within the dmap of the | 
 | 2226 | 	 * starting block. | 
 | 2227 | 	 */ | 
 | 2228 | 	dbitno = blkno & (BPERDMAP - 1); | 
 | 2229 | 	word = dbitno >> L2DBWORD; | 
 | 2230 |  | 
 | 2231 | 	/* block range better be within the dmap. | 
 | 2232 | 	 */ | 
 | 2233 | 	assert(dbitno + nblocks <= BPERDMAP); | 
 | 2234 |  | 
 | 2235 | 	/* free the bits of the dmaps words corresponding to the block range. | 
 | 2236 | 	 * not all bits of the first and last words may be contained within | 
 | 2237 | 	 * the block range.  if this is the case, we'll work against those | 
 | 2238 | 	 * words (i.e. partial first and/or last) on an individual basis | 
 | 2239 | 	 * (a single pass), freeing the bits of interest by hand and updating | 
 | 2240 | 	 * the leaf corresponding to the dmap word. a single pass will be used | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 2241 | 	 * for all dmap words fully contained within the specified range. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2242 | 	 * within this pass, the bits of all fully contained dmap words will | 
 | 2243 | 	 * be marked as free in a single shot and the leaves will be updated. a | 
 | 2244 | 	 * single leaf may describe the free space of multiple dmap words, | 
 | 2245 | 	 * so we may update only a subset of the actual leaves corresponding | 
 | 2246 | 	 * to the dmap words of the block range. | 
 | 2247 | 	 * | 
 | 2248 | 	 * dbJoin() is used to update leaf values and will join the binary | 
 | 2249 | 	 * buddy system of the leaves if the new leaf values indicate this | 
 | 2250 | 	 * should be done. | 
 | 2251 | 	 */ | 
 | 2252 | 	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { | 
 | 2253 | 		/* determine the bit number within the word and | 
 | 2254 | 		 * the number of bits within the word. | 
 | 2255 | 		 */ | 
 | 2256 | 		wbitno = dbitno & (DBWORD - 1); | 
 | 2257 | 		nb = min(rembits, DBWORD - wbitno); | 
 | 2258 |  | 
 | 2259 | 		/* check if only part of a word is to be freed. | 
 | 2260 | 		 */ | 
 | 2261 | 		if (nb < DBWORD) { | 
 | 2262 | 			/* free (zero) the appropriate bits within this | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 2263 | 			 * dmap word. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2264 | 			 */ | 
 | 2265 | 			dp->wmap[word] &= | 
 | 2266 | 			    cpu_to_le32(~(ONES << (DBWORD - nb) | 
 | 2267 | 					  >> wbitno)); | 
 | 2268 |  | 
 | 2269 | 			/* update the leaf for this dmap word. | 
 | 2270 | 			 */ | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2271 | 			rc = dbJoin(tp, word, | 
 | 2272 | 				    dbMaxBud((u8 *) & dp->wmap[word])); | 
 | 2273 | 			if (rc) | 
 | 2274 | 				return rc; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2275 |  | 
 | 2276 | 			word += 1; | 
 | 2277 | 		} else { | 
 | 2278 | 			/* one or more dmap words are fully contained | 
 | 2279 | 			 * within the block range.  determine how many | 
 | 2280 | 			 * words and free (zero) the bits of these words. | 
 | 2281 | 			 */ | 
 | 2282 | 			nwords = rembits >> L2DBWORD; | 
 | 2283 | 			memset(&dp->wmap[word], 0, nwords * 4); | 
 | 2284 |  | 
 | 2285 | 			/* determine how many bits. | 
 | 2286 | 			 */ | 
 | 2287 | 			nb = nwords << L2DBWORD; | 
 | 2288 |  | 
 | 2289 | 			/* now update the appropriate leaves to reflect | 
 | 2290 | 			 * the freed words. | 
 | 2291 | 			 */ | 
 | 2292 | 			for (; nwords > 0; nwords -= nw) { | 
 | 2293 | 				/* determine what the leaf value should be | 
 | 2294 | 				 * updated to as the minimum of the l2 number | 
 | 2295 | 				 * of bits being freed and the l2 (max) number | 
 | 2296 | 				 * of bits that can be described by this leaf. | 
 | 2297 | 				 */ | 
 | 2298 | 				size = | 
 | 2299 | 				    min(LITOL2BSZ | 
 | 2300 | 					(word, L2LPERDMAP, BUDMIN), | 
 | 2301 | 					NLSTOL2BSZ(nwords)); | 
 | 2302 |  | 
 | 2303 | 				/* update the leaf. | 
 | 2304 | 				 */ | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2305 | 				rc = dbJoin(tp, word, size); | 
 | 2306 | 				if (rc) | 
 | 2307 | 					return rc; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2308 |  | 
 | 2309 | 				/* get the number of dmap words handled. | 
 | 2310 | 				 */ | 
 | 2311 | 				nw = BUDSIZE(size, BUDMIN); | 
 | 2312 | 				word += nw; | 
 | 2313 | 			} | 
 | 2314 | 		} | 
 | 2315 | 	} | 
 | 2316 |  | 
 | 2317 | 	/* update the free count for this dmap. | 
 | 2318 | 	 */ | 
| Marcin Slusarz | 8914562 | 2008-02-13 15:34:20 -0600 | [diff] [blame] | 2319 | 	le32_add_cpu(&dp->nfree, nblocks); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2320 |  | 
 | 2321 | 	BMAP_LOCK(bmp); | 
 | 2322 |  | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 2323 | 	/* update the free count for the allocation group and | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2324 | 	 * map. | 
 | 2325 | 	 */ | 
 | 2326 | 	agno = blkno >> bmp->db_agl2size; | 
 | 2327 | 	bmp->db_nfree += nblocks; | 
 | 2328 | 	bmp->db_agfree[agno] += nblocks; | 
 | 2329 |  | 
 | 2330 | 	/* check if this allocation group is not completely free and | 
 | 2331 | 	 * if it is currently the maximum (rightmost) allocation group. | 
 | 2332 | 	 * if so, establish the new maximum allocation group number by | 
 | 2333 | 	 * searching left for the first allocation group with allocation. | 
 | 2334 | 	 */ | 
 | 2335 | 	if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) || | 
 | 2336 | 	    (agno == bmp->db_numag - 1 && | 
 | 2337 | 	     bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) { | 
 | 2338 | 		while (bmp->db_maxag > 0) { | 
 | 2339 | 			bmp->db_maxag -= 1; | 
 | 2340 | 			if (bmp->db_agfree[bmp->db_maxag] != | 
 | 2341 | 			    bmp->db_agsize) | 
 | 2342 | 				break; | 
 | 2343 | 		} | 
 | 2344 |  | 
 | 2345 | 		/* re-establish the allocation group preference if the | 
 | 2346 | 		 * current preference is right of the maximum allocation | 
 | 2347 | 		 * group. | 
 | 2348 | 		 */ | 
 | 2349 | 		if (bmp->db_agpref > bmp->db_maxag) | 
 | 2350 | 			bmp->db_agpref = bmp->db_maxag; | 
 | 2351 | 	} | 
 | 2352 |  | 
 | 2353 | 	BMAP_UNLOCK(bmp); | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2354 |  | 
 | 2355 | 	return 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2356 | } | 
 | 2357 |  | 
 | 2358 |  | 
 | 2359 | /* | 
 | 2360 |  * NAME:	dbAdjCtl() | 
 | 2361 |  * | 
 | 2362 |  * FUNCTION:	adjust a dmap control page at a specified level to reflect | 
 | 2363 |  *		the change in a lower level dmap or dmap control page's | 
 | 2364 |  *		maximum string of free blocks (i.e. a change in the root | 
 | 2365 |  *		of the lower level object's dmtree) due to the allocation | 
 | 2366 |  *		or deallocation of a range of blocks with a single dmap. | 
 | 2367 |  * | 
 | 2368 |  *		on entry, this routine is provided with the new value of | 
 | 2369 |  *		the lower level dmap or dmap control page root and the | 
 | 2370 |  *		starting block number of the block range whose allocation | 
 | 2371 |  *		or deallocation resulted in the root change.  this range | 
 | 2372 |  *		is respresented by a single leaf of the current dmapctl | 
 | 2373 |  *		and the leaf will be updated with this value, possibly | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 2374 |  *		causing a binary buddy system within the leaves to be | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2375 |  *		split or joined.  the update may also cause the dmapctl's | 
 | 2376 |  *		dmtree to be updated. | 
 | 2377 |  * | 
 | 2378 |  *		if the adjustment of the dmap control page, itself, causes its | 
 | 2379 |  *		root to change, this change will be bubbled up to the next dmap | 
 | 2380 |  *		control level by a recursive call to this routine, specifying | 
 | 2381 |  *		the new root value and the next dmap control page level to | 
 | 2382 |  *		be adjusted. | 
 | 2383 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2384 |  *	bmp	-  pointer to bmap descriptor | 
 | 2385 |  *	blkno	-  the first block of a block range within a dmap.  it is | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2386 |  *		   the allocation or deallocation of this block range that | 
 | 2387 |  *		   requires the dmap control page to be adjusted. | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2388 |  *	newval	-  the new value of the lower level dmap or dmap control | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2389 |  *		   page root. | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2390 |  *	alloc	-  'true' if adjustment is due to an allocation. | 
 | 2391 |  *	level	-  current level of dmap control page (i.e. L0, L1, L2) to | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2392 |  *		   be adjusted. | 
 | 2393 |  * | 
 | 2394 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2395 |  *	0	- success | 
 | 2396 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2397 |  * | 
 | 2398 |  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 2399 |  */ | 
 | 2400 | static int | 
 | 2401 | dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) | 
 | 2402 | { | 
 | 2403 | 	struct metapage *mp; | 
 | 2404 | 	s8 oldroot; | 
 | 2405 | 	int oldval; | 
 | 2406 | 	s64 lblkno; | 
 | 2407 | 	struct dmapctl *dcp; | 
 | 2408 | 	int rc, leafno, ti; | 
 | 2409 |  | 
 | 2410 | 	/* get the buffer for the dmap control page for the specified | 
 | 2411 | 	 * block number and control page level. | 
 | 2412 | 	 */ | 
 | 2413 | 	lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level); | 
 | 2414 | 	mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); | 
 | 2415 | 	if (mp == NULL) | 
 | 2416 | 		return -EIO; | 
 | 2417 | 	dcp = (struct dmapctl *) mp->data; | 
 | 2418 |  | 
 | 2419 | 	if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 
 | 2420 | 		jfs_error(bmp->db_ipbmap->i_sb, | 
 | 2421 | 			  "dbAdjCtl: Corrupt dmapctl page"); | 
 | 2422 | 		release_metapage(mp); | 
 | 2423 | 		return -EIO; | 
 | 2424 | 	} | 
 | 2425 |  | 
 | 2426 | 	/* determine the leaf number corresponding to the block and | 
 | 2427 | 	 * the index within the dmap control tree. | 
 | 2428 | 	 */ | 
 | 2429 | 	leafno = BLKTOCTLLEAF(blkno, dcp->budmin); | 
 | 2430 | 	ti = leafno + le32_to_cpu(dcp->leafidx); | 
 | 2431 |  | 
 | 2432 | 	/* save the current leaf value and the current root level (i.e. | 
 | 2433 | 	 * maximum l2 free string described by this dmapctl). | 
 | 2434 | 	 */ | 
 | 2435 | 	oldval = dcp->stree[ti]; | 
 | 2436 | 	oldroot = dcp->stree[ROOT]; | 
 | 2437 |  | 
 | 2438 | 	/* check if this is a control page update for an allocation. | 
 | 2439 | 	 * if so, update the leaf to reflect the new leaf value using | 
 | 2440 | 	 * dbSplit(); otherwise (deallocation), use dbJoin() to udpate | 
 | 2441 | 	 * the leaf with the new value.  in addition to updating the | 
 | 2442 | 	 * leaf, dbSplit() will also split the binary buddy system of | 
 | 2443 | 	 * the leaves, if required, and bubble new values within the | 
 | 2444 | 	 * dmapctl tree, if required.  similarly, dbJoin() will join | 
 | 2445 | 	 * the binary buddy system of leaves and bubble new values up | 
 | 2446 | 	 * the dmapctl tree as required by the new leaf value. | 
 | 2447 | 	 */ | 
 | 2448 | 	if (alloc) { | 
 | 2449 | 		/* check if we are in the middle of a binary buddy | 
 | 2450 | 		 * system.  this happens when we are performing the | 
 | 2451 | 		 * first allocation out of an allocation group that | 
 | 2452 | 		 * is part (not the first part) of a larger binary | 
 | 2453 | 		 * buddy system.  if we are in the middle, back split | 
 | 2454 | 		 * the system prior to calling dbSplit() which assumes | 
 | 2455 | 		 * that it is at the front of a binary buddy system. | 
 | 2456 | 		 */ | 
 | 2457 | 		if (oldval == NOFREE) { | 
| Dave Kleikamp | b6a47fd | 2005-10-11 09:06:59 -0500 | [diff] [blame] | 2458 | 			rc = dbBackSplit((dmtree_t *) dcp, leafno); | 
 | 2459 | 			if (rc) | 
 | 2460 | 				return rc; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2461 | 			oldval = dcp->stree[ti]; | 
 | 2462 | 		} | 
 | 2463 | 		dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); | 
 | 2464 | 	} else { | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2465 | 		rc = dbJoin((dmtree_t *) dcp, leafno, newval); | 
 | 2466 | 		if (rc) | 
 | 2467 | 			return rc; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2468 | 	} | 
 | 2469 |  | 
 | 2470 | 	/* check if the root of the current dmap control page changed due | 
 | 2471 | 	 * to the update and if the current dmap control page is not at | 
 | 2472 | 	 * the current top level (i.e. L0, L1, L2) of the map.  if so (i.e. | 
 | 2473 | 	 * root changed and this is not the top level), call this routine | 
 | 2474 | 	 * again (recursion) for the next higher level of the mapping to | 
 | 2475 | 	 * reflect the change in root for the current dmap control page. | 
 | 2476 | 	 */ | 
 | 2477 | 	if (dcp->stree[ROOT] != oldroot) { | 
 | 2478 | 		/* are we below the top level of the map.  if so, | 
 | 2479 | 		 * bubble the root up to the next higher level. | 
 | 2480 | 		 */ | 
 | 2481 | 		if (level < bmp->db_maxlevel) { | 
 | 2482 | 			/* bubble up the new root of this dmap control page to | 
 | 2483 | 			 * the next level. | 
 | 2484 | 			 */ | 
 | 2485 | 			if ((rc = | 
 | 2486 | 			     dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc, | 
 | 2487 | 				      level + 1))) { | 
 | 2488 | 				/* something went wrong in bubbling up the new | 
 | 2489 | 				 * root value, so backout the changes to the | 
 | 2490 | 				 * current dmap control page. | 
 | 2491 | 				 */ | 
 | 2492 | 				if (alloc) { | 
 | 2493 | 					dbJoin((dmtree_t *) dcp, leafno, | 
 | 2494 | 					       oldval); | 
 | 2495 | 				} else { | 
 | 2496 | 					/* the dbJoin() above might have | 
 | 2497 | 					 * caused a larger binary buddy system | 
 | 2498 | 					 * to form and we may now be in the | 
 | 2499 | 					 * middle of it.  if this is the case, | 
 | 2500 | 					 * back split the buddies. | 
 | 2501 | 					 */ | 
 | 2502 | 					if (dcp->stree[ti] == NOFREE) | 
 | 2503 | 						dbBackSplit((dmtree_t *) | 
 | 2504 | 							    dcp, leafno); | 
 | 2505 | 					dbSplit((dmtree_t *) dcp, leafno, | 
 | 2506 | 						dcp->budmin, oldval); | 
 | 2507 | 				} | 
 | 2508 |  | 
 | 2509 | 				/* release the buffer and return the error. | 
 | 2510 | 				 */ | 
 | 2511 | 				release_metapage(mp); | 
 | 2512 | 				return (rc); | 
 | 2513 | 			} | 
 | 2514 | 		} else { | 
 | 2515 | 			/* we're at the top level of the map. update | 
 | 2516 | 			 * the bmap control page to reflect the size | 
 | 2517 | 			 * of the maximum free buddy system. | 
 | 2518 | 			 */ | 
 | 2519 | 			assert(level == bmp->db_maxlevel); | 
 | 2520 | 			if (bmp->db_maxfreebud != oldroot) { | 
 | 2521 | 				jfs_error(bmp->db_ipbmap->i_sb, | 
 | 2522 | 					  "dbAdjCtl: the maximum free buddy is " | 
 | 2523 | 					  "not the old root"); | 
 | 2524 | 			} | 
 | 2525 | 			bmp->db_maxfreebud = dcp->stree[ROOT]; | 
 | 2526 | 		} | 
 | 2527 | 	} | 
 | 2528 |  | 
 | 2529 | 	/* write the buffer. | 
 | 2530 | 	 */ | 
 | 2531 | 	write_metapage(mp); | 
 | 2532 |  | 
 | 2533 | 	return (0); | 
 | 2534 | } | 
 | 2535 |  | 
 | 2536 |  | 
 | 2537 | /* | 
 | 2538 |  * NAME:	dbSplit() | 
 | 2539 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2540 |  * FUNCTION:	update the leaf of a dmtree with a new value, splitting | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2541 |  *		the leaf from the binary buddy system of the dmtree's | 
 | 2542 |  *		leaves, as required. | 
 | 2543 |  * | 
 | 2544 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2545 |  *	tp	- pointer to the tree containing the leaf. | 
 | 2546 |  *	leafno	- the number of the leaf to be updated. | 
 | 2547 |  *	splitsz	- the size the binary buddy system starting at the leaf | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2548 |  *		  must be split to, specified as the log2 number of blocks. | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2549 |  *	newval	- the new value for the leaf. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2550 |  * | 
 | 2551 |  * RETURN VALUES: none | 
 | 2552 |  * | 
 | 2553 |  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 2554 |  */ | 
 | 2555 | static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) | 
 | 2556 | { | 
 | 2557 | 	int budsz; | 
 | 2558 | 	int cursz; | 
 | 2559 | 	s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); | 
 | 2560 |  | 
 | 2561 | 	/* check if the leaf needs to be split. | 
 | 2562 | 	 */ | 
 | 2563 | 	if (leaf[leafno] > tp->dmt_budmin) { | 
 | 2564 | 		/* the split occurs by cutting the buddy system in half | 
 | 2565 | 		 * at the specified leaf until we reach the specified | 
 | 2566 | 		 * size.  pick up the starting split size (current size | 
 | 2567 | 		 * - 1 in l2) and the corresponding buddy size. | 
 | 2568 | 		 */ | 
 | 2569 | 		cursz = leaf[leafno] - 1; | 
 | 2570 | 		budsz = BUDSIZE(cursz, tp->dmt_budmin); | 
 | 2571 |  | 
 | 2572 | 		/* split until we reach the specified size. | 
 | 2573 | 		 */ | 
 | 2574 | 		while (cursz >= splitsz) { | 
 | 2575 | 			/* update the buddy's leaf with its new value. | 
 | 2576 | 			 */ | 
 | 2577 | 			dbAdjTree(tp, leafno ^ budsz, cursz); | 
 | 2578 |  | 
 | 2579 | 			/* on to the next size and buddy. | 
 | 2580 | 			 */ | 
 | 2581 | 			cursz -= 1; | 
 | 2582 | 			budsz >>= 1; | 
 | 2583 | 		} | 
 | 2584 | 	} | 
 | 2585 |  | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 2586 | 	/* adjust the dmap tree to reflect the specified leaf's new | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2587 | 	 * value. | 
 | 2588 | 	 */ | 
 | 2589 | 	dbAdjTree(tp, leafno, newval); | 
 | 2590 | } | 
 | 2591 |  | 
 | 2592 |  | 
 | 2593 | /* | 
 | 2594 |  * NAME:	dbBackSplit() | 
 | 2595 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2596 |  * FUNCTION:	back split the binary buddy system of dmtree leaves | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2597 |  *		that hold a specified leaf until the specified leaf | 
 | 2598 |  *		starts its own binary buddy system. | 
 | 2599 |  * | 
 | 2600 |  *		the allocators typically perform allocations at the start | 
 | 2601 |  *		of binary buddy systems and dbSplit() is used to accomplish | 
 | 2602 |  *		any required splits.  in some cases, however, allocation | 
 | 2603 |  *		may occur in the middle of a binary system and requires a | 
 | 2604 |  *		back split, with the split proceeding out from the middle of | 
 | 2605 |  *		the system (less efficient) rather than the start of the | 
 | 2606 |  *		system (more efficient).  the cases in which a back split | 
 | 2607 |  *		is required are rare and are limited to the first allocation | 
 | 2608 |  *		within an allocation group which is a part (not first part) | 
 | 2609 |  *		of a larger binary buddy system and a few exception cases | 
 | 2610 |  *		in which a previous join operation must be backed out. | 
 | 2611 |  * | 
 | 2612 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2613 |  *	tp	- pointer to the tree containing the leaf. | 
 | 2614 |  *	leafno	- the number of the leaf to be updated. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2615 |  * | 
 | 2616 |  * RETURN VALUES: none | 
 | 2617 |  * | 
 | 2618 |  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; | 
 | 2619 |  */ | 
| Dave Kleikamp | b6a47fd | 2005-10-11 09:06:59 -0500 | [diff] [blame] | 2620 | static int dbBackSplit(dmtree_t * tp, int leafno) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2621 | { | 
 | 2622 | 	int budsz, bud, w, bsz, size; | 
 | 2623 | 	int cursz; | 
 | 2624 | 	s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); | 
 | 2625 |  | 
 | 2626 | 	/* leaf should be part (not first part) of a binary | 
 | 2627 | 	 * buddy system. | 
 | 2628 | 	 */ | 
 | 2629 | 	assert(leaf[leafno] == NOFREE); | 
 | 2630 |  | 
 | 2631 | 	/* the back split is accomplished by iteratively finding the leaf | 
 | 2632 | 	 * that starts the buddy system that contains the specified leaf and | 
 | 2633 | 	 * splitting that system in two.  this iteration continues until | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 2634 | 	 * the specified leaf becomes the start of a buddy system. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2635 | 	 * | 
 | 2636 | 	 * determine maximum possible l2 size for the specified leaf. | 
 | 2637 | 	 */ | 
 | 2638 | 	size = | 
 | 2639 | 	    LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs), | 
 | 2640 | 		      tp->dmt_budmin); | 
 | 2641 |  | 
 | 2642 | 	/* determine the number of leaves covered by this size.  this | 
 | 2643 | 	 * is the buddy size that we will start with as we search for | 
 | 2644 | 	 * the buddy system that contains the specified leaf. | 
 | 2645 | 	 */ | 
 | 2646 | 	budsz = BUDSIZE(size, tp->dmt_budmin); | 
 | 2647 |  | 
 | 2648 | 	/* back split. | 
 | 2649 | 	 */ | 
 | 2650 | 	while (leaf[leafno] == NOFREE) { | 
 | 2651 | 		/* find the leftmost buddy leaf. | 
 | 2652 | 		 */ | 
 | 2653 | 		for (w = leafno, bsz = budsz;; bsz <<= 1, | 
 | 2654 | 		     w = (w < bud) ? w : bud) { | 
| Dave Kleikamp | b6a47fd | 2005-10-11 09:06:59 -0500 | [diff] [blame] | 2655 | 			if (bsz >= le32_to_cpu(tp->dmt_nleafs)) { | 
 | 2656 | 				jfs_err("JFS: block map error in dbBackSplit"); | 
 | 2657 | 				return -EIO; | 
 | 2658 | 			} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2659 |  | 
 | 2660 | 			/* determine the buddy. | 
 | 2661 | 			 */ | 
 | 2662 | 			bud = w ^ bsz; | 
 | 2663 |  | 
 | 2664 | 			/* check if this buddy is the start of the system. | 
 | 2665 | 			 */ | 
 | 2666 | 			if (leaf[bud] != NOFREE) { | 
 | 2667 | 				/* split the leaf at the start of the | 
 | 2668 | 				 * system in two. | 
 | 2669 | 				 */ | 
 | 2670 | 				cursz = leaf[bud] - 1; | 
 | 2671 | 				dbSplit(tp, bud, cursz, cursz); | 
 | 2672 | 				break; | 
 | 2673 | 			} | 
 | 2674 | 		} | 
 | 2675 | 	} | 
 | 2676 |  | 
| Dave Kleikamp | b6a47fd | 2005-10-11 09:06:59 -0500 | [diff] [blame] | 2677 | 	if (leaf[leafno] != size) { | 
 | 2678 | 		jfs_err("JFS: wrong leaf value in dbBackSplit"); | 
 | 2679 | 		return -EIO; | 
 | 2680 | 	} | 
 | 2681 | 	return 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2682 | } | 
 | 2683 |  | 
 | 2684 |  | 
 | 2685 | /* | 
 | 2686 |  * NAME:	dbJoin() | 
 | 2687 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2688 |  * FUNCTION:	update the leaf of a dmtree with a new value, joining | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2689 |  *		the leaf with other leaves of the dmtree into a multi-leaf | 
 | 2690 |  *		binary buddy system, as required. | 
 | 2691 |  * | 
 | 2692 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2693 |  *	tp	- pointer to the tree containing the leaf. | 
 | 2694 |  *	leafno	- the number of the leaf to be updated. | 
 | 2695 |  *	newval	- the new value for the leaf. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2696 |  * | 
 | 2697 |  * RETURN VALUES: none | 
 | 2698 |  */ | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2699 | static int dbJoin(dmtree_t * tp, int leafno, int newval) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2700 | { | 
 | 2701 | 	int budsz, buddy; | 
 | 2702 | 	s8 *leaf; | 
 | 2703 |  | 
 | 2704 | 	/* can the new leaf value require a join with other leaves ? | 
 | 2705 | 	 */ | 
 | 2706 | 	if (newval >= tp->dmt_budmin) { | 
 | 2707 | 		/* pickup a pointer to the leaves of the tree. | 
 | 2708 | 		 */ | 
 | 2709 | 		leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); | 
 | 2710 |  | 
 | 2711 | 		/* try to join the specified leaf into a large binary | 
 | 2712 | 		 * buddy system.  the join proceeds by attempting to join | 
 | 2713 | 		 * the specified leafno with its buddy (leaf) at new value. | 
 | 2714 | 		 * if the join occurs, we attempt to join the left leaf | 
 | 2715 | 		 * of the joined buddies with its buddy at new value + 1. | 
 | 2716 | 		 * we continue to join until we find a buddy that cannot be | 
 | 2717 | 		 * joined (does not have a value equal to the size of the | 
 | 2718 | 		 * last join) or until all leaves have been joined into a | 
 | 2719 | 		 * single system. | 
 | 2720 | 		 * | 
 | 2721 | 		 * get the buddy size (number of words covered) of | 
 | 2722 | 		 * the new value. | 
 | 2723 | 		 */ | 
 | 2724 | 		budsz = BUDSIZE(newval, tp->dmt_budmin); | 
 | 2725 |  | 
 | 2726 | 		/* try to join. | 
 | 2727 | 		 */ | 
 | 2728 | 		while (budsz < le32_to_cpu(tp->dmt_nleafs)) { | 
 | 2729 | 			/* get the buddy leaf. | 
 | 2730 | 			 */ | 
 | 2731 | 			buddy = leafno ^ budsz; | 
 | 2732 |  | 
 | 2733 | 			/* if the leaf's new value is greater than its | 
 | 2734 | 			 * buddy's value, we join no more. | 
 | 2735 | 			 */ | 
 | 2736 | 			if (newval > leaf[buddy]) | 
 | 2737 | 				break; | 
 | 2738 |  | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2739 | 			/* It shouldn't be less */ | 
 | 2740 | 			if (newval < leaf[buddy]) | 
 | 2741 | 				return -EIO; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2742 |  | 
 | 2743 | 			/* check which (leafno or buddy) is the left buddy. | 
 | 2744 | 			 * the left buddy gets to claim the blocks resulting | 
 | 2745 | 			 * from the join while the right gets to claim none. | 
 | 2746 | 			 * the left buddy is also eligable to participate in | 
 | 2747 | 			 * a join at the next higher level while the right | 
 | 2748 | 			 * is not. | 
 | 2749 | 			 * | 
 | 2750 | 			 */ | 
 | 2751 | 			if (leafno < buddy) { | 
 | 2752 | 				/* leafno is the left buddy. | 
 | 2753 | 				 */ | 
 | 2754 | 				dbAdjTree(tp, buddy, NOFREE); | 
 | 2755 | 			} else { | 
 | 2756 | 				/* buddy is the left buddy and becomes | 
 | 2757 | 				 * leafno. | 
 | 2758 | 				 */ | 
 | 2759 | 				dbAdjTree(tp, leafno, NOFREE); | 
 | 2760 | 				leafno = buddy; | 
 | 2761 | 			} | 
 | 2762 |  | 
 | 2763 | 			/* on to try the next join. | 
 | 2764 | 			 */ | 
 | 2765 | 			newval += 1; | 
 | 2766 | 			budsz <<= 1; | 
 | 2767 | 		} | 
 | 2768 | 	} | 
 | 2769 |  | 
 | 2770 | 	/* update the leaf value. | 
 | 2771 | 	 */ | 
 | 2772 | 	dbAdjTree(tp, leafno, newval); | 
| Dave Kleikamp | 56d1254 | 2005-07-15 09:43:36 -0500 | [diff] [blame] | 2773 |  | 
 | 2774 | 	return 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2775 | } | 
 | 2776 |  | 
 | 2777 |  | 
 | 2778 | /* | 
 | 2779 |  * NAME:	dbAdjTree() | 
 | 2780 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2781 |  * FUNCTION:	update a leaf of a dmtree with a new value, adjusting | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2782 |  *		the dmtree, as required, to reflect the new leaf value. | 
 | 2783 |  *		the combination of any buddies must already be done before | 
 | 2784 |  *		this is called. | 
 | 2785 |  * | 
 | 2786 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2787 |  *	tp	- pointer to the tree to be adjusted. | 
 | 2788 |  *	leafno	- the number of the leaf to be updated. | 
 | 2789 |  *	newval	- the new value for the leaf. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2790 |  * | 
 | 2791 |  * RETURN VALUES: none | 
 | 2792 |  */ | 
 | 2793 | static void dbAdjTree(dmtree_t * tp, int leafno, int newval) | 
 | 2794 | { | 
 | 2795 | 	int lp, pp, k; | 
 | 2796 | 	int max; | 
 | 2797 |  | 
 | 2798 | 	/* pick up the index of the leaf for this leafno. | 
 | 2799 | 	 */ | 
 | 2800 | 	lp = leafno + le32_to_cpu(tp->dmt_leafidx); | 
 | 2801 |  | 
 | 2802 | 	/* is the current value the same as the old value ?  if so, | 
 | 2803 | 	 * there is nothing to do. | 
 | 2804 | 	 */ | 
 | 2805 | 	if (tp->dmt_stree[lp] == newval) | 
 | 2806 | 		return; | 
 | 2807 |  | 
 | 2808 | 	/* set the new value. | 
 | 2809 | 	 */ | 
 | 2810 | 	tp->dmt_stree[lp] = newval; | 
 | 2811 |  | 
 | 2812 | 	/* bubble the new value up the tree as required. | 
 | 2813 | 	 */ | 
 | 2814 | 	for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) { | 
 | 2815 | 		/* get the index of the first leaf of the 4 leaf | 
 | 2816 | 		 * group containing the specified leaf (leafno). | 
 | 2817 | 		 */ | 
 | 2818 | 		lp = ((lp - 1) & ~0x03) + 1; | 
 | 2819 |  | 
 | 2820 | 		/* get the index of the parent of this 4 leaf group. | 
 | 2821 | 		 */ | 
 | 2822 | 		pp = (lp - 1) >> 2; | 
 | 2823 |  | 
 | 2824 | 		/* determine the maximum of the 4 leaves. | 
 | 2825 | 		 */ | 
 | 2826 | 		max = TREEMAX(&tp->dmt_stree[lp]); | 
 | 2827 |  | 
 | 2828 | 		/* if the maximum of the 4 is the same as the | 
 | 2829 | 		 * parent's value, we're done. | 
 | 2830 | 		 */ | 
 | 2831 | 		if (tp->dmt_stree[pp] == max) | 
 | 2832 | 			break; | 
 | 2833 |  | 
 | 2834 | 		/* parent gets new value. | 
 | 2835 | 		 */ | 
 | 2836 | 		tp->dmt_stree[pp] = max; | 
 | 2837 |  | 
 | 2838 | 		/* parent becomes leaf for next go-round. | 
 | 2839 | 		 */ | 
 | 2840 | 		lp = pp; | 
 | 2841 | 	} | 
 | 2842 | } | 
 | 2843 |  | 
 | 2844 |  | 
 | 2845 | /* | 
 | 2846 |  * NAME:	dbFindLeaf() | 
 | 2847 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2848 |  * FUNCTION:	search a dmtree_t for sufficient free blocks, returning | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 2849 |  *		the index of a leaf describing the free blocks if | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2850 |  *		sufficient free blocks are found. | 
 | 2851 |  * | 
 | 2852 |  *		the search starts at the top of the dmtree_t tree and | 
 | 2853 |  *		proceeds down the tree to the leftmost leaf with sufficient | 
 | 2854 |  *		free space. | 
 | 2855 |  * | 
 | 2856 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2857 |  *	tp	- pointer to the tree to be searched. | 
 | 2858 |  *	l2nb	- log2 number of free blocks to search for. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2859 |  *	leafidx	- return pointer to be set to the index of the leaf | 
 | 2860 |  *		  describing at least l2nb free blocks if sufficient | 
 | 2861 |  *		  free blocks are found. | 
 | 2862 |  * | 
 | 2863 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2864 |  *	0	- success | 
 | 2865 |  *	-ENOSPC	- insufficient free blocks. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2866 |  */ | 
 | 2867 | static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) | 
 | 2868 | { | 
 | 2869 | 	int ti, n = 0, k, x = 0; | 
 | 2870 |  | 
 | 2871 | 	/* first check the root of the tree to see if there is | 
 | 2872 | 	 * sufficient free space. | 
 | 2873 | 	 */ | 
 | 2874 | 	if (l2nb > tp->dmt_stree[ROOT]) | 
 | 2875 | 		return -ENOSPC; | 
 | 2876 |  | 
 | 2877 | 	/* sufficient free space available. now search down the tree | 
 | 2878 | 	 * starting at the next level for the leftmost leaf that | 
 | 2879 | 	 * describes sufficient free space. | 
 | 2880 | 	 */ | 
 | 2881 | 	for (k = le32_to_cpu(tp->dmt_height), ti = 1; | 
 | 2882 | 	     k > 0; k--, ti = ((ti + n) << 2) + 1) { | 
 | 2883 | 		/* search the four nodes at this level, starting from | 
 | 2884 | 		 * the left. | 
 | 2885 | 		 */ | 
 | 2886 | 		for (x = ti, n = 0; n < 4; n++) { | 
 | 2887 | 			/* sufficient free space found.  move to the next | 
 | 2888 | 			 * level (or quit if this is the last level). | 
 | 2889 | 			 */ | 
 | 2890 | 			if (l2nb <= tp->dmt_stree[x + n]) | 
 | 2891 | 				break; | 
 | 2892 | 		} | 
 | 2893 |  | 
 | 2894 | 		/* better have found something since the higher | 
 | 2895 | 		 * levels of the tree said it was here. | 
 | 2896 | 		 */ | 
 | 2897 | 		assert(n < 4); | 
 | 2898 | 	} | 
 | 2899 |  | 
 | 2900 | 	/* set the return to the leftmost leaf describing sufficient | 
 | 2901 | 	 * free space. | 
 | 2902 | 	 */ | 
 | 2903 | 	*leafidx = x + n - le32_to_cpu(tp->dmt_leafidx); | 
 | 2904 |  | 
 | 2905 | 	return (0); | 
 | 2906 | } | 
 | 2907 |  | 
 | 2908 |  | 
 | 2909 | /* | 
 | 2910 |  * NAME:	dbFindBits() | 
 | 2911 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2912 |  * FUNCTION:	find a specified number of binary buddy free bits within a | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2913 |  *		dmap bitmap word value. | 
 | 2914 |  * | 
 | 2915 |  *		this routine searches the bitmap value for (1 << l2nb) free | 
 | 2916 |  *		bits at (1 << l2nb) alignments within the value. | 
 | 2917 |  * | 
 | 2918 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2919 |  *	word	-  dmap bitmap word value. | 
 | 2920 |  *	l2nb	-  number of free bits specified as a log2 number. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2921 |  * | 
 | 2922 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2923 |  *	starting bit number of free bits. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2924 |  */ | 
 | 2925 | static int dbFindBits(u32 word, int l2nb) | 
 | 2926 | { | 
 | 2927 | 	int bitno, nb; | 
 | 2928 | 	u32 mask; | 
 | 2929 |  | 
 | 2930 | 	/* get the number of bits. | 
 | 2931 | 	 */ | 
 | 2932 | 	nb = 1 << l2nb; | 
 | 2933 | 	assert(nb <= DBWORD); | 
 | 2934 |  | 
 | 2935 | 	/* complement the word so we can use a mask (i.e. 0s represent | 
 | 2936 | 	 * free bits) and compute the mask. | 
 | 2937 | 	 */ | 
 | 2938 | 	word = ~word; | 
 | 2939 | 	mask = ONES << (DBWORD - nb); | 
 | 2940 |  | 
 | 2941 | 	/* scan the word for nb free bits at nb alignments. | 
 | 2942 | 	 */ | 
 | 2943 | 	for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) { | 
 | 2944 | 		if ((mask & word) == mask) | 
 | 2945 | 			break; | 
 | 2946 | 	} | 
 | 2947 |  | 
 | 2948 | 	ASSERT(bitno < 32); | 
 | 2949 |  | 
 | 2950 | 	/* return the bit number. | 
 | 2951 | 	 */ | 
 | 2952 | 	return (bitno); | 
 | 2953 | } | 
 | 2954 |  | 
 | 2955 |  | 
 | 2956 | /* | 
 | 2957 |  * NAME:	dbMaxBud(u8 *cp) | 
 | 2958 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2959 |  * FUNCTION:	determine the largest binary buddy string of free | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2960 |  *		bits within 32-bits of the map. | 
 | 2961 |  * | 
 | 2962 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2963 |  *	cp	-  pointer to the 32-bit value. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2964 |  * | 
 | 2965 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2966 |  *	largest binary buddy of free bits within a dmap word. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2967 |  */ | 
 | 2968 | static int dbMaxBud(u8 * cp) | 
 | 2969 | { | 
 | 2970 | 	signed char tmp1, tmp2; | 
 | 2971 |  | 
 | 2972 | 	/* check if the wmap word is all free. if so, the | 
 | 2973 | 	 * free buddy size is BUDMIN. | 
 | 2974 | 	 */ | 
 | 2975 | 	if (*((uint *) cp) == 0) | 
 | 2976 | 		return (BUDMIN); | 
 | 2977 |  | 
 | 2978 | 	/* check if the wmap word is half free. if so, the | 
 | 2979 | 	 * free buddy size is BUDMIN-1. | 
 | 2980 | 	 */ | 
 | 2981 | 	if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0) | 
 | 2982 | 		return (BUDMIN - 1); | 
 | 2983 |  | 
 | 2984 | 	/* not all free or half free. determine the free buddy | 
 | 2985 | 	 * size thru table lookup using quarters of the wmap word. | 
 | 2986 | 	 */ | 
 | 2987 | 	tmp1 = max(budtab[cp[2]], budtab[cp[3]]); | 
 | 2988 | 	tmp2 = max(budtab[cp[0]], budtab[cp[1]]); | 
 | 2989 | 	return (max(tmp1, tmp2)); | 
 | 2990 | } | 
 | 2991 |  | 
 | 2992 |  | 
 | 2993 | /* | 
 | 2994 |  * NAME:	cnttz(uint word) | 
 | 2995 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 2996 |  * FUNCTION:	determine the number of trailing zeros within a 32-bit | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2997 |  *		value. | 
 | 2998 |  * | 
 | 2999 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3000 |  *	value	-  32-bit value to be examined. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3001 |  * | 
 | 3002 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3003 |  *	count of trailing zeros | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3004 |  */ | 
 | 3005 | static int cnttz(u32 word) | 
 | 3006 | { | 
 | 3007 | 	int n; | 
 | 3008 |  | 
 | 3009 | 	for (n = 0; n < 32; n++, word >>= 1) { | 
 | 3010 | 		if (word & 0x01) | 
 | 3011 | 			break; | 
 | 3012 | 	} | 
 | 3013 |  | 
 | 3014 | 	return (n); | 
 | 3015 | } | 
 | 3016 |  | 
 | 3017 |  | 
 | 3018 | /* | 
 | 3019 |  * NAME:	cntlz(u32 value) | 
 | 3020 |  * | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3021 |  * FUNCTION:	determine the number of leading zeros within a 32-bit | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3022 |  *		value. | 
 | 3023 |  * | 
 | 3024 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3025 |  *	value	-  32-bit value to be examined. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3026 |  * | 
 | 3027 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3028 |  *	count of leading zeros | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3029 |  */ | 
 | 3030 | static int cntlz(u32 value) | 
 | 3031 | { | 
 | 3032 | 	int n; | 
 | 3033 |  | 
 | 3034 | 	for (n = 0; n < 32; n++, value <<= 1) { | 
 | 3035 | 		if (value & HIGHORDER) | 
 | 3036 | 			break; | 
 | 3037 | 	} | 
 | 3038 | 	return (n); | 
 | 3039 | } | 
 | 3040 |  | 
 | 3041 |  | 
 | 3042 | /* | 
 | 3043 |  * NAME:	blkstol2(s64 nb) | 
 | 3044 |  * | 
 | 3045 |  * FUNCTION:	convert a block count to its log2 value. if the block | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3046 |  *		count is not a l2 multiple, it is rounded up to the next | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3047 |  *		larger l2 multiple. | 
 | 3048 |  * | 
 | 3049 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3050 |  *	nb	-  number of blocks | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3051 |  * | 
 | 3052 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3053 |  *	log2 number of blocks | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3054 |  */ | 
| Dave Kleikamp | 6cb1269 | 2005-09-15 23:25:41 -0500 | [diff] [blame] | 3055 | static int blkstol2(s64 nb) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3056 | { | 
 | 3057 | 	int l2nb; | 
 | 3058 | 	s64 mask;		/* meant to be signed */ | 
 | 3059 |  | 
 | 3060 | 	mask = (s64) 1 << (64 - 1); | 
 | 3061 |  | 
 | 3062 | 	/* count the leading bits. | 
 | 3063 | 	 */ | 
 | 3064 | 	for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) { | 
 | 3065 | 		/* leading bit found. | 
 | 3066 | 		 */ | 
 | 3067 | 		if (nb & mask) { | 
 | 3068 | 			/* determine the l2 value. | 
 | 3069 | 			 */ | 
 | 3070 | 			l2nb = (64 - 1) - l2nb; | 
 | 3071 |  | 
 | 3072 | 			/* check if we need to round up. | 
 | 3073 | 			 */ | 
 | 3074 | 			if (~mask & nb) | 
 | 3075 | 				l2nb++; | 
 | 3076 |  | 
 | 3077 | 			return (l2nb); | 
 | 3078 | 		} | 
 | 3079 | 	} | 
 | 3080 | 	assert(0); | 
 | 3081 | 	return 0;		/* fix compiler warning */ | 
 | 3082 | } | 
 | 3083 |  | 
 | 3084 |  | 
 | 3085 | /* | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3086 |  * NAME:	dbAllocBottomUp() | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3087 |  * | 
 | 3088 |  * FUNCTION:	alloc the specified block range from the working block | 
 | 3089 |  *		allocation map. | 
 | 3090 |  * | 
 | 3091 |  *		the blocks will be alloc from the working map one dmap | 
 | 3092 |  *		at a time. | 
 | 3093 |  * | 
 | 3094 |  * PARAMETERS: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3095 |  *	ip	-  pointer to in-core inode; | 
 | 3096 |  *	blkno	-  starting block number to be freed. | 
 | 3097 |  *	nblocks	-  number of blocks to be freed. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3098 |  * | 
 | 3099 |  * RETURN VALUES: | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3100 |  *	0	- success | 
 | 3101 |  *	-EIO	- i/o error | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3102 |  */ | 
 | 3103 | int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) | 
 | 3104 | { | 
 | 3105 | 	struct metapage *mp; | 
 | 3106 | 	struct dmap *dp; | 
 | 3107 | 	int nb, rc; | 
 | 3108 | 	s64 lblkno, rem; | 
 | 3109 | 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 
 | 3110 | 	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | 
 | 3111 |  | 
| Dave Kleikamp | 82d5b9a | 2007-01-09 14:14:48 -0600 | [diff] [blame] | 3112 | 	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3113 |  | 
 | 3114 | 	/* block to be allocated better be within the mapsize. */ | 
 | 3115 | 	ASSERT(nblocks <= bmp->db_mapsize - blkno); | 
 | 3116 |  | 
 | 3117 | 	/* | 
 | 3118 | 	 * allocate the blocks a dmap at a time. | 
 | 3119 | 	 */ | 
 | 3120 | 	mp = NULL; | 
 | 3121 | 	for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { | 
 | 3122 | 		/* release previous dmap if any */ | 
 | 3123 | 		if (mp) { | 
 | 3124 | 			write_metapage(mp); | 
 | 3125 | 		} | 
 | 3126 |  | 
 | 3127 | 		/* get the buffer for the current dmap. */ | 
 | 3128 | 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); | 
 | 3129 | 		mp = read_metapage(ipbmap, lblkno, PSIZE, 0); | 
 | 3130 | 		if (mp == NULL) { | 
 | 3131 | 			IREAD_UNLOCK(ipbmap); | 
 | 3132 | 			return -EIO; | 
 | 3133 | 		} | 
 | 3134 | 		dp = (struct dmap *) mp->data; | 
 | 3135 |  | 
 | 3136 | 		/* determine the number of blocks to be allocated from | 
 | 3137 | 		 * this dmap. | 
 | 3138 | 		 */ | 
 | 3139 | 		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); | 
 | 3140 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3141 | 		/* allocate the blocks. */ | 
 | 3142 | 		if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { | 
 | 3143 | 			release_metapage(mp); | 
 | 3144 | 			IREAD_UNLOCK(ipbmap); | 
 | 3145 | 			return (rc); | 
 | 3146 | 		} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3147 | 	} | 
 | 3148 |  | 
 | 3149 | 	/* write the last buffer. */ | 
 | 3150 | 	write_metapage(mp); | 
 | 3151 |  | 
 | 3152 | 	IREAD_UNLOCK(ipbmap); | 
 | 3153 |  | 
 | 3154 | 	return (0); | 
 | 3155 | } | 
 | 3156 |  | 
 | 3157 |  | 
 | 3158 | static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | 
 | 3159 | 			 int nblocks) | 
 | 3160 | { | 
 | 3161 | 	int rc; | 
 | 3162 | 	int dbitno, word, rembits, nb, nwords, wbitno, agno; | 
 | 3163 | 	s8 oldroot, *leaf; | 
 | 3164 | 	struct dmaptree *tp = (struct dmaptree *) & dp->tree; | 
 | 3165 |  | 
 | 3166 | 	/* save the current value of the root (i.e. maximum free string) | 
 | 3167 | 	 * of the dmap tree. | 
 | 3168 | 	 */ | 
 | 3169 | 	oldroot = tp->stree[ROOT]; | 
 | 3170 |  | 
 | 3171 | 	/* pick up a pointer to the leaves of the dmap tree */ | 
 | 3172 | 	leaf = tp->stree + LEAFIND; | 
 | 3173 |  | 
 | 3174 | 	/* determine the bit number and word within the dmap of the | 
 | 3175 | 	 * starting block. | 
 | 3176 | 	 */ | 
 | 3177 | 	dbitno = blkno & (BPERDMAP - 1); | 
 | 3178 | 	word = dbitno >> L2DBWORD; | 
 | 3179 |  | 
 | 3180 | 	/* block range better be within the dmap */ | 
 | 3181 | 	assert(dbitno + nblocks <= BPERDMAP); | 
 | 3182 |  | 
 | 3183 | 	/* allocate the bits of the dmap's words corresponding to the block | 
 | 3184 | 	 * range. not all bits of the first and last words may be contained | 
 | 3185 | 	 * within the block range.  if this is the case, we'll work against | 
 | 3186 | 	 * those words (i.e. partial first and/or last) on an individual basis | 
 | 3187 | 	 * (a single pass), allocating the bits of interest by hand and | 
 | 3188 | 	 * updating the leaf corresponding to the dmap word. a single pass | 
 | 3189 | 	 * will be used for all dmap words fully contained within the | 
 | 3190 | 	 * specified range.  within this pass, the bits of all fully contained | 
 | 3191 | 	 * dmap words will be marked as free in a single shot and the leaves | 
 | 3192 | 	 * will be updated. a single leaf may describe the free space of | 
 | 3193 | 	 * multiple dmap words, so we may update only a subset of the actual | 
 | 3194 | 	 * leaves corresponding to the dmap words of the block range. | 
 | 3195 | 	 */ | 
 | 3196 | 	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { | 
 | 3197 | 		/* determine the bit number within the word and | 
 | 3198 | 		 * the number of bits within the word. | 
 | 3199 | 		 */ | 
 | 3200 | 		wbitno = dbitno & (DBWORD - 1); | 
 | 3201 | 		nb = min(rembits, DBWORD - wbitno); | 
 | 3202 |  | 
 | 3203 | 		/* check if only part of a word is to be allocated. | 
 | 3204 | 		 */ | 
 | 3205 | 		if (nb < DBWORD) { | 
 | 3206 | 			/* allocate (set to 1) the appropriate bits within | 
 | 3207 | 			 * this dmap word. | 
 | 3208 | 			 */ | 
 | 3209 | 			dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) | 
 | 3210 | 						      >> wbitno); | 
 | 3211 |  | 
 | 3212 | 			word++; | 
 | 3213 | 		} else { | 
 | 3214 | 			/* one or more dmap words are fully contained | 
 | 3215 | 			 * within the block range.  determine how many | 
 | 3216 | 			 * words and allocate (set to 1) the bits of these | 
 | 3217 | 			 * words. | 
 | 3218 | 			 */ | 
 | 3219 | 			nwords = rembits >> L2DBWORD; | 
 | 3220 | 			memset(&dp->wmap[word], (int) ONES, nwords * 4); | 
 | 3221 |  | 
 | 3222 | 			/* determine how many bits */ | 
 | 3223 | 			nb = nwords << L2DBWORD; | 
 | 3224 | 			word += nwords; | 
 | 3225 | 		} | 
 | 3226 | 	} | 
 | 3227 |  | 
 | 3228 | 	/* update the free count for this dmap */ | 
| Marcin Slusarz | 8914562 | 2008-02-13 15:34:20 -0600 | [diff] [blame] | 3229 | 	le32_add_cpu(&dp->nfree, -nblocks); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3230 |  | 
 | 3231 | 	/* reconstruct summary tree */ | 
 | 3232 | 	dbInitDmapTree(dp); | 
 | 3233 |  | 
 | 3234 | 	BMAP_LOCK(bmp); | 
 | 3235 |  | 
 | 3236 | 	/* if this allocation group is completely free, | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3237 | 	 * update the highest active allocation group number | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3238 | 	 * if this allocation group is the new max. | 
 | 3239 | 	 */ | 
 | 3240 | 	agno = blkno >> bmp->db_agl2size; | 
 | 3241 | 	if (agno > bmp->db_maxag) | 
 | 3242 | 		bmp->db_maxag = agno; | 
 | 3243 |  | 
 | 3244 | 	/* update the free count for the allocation group and map */ | 
 | 3245 | 	bmp->db_agfree[agno] -= nblocks; | 
 | 3246 | 	bmp->db_nfree -= nblocks; | 
 | 3247 |  | 
 | 3248 | 	BMAP_UNLOCK(bmp); | 
 | 3249 |  | 
 | 3250 | 	/* if the root has not changed, done. */ | 
 | 3251 | 	if (tp->stree[ROOT] == oldroot) | 
 | 3252 | 		return (0); | 
 | 3253 |  | 
 | 3254 | 	/* root changed. bubble the change up to the dmap control pages. | 
 | 3255 | 	 * if the adjustment of the upper level control pages fails, | 
 | 3256 | 	 * backout the bit allocation (thus making everything consistent). | 
 | 3257 | 	 */ | 
 | 3258 | 	if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0))) | 
 | 3259 | 		dbFreeBits(bmp, dp, blkno, nblocks); | 
 | 3260 |  | 
 | 3261 | 	return (rc); | 
 | 3262 | } | 
 | 3263 |  | 
 | 3264 |  | 
 | 3265 | /* | 
 | 3266 |  * NAME:	dbExtendFS() | 
 | 3267 |  * | 
 | 3268 |  * FUNCTION:	extend bmap from blkno for nblocks; | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3269 |  *		dbExtendFS() updates bmap ready for dbAllocBottomUp(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3270 |  * | 
 | 3271 |  * L2 | 
 | 3272 |  *  | | 
 | 3273 |  *   L1---------------------------------L1 | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3274 |  *    |					 | | 
 | 3275 |  *     L0---------L0---------L0		  L0---------L0---------L0 | 
 | 3276 |  *      |	   |	      |		   |	      |		 | | 
 | 3277 |  *	 d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3278 |  * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm | 
 | 3279 |  * | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3280 |  * <---old---><----------------------------extend-----------------------> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3281 |  */ | 
 | 3282 | int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks) | 
 | 3283 | { | 
 | 3284 | 	struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb); | 
 | 3285 | 	int nbperpage = sbi->nbperpage; | 
| Richard Knutsson | 4d81715 | 2006-09-30 23:27:14 -0700 | [diff] [blame] | 3286 | 	int i, i0 = true, j, j0 = true, k, n; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3287 | 	s64 newsize; | 
 | 3288 | 	s64 p; | 
 | 3289 | 	struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL; | 
 | 3290 | 	struct dmapctl *l2dcp, *l1dcp, *l0dcp; | 
 | 3291 | 	struct dmap *dp; | 
 | 3292 | 	s8 *l0leaf, *l1leaf, *l2leaf; | 
 | 3293 | 	struct bmap *bmp = sbi->bmap; | 
 | 3294 | 	int agno, l2agsize, oldl2agsize; | 
 | 3295 | 	s64 ag_rem; | 
 | 3296 |  | 
 | 3297 | 	newsize = blkno + nblocks; | 
 | 3298 |  | 
 | 3299 | 	jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld", | 
 | 3300 | 		 (long long) blkno, (long long) nblocks, (long long) newsize); | 
 | 3301 |  | 
 | 3302 | 	/* | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3303 | 	 *	initialize bmap control page. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3304 | 	 * | 
 | 3305 | 	 * all the data in bmap control page should exclude | 
 | 3306 | 	 * the mkfs hidden dmap page. | 
 | 3307 | 	 */ | 
 | 3308 |  | 
 | 3309 | 	/* update mapsize */ | 
 | 3310 | 	bmp->db_mapsize = newsize; | 
 | 3311 | 	bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize); | 
 | 3312 |  | 
 | 3313 | 	/* compute new AG size */ | 
 | 3314 | 	l2agsize = dbGetL2AGSize(newsize); | 
 | 3315 | 	oldl2agsize = bmp->db_agl2size; | 
 | 3316 |  | 
 | 3317 | 	bmp->db_agl2size = l2agsize; | 
 | 3318 | 	bmp->db_agsize = 1 << l2agsize; | 
 | 3319 |  | 
 | 3320 | 	/* compute new number of AG */ | 
 | 3321 | 	agno = bmp->db_numag; | 
 | 3322 | 	bmp->db_numag = newsize >> l2agsize; | 
 | 3323 | 	bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; | 
 | 3324 |  | 
 | 3325 | 	/* | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3326 | 	 *	reconfigure db_agfree[] | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3327 | 	 * from old AG configuration to new AG configuration; | 
 | 3328 | 	 * | 
 | 3329 | 	 * coalesce contiguous k (newAGSize/oldAGSize) AGs; | 
 | 3330 | 	 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; | 
 | 3331 | 	 * note: new AG size = old AG size * (2**x). | 
 | 3332 | 	 */ | 
 | 3333 | 	if (l2agsize == oldl2agsize) | 
 | 3334 | 		goto extend; | 
 | 3335 | 	k = 1 << (l2agsize - oldl2agsize); | 
 | 3336 | 	ag_rem = bmp->db_agfree[0];	/* save agfree[0] */ | 
 | 3337 | 	for (i = 0, n = 0; i < agno; n++) { | 
 | 3338 | 		bmp->db_agfree[n] = 0;	/* init collection point */ | 
 | 3339 |  | 
 | 3340 | 		/* coalesce cotiguous k AGs; */ | 
 | 3341 | 		for (j = 0; j < k && i < agno; j++, i++) { | 
 | 3342 | 			/* merge AGi to AGn */ | 
 | 3343 | 			bmp->db_agfree[n] += bmp->db_agfree[i]; | 
 | 3344 | 		} | 
 | 3345 | 	} | 
 | 3346 | 	bmp->db_agfree[0] += ag_rem;	/* restore agfree[0] */ | 
 | 3347 |  | 
 | 3348 | 	for (; n < MAXAG; n++) | 
 | 3349 | 		bmp->db_agfree[n] = 0; | 
 | 3350 |  | 
 | 3351 | 	/* | 
 | 3352 | 	 * update highest active ag number | 
 | 3353 | 	 */ | 
 | 3354 |  | 
 | 3355 | 	bmp->db_maxag = bmp->db_maxag / k; | 
 | 3356 |  | 
 | 3357 | 	/* | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3358 | 	 *	extend bmap | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3359 | 	 * | 
 | 3360 | 	 * update bit maps and corresponding level control pages; | 
 | 3361 | 	 * global control page db_nfree, db_agfree[agno], db_maxfreebud; | 
 | 3362 | 	 */ | 
 | 3363 |       extend: | 
 | 3364 | 	/* get L2 page */ | 
 | 3365 | 	p = BMAPBLKNO + nbperpage;	/* L2 page */ | 
 | 3366 | 	l2mp = read_metapage(ipbmap, p, PSIZE, 0); | 
 | 3367 | 	if (!l2mp) { | 
 | 3368 | 		jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); | 
 | 3369 | 		return -EIO; | 
 | 3370 | 	} | 
 | 3371 | 	l2dcp = (struct dmapctl *) l2mp->data; | 
 | 3372 |  | 
 | 3373 | 	/* compute start L1 */ | 
 | 3374 | 	k = blkno >> L2MAXL1SIZE; | 
 | 3375 | 	l2leaf = l2dcp->stree + CTLLEAFIND + k; | 
 | 3376 | 	p = BLKTOL1(blkno, sbi->l2nbperpage);	/* L1 page */ | 
 | 3377 |  | 
 | 3378 | 	/* | 
 | 3379 | 	 * extend each L1 in L2 | 
 | 3380 | 	 */ | 
 | 3381 | 	for (; k < LPERCTL; k++, p += nbperpage) { | 
 | 3382 | 		/* get L1 page */ | 
 | 3383 | 		if (j0) { | 
 | 3384 | 			/* read in L1 page: (blkno & (MAXL1SIZE - 1)) */ | 
 | 3385 | 			l1mp = read_metapage(ipbmap, p, PSIZE, 0); | 
 | 3386 | 			if (l1mp == NULL) | 
 | 3387 | 				goto errout; | 
 | 3388 | 			l1dcp = (struct dmapctl *) l1mp->data; | 
 | 3389 |  | 
 | 3390 | 			/* compute start L0 */ | 
 | 3391 | 			j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE; | 
 | 3392 | 			l1leaf = l1dcp->stree + CTLLEAFIND + j; | 
 | 3393 | 			p = BLKTOL0(blkno, sbi->l2nbperpage); | 
| Richard Knutsson | 4d81715 | 2006-09-30 23:27:14 -0700 | [diff] [blame] | 3394 | 			j0 = false; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3395 | 		} else { | 
 | 3396 | 			/* assign/init L1 page */ | 
 | 3397 | 			l1mp = get_metapage(ipbmap, p, PSIZE, 0); | 
 | 3398 | 			if (l1mp == NULL) | 
 | 3399 | 				goto errout; | 
 | 3400 |  | 
 | 3401 | 			l1dcp = (struct dmapctl *) l1mp->data; | 
 | 3402 |  | 
 | 3403 | 			/* compute start L0 */ | 
 | 3404 | 			j = 0; | 
 | 3405 | 			l1leaf = l1dcp->stree + CTLLEAFIND; | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3406 | 			p += nbperpage;	/* 1st L0 of L1.k */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3407 | 		} | 
 | 3408 |  | 
 | 3409 | 		/* | 
 | 3410 | 		 * extend each L0 in L1 | 
 | 3411 | 		 */ | 
 | 3412 | 		for (; j < LPERCTL; j++) { | 
 | 3413 | 			/* get L0 page */ | 
 | 3414 | 			if (i0) { | 
 | 3415 | 				/* read in L0 page: (blkno & (MAXL0SIZE - 1)) */ | 
 | 3416 |  | 
 | 3417 | 				l0mp = read_metapage(ipbmap, p, PSIZE, 0); | 
 | 3418 | 				if (l0mp == NULL) | 
 | 3419 | 					goto errout; | 
 | 3420 | 				l0dcp = (struct dmapctl *) l0mp->data; | 
 | 3421 |  | 
 | 3422 | 				/* compute start dmap */ | 
 | 3423 | 				i = (blkno & (MAXL0SIZE - 1)) >> | 
 | 3424 | 				    L2BPERDMAP; | 
 | 3425 | 				l0leaf = l0dcp->stree + CTLLEAFIND + i; | 
 | 3426 | 				p = BLKTODMAP(blkno, | 
 | 3427 | 					      sbi->l2nbperpage); | 
| Richard Knutsson | 4d81715 | 2006-09-30 23:27:14 -0700 | [diff] [blame] | 3428 | 				i0 = false; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3429 | 			} else { | 
 | 3430 | 				/* assign/init L0 page */ | 
 | 3431 | 				l0mp = get_metapage(ipbmap, p, PSIZE, 0); | 
 | 3432 | 				if (l0mp == NULL) | 
 | 3433 | 					goto errout; | 
 | 3434 |  | 
 | 3435 | 				l0dcp = (struct dmapctl *) l0mp->data; | 
 | 3436 |  | 
 | 3437 | 				/* compute start dmap */ | 
 | 3438 | 				i = 0; | 
 | 3439 | 				l0leaf = l0dcp->stree + CTLLEAFIND; | 
 | 3440 | 				p += nbperpage;	/* 1st dmap of L0.j */ | 
 | 3441 | 			} | 
 | 3442 |  | 
 | 3443 | 			/* | 
 | 3444 | 			 * extend each dmap in L0 | 
 | 3445 | 			 */ | 
 | 3446 | 			for (; i < LPERCTL; i++) { | 
 | 3447 | 				/* | 
 | 3448 | 				 * reconstruct the dmap page, and | 
 | 3449 | 				 * initialize corresponding parent L0 leaf | 
 | 3450 | 				 */ | 
 | 3451 | 				if ((n = blkno & (BPERDMAP - 1))) { | 
 | 3452 | 					/* read in dmap page: */ | 
 | 3453 | 					mp = read_metapage(ipbmap, p, | 
 | 3454 | 							   PSIZE, 0); | 
 | 3455 | 					if (mp == NULL) | 
 | 3456 | 						goto errout; | 
 | 3457 | 					n = min(nblocks, (s64)BPERDMAP - n); | 
 | 3458 | 				} else { | 
 | 3459 | 					/* assign/init dmap page */ | 
 | 3460 | 					mp = read_metapage(ipbmap, p, | 
 | 3461 | 							   PSIZE, 0); | 
 | 3462 | 					if (mp == NULL) | 
 | 3463 | 						goto errout; | 
 | 3464 |  | 
 | 3465 | 					n = min(nblocks, (s64)BPERDMAP); | 
 | 3466 | 				} | 
 | 3467 |  | 
 | 3468 | 				dp = (struct dmap *) mp->data; | 
 | 3469 | 				*l0leaf = dbInitDmap(dp, blkno, n); | 
 | 3470 |  | 
 | 3471 | 				bmp->db_nfree += n; | 
 | 3472 | 				agno = le64_to_cpu(dp->start) >> l2agsize; | 
 | 3473 | 				bmp->db_agfree[agno] += n; | 
 | 3474 |  | 
 | 3475 | 				write_metapage(mp); | 
 | 3476 |  | 
 | 3477 | 				l0leaf++; | 
 | 3478 | 				p += nbperpage; | 
 | 3479 |  | 
 | 3480 | 				blkno += n; | 
 | 3481 | 				nblocks -= n; | 
 | 3482 | 				if (nblocks == 0) | 
 | 3483 | 					break; | 
 | 3484 | 			}	/* for each dmap in a L0 */ | 
 | 3485 |  | 
 | 3486 | 			/* | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3487 | 			 * build current L0 page from its leaves, and | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3488 | 			 * initialize corresponding parent L1 leaf | 
 | 3489 | 			 */ | 
 | 3490 | 			*l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); | 
 | 3491 | 			write_metapage(l0mp); | 
 | 3492 | 			l0mp = NULL; | 
 | 3493 |  | 
 | 3494 | 			if (nblocks) | 
 | 3495 | 				l1leaf++;	/* continue for next L0 */ | 
 | 3496 | 			else { | 
 | 3497 | 				/* more than 1 L0 ? */ | 
 | 3498 | 				if (j > 0) | 
 | 3499 | 					break;	/* build L1 page */ | 
 | 3500 | 				else { | 
 | 3501 | 					/* summarize in global bmap page */ | 
 | 3502 | 					bmp->db_maxfreebud = *l1leaf; | 
 | 3503 | 					release_metapage(l1mp); | 
 | 3504 | 					release_metapage(l2mp); | 
 | 3505 | 					goto finalize; | 
 | 3506 | 				} | 
 | 3507 | 			} | 
 | 3508 | 		}		/* for each L0 in a L1 */ | 
 | 3509 |  | 
 | 3510 | 		/* | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3511 | 		 * build current L1 page from its leaves, and | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3512 | 		 * initialize corresponding parent L2 leaf | 
 | 3513 | 		 */ | 
 | 3514 | 		*l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); | 
 | 3515 | 		write_metapage(l1mp); | 
 | 3516 | 		l1mp = NULL; | 
 | 3517 |  | 
 | 3518 | 		if (nblocks) | 
 | 3519 | 			l2leaf++;	/* continue for next L1 */ | 
 | 3520 | 		else { | 
 | 3521 | 			/* more than 1 L1 ? */ | 
 | 3522 | 			if (k > 0) | 
 | 3523 | 				break;	/* build L2 page */ | 
 | 3524 | 			else { | 
 | 3525 | 				/* summarize in global bmap page */ | 
 | 3526 | 				bmp->db_maxfreebud = *l2leaf; | 
 | 3527 | 				release_metapage(l2mp); | 
 | 3528 | 				goto finalize; | 
 | 3529 | 			} | 
 | 3530 | 		} | 
 | 3531 | 	}			/* for each L1 in a L2 */ | 
 | 3532 |  | 
 | 3533 | 	jfs_error(ipbmap->i_sb, | 
 | 3534 | 		  "dbExtendFS: function has not returned as expected"); | 
 | 3535 | errout: | 
 | 3536 | 	if (l0mp) | 
 | 3537 | 		release_metapage(l0mp); | 
 | 3538 | 	if (l1mp) | 
 | 3539 | 		release_metapage(l1mp); | 
 | 3540 | 	release_metapage(l2mp); | 
 | 3541 | 	return -EIO; | 
 | 3542 |  | 
 | 3543 | 	/* | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3544 | 	 *	finalize bmap control page | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3545 | 	 */ | 
 | 3546 | finalize: | 
 | 3547 |  | 
 | 3548 | 	return 0; | 
 | 3549 | } | 
 | 3550 |  | 
 | 3551 |  | 
 | 3552 | /* | 
 | 3553 |  *	dbFinalizeBmap() | 
 | 3554 |  */ | 
 | 3555 | void dbFinalizeBmap(struct inode *ipbmap) | 
 | 3556 | { | 
 | 3557 | 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; | 
 | 3558 | 	int actags, inactags, l2nl; | 
 | 3559 | 	s64 ag_rem, actfree, inactfree, avgfree; | 
 | 3560 | 	int i, n; | 
 | 3561 |  | 
 | 3562 | 	/* | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3563 | 	 *	finalize bmap control page | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3564 | 	 */ | 
 | 3565 | //finalize: | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3566 | 	/* | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3567 | 	 * compute db_agpref: preferred ag to allocate from | 
 | 3568 | 	 * (the leftmost ag with average free space in it); | 
 | 3569 | 	 */ | 
 | 3570 | //agpref: | 
 | 3571 | 	/* get the number of active ags and inacitve ags */ | 
 | 3572 | 	actags = bmp->db_maxag + 1; | 
 | 3573 | 	inactags = bmp->db_numag - actags; | 
 | 3574 | 	ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1);	/* ??? */ | 
 | 3575 |  | 
 | 3576 | 	/* determine how many blocks are in the inactive allocation | 
 | 3577 | 	 * groups. in doing this, we must account for the fact that | 
 | 3578 | 	 * the rightmost group might be a partial group (i.e. file | 
 | 3579 | 	 * system size is not a multiple of the group size). | 
 | 3580 | 	 */ | 
 | 3581 | 	inactfree = (inactags && ag_rem) ? | 
 | 3582 | 	    ((inactags - 1) << bmp->db_agl2size) + ag_rem | 
 | 3583 | 	    : inactags << bmp->db_agl2size; | 
 | 3584 |  | 
 | 3585 | 	/* determine how many free blocks are in the active | 
 | 3586 | 	 * allocation groups plus the average number of free blocks | 
 | 3587 | 	 * within the active ags. | 
 | 3588 | 	 */ | 
 | 3589 | 	actfree = bmp->db_nfree - inactfree; | 
 | 3590 | 	avgfree = (u32) actfree / (u32) actags; | 
 | 3591 |  | 
 | 3592 | 	/* if the preferred allocation group has not average free space. | 
 | 3593 | 	 * re-establish the preferred group as the leftmost | 
 | 3594 | 	 * group with average free space. | 
 | 3595 | 	 */ | 
 | 3596 | 	if (bmp->db_agfree[bmp->db_agpref] < avgfree) { | 
 | 3597 | 		for (bmp->db_agpref = 0; bmp->db_agpref < actags; | 
 | 3598 | 		     bmp->db_agpref++) { | 
 | 3599 | 			if (bmp->db_agfree[bmp->db_agpref] >= avgfree) | 
 | 3600 | 				break; | 
 | 3601 | 		} | 
 | 3602 | 		if (bmp->db_agpref >= bmp->db_numag) { | 
 | 3603 | 			jfs_error(ipbmap->i_sb, | 
 | 3604 | 				  "cannot find ag with average freespace"); | 
 | 3605 | 		} | 
 | 3606 | 	} | 
 | 3607 |  | 
 | 3608 | 	/* | 
 | 3609 | 	 * compute db_aglevel, db_agheigth, db_width, db_agstart: | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3610 | 	 * an ag is covered in aglevel dmapctl summary tree, | 
 | 3611 | 	 * at agheight level height (from leaf) with agwidth number of nodes | 
 | 3612 | 	 * each, which starts at agstart index node of the smmary tree node | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3613 | 	 * array; | 
 | 3614 | 	 */ | 
 | 3615 | 	bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); | 
 | 3616 | 	l2nl = | 
 | 3617 | 	    bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); | 
 | 3618 | 	bmp->db_agheigth = l2nl >> 1; | 
 | 3619 | 	bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); | 
 | 3620 | 	for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; | 
 | 3621 | 	     i--) { | 
 | 3622 | 		bmp->db_agstart += n; | 
 | 3623 | 		n <<= 2; | 
 | 3624 | 	} | 
 | 3625 |  | 
 | 3626 | } | 
 | 3627 |  | 
 | 3628 |  | 
 | 3629 | /* | 
 | 3630 |  * NAME:	dbInitDmap()/ujfs_idmap_page() | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3631 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3632 |  * FUNCTION:	initialize working/persistent bitmap of the dmap page | 
 | 3633 |  *		for the specified number of blocks: | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3634 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3635 |  *		at entry, the bitmaps had been initialized as free (ZEROS); | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3636 |  *		The number of blocks will only account for the actually | 
 | 3637 |  *		existing blocks. Blocks which don't actually exist in | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3638 |  *		the aggregate will be marked as allocated (ONES); | 
 | 3639 |  * | 
 | 3640 |  * PARAMETERS: | 
 | 3641 |  *	dp	- pointer to page of map | 
 | 3642 |  *	nblocks	- number of blocks this page | 
 | 3643 |  * | 
 | 3644 |  * RETURNS: NONE | 
 | 3645 |  */ | 
 | 3646 | static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) | 
 | 3647 | { | 
 | 3648 | 	int blkno, w, b, r, nw, nb, i; | 
 | 3649 |  | 
 | 3650 | 	/* starting block number within the dmap */ | 
 | 3651 | 	blkno = Blkno & (BPERDMAP - 1); | 
 | 3652 |  | 
 | 3653 | 	if (blkno == 0) { | 
 | 3654 | 		dp->nblocks = dp->nfree = cpu_to_le32(nblocks); | 
 | 3655 | 		dp->start = cpu_to_le64(Blkno); | 
 | 3656 |  | 
 | 3657 | 		if (nblocks == BPERDMAP) { | 
 | 3658 | 			memset(&dp->wmap[0], 0, LPERDMAP * 4); | 
 | 3659 | 			memset(&dp->pmap[0], 0, LPERDMAP * 4); | 
 | 3660 | 			goto initTree; | 
 | 3661 | 		} | 
 | 3662 | 	} else { | 
| Marcin Slusarz | 8914562 | 2008-02-13 15:34:20 -0600 | [diff] [blame] | 3663 | 		le32_add_cpu(&dp->nblocks, nblocks); | 
 | 3664 | 		le32_add_cpu(&dp->nfree, nblocks); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3665 | 	} | 
 | 3666 |  | 
 | 3667 | 	/* word number containing start block number */ | 
 | 3668 | 	w = blkno >> L2DBWORD; | 
 | 3669 |  | 
 | 3670 | 	/* | 
 | 3671 | 	 * free the bits corresponding to the block range (ZEROS): | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3672 | 	 * note: not all bits of the first and last words may be contained | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3673 | 	 * within the block range. | 
 | 3674 | 	 */ | 
 | 3675 | 	for (r = nblocks; r > 0; r -= nb, blkno += nb) { | 
 | 3676 | 		/* number of bits preceding range to be freed in the word */ | 
 | 3677 | 		b = blkno & (DBWORD - 1); | 
 | 3678 | 		/* number of bits to free in the word */ | 
 | 3679 | 		nb = min(r, DBWORD - b); | 
 | 3680 |  | 
 | 3681 | 		/* is partial word to be freed ? */ | 
 | 3682 | 		if (nb < DBWORD) { | 
 | 3683 | 			/* free (set to 0) from the bitmap word */ | 
 | 3684 | 			dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) | 
 | 3685 | 						     >> b)); | 
 | 3686 | 			dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) | 
 | 3687 | 						     >> b)); | 
 | 3688 |  | 
 | 3689 | 			/* skip the word freed */ | 
 | 3690 | 			w++; | 
 | 3691 | 		} else { | 
 | 3692 | 			/* free (set to 0) contiguous bitmap words */ | 
 | 3693 | 			nw = r >> L2DBWORD; | 
 | 3694 | 			memset(&dp->wmap[w], 0, nw * 4); | 
 | 3695 | 			memset(&dp->pmap[w], 0, nw * 4); | 
 | 3696 |  | 
 | 3697 | 			/* skip the words freed */ | 
 | 3698 | 			nb = nw << L2DBWORD; | 
 | 3699 | 			w += nw; | 
 | 3700 | 		} | 
 | 3701 | 	} | 
 | 3702 |  | 
 | 3703 | 	/* | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3704 | 	 * mark bits following the range to be freed (non-existing | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3705 | 	 * blocks) as allocated (ONES) | 
 | 3706 | 	 */ | 
 | 3707 |  | 
 | 3708 | 	if (blkno == BPERDMAP) | 
 | 3709 | 		goto initTree; | 
 | 3710 |  | 
 | 3711 | 	/* the first word beyond the end of existing blocks */ | 
 | 3712 | 	w = blkno >> L2DBWORD; | 
 | 3713 |  | 
 | 3714 | 	/* does nblocks fall on a 32-bit boundary ? */ | 
 | 3715 | 	b = blkno & (DBWORD - 1); | 
 | 3716 | 	if (b) { | 
 | 3717 | 		/* mark a partial word allocated */ | 
 | 3718 | 		dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b); | 
 | 3719 | 		w++; | 
 | 3720 | 	} | 
 | 3721 |  | 
 | 3722 | 	/* set the rest of the words in the page to allocated (ONES) */ | 
 | 3723 | 	for (i = w; i < LPERDMAP; i++) | 
 | 3724 | 		dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES); | 
 | 3725 |  | 
 | 3726 | 	/* | 
 | 3727 | 	 * init tree | 
 | 3728 | 	 */ | 
 | 3729 |       initTree: | 
 | 3730 | 	return (dbInitDmapTree(dp)); | 
 | 3731 | } | 
 | 3732 |  | 
 | 3733 |  | 
 | 3734 | /* | 
 | 3735 |  * NAME:	dbInitDmapTree()/ujfs_complete_dmap() | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3736 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3737 |  * FUNCTION:	initialize summary tree of the specified dmap: | 
 | 3738 |  * | 
 | 3739 |  *		at entry, bitmap of the dmap has been initialized; | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3740 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3741 |  * PARAMETERS: | 
 | 3742 |  *	dp	- dmap to complete | 
 | 3743 |  *	blkno	- starting block number for this dmap | 
 | 3744 |  *	treemax	- will be filled in with max free for this dmap | 
 | 3745 |  * | 
 | 3746 |  * RETURNS:	max free string at the root of the tree | 
 | 3747 |  */ | 
 | 3748 | static int dbInitDmapTree(struct dmap * dp) | 
 | 3749 | { | 
 | 3750 | 	struct dmaptree *tp; | 
 | 3751 | 	s8 *cp; | 
 | 3752 | 	int i; | 
 | 3753 |  | 
 | 3754 | 	/* init fixed info of tree */ | 
 | 3755 | 	tp = &dp->tree; | 
 | 3756 | 	tp->nleafs = cpu_to_le32(LPERDMAP); | 
 | 3757 | 	tp->l2nleafs = cpu_to_le32(L2LPERDMAP); | 
 | 3758 | 	tp->leafidx = cpu_to_le32(LEAFIND); | 
 | 3759 | 	tp->height = cpu_to_le32(4); | 
 | 3760 | 	tp->budmin = BUDMIN; | 
 | 3761 |  | 
 | 3762 | 	/* init each leaf from corresponding wmap word: | 
 | 3763 | 	 * note: leaf is set to NOFREE(-1) if all blocks of corresponding | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3764 | 	 * bitmap word are allocated. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3765 | 	 */ | 
 | 3766 | 	cp = tp->stree + le32_to_cpu(tp->leafidx); | 
 | 3767 | 	for (i = 0; i < LPERDMAP; i++) | 
 | 3768 | 		*cp++ = dbMaxBud((u8 *) & dp->wmap[i]); | 
 | 3769 |  | 
 | 3770 | 	/* build the dmap's binary buddy summary tree */ | 
 | 3771 | 	return (dbInitTree(tp)); | 
 | 3772 | } | 
 | 3773 |  | 
 | 3774 |  | 
 | 3775 | /* | 
 | 3776 |  * NAME:	dbInitTree()/ujfs_adjtree() | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3777 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3778 |  * FUNCTION:	initialize binary buddy summary tree of a dmap or dmapctl. | 
 | 3779 |  * | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3780 |  *		at entry, the leaves of the tree has been initialized | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3781 |  *		from corresponding bitmap word or root of summary tree | 
 | 3782 |  *		of the child control page; | 
 | 3783 |  *		configure binary buddy system at the leaf level, then | 
 | 3784 |  *		bubble up the values of the leaf nodes up the tree. | 
 | 3785 |  * | 
 | 3786 |  * PARAMETERS: | 
 | 3787 |  *	cp	- Pointer to the root of the tree | 
 | 3788 |  *	l2leaves- Number of leaf nodes as a power of 2 | 
 | 3789 |  *	l2min	- Number of blocks that can be covered by a leaf | 
 | 3790 |  *		  as a power of 2 | 
 | 3791 |  * | 
 | 3792 |  * RETURNS: max free string at the root of the tree | 
 | 3793 |  */ | 
 | 3794 | static int dbInitTree(struct dmaptree * dtp) | 
 | 3795 | { | 
 | 3796 | 	int l2max, l2free, bsize, nextb, i; | 
 | 3797 | 	int child, parent, nparent; | 
 | 3798 | 	s8 *tp, *cp, *cp1; | 
 | 3799 |  | 
 | 3800 | 	tp = dtp->stree; | 
 | 3801 |  | 
 | 3802 | 	/* Determine the maximum free string possible for the leaves */ | 
 | 3803 | 	l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin; | 
 | 3804 |  | 
 | 3805 | 	/* | 
 | 3806 | 	 * configure the leaf levevl into binary buddy system | 
 | 3807 | 	 * | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3808 | 	 * Try to combine buddies starting with a buddy size of 1 | 
 | 3809 | 	 * (i.e. two leaves). At a buddy size of 1 two buddy leaves | 
 | 3810 | 	 * can be combined if both buddies have a maximum free of l2min; | 
 | 3811 | 	 * the combination will result in the left-most buddy leaf having | 
 | 3812 | 	 * a maximum free of l2min+1. | 
 | 3813 | 	 * After processing all buddies for a given size, process buddies | 
 | 3814 | 	 * at the next higher buddy size (i.e. current size * 2) and | 
 | 3815 | 	 * the next maximum free (current free + 1). | 
 | 3816 | 	 * This continues until the maximum possible buddy combination | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3817 | 	 * yields maximum free. | 
 | 3818 | 	 */ | 
 | 3819 | 	for (l2free = dtp->budmin, bsize = 1; l2free < l2max; | 
 | 3820 | 	     l2free++, bsize = nextb) { | 
 | 3821 | 		/* get next buddy size == current buddy pair size */ | 
 | 3822 | 		nextb = bsize << 1; | 
 | 3823 |  | 
 | 3824 | 		/* scan each adjacent buddy pair at current buddy size */ | 
 | 3825 | 		for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx); | 
 | 3826 | 		     i < le32_to_cpu(dtp->nleafs); | 
 | 3827 | 		     i += nextb, cp += nextb) { | 
 | 3828 | 			/* coalesce if both adjacent buddies are max free */ | 
 | 3829 | 			if (*cp == l2free && *(cp + bsize) == l2free) { | 
 | 3830 | 				*cp = l2free + 1;	/* left take right */ | 
 | 3831 | 				*(cp + bsize) = -1;	/* right give left */ | 
 | 3832 | 			} | 
 | 3833 | 		} | 
 | 3834 | 	} | 
 | 3835 |  | 
 | 3836 | 	/* | 
 | 3837 | 	 * bubble summary information of leaves up the tree. | 
 | 3838 | 	 * | 
 | 3839 | 	 * Starting at the leaf node level, the four nodes described by | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3840 | 	 * the higher level parent node are compared for a maximum free and | 
 | 3841 | 	 * this maximum becomes the value of the parent node. | 
 | 3842 | 	 * when all lower level nodes are processed in this fashion then | 
 | 3843 | 	 * move up to the next level (parent becomes a lower level node) and | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3844 | 	 * continue the process for that level. | 
 | 3845 | 	 */ | 
 | 3846 | 	for (child = le32_to_cpu(dtp->leafidx), | 
 | 3847 | 	     nparent = le32_to_cpu(dtp->nleafs) >> 2; | 
 | 3848 | 	     nparent > 0; nparent >>= 2, child = parent) { | 
 | 3849 | 		/* get index of 1st node of parent level */ | 
 | 3850 | 		parent = (child - 1) >> 2; | 
 | 3851 |  | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3852 | 		/* set the value of the parent node as the maximum | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3853 | 		 * of the four nodes of the current level. | 
 | 3854 | 		 */ | 
 | 3855 | 		for (i = 0, cp = tp + child, cp1 = tp + parent; | 
 | 3856 | 		     i < nparent; i++, cp += 4, cp1++) | 
 | 3857 | 			*cp1 = TREEMAX(cp); | 
 | 3858 | 	} | 
 | 3859 |  | 
 | 3860 | 	return (*tp); | 
 | 3861 | } | 
 | 3862 |  | 
 | 3863 |  | 
 | 3864 | /* | 
 | 3865 |  *	dbInitDmapCtl() | 
 | 3866 |  * | 
 | 3867 |  * function: initialize dmapctl page | 
 | 3868 |  */ | 
 | 3869 | static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) | 
 | 3870 | {				/* start leaf index not covered by range */ | 
 | 3871 | 	s8 *cp; | 
 | 3872 |  | 
 | 3873 | 	dcp->nleafs = cpu_to_le32(LPERCTL); | 
 | 3874 | 	dcp->l2nleafs = cpu_to_le32(L2LPERCTL); | 
 | 3875 | 	dcp->leafidx = cpu_to_le32(CTLLEAFIND); | 
 | 3876 | 	dcp->height = cpu_to_le32(5); | 
 | 3877 | 	dcp->budmin = L2BPERDMAP + L2LPERCTL * level; | 
 | 3878 |  | 
 | 3879 | 	/* | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3880 | 	 * initialize the leaves of current level that were not covered | 
 | 3881 | 	 * by the specified input block range (i.e. the leaves have no | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3882 | 	 * low level dmapctl or dmap). | 
 | 3883 | 	 */ | 
 | 3884 | 	cp = &dcp->stree[CTLLEAFIND + i]; | 
 | 3885 | 	for (; i < LPERCTL; i++) | 
 | 3886 | 		*cp++ = NOFREE; | 
 | 3887 |  | 
 | 3888 | 	/* build the dmap's binary buddy summary tree */ | 
 | 3889 | 	return (dbInitTree((struct dmaptree *) dcp)); | 
 | 3890 | } | 
 | 3891 |  | 
 | 3892 |  | 
 | 3893 | /* | 
 | 3894 |  * NAME:	dbGetL2AGSize()/ujfs_getagl2size() | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3895 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3896 |  * FUNCTION:	Determine log2(allocation group size) from aggregate size | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3897 |  * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3898 |  * PARAMETERS: | 
 | 3899 |  *	nblocks	- Number of blocks in aggregate | 
 | 3900 |  * | 
 | 3901 |  * RETURNS: log2(allocation group size) in aggregate blocks | 
 | 3902 |  */ | 
 | 3903 | static int dbGetL2AGSize(s64 nblocks) | 
 | 3904 | { | 
 | 3905 | 	s64 sz; | 
 | 3906 | 	s64 m; | 
 | 3907 | 	int l2sz; | 
 | 3908 |  | 
 | 3909 | 	if (nblocks < BPERDMAP * MAXAG) | 
 | 3910 | 		return (L2BPERDMAP); | 
 | 3911 |  | 
 | 3912 | 	/* round up aggregate size to power of 2 */ | 
 | 3913 | 	m = ((u64) 1 << (64 - 1)); | 
 | 3914 | 	for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) { | 
 | 3915 | 		if (m & nblocks) | 
 | 3916 | 			break; | 
 | 3917 | 	} | 
 | 3918 |  | 
 | 3919 | 	sz = (s64) 1 << l2sz; | 
 | 3920 | 	if (sz < nblocks) | 
 | 3921 | 		l2sz += 1; | 
 | 3922 |  | 
 | 3923 | 	/* agsize = roundupSize/max_number_of_ag */ | 
 | 3924 | 	return (l2sz - L2MAXAG); | 
 | 3925 | } | 
 | 3926 |  | 
 | 3927 |  | 
 | 3928 | /* | 
 | 3929 |  * NAME:	dbMapFileSizeToMapSize() | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3930 |  * | 
 | 3931 |  * FUNCTION:	compute number of blocks the block allocation map file | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3932 |  *		can cover from the map file size; | 
 | 3933 |  * | 
 | 3934 |  * RETURNS:	Number of blocks which can be covered by this block map file; | 
 | 3935 |  */ | 
 | 3936 |  | 
 | 3937 | /* | 
 | 3938 |  * maximum number of map pages at each level including control pages | 
 | 3939 |  */ | 
 | 3940 | #define MAXL0PAGES	(1 + LPERCTL) | 
 | 3941 | #define MAXL1PAGES	(1 + LPERCTL * MAXL0PAGES) | 
 | 3942 | #define MAXL2PAGES	(1 + LPERCTL * MAXL1PAGES) | 
 | 3943 |  | 
 | 3944 | /* | 
 | 3945 |  * convert number of map pages to the zero origin top dmapctl level | 
 | 3946 |  */ | 
 | 3947 | #define BMAPPGTOLEV(npages)	\ | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3948 | 	(((npages) <= 3 + MAXL0PAGES) ? 0 : \ | 
 | 3949 | 	 ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3950 |  | 
 | 3951 | s64 dbMapFileSizeToMapSize(struct inode * ipbmap) | 
 | 3952 | { | 
 | 3953 | 	struct super_block *sb = ipbmap->i_sb; | 
 | 3954 | 	s64 nblocks; | 
 | 3955 | 	s64 npages, ndmaps; | 
 | 3956 | 	int level, i; | 
 | 3957 | 	int complete, factor; | 
 | 3958 |  | 
 | 3959 | 	nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize; | 
 | 3960 | 	npages = nblocks >> JFS_SBI(sb)->l2nbperpage; | 
 | 3961 | 	level = BMAPPGTOLEV(npages); | 
 | 3962 |  | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3963 | 	/* At each level, accumulate the number of dmap pages covered by | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3964 | 	 * the number of full child levels below it; | 
 | 3965 | 	 * repeat for the last incomplete child level. | 
 | 3966 | 	 */ | 
 | 3967 | 	ndmaps = 0; | 
 | 3968 | 	npages--;		/* skip the first global control page */ | 
 | 3969 | 	/* skip higher level control pages above top level covered by map */ | 
 | 3970 | 	npages -= (2 - level); | 
 | 3971 | 	npages--;		/* skip top level's control page */ | 
 | 3972 | 	for (i = level; i >= 0; i--) { | 
 | 3973 | 		factor = | 
 | 3974 | 		    (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); | 
 | 3975 | 		complete = (u32) npages / factor; | 
| Dave Kleikamp | f720e3b | 2007-06-06 15:28:35 -0500 | [diff] [blame] | 3976 | 		ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : | 
 | 3977 | 				      ((i == 1) ? LPERCTL : 1)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3978 |  | 
 | 3979 | 		/* pages in last/incomplete child */ | 
 | 3980 | 		npages = (u32) npages % factor; | 
 | 3981 | 		/* skip incomplete child's level control page */ | 
 | 3982 | 		npages--; | 
 | 3983 | 	} | 
 | 3984 |  | 
| Dave Kleikamp | 63f83c9 | 2006-10-02 09:55:27 -0500 | [diff] [blame] | 3985 | 	/* convert the number of dmaps into the number of blocks | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3986 | 	 * which can be covered by the dmaps; | 
 | 3987 | 	 */ | 
 | 3988 | 	nblocks = ndmaps << L2BPERDMAP; | 
 | 3989 |  | 
 | 3990 | 	return (nblocks); | 
 | 3991 | } |