| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 1 | /* | 
 | 2 |  *  fs/ext4/extents_status.c | 
 | 3 |  * | 
 | 4 |  * Written by Yongqiang Yang <xiaoqiangnk@gmail.com> | 
 | 5 |  * Modified by | 
 | 6 |  *	Allison Henderson <achender@linux.vnet.ibm.com> | 
 | 7 |  *	Hugh Dickins <hughd@google.com> | 
 | 8 |  *	Zheng Liu <wenqing.lz@taobao.com> | 
 | 9 |  * | 
 | 10 |  * Ext4 extents status tree core functions. | 
 | 11 |  */ | 
 | 12 | #include <linux/rbtree.h> | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 13 | #include <linux/list_sort.h> | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 14 | #include "ext4.h" | 
 | 15 | #include "extents_status.h" | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 16 |  | 
| Zheng Liu | 992e9fd | 2012-11-08 21:57:33 -0500 | [diff] [blame] | 17 | #include <trace/events/ext4.h> | 
 | 18 |  | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 19 | /* | 
 | 20 |  * According to previous discussion in Ext4 Developer Workshop, we | 
 | 21 |  * will introduce a new structure called io tree to track all extent | 
 | 22 |  * status in order to solve some problems that we have met | 
 | 23 |  * (e.g. Reservation space warning), and provide extent-level locking. | 
 | 24 |  * Delay extent tree is the first step to achieve this goal.  It is | 
 | 25 |  * original built by Yongqiang Yang.  At that time it is called delay | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 26 |  * extent tree, whose goal is only track delayed extents in memory to | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 27 |  * simplify the implementation of fiemap and bigalloc, and introduce | 
 | 28 |  * lseek SEEK_DATA/SEEK_HOLE support.  That is why it is still called | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 29 |  * delay extent tree at the first commit.  But for better understand | 
 | 30 |  * what it does, it has been rename to extent status tree. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 31 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 32 |  * Step1: | 
 | 33 |  * Currently the first step has been done.  All delayed extents are | 
 | 34 |  * tracked in the tree.  It maintains the delayed extent when a delayed | 
 | 35 |  * allocation is issued, and the delayed extent is written out or | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 36 |  * invalidated.  Therefore the implementation of fiemap and bigalloc | 
 | 37 |  * are simplified, and SEEK_DATA/SEEK_HOLE are introduced. | 
 | 38 |  * | 
 | 39 |  * The following comment describes the implemenmtation of extent | 
 | 40 |  * status tree and future works. | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 41 |  * | 
 | 42 |  * Step2: | 
 | 43 |  * In this step all extent status are tracked by extent status tree. | 
 | 44 |  * Thus, we can first try to lookup a block mapping in this tree before | 
 | 45 |  * finding it in extent tree.  Hence, single extent cache can be removed | 
 | 46 |  * because extent status tree can do a better job.  Extents in status | 
 | 47 |  * tree are loaded on-demand.  Therefore, the extent status tree may not | 
 | 48 |  * contain all of the extents in a file.  Meanwhile we define a shrinker | 
 | 49 |  * to reclaim memory from extent status tree because fragmented extent | 
 | 50 |  * tree will make status tree cost too much memory.  written/unwritten/- | 
 | 51 |  * hole extents in the tree will be reclaimed by this shrinker when we | 
 | 52 |  * are under high memory pressure.  Delayed extents will not be | 
 | 53 |  * reclimed because fiemap, bigalloc, and seek_data/hole need it. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 54 |  */ | 
 | 55 |  | 
 | 56 | /* | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 57 |  * Extent status tree implementation for ext4. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 58 |  * | 
 | 59 |  * | 
 | 60 |  * ========================================================================== | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 61 |  * Extent status tree tracks all extent status. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 62 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 63 |  * 1. Why we need to implement extent status tree? | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 64 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 65 |  * Without extent status tree, ext4 identifies a delayed extent by looking | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 66 |  * up page cache, this has several deficiencies - complicated, buggy, | 
 | 67 |  * and inefficient code. | 
 | 68 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 69 |  * FIEMAP, SEEK_HOLE/DATA, bigalloc, and writeout all need to know if a | 
 | 70 |  * block or a range of blocks are belonged to a delayed extent. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 71 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 72 |  * Let us have a look at how they do without extent status tree. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 73 |  *   --	FIEMAP | 
 | 74 |  *	FIEMAP looks up page cache to identify delayed allocations from holes. | 
 | 75 |  * | 
 | 76 |  *   --	SEEK_HOLE/DATA | 
 | 77 |  *	SEEK_HOLE/DATA has the same problem as FIEMAP. | 
 | 78 |  * | 
 | 79 |  *   --	bigalloc | 
 | 80 |  *	bigalloc looks up page cache to figure out if a block is | 
 | 81 |  *	already under delayed allocation or not to determine whether | 
 | 82 |  *	quota reserving is needed for the cluster. | 
 | 83 |  * | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 84 |  *   --	writeout | 
 | 85 |  *	Writeout looks up whole page cache to see if a buffer is | 
 | 86 |  *	mapped, If there are not very many delayed buffers, then it is | 
 | 87 |  *	time comsuming. | 
 | 88 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 89 |  * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA, | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 90 |  * bigalloc and writeout can figure out if a block or a range of | 
 | 91 |  * blocks is under delayed allocation(belonged to a delayed extent) or | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 92 |  * not by searching the extent tree. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 93 |  * | 
 | 94 |  * | 
 | 95 |  * ========================================================================== | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 96 |  * 2. Ext4 extent status tree impelmentation | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 97 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 98 |  *   --	extent | 
 | 99 |  *	A extent is a range of blocks which are contiguous logically and | 
 | 100 |  *	physically.  Unlike extent in extent tree, this extent in ext4 is | 
 | 101 |  *	a in-memory struct, there is no corresponding on-disk data.  There | 
 | 102 |  *	is no limit on length of extent, so an extent can contain as many | 
 | 103 |  *	blocks as they are contiguous logically and physically. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 104 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 105 |  *   --	extent status tree | 
 | 106 |  *	Every inode has an extent status tree and all allocation blocks | 
 | 107 |  *	are added to the tree with different status.  The extent in the | 
 | 108 |  *	tree are ordered by logical block no. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 109 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 110 |  *   --	operations on a extent status tree | 
 | 111 |  *	There are three important operations on a delayed extent tree: find | 
 | 112 |  *	next extent, adding a extent(a range of blocks) and removing a extent. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 113 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 114 |  *   --	race on a extent status tree | 
 | 115 |  *	Extent status tree is protected by inode->i_es_lock. | 
 | 116 |  * | 
 | 117 |  *   --	memory consumption | 
 | 118 |  *      Fragmented extent tree will make extent status tree cost too much | 
 | 119 |  *      memory.  Hence, we will reclaim written/unwritten/hole extents from | 
 | 120 |  *      the tree under a heavy memory pressure. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 121 |  * | 
 | 122 |  * | 
 | 123 |  * ========================================================================== | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 124 |  * 3. Performance analysis | 
 | 125 |  * | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 126 |  *   --	overhead | 
 | 127 |  *	1. There is a cache extent for write access, so if writes are | 
 | 128 |  *	not very random, adding space operaions are in O(1) time. | 
 | 129 |  * | 
 | 130 |  *   --	gain | 
 | 131 |  *	2. Code is much simpler, more readable, more maintainable and | 
 | 132 |  *	more efficient. | 
 | 133 |  * | 
 | 134 |  * | 
 | 135 |  * ========================================================================== | 
 | 136 |  * 4. TODO list | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 137 |  * | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 138 |  *   -- Refactor delayed space reservation | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 139 |  * | 
 | 140 |  *   -- Extent-level locking | 
 | 141 |  */ | 
 | 142 |  | 
 | 143 | static struct kmem_cache *ext4_es_cachep; | 
 | 144 |  | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 145 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); | 
 | 146 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 147 | 			      ext4_lblk_t end); | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 148 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 
 | 149 | 				       int nr_to_scan); | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 150 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | 
 | 151 | 			    struct ext4_inode_info *locked_ei); | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 152 |  | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 153 | int __init ext4_init_es(void) | 
 | 154 | { | 
| Theodore Ts'o | 2463077 | 2013-02-28 23:58:56 -0500 | [diff] [blame] | 155 | 	ext4_es_cachep = kmem_cache_create("ext4_extent_status", | 
 | 156 | 					   sizeof(struct extent_status), | 
 | 157 | 					   0, (SLAB_RECLAIM_ACCOUNT), NULL); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 158 | 	if (ext4_es_cachep == NULL) | 
 | 159 | 		return -ENOMEM; | 
 | 160 | 	return 0; | 
 | 161 | } | 
 | 162 |  | 
 | 163 | void ext4_exit_es(void) | 
 | 164 | { | 
 | 165 | 	if (ext4_es_cachep) | 
 | 166 | 		kmem_cache_destroy(ext4_es_cachep); | 
 | 167 | } | 
 | 168 |  | 
 | 169 | void ext4_es_init_tree(struct ext4_es_tree *tree) | 
 | 170 | { | 
 | 171 | 	tree->root = RB_ROOT; | 
 | 172 | 	tree->cache_es = NULL; | 
 | 173 | } | 
 | 174 |  | 
 | 175 | #ifdef ES_DEBUG__ | 
 | 176 | static void ext4_es_print_tree(struct inode *inode) | 
 | 177 | { | 
 | 178 | 	struct ext4_es_tree *tree; | 
 | 179 | 	struct rb_node *node; | 
 | 180 |  | 
 | 181 | 	printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino); | 
 | 182 | 	tree = &EXT4_I(inode)->i_es_tree; | 
 | 183 | 	node = rb_first(&tree->root); | 
 | 184 | 	while (node) { | 
 | 185 | 		struct extent_status *es; | 
 | 186 | 		es = rb_entry(node, struct extent_status, rb_node); | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 187 | 		printk(KERN_DEBUG " [%u/%u) %llu %llx", | 
 | 188 | 		       es->es_lblk, es->es_len, | 
 | 189 | 		       ext4_es_pblock(es), ext4_es_status(es)); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 190 | 		node = rb_next(node); | 
 | 191 | 	} | 
 | 192 | 	printk(KERN_DEBUG "\n"); | 
 | 193 | } | 
 | 194 | #else | 
 | 195 | #define ext4_es_print_tree(inode) | 
 | 196 | #endif | 
 | 197 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 198 | static inline ext4_lblk_t ext4_es_end(struct extent_status *es) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 199 | { | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 200 | 	BUG_ON(es->es_lblk + es->es_len < es->es_lblk); | 
 | 201 | 	return es->es_lblk + es->es_len - 1; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 202 | } | 
 | 203 |  | 
 | 204 | /* | 
 | 205 |  * search through the tree for an delayed extent with a given offset.  If | 
 | 206 |  * it can't be found, try to find next extent. | 
 | 207 |  */ | 
 | 208 | static struct extent_status *__es_tree_search(struct rb_root *root, | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 209 | 					      ext4_lblk_t lblk) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 210 | { | 
 | 211 | 	struct rb_node *node = root->rb_node; | 
 | 212 | 	struct extent_status *es = NULL; | 
 | 213 |  | 
 | 214 | 	while (node) { | 
 | 215 | 		es = rb_entry(node, struct extent_status, rb_node); | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 216 | 		if (lblk < es->es_lblk) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 217 | 			node = node->rb_left; | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 218 | 		else if (lblk > ext4_es_end(es)) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 219 | 			node = node->rb_right; | 
 | 220 | 		else | 
 | 221 | 			return es; | 
 | 222 | 	} | 
 | 223 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 224 | 	if (es && lblk < es->es_lblk) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 225 | 		return es; | 
 | 226 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 227 | 	if (es && lblk > ext4_es_end(es)) { | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 228 | 		node = rb_next(&es->rb_node); | 
 | 229 | 		return node ? rb_entry(node, struct extent_status, rb_node) : | 
 | 230 | 			      NULL; | 
 | 231 | 	} | 
 | 232 |  | 
 | 233 | 	return NULL; | 
 | 234 | } | 
 | 235 |  | 
 | 236 | /* | 
| Yan, Zheng | e30b5dc | 2013-05-03 02:15:52 -0400 | [diff] [blame] | 237 |  * ext4_es_find_delayed_extent_range: find the 1st delayed extent covering | 
 | 238 |  * @es->lblk if it exists, otherwise, the next extent after @es->lblk. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 239 |  * | 
 | 240 |  * @inode: the inode which owns delayed extents | 
| Zheng Liu | be40136 | 2013-02-18 00:27:26 -0500 | [diff] [blame] | 241 |  * @lblk: the offset where we start to search | 
| Yan, Zheng | e30b5dc | 2013-05-03 02:15:52 -0400 | [diff] [blame] | 242 |  * @end: the offset where we stop to search | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 243 |  * @es: delayed extent that we found | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 244 |  */ | 
| Yan, Zheng | e30b5dc | 2013-05-03 02:15:52 -0400 | [diff] [blame] | 245 | void ext4_es_find_delayed_extent_range(struct inode *inode, | 
 | 246 | 				 ext4_lblk_t lblk, ext4_lblk_t end, | 
| Zheng Liu | be40136 | 2013-02-18 00:27:26 -0500 | [diff] [blame] | 247 | 				 struct extent_status *es) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 248 | { | 
 | 249 | 	struct ext4_es_tree *tree = NULL; | 
 | 250 | 	struct extent_status *es1 = NULL; | 
 | 251 | 	struct rb_node *node; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 252 |  | 
| Zheng Liu | be40136 | 2013-02-18 00:27:26 -0500 | [diff] [blame] | 253 | 	BUG_ON(es == NULL); | 
| Yan, Zheng | e30b5dc | 2013-05-03 02:15:52 -0400 | [diff] [blame] | 254 | 	BUG_ON(end < lblk); | 
 | 255 | 	trace_ext4_es_find_delayed_extent_range_enter(inode, lblk); | 
| Zheng Liu | 992e9fd | 2012-11-08 21:57:33 -0500 | [diff] [blame] | 256 |  | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 257 | 	read_lock(&EXT4_I(inode)->i_es_lock); | 
 | 258 | 	tree = &EXT4_I(inode)->i_es_tree; | 
 | 259 |  | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 260 | 	/* find extent in cache firstly */ | 
| Zheng Liu | be40136 | 2013-02-18 00:27:26 -0500 | [diff] [blame] | 261 | 	es->es_lblk = es->es_len = es->es_pblk = 0; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 262 | 	if (tree->cache_es) { | 
 | 263 | 		es1 = tree->cache_es; | 
| Zheng Liu | be40136 | 2013-02-18 00:27:26 -0500 | [diff] [blame] | 264 | 		if (in_range(lblk, es1->es_lblk, es1->es_len)) { | 
| Theodore Ts'o | 3be78c7 | 2013-08-16 21:22:41 -0400 | [diff] [blame] | 265 | 			es_debug("%u cached by [%u/%u) %llu %x\n", | 
| Zheng Liu | be40136 | 2013-02-18 00:27:26 -0500 | [diff] [blame] | 266 | 				 lblk, es1->es_lblk, es1->es_len, | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 267 | 				 ext4_es_pblock(es1), ext4_es_status(es1)); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 268 | 			goto out; | 
 | 269 | 		} | 
 | 270 | 	} | 
 | 271 |  | 
| Zheng Liu | be40136 | 2013-02-18 00:27:26 -0500 | [diff] [blame] | 272 | 	es1 = __es_tree_search(&tree->root, lblk); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 273 |  | 
 | 274 | out: | 
| Zheng Liu | be40136 | 2013-02-18 00:27:26 -0500 | [diff] [blame] | 275 | 	if (es1 && !ext4_es_is_delayed(es1)) { | 
 | 276 | 		while ((node = rb_next(&es1->rb_node)) != NULL) { | 
 | 277 | 			es1 = rb_entry(node, struct extent_status, rb_node); | 
| Yan, Zheng | e30b5dc | 2013-05-03 02:15:52 -0400 | [diff] [blame] | 278 | 			if (es1->es_lblk > end) { | 
 | 279 | 				es1 = NULL; | 
 | 280 | 				break; | 
 | 281 | 			} | 
| Zheng Liu | be40136 | 2013-02-18 00:27:26 -0500 | [diff] [blame] | 282 | 			if (ext4_es_is_delayed(es1)) | 
 | 283 | 				break; | 
 | 284 | 		} | 
 | 285 | 	} | 
 | 286 |  | 
 | 287 | 	if (es1 && ext4_es_is_delayed(es1)) { | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 288 | 		tree->cache_es = es1; | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 289 | 		es->es_lblk = es1->es_lblk; | 
 | 290 | 		es->es_len = es1->es_len; | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 291 | 		es->es_pblk = es1->es_pblk; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 292 | 	} | 
 | 293 |  | 
 | 294 | 	read_unlock(&EXT4_I(inode)->i_es_lock); | 
| Zheng Liu | 992e9fd | 2012-11-08 21:57:33 -0500 | [diff] [blame] | 295 |  | 
| Yan, Zheng | e30b5dc | 2013-05-03 02:15:52 -0400 | [diff] [blame] | 296 | 	trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 297 | } | 
 | 298 |  | 
 | 299 | static struct extent_status * | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 300 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | 
 | 301 | 		     ext4_fsblk_t pblk) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 302 | { | 
 | 303 | 	struct extent_status *es; | 
 | 304 | 	es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); | 
 | 305 | 	if (es == NULL) | 
 | 306 | 		return NULL; | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 307 | 	es->es_lblk = lblk; | 
 | 308 | 	es->es_len = len; | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 309 | 	es->es_pblk = pblk; | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 310 |  | 
 | 311 | 	/* | 
 | 312 | 	 * We don't count delayed extent because we never try to reclaim them | 
 | 313 | 	 */ | 
| Theodore Ts'o | 2463077 | 2013-02-28 23:58:56 -0500 | [diff] [blame] | 314 | 	if (!ext4_es_is_delayed(es)) { | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 315 | 		EXT4_I(inode)->i_es_lru_nr++; | 
| Theodore Ts'o | 1ac6466 | 2013-03-02 10:27:46 -0500 | [diff] [blame] | 316 | 		percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); | 
| Theodore Ts'o | 2463077 | 2013-02-28 23:58:56 -0500 | [diff] [blame] | 317 | 	} | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 318 |  | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 319 | 	return es; | 
 | 320 | } | 
 | 321 |  | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 322 | static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 323 | { | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 324 | 	/* Decrease the lru counter when this es is not delayed */ | 
 | 325 | 	if (!ext4_es_is_delayed(es)) { | 
 | 326 | 		BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); | 
 | 327 | 		EXT4_I(inode)->i_es_lru_nr--; | 
| Theodore Ts'o | 1ac6466 | 2013-03-02 10:27:46 -0500 | [diff] [blame] | 328 | 		percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 329 | 	} | 
 | 330 |  | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 331 | 	kmem_cache_free(ext4_es_cachep, es); | 
 | 332 | } | 
 | 333 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 334 | /* | 
 | 335 |  * Check whether or not two extents can be merged | 
 | 336 |  * Condition: | 
 | 337 |  *  - logical block number is contiguous | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 338 |  *  - physical block number is contiguous | 
 | 339 |  *  - status is equal | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 340 |  */ | 
 | 341 | static int ext4_es_can_be_merged(struct extent_status *es1, | 
 | 342 | 				 struct extent_status *es2) | 
 | 343 | { | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 344 | 	if (ext4_es_status(es1) != ext4_es_status(es2)) | 
 | 345 | 		return 0; | 
 | 346 |  | 
| Zheng Liu | bd38436 | 2013-03-10 20:48:59 -0400 | [diff] [blame] | 347 | 	if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL) | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 348 | 		return 0; | 
 | 349 |  | 
| Zheng Liu | bd38436 | 2013-03-10 20:48:59 -0400 | [diff] [blame] | 350 | 	if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk) | 
 | 351 | 		return 0; | 
 | 352 |  | 
 | 353 | 	if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && | 
 | 354 | 	    (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2))) | 
 | 355 | 		return 1; | 
 | 356 |  | 
 | 357 | 	if (ext4_es_is_hole(es1)) | 
 | 358 | 		return 1; | 
 | 359 |  | 
 | 360 | 	/* we need to check delayed extent is without unwritten status */ | 
 | 361 | 	if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1)) | 
 | 362 | 		return 1; | 
 | 363 |  | 
 | 364 | 	return 0; | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 365 | } | 
 | 366 |  | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 367 | static struct extent_status * | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 368 | ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 369 | { | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 370 | 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 371 | 	struct extent_status *es1; | 
 | 372 | 	struct rb_node *node; | 
 | 373 |  | 
 | 374 | 	node = rb_prev(&es->rb_node); | 
 | 375 | 	if (!node) | 
 | 376 | 		return es; | 
 | 377 |  | 
 | 378 | 	es1 = rb_entry(node, struct extent_status, rb_node); | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 379 | 	if (ext4_es_can_be_merged(es1, es)) { | 
 | 380 | 		es1->es_len += es->es_len; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 381 | 		rb_erase(&es->rb_node, &tree->root); | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 382 | 		ext4_es_free_extent(inode, es); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 383 | 		es = es1; | 
 | 384 | 	} | 
 | 385 |  | 
 | 386 | 	return es; | 
 | 387 | } | 
 | 388 |  | 
 | 389 | static struct extent_status * | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 390 | ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 391 | { | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 392 | 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 393 | 	struct extent_status *es1; | 
 | 394 | 	struct rb_node *node; | 
 | 395 |  | 
 | 396 | 	node = rb_next(&es->rb_node); | 
 | 397 | 	if (!node) | 
 | 398 | 		return es; | 
 | 399 |  | 
 | 400 | 	es1 = rb_entry(node, struct extent_status, rb_node); | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 401 | 	if (ext4_es_can_be_merged(es, es1)) { | 
 | 402 | 		es->es_len += es1->es_len; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 403 | 		rb_erase(node, &tree->root); | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 404 | 		ext4_es_free_extent(inode, es1); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 405 | 	} | 
 | 406 |  | 
 | 407 | 	return es; | 
 | 408 | } | 
 | 409 |  | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 410 | #ifdef ES_AGGRESSIVE_TEST | 
| Zheng Liu | d7b2a00 | 2013-08-28 14:47:06 -0400 | [diff] [blame] | 411 | #include "ext4_extents.h"	/* Needed when ES_AGGRESSIVE_TEST is defined */ | 
 | 412 |  | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 413 | static void ext4_es_insert_extent_ext_check(struct inode *inode, | 
 | 414 | 					    struct extent_status *es) | 
 | 415 | { | 
 | 416 | 	struct ext4_ext_path *path = NULL; | 
 | 417 | 	struct ext4_extent *ex; | 
 | 418 | 	ext4_lblk_t ee_block; | 
 | 419 | 	ext4_fsblk_t ee_start; | 
 | 420 | 	unsigned short ee_len; | 
 | 421 | 	int depth, ee_status, es_status; | 
 | 422 |  | 
| Theodore Ts'o | 107a7bd | 2013-08-16 21:23:41 -0400 | [diff] [blame] | 423 | 	path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 424 | 	if (IS_ERR(path)) | 
 | 425 | 		return; | 
 | 426 |  | 
 | 427 | 	depth = ext_depth(inode); | 
 | 428 | 	ex = path[depth].p_ext; | 
 | 429 |  | 
 | 430 | 	if (ex) { | 
 | 431 |  | 
 | 432 | 		ee_block = le32_to_cpu(ex->ee_block); | 
 | 433 | 		ee_start = ext4_ext_pblock(ex); | 
 | 434 | 		ee_len = ext4_ext_get_actual_len(ex); | 
 | 435 |  | 
 | 436 | 		ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0; | 
 | 437 | 		es_status = ext4_es_is_unwritten(es) ? 1 : 0; | 
 | 438 |  | 
 | 439 | 		/* | 
 | 440 | 		 * Make sure ex and es are not overlap when we try to insert | 
 | 441 | 		 * a delayed/hole extent. | 
 | 442 | 		 */ | 
 | 443 | 		if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { | 
 | 444 | 			if (in_range(es->es_lblk, ee_block, ee_len)) { | 
| Theodore Ts'o | bdafe42 | 2013-07-13 00:40:31 -0400 | [diff] [blame] | 445 | 				pr_warn("ES insert assertion failed for " | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 446 | 					"inode: %lu we can find an extent " | 
 | 447 | 					"at block [%d/%d/%llu/%c], but we " | 
 | 448 | 					"want to add an delayed/hole extent " | 
 | 449 | 					"[%d/%d/%llu/%llx]\n", | 
 | 450 | 					inode->i_ino, ee_block, ee_len, | 
 | 451 | 					ee_start, ee_status ? 'u' : 'w', | 
 | 452 | 					es->es_lblk, es->es_len, | 
 | 453 | 					ext4_es_pblock(es), ext4_es_status(es)); | 
 | 454 | 			} | 
 | 455 | 			goto out; | 
 | 456 | 		} | 
 | 457 |  | 
 | 458 | 		/* | 
 | 459 | 		 * We don't check ee_block == es->es_lblk, etc. because es | 
 | 460 | 		 * might be a part of whole extent, vice versa. | 
 | 461 | 		 */ | 
 | 462 | 		if (es->es_lblk < ee_block || | 
 | 463 | 		    ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { | 
| Theodore Ts'o | bdafe42 | 2013-07-13 00:40:31 -0400 | [diff] [blame] | 464 | 			pr_warn("ES insert assertion failed for inode: %lu " | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 465 | 				"ex_status [%d/%d/%llu/%c] != " | 
 | 466 | 				"es_status [%d/%d/%llu/%c]\n", inode->i_ino, | 
 | 467 | 				ee_block, ee_len, ee_start, | 
 | 468 | 				ee_status ? 'u' : 'w', es->es_lblk, es->es_len, | 
 | 469 | 				ext4_es_pblock(es), es_status ? 'u' : 'w'); | 
 | 470 | 			goto out; | 
 | 471 | 		} | 
 | 472 |  | 
 | 473 | 		if (ee_status ^ es_status) { | 
| Theodore Ts'o | bdafe42 | 2013-07-13 00:40:31 -0400 | [diff] [blame] | 474 | 			pr_warn("ES insert assertion failed for inode: %lu " | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 475 | 				"ex_status [%d/%d/%llu/%c] != " | 
 | 476 | 				"es_status [%d/%d/%llu/%c]\n", inode->i_ino, | 
 | 477 | 				ee_block, ee_len, ee_start, | 
 | 478 | 				ee_status ? 'u' : 'w', es->es_lblk, es->es_len, | 
 | 479 | 				ext4_es_pblock(es), es_status ? 'u' : 'w'); | 
 | 480 | 		} | 
 | 481 | 	} else { | 
 | 482 | 		/* | 
 | 483 | 		 * We can't find an extent on disk.  So we need to make sure | 
 | 484 | 		 * that we don't want to add an written/unwritten extent. | 
 | 485 | 		 */ | 
 | 486 | 		if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { | 
| Theodore Ts'o | bdafe42 | 2013-07-13 00:40:31 -0400 | [diff] [blame] | 487 | 			pr_warn("ES insert assertion failed for inode: %lu " | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 488 | 				"can't find an extent at block %d but we want " | 
 | 489 | 				"to add an written/unwritten extent " | 
 | 490 | 				"[%d/%d/%llu/%llx]\n", inode->i_ino, | 
 | 491 | 				es->es_lblk, es->es_lblk, es->es_len, | 
 | 492 | 				ext4_es_pblock(es), ext4_es_status(es)); | 
 | 493 | 		} | 
 | 494 | 	} | 
 | 495 | out: | 
 | 496 | 	if (path) { | 
 | 497 | 		ext4_ext_drop_refs(path); | 
 | 498 | 		kfree(path); | 
 | 499 | 	} | 
 | 500 | } | 
 | 501 |  | 
 | 502 | static void ext4_es_insert_extent_ind_check(struct inode *inode, | 
 | 503 | 					    struct extent_status *es) | 
 | 504 | { | 
 | 505 | 	struct ext4_map_blocks map; | 
 | 506 | 	int retval; | 
 | 507 |  | 
 | 508 | 	/* | 
 | 509 | 	 * Here we call ext4_ind_map_blocks to lookup a block mapping because | 
 | 510 | 	 * 'Indirect' structure is defined in indirect.c.  So we couldn't | 
 | 511 | 	 * access direct/indirect tree from outside.  It is too dirty to define | 
 | 512 | 	 * this function in indirect.c file. | 
 | 513 | 	 */ | 
 | 514 |  | 
 | 515 | 	map.m_lblk = es->es_lblk; | 
 | 516 | 	map.m_len = es->es_len; | 
 | 517 |  | 
 | 518 | 	retval = ext4_ind_map_blocks(NULL, inode, &map, 0); | 
 | 519 | 	if (retval > 0) { | 
 | 520 | 		if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) { | 
 | 521 | 			/* | 
 | 522 | 			 * We want to add a delayed/hole extent but this | 
 | 523 | 			 * block has been allocated. | 
 | 524 | 			 */ | 
| Theodore Ts'o | bdafe42 | 2013-07-13 00:40:31 -0400 | [diff] [blame] | 525 | 			pr_warn("ES insert assertion failed for inode: %lu " | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 526 | 				"We can find blocks but we want to add a " | 
 | 527 | 				"delayed/hole extent [%d/%d/%llu/%llx]\n", | 
 | 528 | 				inode->i_ino, es->es_lblk, es->es_len, | 
 | 529 | 				ext4_es_pblock(es), ext4_es_status(es)); | 
 | 530 | 			return; | 
 | 531 | 		} else if (ext4_es_is_written(es)) { | 
 | 532 | 			if (retval != es->es_len) { | 
| Theodore Ts'o | bdafe42 | 2013-07-13 00:40:31 -0400 | [diff] [blame] | 533 | 				pr_warn("ES insert assertion failed for " | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 534 | 					"inode: %lu retval %d != es_len %d\n", | 
 | 535 | 					inode->i_ino, retval, es->es_len); | 
 | 536 | 				return; | 
 | 537 | 			} | 
 | 538 | 			if (map.m_pblk != ext4_es_pblock(es)) { | 
| Theodore Ts'o | bdafe42 | 2013-07-13 00:40:31 -0400 | [diff] [blame] | 539 | 				pr_warn("ES insert assertion failed for " | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 540 | 					"inode: %lu m_pblk %llu != " | 
 | 541 | 					"es_pblk %llu\n", | 
 | 542 | 					inode->i_ino, map.m_pblk, | 
 | 543 | 					ext4_es_pblock(es)); | 
 | 544 | 				return; | 
 | 545 | 			} | 
 | 546 | 		} else { | 
 | 547 | 			/* | 
 | 548 | 			 * We don't need to check unwritten extent because | 
 | 549 | 			 * indirect-based file doesn't have it. | 
 | 550 | 			 */ | 
 | 551 | 			BUG_ON(1); | 
 | 552 | 		} | 
 | 553 | 	} else if (retval == 0) { | 
 | 554 | 		if (ext4_es_is_written(es)) { | 
| Theodore Ts'o | bdafe42 | 2013-07-13 00:40:31 -0400 | [diff] [blame] | 555 | 			pr_warn("ES insert assertion failed for inode: %lu " | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 556 | 				"We can't find the block but we want to add " | 
 | 557 | 				"an written extent [%d/%d/%llu/%llx]\n", | 
 | 558 | 				inode->i_ino, es->es_lblk, es->es_len, | 
 | 559 | 				ext4_es_pblock(es), ext4_es_status(es)); | 
 | 560 | 			return; | 
 | 561 | 		} | 
 | 562 | 	} | 
 | 563 | } | 
 | 564 |  | 
 | 565 | static inline void ext4_es_insert_extent_check(struct inode *inode, | 
 | 566 | 					       struct extent_status *es) | 
 | 567 | { | 
 | 568 | 	/* | 
 | 569 | 	 * We don't need to worry about the race condition because | 
 | 570 | 	 * caller takes i_data_sem locking. | 
 | 571 | 	 */ | 
 | 572 | 	BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem)); | 
 | 573 | 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 
 | 574 | 		ext4_es_insert_extent_ext_check(inode, es); | 
 | 575 | 	else | 
 | 576 | 		ext4_es_insert_extent_ind_check(inode, es); | 
 | 577 | } | 
 | 578 | #else | 
 | 579 | static inline void ext4_es_insert_extent_check(struct inode *inode, | 
 | 580 | 					       struct extent_status *es) | 
 | 581 | { | 
 | 582 | } | 
 | 583 | #endif | 
 | 584 |  | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 585 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 586 | { | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 587 | 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 588 | 	struct rb_node **p = &tree->root.rb_node; | 
 | 589 | 	struct rb_node *parent = NULL; | 
 | 590 | 	struct extent_status *es; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 591 |  | 
 | 592 | 	while (*p) { | 
 | 593 | 		parent = *p; | 
 | 594 | 		es = rb_entry(parent, struct extent_status, rb_node); | 
 | 595 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 596 | 		if (newes->es_lblk < es->es_lblk) { | 
 | 597 | 			if (ext4_es_can_be_merged(newes, es)) { | 
 | 598 | 				/* | 
 | 599 | 				 * Here we can modify es_lblk directly | 
 | 600 | 				 * because it isn't overlapped. | 
 | 601 | 				 */ | 
 | 602 | 				es->es_lblk = newes->es_lblk; | 
 | 603 | 				es->es_len += newes->es_len; | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 604 | 				if (ext4_es_is_written(es) || | 
 | 605 | 				    ext4_es_is_unwritten(es)) | 
 | 606 | 					ext4_es_store_pblock(es, | 
 | 607 | 							     newes->es_pblk); | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 608 | 				es = ext4_es_try_to_merge_left(inode, es); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 609 | 				goto out; | 
 | 610 | 			} | 
 | 611 | 			p = &(*p)->rb_left; | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 612 | 		} else if (newes->es_lblk > ext4_es_end(es)) { | 
 | 613 | 			if (ext4_es_can_be_merged(es, newes)) { | 
 | 614 | 				es->es_len += newes->es_len; | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 615 | 				es = ext4_es_try_to_merge_right(inode, es); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 616 | 				goto out; | 
 | 617 | 			} | 
 | 618 | 			p = &(*p)->rb_right; | 
 | 619 | 		} else { | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 620 | 			BUG_ON(1); | 
 | 621 | 			return -EINVAL; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 622 | 		} | 
 | 623 | 	} | 
 | 624 |  | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 625 | 	es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len, | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 626 | 				  newes->es_pblk); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 627 | 	if (!es) | 
 | 628 | 		return -ENOMEM; | 
 | 629 | 	rb_link_node(&es->rb_node, parent, p); | 
 | 630 | 	rb_insert_color(&es->rb_node, &tree->root); | 
 | 631 |  | 
 | 632 | out: | 
 | 633 | 	tree->cache_es = es; | 
 | 634 | 	return 0; | 
 | 635 | } | 
 | 636 |  | 
 | 637 | /* | 
| Theodore Ts'o | bdafe42 | 2013-07-13 00:40:31 -0400 | [diff] [blame] | 638 |  * ext4_es_insert_extent() adds information to an inode's extent | 
 | 639 |  * status tree. | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 640 |  * | 
 | 641 |  * Return 0 on success, error code on failure. | 
 | 642 |  */ | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 643 | int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 644 | 			  ext4_lblk_t len, ext4_fsblk_t pblk, | 
| Theodore Ts'o | 3be78c7 | 2013-08-16 21:22:41 -0400 | [diff] [blame] | 645 | 			  unsigned int status) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 646 | { | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 647 | 	struct extent_status newes; | 
 | 648 | 	ext4_lblk_t end = lblk + len - 1; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 649 | 	int err = 0; | 
 | 650 |  | 
| Theodore Ts'o | 3be78c7 | 2013-08-16 21:22:41 -0400 | [diff] [blame] | 651 | 	es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 652 | 		 lblk, len, pblk, status, inode->i_ino); | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 653 |  | 
| Eryu Guan | d438147 | 2013-02-22 15:27:47 -0500 | [diff] [blame] | 654 | 	if (!len) | 
 | 655 | 		return 0; | 
 | 656 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 657 | 	BUG_ON(end < lblk); | 
 | 658 |  | 
 | 659 | 	newes.es_lblk = lblk; | 
 | 660 | 	newes.es_len = len; | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 661 | 	ext4_es_store_pblock(&newes, pblk); | 
 | 662 | 	ext4_es_store_status(&newes, status); | 
 | 663 | 	trace_ext4_es_insert_extent(inode, &newes); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 664 |  | 
| Dmitry Monakhov | 921f266 | 2013-03-10 21:01:03 -0400 | [diff] [blame] | 665 | 	ext4_es_insert_extent_check(inode, &newes); | 
 | 666 |  | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 667 | 	write_lock(&EXT4_I(inode)->i_es_lock); | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 668 | 	err = __es_remove_extent(inode, lblk, end); | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 669 | 	if (err != 0) | 
 | 670 | 		goto error; | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 671 | retry: | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 672 | 	err = __es_insert_extent(inode, &newes); | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 673 | 	if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 
 | 674 | 					       EXT4_I(inode))) | 
 | 675 | 		goto retry; | 
 | 676 | 	if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) | 
 | 677 | 		err = 0; | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 678 |  | 
 | 679 | error: | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 680 | 	write_unlock(&EXT4_I(inode)->i_es_lock); | 
 | 681 |  | 
 | 682 | 	ext4_es_print_tree(inode); | 
 | 683 |  | 
 | 684 | 	return err; | 
 | 685 | } | 
 | 686 |  | 
| Zheng Liu | d100eef | 2013-02-18 00:29:59 -0500 | [diff] [blame] | 687 | /* | 
| Theodore Ts'o | 107a7bd | 2013-08-16 21:23:41 -0400 | [diff] [blame] | 688 |  * ext4_es_cache_extent() inserts information into the extent status | 
 | 689 |  * tree if and only if there isn't information about the range in | 
 | 690 |  * question already. | 
 | 691 |  */ | 
 | 692 | void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, | 
 | 693 | 			  ext4_lblk_t len, ext4_fsblk_t pblk, | 
 | 694 | 			  unsigned int status) | 
 | 695 | { | 
 | 696 | 	struct extent_status *es; | 
 | 697 | 	struct extent_status newes; | 
 | 698 | 	ext4_lblk_t end = lblk + len - 1; | 
 | 699 |  | 
 | 700 | 	newes.es_lblk = lblk; | 
 | 701 | 	newes.es_len = len; | 
 | 702 | 	ext4_es_store_pblock(&newes, pblk); | 
 | 703 | 	ext4_es_store_status(&newes, status); | 
 | 704 | 	trace_ext4_es_cache_extent(inode, &newes); | 
 | 705 |  | 
 | 706 | 	if (!len) | 
 | 707 | 		return; | 
 | 708 |  | 
 | 709 | 	BUG_ON(end < lblk); | 
 | 710 |  | 
 | 711 | 	write_lock(&EXT4_I(inode)->i_es_lock); | 
 | 712 |  | 
 | 713 | 	es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk); | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 714 | 	if (!es || es->es_lblk > end) | 
 | 715 | 		__es_insert_extent(inode, &newes); | 
| Theodore Ts'o | 107a7bd | 2013-08-16 21:23:41 -0400 | [diff] [blame] | 716 | 	write_unlock(&EXT4_I(inode)->i_es_lock); | 
 | 717 | } | 
 | 718 |  | 
 | 719 | /* | 
| Zheng Liu | d100eef | 2013-02-18 00:29:59 -0500 | [diff] [blame] | 720 |  * ext4_es_lookup_extent() looks up an extent in extent status tree. | 
 | 721 |  * | 
 | 722 |  * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. | 
 | 723 |  * | 
 | 724 |  * Return: 1 on found, 0 on not | 
 | 725 |  */ | 
 | 726 | int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | 
 | 727 | 			  struct extent_status *es) | 
 | 728 | { | 
 | 729 | 	struct ext4_es_tree *tree; | 
 | 730 | 	struct extent_status *es1 = NULL; | 
 | 731 | 	struct rb_node *node; | 
 | 732 | 	int found = 0; | 
 | 733 |  | 
 | 734 | 	trace_ext4_es_lookup_extent_enter(inode, lblk); | 
 | 735 | 	es_debug("lookup extent in block %u\n", lblk); | 
 | 736 |  | 
 | 737 | 	tree = &EXT4_I(inode)->i_es_tree; | 
 | 738 | 	read_lock(&EXT4_I(inode)->i_es_lock); | 
 | 739 |  | 
 | 740 | 	/* find extent in cache firstly */ | 
 | 741 | 	es->es_lblk = es->es_len = es->es_pblk = 0; | 
 | 742 | 	if (tree->cache_es) { | 
 | 743 | 		es1 = tree->cache_es; | 
 | 744 | 		if (in_range(lblk, es1->es_lblk, es1->es_len)) { | 
 | 745 | 			es_debug("%u cached by [%u/%u)\n", | 
 | 746 | 				 lblk, es1->es_lblk, es1->es_len); | 
 | 747 | 			found = 1; | 
 | 748 | 			goto out; | 
 | 749 | 		} | 
 | 750 | 	} | 
 | 751 |  | 
 | 752 | 	node = tree->root.rb_node; | 
 | 753 | 	while (node) { | 
 | 754 | 		es1 = rb_entry(node, struct extent_status, rb_node); | 
 | 755 | 		if (lblk < es1->es_lblk) | 
 | 756 | 			node = node->rb_left; | 
 | 757 | 		else if (lblk > ext4_es_end(es1)) | 
 | 758 | 			node = node->rb_right; | 
 | 759 | 		else { | 
 | 760 | 			found = 1; | 
 | 761 | 			break; | 
 | 762 | 		} | 
 | 763 | 	} | 
 | 764 |  | 
 | 765 | out: | 
 | 766 | 	if (found) { | 
 | 767 | 		BUG_ON(!es1); | 
 | 768 | 		es->es_lblk = es1->es_lblk; | 
 | 769 | 		es->es_len = es1->es_len; | 
 | 770 | 		es->es_pblk = es1->es_pblk; | 
 | 771 | 	} | 
 | 772 |  | 
 | 773 | 	read_unlock(&EXT4_I(inode)->i_es_lock); | 
 | 774 |  | 
 | 775 | 	trace_ext4_es_lookup_extent_exit(inode, es, found); | 
 | 776 | 	return found; | 
 | 777 | } | 
 | 778 |  | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 779 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 
 | 780 | 			      ext4_lblk_t end) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 781 | { | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 782 | 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 783 | 	struct rb_node *node; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 784 | 	struct extent_status *es; | 
 | 785 | 	struct extent_status orig_es; | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 786 | 	ext4_lblk_t len1, len2; | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 787 | 	ext4_fsblk_t block; | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 788 | 	int err; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 789 |  | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 790 | retry: | 
 | 791 | 	err = 0; | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 792 | 	es = __es_tree_search(&tree->root, lblk); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 793 | 	if (!es) | 
 | 794 | 		goto out; | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 795 | 	if (es->es_lblk > end) | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 796 | 		goto out; | 
 | 797 |  | 
 | 798 | 	/* Simply invalidate cache_es. */ | 
 | 799 | 	tree->cache_es = NULL; | 
 | 800 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 801 | 	orig_es.es_lblk = es->es_lblk; | 
 | 802 | 	orig_es.es_len = es->es_len; | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 803 | 	orig_es.es_pblk = es->es_pblk; | 
 | 804 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 805 | 	len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0; | 
 | 806 | 	len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 807 | 	if (len1 > 0) | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 808 | 		es->es_len = len1; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 809 | 	if (len2 > 0) { | 
 | 810 | 		if (len1 > 0) { | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 811 | 			struct extent_status newes; | 
 | 812 |  | 
 | 813 | 			newes.es_lblk = end + 1; | 
 | 814 | 			newes.es_len = len2; | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 815 | 			if (ext4_es_is_written(&orig_es) || | 
 | 816 | 			    ext4_es_is_unwritten(&orig_es)) { | 
 | 817 | 				block = ext4_es_pblock(&orig_es) + | 
 | 818 | 					orig_es.es_len - len2; | 
 | 819 | 				ext4_es_store_pblock(&newes, block); | 
 | 820 | 			} | 
 | 821 | 			ext4_es_store_status(&newes, ext4_es_status(&orig_es)); | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 822 | 			err = __es_insert_extent(inode, &newes); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 823 | 			if (err) { | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 824 | 				es->es_lblk = orig_es.es_lblk; | 
 | 825 | 				es->es_len = orig_es.es_len; | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 826 | 				if ((err == -ENOMEM) && | 
 | 827 | 				    __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 
 | 828 | 						     EXT4_I(inode))) | 
 | 829 | 					goto retry; | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 830 | 				goto out; | 
 | 831 | 			} | 
 | 832 | 		} else { | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 833 | 			es->es_lblk = end + 1; | 
 | 834 | 			es->es_len = len2; | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 835 | 			if (ext4_es_is_written(es) || | 
 | 836 | 			    ext4_es_is_unwritten(es)) { | 
 | 837 | 				block = orig_es.es_pblk + orig_es.es_len - len2; | 
 | 838 | 				ext4_es_store_pblock(es, block); | 
 | 839 | 			} | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 840 | 		} | 
 | 841 | 		goto out; | 
 | 842 | 	} | 
 | 843 |  | 
 | 844 | 	if (len1 > 0) { | 
 | 845 | 		node = rb_next(&es->rb_node); | 
 | 846 | 		if (node) | 
 | 847 | 			es = rb_entry(node, struct extent_status, rb_node); | 
 | 848 | 		else | 
 | 849 | 			es = NULL; | 
 | 850 | 	} | 
 | 851 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 852 | 	while (es && ext4_es_end(es) <= end) { | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 853 | 		node = rb_next(&es->rb_node); | 
 | 854 | 		rb_erase(&es->rb_node, &tree->root); | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 855 | 		ext4_es_free_extent(inode, es); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 856 | 		if (!node) { | 
 | 857 | 			es = NULL; | 
 | 858 | 			break; | 
 | 859 | 		} | 
 | 860 | 		es = rb_entry(node, struct extent_status, rb_node); | 
 | 861 | 	} | 
 | 862 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 863 | 	if (es && es->es_lblk < end + 1) { | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 864 | 		ext4_lblk_t orig_len = es->es_len; | 
 | 865 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 866 | 		len1 = ext4_es_end(es) - end; | 
 | 867 | 		es->es_lblk = end + 1; | 
 | 868 | 		es->es_len = len1; | 
| Zheng Liu | fdc0212 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 869 | 		if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) { | 
 | 870 | 			block = es->es_pblk + orig_len - len1; | 
 | 871 | 			ext4_es_store_pblock(es, block); | 
 | 872 | 		} | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 873 | 	} | 
 | 874 |  | 
 | 875 | out: | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 876 | 	return err; | 
 | 877 | } | 
 | 878 |  | 
 | 879 | /* | 
 | 880 |  * ext4_es_remove_extent() removes a space from a extent status tree. | 
 | 881 |  * | 
 | 882 |  * Return 0 on success, error code on failure. | 
 | 883 |  */ | 
 | 884 | int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 
 | 885 | 			  ext4_lblk_t len) | 
 | 886 | { | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 887 | 	ext4_lblk_t end; | 
 | 888 | 	int err = 0; | 
 | 889 |  | 
 | 890 | 	trace_ext4_es_remove_extent(inode, lblk, len); | 
 | 891 | 	es_debug("remove [%u/%u) from extent status tree of inode %lu\n", | 
 | 892 | 		 lblk, len, inode->i_ino); | 
 | 893 |  | 
| Eryu Guan | d438147 | 2013-02-22 15:27:47 -0500 | [diff] [blame] | 894 | 	if (!len) | 
 | 895 | 		return err; | 
 | 896 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 897 | 	end = lblk + len - 1; | 
 | 898 | 	BUG_ON(end < lblk); | 
 | 899 |  | 
| Zheng Liu | 06b0c88 | 2013-02-18 00:26:51 -0500 | [diff] [blame] | 900 | 	write_lock(&EXT4_I(inode)->i_es_lock); | 
| Zheng Liu | bdedbb7 | 2013-02-18 00:32:02 -0500 | [diff] [blame] | 901 | 	err = __es_remove_extent(inode, lblk, end); | 
| Zheng Liu | 654598b | 2012-11-08 21:57:20 -0500 | [diff] [blame] | 902 | 	write_unlock(&EXT4_I(inode)->i_es_lock); | 
 | 903 | 	ext4_es_print_tree(inode); | 
 | 904 | 	return err; | 
 | 905 | } | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 906 |  | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 907 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | 
 | 908 | 				     struct list_head *b) | 
 | 909 | { | 
 | 910 | 	struct ext4_inode_info *eia, *eib; | 
 | 911 | 	eia = list_entry(a, struct ext4_inode_info, i_es_lru); | 
 | 912 | 	eib = list_entry(b, struct ext4_inode_info, i_es_lru); | 
 | 913 |  | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 914 | 	if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | 
 | 915 | 	    !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | 
 | 916 | 		return 1; | 
 | 917 | 	if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | 
 | 918 | 	    ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | 
 | 919 | 		return -1; | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 920 | 	if (eia->i_touch_when == eib->i_touch_when) | 
 | 921 | 		return 0; | 
 | 922 | 	if (time_after(eia->i_touch_when, eib->i_touch_when)) | 
 | 923 | 		return 1; | 
 | 924 | 	else | 
 | 925 | 		return -1; | 
 | 926 | } | 
 | 927 |  | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 928 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | 
 | 929 | 			    struct ext4_inode_info *locked_ei) | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 930 | { | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 931 | 	struct ext4_inode_info *ei; | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 932 | 	struct list_head *cur, *tmp; | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 933 | 	LIST_HEAD(skipped); | 
| Dave Chinner | 1ab6c49 | 2013-08-28 10:18:09 +1000 | [diff] [blame] | 934 | 	int nr_shrunk = 0; | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 935 | 	int retried = 0, skip_precached = 1, nr_skipped = 0; | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 936 |  | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 937 | 	spin_lock(&sbi->s_es_lru_lock); | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 938 |  | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 939 | retry: | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 940 | 	list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 
| Dave Chinner | 1ab6c49 | 2013-08-28 10:18:09 +1000 | [diff] [blame] | 941 | 		int shrunk; | 
 | 942 |  | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 943 | 		/* | 
 | 944 | 		 * If we have already reclaimed all extents from extent | 
 | 945 | 		 * status tree, just stop the loop immediately. | 
 | 946 | 		 */ | 
 | 947 | 		if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) | 
 | 948 | 			break; | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 949 |  | 
 | 950 | 		ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | 
 | 951 |  | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 952 | 		/* | 
 | 953 | 		 * Skip the inode that is newer than the last_sorted | 
 | 954 | 		 * time.  Normally we try hard to avoid shrinking | 
 | 955 | 		 * precached inodes, but we will as a last resort. | 
 | 956 | 		 */ | 
 | 957 | 		if ((sbi->s_es_last_sorted < ei->i_touch_when) || | 
 | 958 | 		    (skip_precached && ext4_test_inode_state(&ei->vfs_inode, | 
 | 959 | 						EXT4_STATE_EXT_PRECACHED))) { | 
 | 960 | 			nr_skipped++; | 
 | 961 | 			list_move_tail(cur, &skipped); | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 962 | 			continue; | 
 | 963 | 		} | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 964 |  | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 965 | 		if (ei->i_es_lru_nr == 0 || ei == locked_ei) | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 966 | 			continue; | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 967 |  | 
 | 968 | 		write_lock(&ei->i_es_lock); | 
| Dave Chinner | 1ab6c49 | 2013-08-28 10:18:09 +1000 | [diff] [blame] | 969 | 		shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 970 | 		if (ei->i_es_lru_nr == 0) | 
 | 971 | 			list_del_init(&ei->i_es_lru); | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 972 | 		write_unlock(&ei->i_es_lock); | 
 | 973 |  | 
| Dave Chinner | 1ab6c49 | 2013-08-28 10:18:09 +1000 | [diff] [blame] | 974 | 		nr_shrunk += shrunk; | 
 | 975 | 		nr_to_scan -= shrunk; | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 976 | 		if (nr_to_scan == 0) | 
 | 977 | 			break; | 
 | 978 | 	} | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 979 |  | 
 | 980 | 	/* Move the newer inodes into the tail of the LRU list. */ | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 981 | 	list_splice_tail(&skipped, &sbi->s_es_lru); | 
 | 982 | 	INIT_LIST_HEAD(&skipped); | 
 | 983 |  | 
 | 984 | 	/* | 
 | 985 | 	 * If we skipped any inodes, and we weren't able to make any | 
 | 986 | 	 * forward progress, sort the list and try again. | 
 | 987 | 	 */ | 
 | 988 | 	if ((nr_shrunk == 0) && nr_skipped && !retried) { | 
 | 989 | 		retried++; | 
 | 990 | 		list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | 
 | 991 | 		sbi->s_es_last_sorted = jiffies; | 
 | 992 | 		ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, | 
 | 993 | 				      i_es_lru); | 
 | 994 | 		/* | 
 | 995 | 		 * If there are no non-precached inodes left on the | 
 | 996 | 		 * list, start releasing precached extents. | 
 | 997 | 		 */ | 
 | 998 | 		if (ext4_test_inode_state(&ei->vfs_inode, | 
 | 999 | 					  EXT4_STATE_EXT_PRECACHED)) | 
 | 1000 | 			skip_precached = 0; | 
 | 1001 | 		goto retry; | 
 | 1002 | 	} | 
 | 1003 |  | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1004 | 	spin_unlock(&sbi->s_es_lru_lock); | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1005 |  | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 1006 | 	if (locked_ei && nr_shrunk == 0) | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 1007 | 		nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 1008 |  | 
 | 1009 | 	return nr_shrunk; | 
 | 1010 | } | 
 | 1011 |  | 
| Dave Chinner | 1ab6c49 | 2013-08-28 10:18:09 +1000 | [diff] [blame] | 1012 | static unsigned long ext4_es_count(struct shrinker *shrink, | 
 | 1013 | 				   struct shrink_control *sc) | 
 | 1014 | { | 
 | 1015 | 	unsigned long nr; | 
 | 1016 | 	struct ext4_sb_info *sbi; | 
 | 1017 |  | 
 | 1018 | 	sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | 
 | 1019 | 	nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 
 | 1020 | 	trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); | 
 | 1021 | 	return nr; | 
 | 1022 | } | 
 | 1023 |  | 
 | 1024 | static unsigned long ext4_es_scan(struct shrinker *shrink, | 
 | 1025 | 				  struct shrink_control *sc) | 
| Theodore Ts'o | e15f742 | 2013-07-15 00:12:14 -0400 | [diff] [blame] | 1026 | { | 
 | 1027 | 	struct ext4_sb_info *sbi = container_of(shrink, | 
 | 1028 | 					struct ext4_sb_info, s_es_shrinker); | 
 | 1029 | 	int nr_to_scan = sc->nr_to_scan; | 
 | 1030 | 	int ret, nr_shrunk; | 
 | 1031 |  | 
 | 1032 | 	ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 
 | 1033 | 	trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); | 
 | 1034 |  | 
 | 1035 | 	if (!nr_to_scan) | 
 | 1036 | 		return ret; | 
 | 1037 |  | 
 | 1038 | 	nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 
 | 1039 |  | 
| Theodore Ts'o | 2463077 | 2013-02-28 23:58:56 -0500 | [diff] [blame] | 1040 | 	trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); | 
| Dave Chinner | 1ab6c49 | 2013-08-28 10:18:09 +1000 | [diff] [blame] | 1041 | 	return nr_shrunk; | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1042 | } | 
 | 1043 |  | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 1044 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1045 | { | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1046 | 	INIT_LIST_HEAD(&sbi->s_es_lru); | 
 | 1047 | 	spin_lock_init(&sbi->s_es_lru_lock); | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 1048 | 	sbi->s_es_last_sorted = 0; | 
| Dave Chinner | 1ab6c49 | 2013-08-28 10:18:09 +1000 | [diff] [blame] | 1049 | 	sbi->s_es_shrinker.scan_objects = ext4_es_scan; | 
 | 1050 | 	sbi->s_es_shrinker.count_objects = ext4_es_count; | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1051 | 	sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 
 | 1052 | 	register_shrinker(&sbi->s_es_shrinker); | 
 | 1053 | } | 
 | 1054 |  | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 1055 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1056 | { | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 1057 | 	unregister_shrinker(&sbi->s_es_shrinker); | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1058 | } | 
 | 1059 |  | 
 | 1060 | void ext4_es_lru_add(struct inode *inode) | 
 | 1061 | { | 
 | 1062 | 	struct ext4_inode_info *ei = EXT4_I(inode); | 
 | 1063 | 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 
 | 1064 |  | 
| Zheng Liu | d3922a7 | 2013-07-01 08:12:37 -0400 | [diff] [blame] | 1065 | 	ei->i_touch_when = jiffies; | 
 | 1066 |  | 
 | 1067 | 	if (!list_empty(&ei->i_es_lru)) | 
 | 1068 | 		return; | 
 | 1069 |  | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1070 | 	spin_lock(&sbi->s_es_lru_lock); | 
 | 1071 | 	if (list_empty(&ei->i_es_lru)) | 
 | 1072 | 		list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1073 | 	spin_unlock(&sbi->s_es_lru_lock); | 
 | 1074 | } | 
 | 1075 |  | 
 | 1076 | void ext4_es_lru_del(struct inode *inode) | 
 | 1077 | { | 
 | 1078 | 	struct ext4_inode_info *ei = EXT4_I(inode); | 
 | 1079 | 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 
 | 1080 |  | 
 | 1081 | 	spin_lock(&sbi->s_es_lru_lock); | 
 | 1082 | 	if (!list_empty(&ei->i_es_lru)) | 
 | 1083 | 		list_del_init(&ei->i_es_lru); | 
 | 1084 | 	spin_unlock(&sbi->s_es_lru_lock); | 
 | 1085 | } | 
 | 1086 |  | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1087 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 
 | 1088 | 				       int nr_to_scan) | 
 | 1089 | { | 
 | 1090 | 	struct inode *inode = &ei->vfs_inode; | 
 | 1091 | 	struct ext4_es_tree *tree = &ei->i_es_tree; | 
 | 1092 | 	struct rb_node *node; | 
 | 1093 | 	struct extent_status *es; | 
| Dave Chinner | 1ab6c49 | 2013-08-28 10:18:09 +1000 | [diff] [blame] | 1094 | 	unsigned long nr_shrunk = 0; | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 1095 | 	static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | 
 | 1096 | 				      DEFAULT_RATELIMIT_BURST); | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1097 |  | 
 | 1098 | 	if (ei->i_es_lru_nr == 0) | 
 | 1099 | 		return 0; | 
 | 1100 |  | 
| Theodore Ts'o | 7869a4a | 2013-08-16 22:05:14 -0400 | [diff] [blame] | 1101 | 	if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && | 
 | 1102 | 	    __ratelimit(&_rs)) | 
 | 1103 | 		ext4_warning(inode->i_sb, "forced shrink of precached extents"); | 
 | 1104 |  | 
| Zheng Liu | 74cd15c | 2013-02-18 00:32:55 -0500 | [diff] [blame] | 1105 | 	node = rb_first(&tree->root); | 
 | 1106 | 	while (node != NULL) { | 
 | 1107 | 		es = rb_entry(node, struct extent_status, rb_node); | 
 | 1108 | 		node = rb_next(&es->rb_node); | 
 | 1109 | 		/* | 
 | 1110 | 		 * We can't reclaim delayed extent from status tree because | 
 | 1111 | 		 * fiemap, bigallic, and seek_data/hole need to use it. | 
 | 1112 | 		 */ | 
 | 1113 | 		if (!ext4_es_is_delayed(es)) { | 
 | 1114 | 			rb_erase(&es->rb_node, &tree->root); | 
 | 1115 | 			ext4_es_free_extent(inode, es); | 
 | 1116 | 			nr_shrunk++; | 
 | 1117 | 			if (--nr_to_scan == 0) | 
 | 1118 | 				break; | 
 | 1119 | 		} | 
 | 1120 | 	} | 
 | 1121 | 	tree->cache_es = NULL; | 
 | 1122 | 	return nr_shrunk; | 
 | 1123 | } |