blob: 0112c02742f44cfb5811d75655cd065b3730821a [file] [log] [blame]
Chris Masond1310b22008-01-24 16:13:08 -05001#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
Chris Masond1310b22008-01-24 16:13:08 -05005#include <linux/pagemap.h>
6#include <linux/page-flags.h>
7#include <linux/module.h>
8#include <linux/spinlock.h>
9#include <linux/blkdev.h>
10#include <linux/swap.h>
Chris Masond1310b22008-01-24 16:13:08 -050011#include <linux/writeback.h>
12#include <linux/pagevec.h>
Linus Torvalds268bb0c2011-05-20 12:50:29 -070013#include <linux/prefetch.h>
Dan Magenheimer90a887c2011-05-26 10:01:56 -060014#include <linux/cleancache.h>
Chris Masond1310b22008-01-24 16:13:08 -050015#include "extent_io.h"
16#include "extent_map.h"
David Woodhouse2db04962008-08-07 11:19:43 -040017#include "compat.h"
David Woodhouse902b22f2008-08-20 08:51:49 -040018#include "ctree.h"
19#include "btrfs_inode.h"
Jan Schmidt4a54c8c2011-07-22 15:41:52 +020020#include "volumes.h"
Stefan Behrens21adbd52011-11-09 13:44:05 +010021#include "check-integrity.h"
Chris Masond1310b22008-01-24 16:13:08 -050022
Chris Masond1310b22008-01-24 16:13:08 -050023static struct kmem_cache *extent_state_cache;
24static struct kmem_cache *extent_buffer_cache;
25
26static LIST_HEAD(buffers);
27static LIST_HEAD(states);
Chris Mason4bef0842008-09-08 11:18:08 -040028
Chris Masonb47eda82008-11-10 12:34:40 -050029#define LEAK_DEBUG 0
Chris Mason39351272009-02-04 09:24:05 -050030#if LEAK_DEBUG
Chris Masond3977122009-01-05 21:25:51 -050031static DEFINE_SPINLOCK(leak_lock);
Chris Mason4bef0842008-09-08 11:18:08 -040032#endif
Chris Masond1310b22008-01-24 16:13:08 -050033
Chris Masond1310b22008-01-24 16:13:08 -050034#define BUFFER_LRU_MAX 64
35
36struct tree_entry {
37 u64 start;
38 u64 end;
Chris Masond1310b22008-01-24 16:13:08 -050039 struct rb_node rb_node;
40};
41
42struct extent_page_data {
43 struct bio *bio;
44 struct extent_io_tree *tree;
45 get_extent_t *get_extent;
Chris Mason771ed682008-11-06 22:02:51 -050046
47 /* tells writepage not to lock the state bits for this range
48 * it still does the unlocking
49 */
Chris Masonffbd5172009-04-20 15:50:09 -040050 unsigned int extent_locked:1;
51
52 /* tells the submit_bio code to use a WRITE_SYNC */
53 unsigned int sync_io:1;
Chris Masond1310b22008-01-24 16:13:08 -050054};
55
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -040056static inline struct btrfs_fs_info *
57tree_fs_info(struct extent_io_tree *tree)
58{
59 return btrfs_sb(tree->mapping->host->i_sb);
60}
61
Chris Masond1310b22008-01-24 16:13:08 -050062int __init extent_io_init(void)
63{
Christoph Hellwig9601e3f2009-04-13 15:33:09 +020064 extent_state_cache = kmem_cache_create("extent_state",
65 sizeof(struct extent_state), 0,
66 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
Chris Masond1310b22008-01-24 16:13:08 -050067 if (!extent_state_cache)
68 return -ENOMEM;
69
Christoph Hellwig9601e3f2009-04-13 15:33:09 +020070 extent_buffer_cache = kmem_cache_create("extent_buffers",
71 sizeof(struct extent_buffer), 0,
72 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
Chris Masond1310b22008-01-24 16:13:08 -050073 if (!extent_buffer_cache)
74 goto free_state_cache;
75 return 0;
76
77free_state_cache:
78 kmem_cache_destroy(extent_state_cache);
79 return -ENOMEM;
80}
81
82void extent_io_exit(void)
83{
84 struct extent_state *state;
Chris Mason2d2ae542008-03-26 16:24:23 -040085 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -050086
87 while (!list_empty(&states)) {
Chris Mason2d2ae542008-03-26 16:24:23 -040088 state = list_entry(states.next, struct extent_state, leak_list);
Chris Masond3977122009-01-05 21:25:51 -050089 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
90 "state %lu in tree %p refs %d\n",
91 (unsigned long long)state->start,
92 (unsigned long long)state->end,
93 state->state, state->tree, atomic_read(&state->refs));
Chris Mason2d2ae542008-03-26 16:24:23 -040094 list_del(&state->leak_list);
Chris Masond1310b22008-01-24 16:13:08 -050095 kmem_cache_free(extent_state_cache, state);
96
97 }
98
Chris Mason2d2ae542008-03-26 16:24:23 -040099 while (!list_empty(&buffers)) {
100 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
Chris Masond3977122009-01-05 21:25:51 -0500101 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
102 "refs %d\n", (unsigned long long)eb->start,
103 eb->len, atomic_read(&eb->refs));
Chris Mason2d2ae542008-03-26 16:24:23 -0400104 list_del(&eb->leak_list);
105 kmem_cache_free(extent_buffer_cache, eb);
106 }
Chris Masond1310b22008-01-24 16:13:08 -0500107 if (extent_state_cache)
108 kmem_cache_destroy(extent_state_cache);
109 if (extent_buffer_cache)
110 kmem_cache_destroy(extent_buffer_cache);
111}
112
113void extent_io_tree_init(struct extent_io_tree *tree,
David Sterbaf993c882011-04-20 23:35:57 +0200114 struct address_space *mapping)
Chris Masond1310b22008-01-24 16:13:08 -0500115{
Eric Paris6bef4d32010-02-23 19:43:04 +0000116 tree->state = RB_ROOT;
Miao Xie19fe0a82010-10-26 20:57:29 -0400117 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -0500118 tree->ops = NULL;
119 tree->dirty_bytes = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500120 spin_lock_init(&tree->lock);
Chris Mason6af118c2008-07-22 11:18:07 -0400121 spin_lock_init(&tree->buffer_lock);
Chris Masond1310b22008-01-24 16:13:08 -0500122 tree->mapping = mapping;
Chris Masond1310b22008-01-24 16:13:08 -0500123}
Chris Masond1310b22008-01-24 16:13:08 -0500124
Christoph Hellwigb2950862008-12-02 09:54:17 -0500125static struct extent_state *alloc_extent_state(gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500126{
127 struct extent_state *state;
Chris Mason39351272009-02-04 09:24:05 -0500128#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400129 unsigned long flags;
Chris Mason4bef0842008-09-08 11:18:08 -0400130#endif
Chris Masond1310b22008-01-24 16:13:08 -0500131
132 state = kmem_cache_alloc(extent_state_cache, mask);
Peter2b114d12008-04-01 11:21:40 -0400133 if (!state)
Chris Masond1310b22008-01-24 16:13:08 -0500134 return state;
135 state->state = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500136 state->private = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500137 state->tree = NULL;
Chris Mason39351272009-02-04 09:24:05 -0500138#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400139 spin_lock_irqsave(&leak_lock, flags);
140 list_add(&state->leak_list, &states);
141 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -0400142#endif
Chris Masond1310b22008-01-24 16:13:08 -0500143 atomic_set(&state->refs, 1);
144 init_waitqueue_head(&state->wq);
Jeff Mahoney143bede2012-03-01 14:56:26 +0100145 trace_alloc_extent_state(state, mask, _RET_IP_);
Chris Masond1310b22008-01-24 16:13:08 -0500146 return state;
147}
Chris Masond1310b22008-01-24 16:13:08 -0500148
Chris Mason4845e442010-05-25 20:56:50 -0400149void free_extent_state(struct extent_state *state)
Chris Masond1310b22008-01-24 16:13:08 -0500150{
Chris Masond1310b22008-01-24 16:13:08 -0500151 if (!state)
152 return;
153 if (atomic_dec_and_test(&state->refs)) {
Chris Mason39351272009-02-04 09:24:05 -0500154#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400155 unsigned long flags;
Chris Mason4bef0842008-09-08 11:18:08 -0400156#endif
Chris Mason70dec802008-01-29 09:59:12 -0500157 WARN_ON(state->tree);
Chris Mason39351272009-02-04 09:24:05 -0500158#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400159 spin_lock_irqsave(&leak_lock, flags);
160 list_del(&state->leak_list);
161 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -0400162#endif
Jeff Mahoney143bede2012-03-01 14:56:26 +0100163 trace_free_extent_state(state, _RET_IP_);
Chris Masond1310b22008-01-24 16:13:08 -0500164 kmem_cache_free(extent_state_cache, state);
165 }
166}
Chris Masond1310b22008-01-24 16:13:08 -0500167
168static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
169 struct rb_node *node)
170{
Chris Masond3977122009-01-05 21:25:51 -0500171 struct rb_node **p = &root->rb_node;
172 struct rb_node *parent = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500173 struct tree_entry *entry;
174
Chris Masond3977122009-01-05 21:25:51 -0500175 while (*p) {
Chris Masond1310b22008-01-24 16:13:08 -0500176 parent = *p;
177 entry = rb_entry(parent, struct tree_entry, rb_node);
178
179 if (offset < entry->start)
180 p = &(*p)->rb_left;
181 else if (offset > entry->end)
182 p = &(*p)->rb_right;
183 else
184 return parent;
185 }
186
187 entry = rb_entry(node, struct tree_entry, rb_node);
Chris Masond1310b22008-01-24 16:13:08 -0500188 rb_link_node(node, parent, p);
189 rb_insert_color(node, root);
190 return NULL;
191}
192
Chris Mason80ea96b2008-02-01 14:51:59 -0500193static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
Chris Masond1310b22008-01-24 16:13:08 -0500194 struct rb_node **prev_ret,
195 struct rb_node **next_ret)
196{
Chris Mason80ea96b2008-02-01 14:51:59 -0500197 struct rb_root *root = &tree->state;
Chris Masond3977122009-01-05 21:25:51 -0500198 struct rb_node *n = root->rb_node;
Chris Masond1310b22008-01-24 16:13:08 -0500199 struct rb_node *prev = NULL;
200 struct rb_node *orig_prev = NULL;
201 struct tree_entry *entry;
202 struct tree_entry *prev_entry = NULL;
203
Chris Masond3977122009-01-05 21:25:51 -0500204 while (n) {
Chris Masond1310b22008-01-24 16:13:08 -0500205 entry = rb_entry(n, struct tree_entry, rb_node);
206 prev = n;
207 prev_entry = entry;
208
209 if (offset < entry->start)
210 n = n->rb_left;
211 else if (offset > entry->end)
212 n = n->rb_right;
Chris Masond3977122009-01-05 21:25:51 -0500213 else
Chris Masond1310b22008-01-24 16:13:08 -0500214 return n;
215 }
216
217 if (prev_ret) {
218 orig_prev = prev;
Chris Masond3977122009-01-05 21:25:51 -0500219 while (prev && offset > prev_entry->end) {
Chris Masond1310b22008-01-24 16:13:08 -0500220 prev = rb_next(prev);
221 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
222 }
223 *prev_ret = prev;
224 prev = orig_prev;
225 }
226
227 if (next_ret) {
228 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
Chris Masond3977122009-01-05 21:25:51 -0500229 while (prev && offset < prev_entry->start) {
Chris Masond1310b22008-01-24 16:13:08 -0500230 prev = rb_prev(prev);
231 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
232 }
233 *next_ret = prev;
234 }
235 return NULL;
236}
237
Chris Mason80ea96b2008-02-01 14:51:59 -0500238static inline struct rb_node *tree_search(struct extent_io_tree *tree,
239 u64 offset)
Chris Masond1310b22008-01-24 16:13:08 -0500240{
Chris Mason70dec802008-01-29 09:59:12 -0500241 struct rb_node *prev = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500242 struct rb_node *ret;
Chris Mason70dec802008-01-29 09:59:12 -0500243
Chris Mason80ea96b2008-02-01 14:51:59 -0500244 ret = __etree_search(tree, offset, &prev, NULL);
Chris Masond3977122009-01-05 21:25:51 -0500245 if (!ret)
Chris Masond1310b22008-01-24 16:13:08 -0500246 return prev;
247 return ret;
248}
249
Josef Bacik9ed74f22009-09-11 16:12:44 -0400250static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
251 struct extent_state *other)
252{
253 if (tree->ops && tree->ops->merge_extent_hook)
254 tree->ops->merge_extent_hook(tree->mapping->host, new,
255 other);
256}
257
Chris Masond1310b22008-01-24 16:13:08 -0500258/*
259 * utility function to look for merge candidates inside a given range.
260 * Any extents with matching state are merged together into a single
261 * extent in the tree. Extents with EXTENT_IO in their state field
262 * are not merged because the end_io handlers need to be able to do
263 * operations on them without sleeping (or doing allocations/splits).
264 *
265 * This should be called with the tree lock held.
266 */
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000267static void merge_state(struct extent_io_tree *tree,
268 struct extent_state *state)
Chris Masond1310b22008-01-24 16:13:08 -0500269{
270 struct extent_state *other;
271 struct rb_node *other_node;
272
Zheng Yan5b21f2e2008-09-26 10:05:38 -0400273 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000274 return;
Chris Masond1310b22008-01-24 16:13:08 -0500275
276 other_node = rb_prev(&state->rb_node);
277 if (other_node) {
278 other = rb_entry(other_node, struct extent_state, rb_node);
279 if (other->end == state->start - 1 &&
280 other->state == state->state) {
Josef Bacik9ed74f22009-09-11 16:12:44 -0400281 merge_cb(tree, state, other);
Chris Masond1310b22008-01-24 16:13:08 -0500282 state->start = other->start;
Chris Mason70dec802008-01-29 09:59:12 -0500283 other->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500284 rb_erase(&other->rb_node, &tree->state);
285 free_extent_state(other);
286 }
287 }
288 other_node = rb_next(&state->rb_node);
289 if (other_node) {
290 other = rb_entry(other_node, struct extent_state, rb_node);
291 if (other->start == state->end + 1 &&
292 other->state == state->state) {
Josef Bacik9ed74f22009-09-11 16:12:44 -0400293 merge_cb(tree, state, other);
Josef Bacikdf98b6e2011-06-20 14:53:48 -0400294 state->end = other->end;
295 other->tree = NULL;
296 rb_erase(&other->rb_node, &tree->state);
297 free_extent_state(other);
Chris Masond1310b22008-01-24 16:13:08 -0500298 }
299 }
Chris Masond1310b22008-01-24 16:13:08 -0500300}
301
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000302static void set_state_cb(struct extent_io_tree *tree,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400303 struct extent_state *state, int *bits)
Chris Mason291d6732008-01-29 15:55:23 -0500304{
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000305 if (tree->ops && tree->ops->set_bit_hook)
306 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500307}
308
309static void clear_state_cb(struct extent_io_tree *tree,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400310 struct extent_state *state, int *bits)
Chris Mason291d6732008-01-29 15:55:23 -0500311{
Josef Bacik9ed74f22009-09-11 16:12:44 -0400312 if (tree->ops && tree->ops->clear_bit_hook)
313 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500314}
315
Xiao Guangrong3150b692011-07-14 03:19:08 +0000316static void set_state_bits(struct extent_io_tree *tree,
317 struct extent_state *state, int *bits);
318
Chris Masond1310b22008-01-24 16:13:08 -0500319/*
320 * insert an extent_state struct into the tree. 'bits' are set on the
321 * struct before it is inserted.
322 *
323 * This may return -EEXIST if the extent is already there, in which case the
324 * state struct is freed.
325 *
326 * The tree lock is not taken internally. This is a utility function and
327 * probably isn't what you want to call (see set/clear_extent_bit).
328 */
329static int insert_state(struct extent_io_tree *tree,
330 struct extent_state *state, u64 start, u64 end,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400331 int *bits)
Chris Masond1310b22008-01-24 16:13:08 -0500332{
333 struct rb_node *node;
334
335 if (end < start) {
Chris Masond3977122009-01-05 21:25:51 -0500336 printk(KERN_ERR "btrfs end < start %llu %llu\n",
337 (unsigned long long)end,
338 (unsigned long long)start);
Chris Masond1310b22008-01-24 16:13:08 -0500339 WARN_ON(1);
340 }
Chris Masond1310b22008-01-24 16:13:08 -0500341 state->start = start;
342 state->end = end;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400343
Xiao Guangrong3150b692011-07-14 03:19:08 +0000344 set_state_bits(tree, state, bits);
345
Chris Masond1310b22008-01-24 16:13:08 -0500346 node = tree_insert(&tree->state, end, &state->rb_node);
347 if (node) {
348 struct extent_state *found;
349 found = rb_entry(node, struct extent_state, rb_node);
Chris Masond3977122009-01-05 21:25:51 -0500350 printk(KERN_ERR "btrfs found node %llu %llu on insert of "
351 "%llu %llu\n", (unsigned long long)found->start,
352 (unsigned long long)found->end,
353 (unsigned long long)start, (unsigned long long)end);
Chris Masond1310b22008-01-24 16:13:08 -0500354 return -EEXIST;
355 }
Chris Mason70dec802008-01-29 09:59:12 -0500356 state->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500357 merge_state(tree, state);
358 return 0;
359}
360
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000361static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
Josef Bacik9ed74f22009-09-11 16:12:44 -0400362 u64 split)
363{
364 if (tree->ops && tree->ops->split_extent_hook)
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000365 tree->ops->split_extent_hook(tree->mapping->host, orig, split);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400366}
367
Chris Masond1310b22008-01-24 16:13:08 -0500368/*
369 * split a given extent state struct in two, inserting the preallocated
370 * struct 'prealloc' as the newly created second half. 'split' indicates an
371 * offset inside 'orig' where it should be split.
372 *
373 * Before calling,
374 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
375 * are two extent state structs in the tree:
376 * prealloc: [orig->start, split - 1]
377 * orig: [ split, orig->end ]
378 *
379 * The tree locks are not taken by this function. They need to be held
380 * by the caller.
381 */
382static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
383 struct extent_state *prealloc, u64 split)
384{
385 struct rb_node *node;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400386
387 split_cb(tree, orig, split);
388
Chris Masond1310b22008-01-24 16:13:08 -0500389 prealloc->start = orig->start;
390 prealloc->end = split - 1;
391 prealloc->state = orig->state;
392 orig->start = split;
393
394 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
395 if (node) {
Chris Masond1310b22008-01-24 16:13:08 -0500396 free_extent_state(prealloc);
397 return -EEXIST;
398 }
Chris Mason70dec802008-01-29 09:59:12 -0500399 prealloc->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500400 return 0;
401}
402
403/*
404 * utility function to clear some bits in an extent state struct.
405 * it will optionally wake up any one waiting on this state (wake == 1), or
406 * forcibly remove the state from the tree (delete == 1).
407 *
408 * If no bits are set on the state struct after clearing things, the
409 * struct is freed and removed from the tree
410 */
411static int clear_state_bit(struct extent_io_tree *tree,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400412 struct extent_state *state,
413 int *bits, int wake)
Chris Masond1310b22008-01-24 16:13:08 -0500414{
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400415 int bits_to_clear = *bits & ~EXTENT_CTLBITS;
Josef Bacik32c00af2009-10-08 13:34:05 -0400416 int ret = state->state & bits_to_clear;
Chris Masond1310b22008-01-24 16:13:08 -0500417
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400418 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
Chris Masond1310b22008-01-24 16:13:08 -0500419 u64 range = state->end - state->start + 1;
420 WARN_ON(range > tree->dirty_bytes);
421 tree->dirty_bytes -= range;
422 }
Chris Mason291d6732008-01-29 15:55:23 -0500423 clear_state_cb(tree, state, bits);
Josef Bacik32c00af2009-10-08 13:34:05 -0400424 state->state &= ~bits_to_clear;
Chris Masond1310b22008-01-24 16:13:08 -0500425 if (wake)
426 wake_up(&state->wq);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400427 if (state->state == 0) {
Chris Mason70dec802008-01-29 09:59:12 -0500428 if (state->tree) {
Chris Masond1310b22008-01-24 16:13:08 -0500429 rb_erase(&state->rb_node, &tree->state);
Chris Mason70dec802008-01-29 09:59:12 -0500430 state->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500431 free_extent_state(state);
432 } else {
433 WARN_ON(1);
434 }
435 } else {
436 merge_state(tree, state);
437 }
438 return ret;
439}
440
Xiao Guangrong82337672011-04-20 06:44:57 +0000441static struct extent_state *
442alloc_extent_state_atomic(struct extent_state *prealloc)
443{
444 if (!prealloc)
445 prealloc = alloc_extent_state(GFP_ATOMIC);
446
447 return prealloc;
448}
449
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400450void extent_io_tree_panic(struct extent_io_tree *tree, int err)
451{
452 btrfs_panic(tree_fs_info(tree), err, "Locking error: "
453 "Extent tree was modified by another "
454 "thread while locked.");
455}
456
Chris Masond1310b22008-01-24 16:13:08 -0500457/*
458 * clear some bits on a range in the tree. This may require splitting
459 * or inserting elements in the tree, so the gfp mask is used to
460 * indicate which allocations or sleeping are allowed.
461 *
462 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
463 * the given range from the tree regardless of state (ie for truncate).
464 *
465 * the range [start, end] is inclusive.
466 *
Jeff Mahoney6763af82012-03-01 14:56:29 +0100467 * This takes the tree lock, and returns 0 on success and < 0 on error.
Chris Masond1310b22008-01-24 16:13:08 -0500468 */
469int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
Chris Mason2c64c532009-09-02 15:04:12 -0400470 int bits, int wake, int delete,
471 struct extent_state **cached_state,
472 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500473{
474 struct extent_state *state;
Chris Mason2c64c532009-09-02 15:04:12 -0400475 struct extent_state *cached;
Chris Masond1310b22008-01-24 16:13:08 -0500476 struct extent_state *prealloc = NULL;
Chris Mason2c64c532009-09-02 15:04:12 -0400477 struct rb_node *next_node;
Chris Masond1310b22008-01-24 16:13:08 -0500478 struct rb_node *node;
Yan Zheng5c939df2009-05-27 09:16:03 -0400479 u64 last_end;
Chris Masond1310b22008-01-24 16:13:08 -0500480 int err;
Josef Bacik2ac55d42010-02-03 19:33:23 +0000481 int clear = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500482
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400483 if (delete)
484 bits |= ~EXTENT_CTLBITS;
485 bits |= EXTENT_FIRST_DELALLOC;
486
Josef Bacik2ac55d42010-02-03 19:33:23 +0000487 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
488 clear = 1;
Chris Masond1310b22008-01-24 16:13:08 -0500489again:
490 if (!prealloc && (mask & __GFP_WAIT)) {
491 prealloc = alloc_extent_state(mask);
492 if (!prealloc)
493 return -ENOMEM;
494 }
495
Chris Masoncad321a2008-12-17 14:51:42 -0500496 spin_lock(&tree->lock);
Chris Mason2c64c532009-09-02 15:04:12 -0400497 if (cached_state) {
498 cached = *cached_state;
Josef Bacik2ac55d42010-02-03 19:33:23 +0000499
500 if (clear) {
501 *cached_state = NULL;
502 cached_state = NULL;
503 }
504
Josef Bacikdf98b6e2011-06-20 14:53:48 -0400505 if (cached && cached->tree && cached->start <= start &&
506 cached->end > start) {
Josef Bacik2ac55d42010-02-03 19:33:23 +0000507 if (clear)
508 atomic_dec(&cached->refs);
Chris Mason2c64c532009-09-02 15:04:12 -0400509 state = cached;
Chris Mason42daec22009-09-23 19:51:09 -0400510 goto hit_next;
Chris Mason2c64c532009-09-02 15:04:12 -0400511 }
Josef Bacik2ac55d42010-02-03 19:33:23 +0000512 if (clear)
513 free_extent_state(cached);
Chris Mason2c64c532009-09-02 15:04:12 -0400514 }
Chris Masond1310b22008-01-24 16:13:08 -0500515 /*
516 * this search will find the extents that end after
517 * our range starts
518 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500519 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500520 if (!node)
521 goto out;
522 state = rb_entry(node, struct extent_state, rb_node);
Chris Mason2c64c532009-09-02 15:04:12 -0400523hit_next:
Chris Masond1310b22008-01-24 16:13:08 -0500524 if (state->start > end)
525 goto out;
526 WARN_ON(state->end < start);
Yan Zheng5c939df2009-05-27 09:16:03 -0400527 last_end = state->end;
Chris Masond1310b22008-01-24 16:13:08 -0500528
Liu Bo04493142012-02-16 18:34:37 +0800529 if (state->end < end && !need_resched())
530 next_node = rb_next(&state->rb_node);
531 else
532 next_node = NULL;
533
534 /* the state doesn't have the wanted bits, go ahead */
535 if (!(state->state & bits))
536 goto next;
537
Chris Masond1310b22008-01-24 16:13:08 -0500538 /*
539 * | ---- desired range ---- |
540 * | state | or
541 * | ------------- state -------------- |
542 *
543 * We need to split the extent we found, and may flip
544 * bits on second half.
545 *
546 * If the extent we found extends past our range, we
547 * just split and search again. It'll get split again
548 * the next time though.
549 *
550 * If the extent we found is inside our range, we clear
551 * the desired bit on it.
552 */
553
554 if (state->start < start) {
Xiao Guangrong82337672011-04-20 06:44:57 +0000555 prealloc = alloc_extent_state_atomic(prealloc);
556 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500557 err = split_state(tree, state, prealloc, start);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400558 if (err)
559 extent_io_tree_panic(tree, err);
560
Chris Masond1310b22008-01-24 16:13:08 -0500561 prealloc = NULL;
562 if (err)
563 goto out;
564 if (state->end <= end) {
Jeff Mahoney6763af82012-03-01 14:56:29 +0100565 clear_state_bit(tree, state, &bits, wake);
Yan Zheng5c939df2009-05-27 09:16:03 -0400566 if (last_end == (u64)-1)
567 goto out;
568 start = last_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -0500569 }
570 goto search_again;
571 }
572 /*
573 * | ---- desired range ---- |
574 * | state |
575 * We need to split the extent, and clear the bit
576 * on the first half
577 */
578 if (state->start <= end && state->end > end) {
Xiao Guangrong82337672011-04-20 06:44:57 +0000579 prealloc = alloc_extent_state_atomic(prealloc);
580 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500581 err = split_state(tree, state, prealloc, end + 1);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400582 if (err)
583 extent_io_tree_panic(tree, err);
584
Chris Masond1310b22008-01-24 16:13:08 -0500585 if (wake)
586 wake_up(&state->wq);
Chris Mason42daec22009-09-23 19:51:09 -0400587
Jeff Mahoney6763af82012-03-01 14:56:29 +0100588 clear_state_bit(tree, prealloc, &bits, wake);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400589
Chris Masond1310b22008-01-24 16:13:08 -0500590 prealloc = NULL;
591 goto out;
592 }
Chris Mason42daec22009-09-23 19:51:09 -0400593
Jeff Mahoney6763af82012-03-01 14:56:29 +0100594 clear_state_bit(tree, state, &bits, wake);
Liu Bo04493142012-02-16 18:34:37 +0800595next:
Yan Zheng5c939df2009-05-27 09:16:03 -0400596 if (last_end == (u64)-1)
597 goto out;
598 start = last_end + 1;
Chris Mason2c64c532009-09-02 15:04:12 -0400599 if (start <= end && next_node) {
600 state = rb_entry(next_node, struct extent_state,
601 rb_node);
Liu Bo692e5752012-02-16 18:34:36 +0800602 goto hit_next;
Chris Mason2c64c532009-09-02 15:04:12 -0400603 }
Chris Masond1310b22008-01-24 16:13:08 -0500604 goto search_again;
605
606out:
Chris Masoncad321a2008-12-17 14:51:42 -0500607 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500608 if (prealloc)
609 free_extent_state(prealloc);
610
Jeff Mahoney6763af82012-03-01 14:56:29 +0100611 return 0;
Chris Masond1310b22008-01-24 16:13:08 -0500612
613search_again:
614 if (start > end)
615 goto out;
Chris Masoncad321a2008-12-17 14:51:42 -0500616 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500617 if (mask & __GFP_WAIT)
618 cond_resched();
619 goto again;
620}
Chris Masond1310b22008-01-24 16:13:08 -0500621
Jeff Mahoney143bede2012-03-01 14:56:26 +0100622static void wait_on_state(struct extent_io_tree *tree,
623 struct extent_state *state)
Christoph Hellwig641f5212008-12-02 06:36:10 -0500624 __releases(tree->lock)
625 __acquires(tree->lock)
Chris Masond1310b22008-01-24 16:13:08 -0500626{
627 DEFINE_WAIT(wait);
628 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
Chris Masoncad321a2008-12-17 14:51:42 -0500629 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500630 schedule();
Chris Masoncad321a2008-12-17 14:51:42 -0500631 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500632 finish_wait(&state->wq, &wait);
Chris Masond1310b22008-01-24 16:13:08 -0500633}
634
635/*
636 * waits for one or more bits to clear on a range in the state tree.
637 * The range [start, end] is inclusive.
638 * The tree lock is taken by this function
639 */
Jeff Mahoney143bede2012-03-01 14:56:26 +0100640void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
Chris Masond1310b22008-01-24 16:13:08 -0500641{
642 struct extent_state *state;
643 struct rb_node *node;
644
Chris Masoncad321a2008-12-17 14:51:42 -0500645 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500646again:
647 while (1) {
648 /*
649 * this search will find all the extents that end after
650 * our range starts
651 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500652 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500653 if (!node)
654 break;
655
656 state = rb_entry(node, struct extent_state, rb_node);
657
658 if (state->start > end)
659 goto out;
660
661 if (state->state & bits) {
662 start = state->start;
663 atomic_inc(&state->refs);
664 wait_on_state(tree, state);
665 free_extent_state(state);
666 goto again;
667 }
668 start = state->end + 1;
669
670 if (start > end)
671 break;
672
Xiao Guangrongded91f02011-07-14 03:19:27 +0000673 cond_resched_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500674 }
675out:
Chris Masoncad321a2008-12-17 14:51:42 -0500676 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500677}
Chris Masond1310b22008-01-24 16:13:08 -0500678
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000679static void set_state_bits(struct extent_io_tree *tree,
Chris Masond1310b22008-01-24 16:13:08 -0500680 struct extent_state *state,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400681 int *bits)
Chris Masond1310b22008-01-24 16:13:08 -0500682{
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400683 int bits_to_set = *bits & ~EXTENT_CTLBITS;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400684
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000685 set_state_cb(tree, state, bits);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400686 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
Chris Masond1310b22008-01-24 16:13:08 -0500687 u64 range = state->end - state->start + 1;
688 tree->dirty_bytes += range;
689 }
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400690 state->state |= bits_to_set;
Chris Masond1310b22008-01-24 16:13:08 -0500691}
692
Chris Mason2c64c532009-09-02 15:04:12 -0400693static void cache_state(struct extent_state *state,
694 struct extent_state **cached_ptr)
695{
696 if (cached_ptr && !(*cached_ptr)) {
697 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
698 *cached_ptr = state;
699 atomic_inc(&state->refs);
700 }
701 }
702}
703
Arne Jansen507903b2011-04-06 10:02:20 +0000704static void uncache_state(struct extent_state **cached_ptr)
705{
706 if (cached_ptr && (*cached_ptr)) {
707 struct extent_state *state = *cached_ptr;
Chris Mason109b36a2011-04-12 13:57:39 -0400708 *cached_ptr = NULL;
709 free_extent_state(state);
Arne Jansen507903b2011-04-06 10:02:20 +0000710 }
711}
712
Chris Masond1310b22008-01-24 16:13:08 -0500713/*
Chris Mason1edbb732009-09-02 13:24:36 -0400714 * set some bits on a range in the tree. This may require allocations or
715 * sleeping, so the gfp mask is used to indicate what is allowed.
Chris Masond1310b22008-01-24 16:13:08 -0500716 *
Chris Mason1edbb732009-09-02 13:24:36 -0400717 * If any of the exclusive bits are set, this will fail with -EEXIST if some
718 * part of the range already has the desired bits set. The start of the
719 * existing range is returned in failed_start in this case.
Chris Masond1310b22008-01-24 16:13:08 -0500720 *
Chris Mason1edbb732009-09-02 13:24:36 -0400721 * [start, end] is inclusive This takes the tree lock.
Chris Masond1310b22008-01-24 16:13:08 -0500722 */
Chris Mason1edbb732009-09-02 13:24:36 -0400723
Chris Mason4845e442010-05-25 20:56:50 -0400724int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
725 int bits, int exclusive_bits, u64 *failed_start,
726 struct extent_state **cached_state, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500727{
728 struct extent_state *state;
729 struct extent_state *prealloc = NULL;
730 struct rb_node *node;
Chris Masond1310b22008-01-24 16:13:08 -0500731 int err = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500732 u64 last_start;
733 u64 last_end;
Chris Mason42daec22009-09-23 19:51:09 -0400734
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400735 bits |= EXTENT_FIRST_DELALLOC;
Chris Masond1310b22008-01-24 16:13:08 -0500736again:
737 if (!prealloc && (mask & __GFP_WAIT)) {
738 prealloc = alloc_extent_state(mask);
Xiao Guangrong82337672011-04-20 06:44:57 +0000739 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500740 }
741
Chris Masoncad321a2008-12-17 14:51:42 -0500742 spin_lock(&tree->lock);
Chris Mason9655d292009-09-02 15:22:30 -0400743 if (cached_state && *cached_state) {
744 state = *cached_state;
Josef Bacikdf98b6e2011-06-20 14:53:48 -0400745 if (state->start <= start && state->end > start &&
746 state->tree) {
Chris Mason9655d292009-09-02 15:22:30 -0400747 node = &state->rb_node;
748 goto hit_next;
749 }
750 }
Chris Masond1310b22008-01-24 16:13:08 -0500751 /*
752 * this search will find all the extents that end after
753 * our range starts.
754 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500755 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500756 if (!node) {
Xiao Guangrong82337672011-04-20 06:44:57 +0000757 prealloc = alloc_extent_state_atomic(prealloc);
758 BUG_ON(!prealloc);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400759 err = insert_state(tree, prealloc, start, end, &bits);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400760 if (err)
761 extent_io_tree_panic(tree, err);
762
Chris Masond1310b22008-01-24 16:13:08 -0500763 prealloc = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500764 goto out;
765 }
Chris Masond1310b22008-01-24 16:13:08 -0500766 state = rb_entry(node, struct extent_state, rb_node);
Chris Mason40431d62009-08-05 12:57:59 -0400767hit_next:
Chris Masond1310b22008-01-24 16:13:08 -0500768 last_start = state->start;
769 last_end = state->end;
770
771 /*
772 * | ---- desired range ---- |
773 * | state |
774 *
775 * Just lock what we found and keep going
776 */
777 if (state->start == start && state->end <= end) {
Chris Mason40431d62009-08-05 12:57:59 -0400778 struct rb_node *next_node;
Chris Mason1edbb732009-09-02 13:24:36 -0400779 if (state->state & exclusive_bits) {
Chris Masond1310b22008-01-24 16:13:08 -0500780 *failed_start = state->start;
781 err = -EEXIST;
782 goto out;
783 }
Chris Mason42daec22009-09-23 19:51:09 -0400784
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000785 set_state_bits(tree, state, &bits);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400786
Chris Mason2c64c532009-09-02 15:04:12 -0400787 cache_state(state, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -0500788 merge_state(tree, state);
Yan Zheng5c939df2009-05-27 09:16:03 -0400789 if (last_end == (u64)-1)
790 goto out;
Chris Mason40431d62009-08-05 12:57:59 -0400791
Yan Zheng5c939df2009-05-27 09:16:03 -0400792 start = last_end + 1;
Josef Bacikdf98b6e2011-06-20 14:53:48 -0400793 next_node = rb_next(&state->rb_node);
Xiao Guangrongc7f895a2011-04-20 06:45:49 +0000794 if (next_node && start < end && prealloc && !need_resched()) {
795 state = rb_entry(next_node, struct extent_state,
796 rb_node);
797 if (state->start == start)
798 goto hit_next;
Chris Mason40431d62009-08-05 12:57:59 -0400799 }
Chris Masond1310b22008-01-24 16:13:08 -0500800 goto search_again;
801 }
802
803 /*
804 * | ---- desired range ---- |
805 * | state |
806 * or
807 * | ------------- state -------------- |
808 *
809 * We need to split the extent we found, and may flip bits on
810 * second half.
811 *
812 * If the extent we found extends past our
813 * range, we just split and search again. It'll get split
814 * again the next time though.
815 *
816 * If the extent we found is inside our range, we set the
817 * desired bit on it.
818 */
819 if (state->start < start) {
Chris Mason1edbb732009-09-02 13:24:36 -0400820 if (state->state & exclusive_bits) {
Chris Masond1310b22008-01-24 16:13:08 -0500821 *failed_start = start;
822 err = -EEXIST;
823 goto out;
824 }
Xiao Guangrong82337672011-04-20 06:44:57 +0000825
826 prealloc = alloc_extent_state_atomic(prealloc);
827 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500828 err = split_state(tree, state, prealloc, start);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400829 if (err)
830 extent_io_tree_panic(tree, err);
831
Chris Masond1310b22008-01-24 16:13:08 -0500832 prealloc = NULL;
833 if (err)
834 goto out;
835 if (state->end <= end) {
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000836 set_state_bits(tree, state, &bits);
Chris Mason2c64c532009-09-02 15:04:12 -0400837 cache_state(state, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -0500838 merge_state(tree, state);
Yan Zheng5c939df2009-05-27 09:16:03 -0400839 if (last_end == (u64)-1)
840 goto out;
841 start = last_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -0500842 }
843 goto search_again;
844 }
845 /*
846 * | ---- desired range ---- |
847 * | state | or | state |
848 *
849 * There's a hole, we need to insert something in it and
850 * ignore the extent we found.
851 */
852 if (state->start > start) {
853 u64 this_end;
854 if (end < last_start)
855 this_end = end;
856 else
Chris Masond3977122009-01-05 21:25:51 -0500857 this_end = last_start - 1;
Xiao Guangrong82337672011-04-20 06:44:57 +0000858
859 prealloc = alloc_extent_state_atomic(prealloc);
860 BUG_ON(!prealloc);
Xiao Guangrongc7f895a2011-04-20 06:45:49 +0000861
862 /*
863 * Avoid to free 'prealloc' if it can be merged with
864 * the later extent.
865 */
Chris Masond1310b22008-01-24 16:13:08 -0500866 err = insert_state(tree, prealloc, start, this_end,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400867 &bits);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400868 if (err)
869 extent_io_tree_panic(tree, err);
870
Chris Mason2c64c532009-09-02 15:04:12 -0400871 cache_state(prealloc, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -0500872 prealloc = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500873 start = this_end + 1;
874 goto search_again;
875 }
876 /*
877 * | ---- desired range ---- |
878 * | state |
879 * We need to split the extent, and set the bit
880 * on the first half
881 */
882 if (state->start <= end && state->end > end) {
Chris Mason1edbb732009-09-02 13:24:36 -0400883 if (state->state & exclusive_bits) {
Chris Masond1310b22008-01-24 16:13:08 -0500884 *failed_start = start;
885 err = -EEXIST;
886 goto out;
887 }
Xiao Guangrong82337672011-04-20 06:44:57 +0000888
889 prealloc = alloc_extent_state_atomic(prealloc);
890 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500891 err = split_state(tree, state, prealloc, end + 1);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400892 if (err)
893 extent_io_tree_panic(tree, err);
Chris Masond1310b22008-01-24 16:13:08 -0500894
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000895 set_state_bits(tree, prealloc, &bits);
Chris Mason2c64c532009-09-02 15:04:12 -0400896 cache_state(prealloc, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -0500897 merge_state(tree, prealloc);
898 prealloc = NULL;
899 goto out;
900 }
901
902 goto search_again;
903
904out:
Chris Masoncad321a2008-12-17 14:51:42 -0500905 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500906 if (prealloc)
907 free_extent_state(prealloc);
908
909 return err;
910
911search_again:
912 if (start > end)
913 goto out;
Chris Masoncad321a2008-12-17 14:51:42 -0500914 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500915 if (mask & __GFP_WAIT)
916 cond_resched();
917 goto again;
918}
Chris Masond1310b22008-01-24 16:13:08 -0500919
Josef Bacik462d6fa2011-09-26 13:56:12 -0400920/**
921 * convert_extent - convert all bits in a given range from one bit to another
922 * @tree: the io tree to search
923 * @start: the start offset in bytes
924 * @end: the end offset in bytes (inclusive)
925 * @bits: the bits to set in this range
926 * @clear_bits: the bits to clear in this range
927 * @mask: the allocation mask
928 *
929 * This will go through and set bits for the given range. If any states exist
930 * already in this range they are set with the given bit and cleared of the
931 * clear_bits. This is only meant to be used by things that are mergeable, ie
932 * converting from say DELALLOC to DIRTY. This is not meant to be used with
933 * boundary bits like LOCK.
934 */
935int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
936 int bits, int clear_bits, gfp_t mask)
937{
938 struct extent_state *state;
939 struct extent_state *prealloc = NULL;
940 struct rb_node *node;
941 int err = 0;
942 u64 last_start;
943 u64 last_end;
944
945again:
946 if (!prealloc && (mask & __GFP_WAIT)) {
947 prealloc = alloc_extent_state(mask);
948 if (!prealloc)
949 return -ENOMEM;
950 }
951
952 spin_lock(&tree->lock);
953 /*
954 * this search will find all the extents that end after
955 * our range starts.
956 */
957 node = tree_search(tree, start);
958 if (!node) {
959 prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo1cf4ffd2011-12-07 20:08:40 -0500960 if (!prealloc) {
961 err = -ENOMEM;
962 goto out;
963 }
Josef Bacik462d6fa2011-09-26 13:56:12 -0400964 err = insert_state(tree, prealloc, start, end, &bits);
965 prealloc = NULL;
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400966 if (err)
967 extent_io_tree_panic(tree, err);
Josef Bacik462d6fa2011-09-26 13:56:12 -0400968 goto out;
969 }
970 state = rb_entry(node, struct extent_state, rb_node);
971hit_next:
972 last_start = state->start;
973 last_end = state->end;
974
975 /*
976 * | ---- desired range ---- |
977 * | state |
978 *
979 * Just lock what we found and keep going
980 */
981 if (state->start == start && state->end <= end) {
982 struct rb_node *next_node;
983
984 set_state_bits(tree, state, &bits);
985 clear_state_bit(tree, state, &clear_bits, 0);
Josef Bacik462d6fa2011-09-26 13:56:12 -0400986 if (last_end == (u64)-1)
987 goto out;
988
989 start = last_end + 1;
990 next_node = rb_next(&state->rb_node);
991 if (next_node && start < end && prealloc && !need_resched()) {
992 state = rb_entry(next_node, struct extent_state,
993 rb_node);
994 if (state->start == start)
995 goto hit_next;
996 }
997 goto search_again;
998 }
999
1000 /*
1001 * | ---- desired range ---- |
1002 * | state |
1003 * or
1004 * | ------------- state -------------- |
1005 *
1006 * We need to split the extent we found, and may flip bits on
1007 * second half.
1008 *
1009 * If the extent we found extends past our
1010 * range, we just split and search again. It'll get split
1011 * again the next time though.
1012 *
1013 * If the extent we found is inside our range, we set the
1014 * desired bit on it.
1015 */
1016 if (state->start < start) {
1017 prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo1cf4ffd2011-12-07 20:08:40 -05001018 if (!prealloc) {
1019 err = -ENOMEM;
1020 goto out;
1021 }
Josef Bacik462d6fa2011-09-26 13:56:12 -04001022 err = split_state(tree, state, prealloc, start);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001023 if (err)
1024 extent_io_tree_panic(tree, err);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001025 prealloc = NULL;
1026 if (err)
1027 goto out;
1028 if (state->end <= end) {
1029 set_state_bits(tree, state, &bits);
1030 clear_state_bit(tree, state, &clear_bits, 0);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001031 if (last_end == (u64)-1)
1032 goto out;
1033 start = last_end + 1;
1034 }
1035 goto search_again;
1036 }
1037 /*
1038 * | ---- desired range ---- |
1039 * | state | or | state |
1040 *
1041 * There's a hole, we need to insert something in it and
1042 * ignore the extent we found.
1043 */
1044 if (state->start > start) {
1045 u64 this_end;
1046 if (end < last_start)
1047 this_end = end;
1048 else
1049 this_end = last_start - 1;
1050
1051 prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo1cf4ffd2011-12-07 20:08:40 -05001052 if (!prealloc) {
1053 err = -ENOMEM;
1054 goto out;
1055 }
Josef Bacik462d6fa2011-09-26 13:56:12 -04001056
1057 /*
1058 * Avoid to free 'prealloc' if it can be merged with
1059 * the later extent.
1060 */
1061 err = insert_state(tree, prealloc, start, this_end,
1062 &bits);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001063 if (err)
1064 extent_io_tree_panic(tree, err);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001065 prealloc = NULL;
1066 start = this_end + 1;
1067 goto search_again;
1068 }
1069 /*
1070 * | ---- desired range ---- |
1071 * | state |
1072 * We need to split the extent, and set the bit
1073 * on the first half
1074 */
1075 if (state->start <= end && state->end > end) {
1076 prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo1cf4ffd2011-12-07 20:08:40 -05001077 if (!prealloc) {
1078 err = -ENOMEM;
1079 goto out;
1080 }
Josef Bacik462d6fa2011-09-26 13:56:12 -04001081
1082 err = split_state(tree, state, prealloc, end + 1);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001083 if (err)
1084 extent_io_tree_panic(tree, err);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001085
1086 set_state_bits(tree, prealloc, &bits);
1087 clear_state_bit(tree, prealloc, &clear_bits, 0);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001088 prealloc = NULL;
1089 goto out;
1090 }
1091
1092 goto search_again;
1093
1094out:
1095 spin_unlock(&tree->lock);
1096 if (prealloc)
1097 free_extent_state(prealloc);
1098
1099 return err;
1100
1101search_again:
1102 if (start > end)
1103 goto out;
1104 spin_unlock(&tree->lock);
1105 if (mask & __GFP_WAIT)
1106 cond_resched();
1107 goto again;
1108}
1109
Chris Masond1310b22008-01-24 16:13:08 -05001110/* wrappers around set/clear extent bit */
1111int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1112 gfp_t mask)
1113{
1114 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
Chris Mason2c64c532009-09-02 15:04:12 -04001115 NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -05001116}
Chris Masond1310b22008-01-24 16:13:08 -05001117
1118int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1119 int bits, gfp_t mask)
1120{
1121 return set_extent_bit(tree, start, end, bits, 0, NULL,
Chris Mason2c64c532009-09-02 15:04:12 -04001122 NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -05001123}
Chris Masond1310b22008-01-24 16:13:08 -05001124
1125int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1126 int bits, gfp_t mask)
1127{
Chris Mason2c64c532009-09-02 15:04:12 -04001128 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -05001129}
Chris Masond1310b22008-01-24 16:13:08 -05001130
1131int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
Josef Bacik2ac55d42010-02-03 19:33:23 +00001132 struct extent_state **cached_state, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05001133{
1134 return set_extent_bit(tree, start, end,
Liu Bofee187d2011-09-29 15:55:28 +08001135 EXTENT_DELALLOC | EXTENT_UPTODATE,
Josef Bacik2ac55d42010-02-03 19:33:23 +00001136 0, NULL, cached_state, mask);
Chris Masond1310b22008-01-24 16:13:08 -05001137}
Chris Masond1310b22008-01-24 16:13:08 -05001138
1139int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1140 gfp_t mask)
1141{
1142 return clear_extent_bit(tree, start, end,
Josef Bacik32c00af2009-10-08 13:34:05 -04001143 EXTENT_DIRTY | EXTENT_DELALLOC |
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04001144 EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -05001145}
Chris Masond1310b22008-01-24 16:13:08 -05001146
1147int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
1148 gfp_t mask)
1149{
1150 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
Chris Mason2c64c532009-09-02 15:04:12 -04001151 NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -05001152}
Chris Masond1310b22008-01-24 16:13:08 -05001153
Chris Masond1310b22008-01-24 16:13:08 -05001154int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
Arne Jansen507903b2011-04-06 10:02:20 +00001155 struct extent_state **cached_state, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05001156{
Arne Jansen507903b2011-04-06 10:02:20 +00001157 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
1158 NULL, cached_state, mask);
Chris Masond1310b22008-01-24 16:13:08 -05001159}
Chris Masond1310b22008-01-24 16:13:08 -05001160
Chris Masond3977122009-01-05 21:25:51 -05001161static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
Josef Bacik2ac55d42010-02-03 19:33:23 +00001162 u64 end, struct extent_state **cached_state,
1163 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05001164{
Chris Mason2c64c532009-09-02 15:04:12 -04001165 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
Josef Bacik2ac55d42010-02-03 19:33:23 +00001166 cached_state, mask);
Chris Masond1310b22008-01-24 16:13:08 -05001167}
Chris Masond1310b22008-01-24 16:13:08 -05001168
Chris Masond352ac62008-09-29 15:18:18 -04001169/*
1170 * either insert or lock state struct between start and end use mask to tell
1171 * us if waiting is desired.
1172 */
Chris Mason1edbb732009-09-02 13:24:36 -04001173int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001174 int bits, struct extent_state **cached_state)
Chris Masond1310b22008-01-24 16:13:08 -05001175{
1176 int err;
1177 u64 failed_start;
1178 while (1) {
Chris Mason1edbb732009-09-02 13:24:36 -04001179 err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
Chris Mason2c64c532009-09-02 15:04:12 -04001180 EXTENT_LOCKED, &failed_start,
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001181 cached_state, GFP_NOFS);
1182 if (err == -EEXIST) {
Chris Masond1310b22008-01-24 16:13:08 -05001183 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1184 start = failed_start;
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001185 } else
Chris Masond1310b22008-01-24 16:13:08 -05001186 break;
Chris Masond1310b22008-01-24 16:13:08 -05001187 WARN_ON(start > end);
1188 }
1189 return err;
1190}
Chris Masond1310b22008-01-24 16:13:08 -05001191
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001192int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
Chris Mason1edbb732009-09-02 13:24:36 -04001193{
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001194 return lock_extent_bits(tree, start, end, 0, NULL);
Chris Mason1edbb732009-09-02 13:24:36 -04001195}
1196
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001197int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
Josef Bacik25179202008-10-29 14:49:05 -04001198{
1199 int err;
1200 u64 failed_start;
1201
Chris Mason2c64c532009-09-02 15:04:12 -04001202 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001203 &failed_start, NULL, GFP_NOFS);
Yan Zheng66435582008-10-30 14:19:50 -04001204 if (err == -EEXIST) {
1205 if (failed_start > start)
1206 clear_extent_bit(tree, start, failed_start - 1,
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001207 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
Josef Bacik25179202008-10-29 14:49:05 -04001208 return 0;
Yan Zheng66435582008-10-30 14:19:50 -04001209 }
Josef Bacik25179202008-10-29 14:49:05 -04001210 return 1;
1211}
Josef Bacik25179202008-10-29 14:49:05 -04001212
Chris Mason2c64c532009-09-02 15:04:12 -04001213int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1214 struct extent_state **cached, gfp_t mask)
1215{
1216 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
1217 mask);
1218}
1219
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001220int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
Chris Masond1310b22008-01-24 16:13:08 -05001221{
Chris Mason2c64c532009-09-02 15:04:12 -04001222 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001223 GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05001224}
Chris Masond1310b22008-01-24 16:13:08 -05001225
1226/*
Chris Masond1310b22008-01-24 16:13:08 -05001227 * helper function to set both pages and extents in the tree writeback
1228 */
Christoph Hellwigb2950862008-12-02 09:54:17 -05001229static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
Chris Masond1310b22008-01-24 16:13:08 -05001230{
1231 unsigned long index = start >> PAGE_CACHE_SHIFT;
1232 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1233 struct page *page;
1234
1235 while (index <= end_index) {
1236 page = find_get_page(tree->mapping, index);
1237 BUG_ON(!page);
1238 set_page_writeback(page);
1239 page_cache_release(page);
1240 index++;
1241 }
Chris Masond1310b22008-01-24 16:13:08 -05001242 return 0;
1243}
Chris Masond1310b22008-01-24 16:13:08 -05001244
Chris Masond352ac62008-09-29 15:18:18 -04001245/* find the first state struct with 'bits' set after 'start', and
1246 * return it. tree->lock must be held. NULL will returned if
1247 * nothing was found after 'start'
1248 */
Chris Masond7fc6402008-02-18 12:12:38 -05001249struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
1250 u64 start, int bits)
1251{
1252 struct rb_node *node;
1253 struct extent_state *state;
1254
1255 /*
1256 * this search will find all the extents that end after
1257 * our range starts.
1258 */
1259 node = tree_search(tree, start);
Chris Masond3977122009-01-05 21:25:51 -05001260 if (!node)
Chris Masond7fc6402008-02-18 12:12:38 -05001261 goto out;
Chris Masond7fc6402008-02-18 12:12:38 -05001262
Chris Masond3977122009-01-05 21:25:51 -05001263 while (1) {
Chris Masond7fc6402008-02-18 12:12:38 -05001264 state = rb_entry(node, struct extent_state, rb_node);
Chris Masond3977122009-01-05 21:25:51 -05001265 if (state->end >= start && (state->state & bits))
Chris Masond7fc6402008-02-18 12:12:38 -05001266 return state;
Chris Masond3977122009-01-05 21:25:51 -05001267
Chris Masond7fc6402008-02-18 12:12:38 -05001268 node = rb_next(node);
1269 if (!node)
1270 break;
1271 }
1272out:
1273 return NULL;
1274}
Chris Masond7fc6402008-02-18 12:12:38 -05001275
Chris Masond352ac62008-09-29 15:18:18 -04001276/*
Xiao Guangrong69261c42011-07-14 03:19:45 +00001277 * find the first offset in the io tree with 'bits' set. zero is
1278 * returned if we find something, and *start_ret and *end_ret are
1279 * set to reflect the state struct that was found.
1280 *
1281 * If nothing was found, 1 is returned, < 0 on error
1282 */
1283int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1284 u64 *start_ret, u64 *end_ret, int bits)
1285{
1286 struct extent_state *state;
1287 int ret = 1;
1288
1289 spin_lock(&tree->lock);
1290 state = find_first_extent_bit_state(tree, start, bits);
1291 if (state) {
1292 *start_ret = state->start;
1293 *end_ret = state->end;
1294 ret = 0;
1295 }
1296 spin_unlock(&tree->lock);
1297 return ret;
1298}
1299
1300/*
Chris Masond352ac62008-09-29 15:18:18 -04001301 * find a contiguous range of bytes in the file marked as delalloc, not
1302 * more than 'max_bytes'. start and end are used to return the range,
1303 *
1304 * 1 is returned if we find something, 0 if nothing was in the tree
1305 */
Chris Masonc8b97812008-10-29 14:49:59 -04001306static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
Josef Bacikc2a128d2010-02-02 21:19:11 +00001307 u64 *start, u64 *end, u64 max_bytes,
1308 struct extent_state **cached_state)
Chris Masond1310b22008-01-24 16:13:08 -05001309{
1310 struct rb_node *node;
1311 struct extent_state *state;
1312 u64 cur_start = *start;
1313 u64 found = 0;
1314 u64 total_bytes = 0;
1315
Chris Masoncad321a2008-12-17 14:51:42 -05001316 spin_lock(&tree->lock);
Chris Masonc8b97812008-10-29 14:49:59 -04001317
Chris Masond1310b22008-01-24 16:13:08 -05001318 /*
1319 * this search will find all the extents that end after
1320 * our range starts.
1321 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001322 node = tree_search(tree, cur_start);
Peter2b114d12008-04-01 11:21:40 -04001323 if (!node) {
Chris Mason3b951512008-04-17 11:29:12 -04001324 if (!found)
1325 *end = (u64)-1;
Chris Masond1310b22008-01-24 16:13:08 -05001326 goto out;
1327 }
1328
Chris Masond3977122009-01-05 21:25:51 -05001329 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001330 state = rb_entry(node, struct extent_state, rb_node);
Zheng Yan5b21f2e2008-09-26 10:05:38 -04001331 if (found && (state->start != cur_start ||
1332 (state->state & EXTENT_BOUNDARY))) {
Chris Masond1310b22008-01-24 16:13:08 -05001333 goto out;
1334 }
1335 if (!(state->state & EXTENT_DELALLOC)) {
1336 if (!found)
1337 *end = state->end;
1338 goto out;
1339 }
Josef Bacikc2a128d2010-02-02 21:19:11 +00001340 if (!found) {
Chris Masond1310b22008-01-24 16:13:08 -05001341 *start = state->start;
Josef Bacikc2a128d2010-02-02 21:19:11 +00001342 *cached_state = state;
1343 atomic_inc(&state->refs);
1344 }
Chris Masond1310b22008-01-24 16:13:08 -05001345 found++;
1346 *end = state->end;
1347 cur_start = state->end + 1;
1348 node = rb_next(node);
1349 if (!node)
1350 break;
1351 total_bytes += state->end - state->start + 1;
1352 if (total_bytes >= max_bytes)
1353 break;
1354 }
1355out:
Chris Masoncad321a2008-12-17 14:51:42 -05001356 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001357 return found;
1358}
1359
Jeff Mahoney143bede2012-03-01 14:56:26 +01001360static noinline void __unlock_for_delalloc(struct inode *inode,
1361 struct page *locked_page,
1362 u64 start, u64 end)
Chris Masonc8b97812008-10-29 14:49:59 -04001363{
1364 int ret;
1365 struct page *pages[16];
1366 unsigned long index = start >> PAGE_CACHE_SHIFT;
1367 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1368 unsigned long nr_pages = end_index - index + 1;
1369 int i;
1370
1371 if (index == locked_page->index && end_index == index)
Jeff Mahoney143bede2012-03-01 14:56:26 +01001372 return;
Chris Masonc8b97812008-10-29 14:49:59 -04001373
Chris Masond3977122009-01-05 21:25:51 -05001374 while (nr_pages > 0) {
Chris Masonc8b97812008-10-29 14:49:59 -04001375 ret = find_get_pages_contig(inode->i_mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001376 min_t(unsigned long, nr_pages,
1377 ARRAY_SIZE(pages)), pages);
Chris Masonc8b97812008-10-29 14:49:59 -04001378 for (i = 0; i < ret; i++) {
1379 if (pages[i] != locked_page)
1380 unlock_page(pages[i]);
1381 page_cache_release(pages[i]);
1382 }
1383 nr_pages -= ret;
1384 index += ret;
1385 cond_resched();
1386 }
Chris Masonc8b97812008-10-29 14:49:59 -04001387}
1388
1389static noinline int lock_delalloc_pages(struct inode *inode,
1390 struct page *locked_page,
1391 u64 delalloc_start,
1392 u64 delalloc_end)
1393{
1394 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
1395 unsigned long start_index = index;
1396 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
1397 unsigned long pages_locked = 0;
1398 struct page *pages[16];
1399 unsigned long nrpages;
1400 int ret;
1401 int i;
1402
1403 /* the caller is responsible for locking the start index */
1404 if (index == locked_page->index && index == end_index)
1405 return 0;
1406
1407 /* skip the page at the start index */
1408 nrpages = end_index - index + 1;
Chris Masond3977122009-01-05 21:25:51 -05001409 while (nrpages > 0) {
Chris Masonc8b97812008-10-29 14:49:59 -04001410 ret = find_get_pages_contig(inode->i_mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001411 min_t(unsigned long,
1412 nrpages, ARRAY_SIZE(pages)), pages);
Chris Masonc8b97812008-10-29 14:49:59 -04001413 if (ret == 0) {
1414 ret = -EAGAIN;
1415 goto done;
1416 }
1417 /* now we have an array of pages, lock them all */
1418 for (i = 0; i < ret; i++) {
1419 /*
1420 * the caller is taking responsibility for
1421 * locked_page
1422 */
Chris Mason771ed682008-11-06 22:02:51 -05001423 if (pages[i] != locked_page) {
Chris Masonc8b97812008-10-29 14:49:59 -04001424 lock_page(pages[i]);
Chris Masonf2b1c412008-11-10 07:31:30 -05001425 if (!PageDirty(pages[i]) ||
1426 pages[i]->mapping != inode->i_mapping) {
Chris Mason771ed682008-11-06 22:02:51 -05001427 ret = -EAGAIN;
1428 unlock_page(pages[i]);
1429 page_cache_release(pages[i]);
1430 goto done;
1431 }
1432 }
Chris Masonc8b97812008-10-29 14:49:59 -04001433 page_cache_release(pages[i]);
Chris Mason771ed682008-11-06 22:02:51 -05001434 pages_locked++;
Chris Masonc8b97812008-10-29 14:49:59 -04001435 }
Chris Masonc8b97812008-10-29 14:49:59 -04001436 nrpages -= ret;
1437 index += ret;
1438 cond_resched();
1439 }
1440 ret = 0;
1441done:
1442 if (ret && pages_locked) {
1443 __unlock_for_delalloc(inode, locked_page,
1444 delalloc_start,
1445 ((u64)(start_index + pages_locked - 1)) <<
1446 PAGE_CACHE_SHIFT);
1447 }
1448 return ret;
1449}
1450
1451/*
1452 * find a contiguous range of bytes in the file marked as delalloc, not
1453 * more than 'max_bytes'. start and end are used to return the range,
1454 *
1455 * 1 is returned if we find something, 0 if nothing was in the tree
1456 */
1457static noinline u64 find_lock_delalloc_range(struct inode *inode,
1458 struct extent_io_tree *tree,
1459 struct page *locked_page,
1460 u64 *start, u64 *end,
1461 u64 max_bytes)
1462{
1463 u64 delalloc_start;
1464 u64 delalloc_end;
1465 u64 found;
Chris Mason9655d292009-09-02 15:22:30 -04001466 struct extent_state *cached_state = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04001467 int ret;
1468 int loops = 0;
1469
1470again:
1471 /* step one, find a bunch of delalloc bytes starting at start */
1472 delalloc_start = *start;
1473 delalloc_end = 0;
1474 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
Josef Bacikc2a128d2010-02-02 21:19:11 +00001475 max_bytes, &cached_state);
Chris Mason70b99e62008-10-31 12:46:39 -04001476 if (!found || delalloc_end <= *start) {
Chris Masonc8b97812008-10-29 14:49:59 -04001477 *start = delalloc_start;
1478 *end = delalloc_end;
Josef Bacikc2a128d2010-02-02 21:19:11 +00001479 free_extent_state(cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001480 return found;
1481 }
1482
1483 /*
Chris Mason70b99e62008-10-31 12:46:39 -04001484 * start comes from the offset of locked_page. We have to lock
1485 * pages in order, so we can't process delalloc bytes before
1486 * locked_page
1487 */
Chris Masond3977122009-01-05 21:25:51 -05001488 if (delalloc_start < *start)
Chris Mason70b99e62008-10-31 12:46:39 -04001489 delalloc_start = *start;
Chris Mason70b99e62008-10-31 12:46:39 -04001490
1491 /*
Chris Masonc8b97812008-10-29 14:49:59 -04001492 * make sure to limit the number of pages we try to lock down
1493 * if we're looping.
1494 */
Chris Masond3977122009-01-05 21:25:51 -05001495 if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
Chris Mason771ed682008-11-06 22:02:51 -05001496 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
Chris Masond3977122009-01-05 21:25:51 -05001497
Chris Masonc8b97812008-10-29 14:49:59 -04001498 /* step two, lock all the pages after the page that has start */
1499 ret = lock_delalloc_pages(inode, locked_page,
1500 delalloc_start, delalloc_end);
1501 if (ret == -EAGAIN) {
1502 /* some of the pages are gone, lets avoid looping by
1503 * shortening the size of the delalloc range we're searching
1504 */
Chris Mason9655d292009-09-02 15:22:30 -04001505 free_extent_state(cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001506 if (!loops) {
1507 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1508 max_bytes = PAGE_CACHE_SIZE - offset;
1509 loops = 1;
1510 goto again;
1511 } else {
1512 found = 0;
1513 goto out_failed;
1514 }
1515 }
1516 BUG_ON(ret);
1517
1518 /* step three, lock the state bits for the whole range */
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001519 lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001520
1521 /* then test to make sure it is all still delalloc */
1522 ret = test_range_bit(tree, delalloc_start, delalloc_end,
Chris Mason9655d292009-09-02 15:22:30 -04001523 EXTENT_DELALLOC, 1, cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001524 if (!ret) {
Chris Mason9655d292009-09-02 15:22:30 -04001525 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1526 &cached_state, GFP_NOFS);
Chris Masonc8b97812008-10-29 14:49:59 -04001527 __unlock_for_delalloc(inode, locked_page,
1528 delalloc_start, delalloc_end);
1529 cond_resched();
1530 goto again;
1531 }
Chris Mason9655d292009-09-02 15:22:30 -04001532 free_extent_state(cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001533 *start = delalloc_start;
1534 *end = delalloc_end;
1535out_failed:
1536 return found;
1537}
1538
1539int extent_clear_unlock_delalloc(struct inode *inode,
1540 struct extent_io_tree *tree,
1541 u64 start, u64 end, struct page *locked_page,
Chris Masona791e352009-10-08 11:27:10 -04001542 unsigned long op)
Chris Masonc8b97812008-10-29 14:49:59 -04001543{
1544 int ret;
1545 struct page *pages[16];
1546 unsigned long index = start >> PAGE_CACHE_SHIFT;
1547 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1548 unsigned long nr_pages = end_index - index + 1;
1549 int i;
Chris Mason771ed682008-11-06 22:02:51 -05001550 int clear_bits = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04001551
Chris Masona791e352009-10-08 11:27:10 -04001552 if (op & EXTENT_CLEAR_UNLOCK)
Chris Mason771ed682008-11-06 22:02:51 -05001553 clear_bits |= EXTENT_LOCKED;
Chris Masona791e352009-10-08 11:27:10 -04001554 if (op & EXTENT_CLEAR_DIRTY)
Chris Masonc8b97812008-10-29 14:49:59 -04001555 clear_bits |= EXTENT_DIRTY;
1556
Chris Masona791e352009-10-08 11:27:10 -04001557 if (op & EXTENT_CLEAR_DELALLOC)
Chris Mason771ed682008-11-06 22:02:51 -05001558 clear_bits |= EXTENT_DELALLOC;
1559
Chris Mason2c64c532009-09-02 15:04:12 -04001560 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
Josef Bacik32c00af2009-10-08 13:34:05 -04001561 if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
1562 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
1563 EXTENT_SET_PRIVATE2)))
Chris Mason771ed682008-11-06 22:02:51 -05001564 return 0;
Chris Masonc8b97812008-10-29 14:49:59 -04001565
Chris Masond3977122009-01-05 21:25:51 -05001566 while (nr_pages > 0) {
Chris Masonc8b97812008-10-29 14:49:59 -04001567 ret = find_get_pages_contig(inode->i_mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001568 min_t(unsigned long,
1569 nr_pages, ARRAY_SIZE(pages)), pages);
Chris Masonc8b97812008-10-29 14:49:59 -04001570 for (i = 0; i < ret; i++) {
Chris Mason8b62b722009-09-02 16:53:46 -04001571
Chris Masona791e352009-10-08 11:27:10 -04001572 if (op & EXTENT_SET_PRIVATE2)
Chris Mason8b62b722009-09-02 16:53:46 -04001573 SetPagePrivate2(pages[i]);
1574
Chris Masonc8b97812008-10-29 14:49:59 -04001575 if (pages[i] == locked_page) {
1576 page_cache_release(pages[i]);
1577 continue;
1578 }
Chris Masona791e352009-10-08 11:27:10 -04001579 if (op & EXTENT_CLEAR_DIRTY)
Chris Masonc8b97812008-10-29 14:49:59 -04001580 clear_page_dirty_for_io(pages[i]);
Chris Masona791e352009-10-08 11:27:10 -04001581 if (op & EXTENT_SET_WRITEBACK)
Chris Masonc8b97812008-10-29 14:49:59 -04001582 set_page_writeback(pages[i]);
Chris Masona791e352009-10-08 11:27:10 -04001583 if (op & EXTENT_END_WRITEBACK)
Chris Masonc8b97812008-10-29 14:49:59 -04001584 end_page_writeback(pages[i]);
Chris Masona791e352009-10-08 11:27:10 -04001585 if (op & EXTENT_CLEAR_UNLOCK_PAGE)
Chris Mason771ed682008-11-06 22:02:51 -05001586 unlock_page(pages[i]);
Chris Masonc8b97812008-10-29 14:49:59 -04001587 page_cache_release(pages[i]);
1588 }
1589 nr_pages -= ret;
1590 index += ret;
1591 cond_resched();
1592 }
1593 return 0;
1594}
Chris Masonc8b97812008-10-29 14:49:59 -04001595
Chris Masond352ac62008-09-29 15:18:18 -04001596/*
1597 * count the number of bytes in the tree that have a given bit(s)
1598 * set. This can be fairly slow, except for EXTENT_DIRTY which is
1599 * cached. The total number found is returned.
1600 */
Chris Masond1310b22008-01-24 16:13:08 -05001601u64 count_range_bits(struct extent_io_tree *tree,
1602 u64 *start, u64 search_end, u64 max_bytes,
Chris Masonec29ed52011-02-23 16:23:20 -05001603 unsigned long bits, int contig)
Chris Masond1310b22008-01-24 16:13:08 -05001604{
1605 struct rb_node *node;
1606 struct extent_state *state;
1607 u64 cur_start = *start;
1608 u64 total_bytes = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05001609 u64 last = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001610 int found = 0;
1611
1612 if (search_end <= cur_start) {
Chris Masond1310b22008-01-24 16:13:08 -05001613 WARN_ON(1);
1614 return 0;
1615 }
1616
Chris Masoncad321a2008-12-17 14:51:42 -05001617 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001618 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1619 total_bytes = tree->dirty_bytes;
1620 goto out;
1621 }
1622 /*
1623 * this search will find all the extents that end after
1624 * our range starts.
1625 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001626 node = tree_search(tree, cur_start);
Chris Masond3977122009-01-05 21:25:51 -05001627 if (!node)
Chris Masond1310b22008-01-24 16:13:08 -05001628 goto out;
Chris Masond1310b22008-01-24 16:13:08 -05001629
Chris Masond3977122009-01-05 21:25:51 -05001630 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001631 state = rb_entry(node, struct extent_state, rb_node);
1632 if (state->start > search_end)
1633 break;
Chris Masonec29ed52011-02-23 16:23:20 -05001634 if (contig && found && state->start > last + 1)
1635 break;
1636 if (state->end >= cur_start && (state->state & bits) == bits) {
Chris Masond1310b22008-01-24 16:13:08 -05001637 total_bytes += min(search_end, state->end) + 1 -
1638 max(cur_start, state->start);
1639 if (total_bytes >= max_bytes)
1640 break;
1641 if (!found) {
Josef Bacikaf60bed2011-05-04 11:11:17 -04001642 *start = max(cur_start, state->start);
Chris Masond1310b22008-01-24 16:13:08 -05001643 found = 1;
1644 }
Chris Masonec29ed52011-02-23 16:23:20 -05001645 last = state->end;
1646 } else if (contig && found) {
1647 break;
Chris Masond1310b22008-01-24 16:13:08 -05001648 }
1649 node = rb_next(node);
1650 if (!node)
1651 break;
1652 }
1653out:
Chris Masoncad321a2008-12-17 14:51:42 -05001654 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001655 return total_bytes;
1656}
Christoph Hellwigb2950862008-12-02 09:54:17 -05001657
Chris Masond352ac62008-09-29 15:18:18 -04001658/*
1659 * set the private field for a given byte offset in the tree. If there isn't
1660 * an extent_state there already, this does nothing.
1661 */
Chris Masond1310b22008-01-24 16:13:08 -05001662int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1663{
1664 struct rb_node *node;
1665 struct extent_state *state;
1666 int ret = 0;
1667
Chris Masoncad321a2008-12-17 14:51:42 -05001668 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001669 /*
1670 * this search will find all the extents that end after
1671 * our range starts.
1672 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001673 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001674 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001675 ret = -ENOENT;
1676 goto out;
1677 }
1678 state = rb_entry(node, struct extent_state, rb_node);
1679 if (state->start != start) {
1680 ret = -ENOENT;
1681 goto out;
1682 }
1683 state->private = private;
1684out:
Chris Masoncad321a2008-12-17 14:51:42 -05001685 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001686 return ret;
1687}
1688
1689int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1690{
1691 struct rb_node *node;
1692 struct extent_state *state;
1693 int ret = 0;
1694
Chris Masoncad321a2008-12-17 14:51:42 -05001695 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001696 /*
1697 * this search will find all the extents that end after
1698 * our range starts.
1699 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001700 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001701 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001702 ret = -ENOENT;
1703 goto out;
1704 }
1705 state = rb_entry(node, struct extent_state, rb_node);
1706 if (state->start != start) {
1707 ret = -ENOENT;
1708 goto out;
1709 }
1710 *private = state->private;
1711out:
Chris Masoncad321a2008-12-17 14:51:42 -05001712 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001713 return ret;
1714}
1715
1716/*
1717 * searches a range in the state tree for a given mask.
Chris Mason70dec802008-01-29 09:59:12 -05001718 * If 'filled' == 1, this returns 1 only if every extent in the tree
Chris Masond1310b22008-01-24 16:13:08 -05001719 * has the bits set. Otherwise, 1 is returned if any bit in the
1720 * range is found set.
1721 */
1722int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
Chris Mason9655d292009-09-02 15:22:30 -04001723 int bits, int filled, struct extent_state *cached)
Chris Masond1310b22008-01-24 16:13:08 -05001724{
1725 struct extent_state *state = NULL;
1726 struct rb_node *node;
1727 int bitset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001728
Chris Masoncad321a2008-12-17 14:51:42 -05001729 spin_lock(&tree->lock);
Josef Bacikdf98b6e2011-06-20 14:53:48 -04001730 if (cached && cached->tree && cached->start <= start &&
1731 cached->end > start)
Chris Mason9655d292009-09-02 15:22:30 -04001732 node = &cached->rb_node;
1733 else
1734 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -05001735 while (node && start <= end) {
1736 state = rb_entry(node, struct extent_state, rb_node);
1737
1738 if (filled && state->start > start) {
1739 bitset = 0;
1740 break;
1741 }
1742
1743 if (state->start > end)
1744 break;
1745
1746 if (state->state & bits) {
1747 bitset = 1;
1748 if (!filled)
1749 break;
1750 } else if (filled) {
1751 bitset = 0;
1752 break;
1753 }
Chris Mason46562ce2009-09-23 20:23:16 -04001754
1755 if (state->end == (u64)-1)
1756 break;
1757
Chris Masond1310b22008-01-24 16:13:08 -05001758 start = state->end + 1;
1759 if (start > end)
1760 break;
1761 node = rb_next(node);
1762 if (!node) {
1763 if (filled)
1764 bitset = 0;
1765 break;
1766 }
1767 }
Chris Masoncad321a2008-12-17 14:51:42 -05001768 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001769 return bitset;
1770}
Chris Masond1310b22008-01-24 16:13:08 -05001771
1772/*
1773 * helper function to set a given page up to date if all the
1774 * extents in the tree for that page are up to date
1775 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01001776static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
Chris Masond1310b22008-01-24 16:13:08 -05001777{
1778 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1779 u64 end = start + PAGE_CACHE_SIZE - 1;
Chris Mason9655d292009-09-02 15:22:30 -04001780 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
Chris Masond1310b22008-01-24 16:13:08 -05001781 SetPageUptodate(page);
Chris Masond1310b22008-01-24 16:13:08 -05001782}
1783
1784/*
1785 * helper function to unlock a page if all the extents in the tree
1786 * for that page are unlocked
1787 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01001788static void check_page_locked(struct extent_io_tree *tree, struct page *page)
Chris Masond1310b22008-01-24 16:13:08 -05001789{
1790 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1791 u64 end = start + PAGE_CACHE_SIZE - 1;
Chris Mason9655d292009-09-02 15:22:30 -04001792 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
Chris Masond1310b22008-01-24 16:13:08 -05001793 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05001794}
1795
1796/*
1797 * helper function to end page writeback if all the extents
1798 * in the tree for that page are done with writeback
1799 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01001800static void check_page_writeback(struct extent_io_tree *tree,
1801 struct page *page)
Chris Masond1310b22008-01-24 16:13:08 -05001802{
Chris Mason1edbb732009-09-02 13:24:36 -04001803 end_page_writeback(page);
Chris Masond1310b22008-01-24 16:13:08 -05001804}
1805
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02001806/*
1807 * When IO fails, either with EIO or csum verification fails, we
1808 * try other mirrors that might have a good copy of the data. This
1809 * io_failure_record is used to record state as we go through all the
1810 * mirrors. If another mirror has good data, the page is set up to date
1811 * and things continue. If a good mirror can't be found, the original
1812 * bio end_io callback is called to indicate things have failed.
1813 */
1814struct io_failure_record {
1815 struct page *page;
1816 u64 start;
1817 u64 len;
1818 u64 logical;
1819 unsigned long bio_flags;
1820 int this_mirror;
1821 int failed_mirror;
1822 int in_validation;
1823};
1824
1825static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
1826 int did_repair)
1827{
1828 int ret;
1829 int err = 0;
1830 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
1831
1832 set_state_private(failure_tree, rec->start, 0);
1833 ret = clear_extent_bits(failure_tree, rec->start,
1834 rec->start + rec->len - 1,
1835 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
1836 if (ret)
1837 err = ret;
1838
1839 if (did_repair) {
1840 ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
1841 rec->start + rec->len - 1,
1842 EXTENT_DAMAGED, GFP_NOFS);
1843 if (ret && !err)
1844 err = ret;
1845 }
1846
1847 kfree(rec);
1848 return err;
1849}
1850
1851static void repair_io_failure_callback(struct bio *bio, int err)
1852{
1853 complete(bio->bi_private);
1854}
1855
1856/*
1857 * this bypasses the standard btrfs submit functions deliberately, as
1858 * the standard behavior is to write all copies in a raid setup. here we only
1859 * want to write the one bad copy. so we do the mapping for ourselves and issue
1860 * submit_bio directly.
1861 * to avoid any synchonization issues, wait for the data after writing, which
1862 * actually prevents the read that triggered the error from finishing.
1863 * currently, there can be no more than two copies of every data bit. thus,
1864 * exactly one rewrite is required.
1865 */
1866int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
1867 u64 length, u64 logical, struct page *page,
1868 int mirror_num)
1869{
1870 struct bio *bio;
1871 struct btrfs_device *dev;
1872 DECLARE_COMPLETION_ONSTACK(compl);
1873 u64 map_length = 0;
1874 u64 sector;
1875 struct btrfs_bio *bbio = NULL;
1876 int ret;
1877
1878 BUG_ON(!mirror_num);
1879
1880 bio = bio_alloc(GFP_NOFS, 1);
1881 if (!bio)
1882 return -EIO;
1883 bio->bi_private = &compl;
1884 bio->bi_end_io = repair_io_failure_callback;
1885 bio->bi_size = 0;
1886 map_length = length;
1887
1888 ret = btrfs_map_block(map_tree, WRITE, logical,
1889 &map_length, &bbio, mirror_num);
1890 if (ret) {
1891 bio_put(bio);
1892 return -EIO;
1893 }
1894 BUG_ON(mirror_num != bbio->mirror_num);
1895 sector = bbio->stripes[mirror_num-1].physical >> 9;
1896 bio->bi_sector = sector;
1897 dev = bbio->stripes[mirror_num-1].dev;
1898 kfree(bbio);
1899 if (!dev || !dev->bdev || !dev->writeable) {
1900 bio_put(bio);
1901 return -EIO;
1902 }
1903 bio->bi_bdev = dev->bdev;
1904 bio_add_page(bio, page, length, start-page_offset(page));
Stefan Behrens21adbd52011-11-09 13:44:05 +01001905 btrfsic_submit_bio(WRITE_SYNC, bio);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02001906 wait_for_completion(&compl);
1907
1908 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1909 /* try to remap that extent elsewhere? */
1910 bio_put(bio);
1911 return -EIO;
1912 }
1913
1914 printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s "
1915 "sector %llu)\n", page->mapping->host->i_ino, start,
1916 dev->name, sector);
1917
1918 bio_put(bio);
1919 return 0;
1920}
1921
1922/*
1923 * each time an IO finishes, we do a fast check in the IO failure tree
1924 * to see if we need to process or clean up an io_failure_record
1925 */
1926static int clean_io_failure(u64 start, struct page *page)
1927{
1928 u64 private;
1929 u64 private_failure;
1930 struct io_failure_record *failrec;
1931 struct btrfs_mapping_tree *map_tree;
1932 struct extent_state *state;
1933 int num_copies;
1934 int did_repair = 0;
1935 int ret;
1936 struct inode *inode = page->mapping->host;
1937
1938 private = 0;
1939 ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1940 (u64)-1, 1, EXTENT_DIRTY, 0);
1941 if (!ret)
1942 return 0;
1943
1944 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start,
1945 &private_failure);
1946 if (ret)
1947 return 0;
1948
1949 failrec = (struct io_failure_record *)(unsigned long) private_failure;
1950 BUG_ON(!failrec->this_mirror);
1951
1952 if (failrec->in_validation) {
1953 /* there was no real error, just free the record */
1954 pr_debug("clean_io_failure: freeing dummy error at %llu\n",
1955 failrec->start);
1956 did_repair = 1;
1957 goto out;
1958 }
1959
1960 spin_lock(&BTRFS_I(inode)->io_tree.lock);
1961 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
1962 failrec->start,
1963 EXTENT_LOCKED);
1964 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
1965
1966 if (state && state->start == failrec->start) {
1967 map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
1968 num_copies = btrfs_num_copies(map_tree, failrec->logical,
1969 failrec->len);
1970 if (num_copies > 1) {
1971 ret = repair_io_failure(map_tree, start, failrec->len,
1972 failrec->logical, page,
1973 failrec->failed_mirror);
1974 did_repair = !ret;
1975 }
1976 }
1977
1978out:
1979 if (!ret)
1980 ret = free_io_failure(inode, failrec, did_repair);
1981
1982 return ret;
1983}
1984
1985/*
1986 * this is a generic handler for readpage errors (default
1987 * readpage_io_failed_hook). if other copies exist, read those and write back
1988 * good data to the failed position. does not investigate in remapping the
1989 * failed extent elsewhere, hoping the device will be smart enough to do this as
1990 * needed
1991 */
1992
1993static int bio_readpage_error(struct bio *failed_bio, struct page *page,
1994 u64 start, u64 end, int failed_mirror,
1995 struct extent_state *state)
1996{
1997 struct io_failure_record *failrec = NULL;
1998 u64 private;
1999 struct extent_map *em;
2000 struct inode *inode = page->mapping->host;
2001 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2002 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2003 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2004 struct bio *bio;
2005 int num_copies;
2006 int ret;
2007 int read_mode;
2008 u64 logical;
2009
2010 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
2011
2012 ret = get_state_private(failure_tree, start, &private);
2013 if (ret) {
2014 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2015 if (!failrec)
2016 return -ENOMEM;
2017 failrec->start = start;
2018 failrec->len = end - start + 1;
2019 failrec->this_mirror = 0;
2020 failrec->bio_flags = 0;
2021 failrec->in_validation = 0;
2022
2023 read_lock(&em_tree->lock);
2024 em = lookup_extent_mapping(em_tree, start, failrec->len);
2025 if (!em) {
2026 read_unlock(&em_tree->lock);
2027 kfree(failrec);
2028 return -EIO;
2029 }
2030
2031 if (em->start > start || em->start + em->len < start) {
2032 free_extent_map(em);
2033 em = NULL;
2034 }
2035 read_unlock(&em_tree->lock);
2036
2037 if (!em || IS_ERR(em)) {
2038 kfree(failrec);
2039 return -EIO;
2040 }
2041 logical = start - em->start;
2042 logical = em->block_start + logical;
2043 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2044 logical = em->block_start;
2045 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2046 extent_set_compress_type(&failrec->bio_flags,
2047 em->compress_type);
2048 }
2049 pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, "
2050 "len=%llu\n", logical, start, failrec->len);
2051 failrec->logical = logical;
2052 free_extent_map(em);
2053
2054 /* set the bits in the private failure tree */
2055 ret = set_extent_bits(failure_tree, start, end,
2056 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
2057 if (ret >= 0)
2058 ret = set_state_private(failure_tree, start,
2059 (u64)(unsigned long)failrec);
2060 /* set the bits in the inode's tree */
2061 if (ret >= 0)
2062 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
2063 GFP_NOFS);
2064 if (ret < 0) {
2065 kfree(failrec);
2066 return ret;
2067 }
2068 } else {
2069 failrec = (struct io_failure_record *)(unsigned long)private;
2070 pr_debug("bio_readpage_error: (found) logical=%llu, "
2071 "start=%llu, len=%llu, validation=%d\n",
2072 failrec->logical, failrec->start, failrec->len,
2073 failrec->in_validation);
2074 /*
2075 * when data can be on disk more than twice, add to failrec here
2076 * (e.g. with a list for failed_mirror) to make
2077 * clean_io_failure() clean all those errors at once.
2078 */
2079 }
2080 num_copies = btrfs_num_copies(
2081 &BTRFS_I(inode)->root->fs_info->mapping_tree,
2082 failrec->logical, failrec->len);
2083 if (num_copies == 1) {
2084 /*
2085 * we only have a single copy of the data, so don't bother with
2086 * all the retry and error correction code that follows. no
2087 * matter what the error is, it is very likely to persist.
2088 */
2089 pr_debug("bio_readpage_error: cannot repair, num_copies == 1. "
2090 "state=%p, num_copies=%d, next_mirror %d, "
2091 "failed_mirror %d\n", state, num_copies,
2092 failrec->this_mirror, failed_mirror);
2093 free_io_failure(inode, failrec, 0);
2094 return -EIO;
2095 }
2096
2097 if (!state) {
2098 spin_lock(&tree->lock);
2099 state = find_first_extent_bit_state(tree, failrec->start,
2100 EXTENT_LOCKED);
2101 if (state && state->start != failrec->start)
2102 state = NULL;
2103 spin_unlock(&tree->lock);
2104 }
2105
2106 /*
2107 * there are two premises:
2108 * a) deliver good data to the caller
2109 * b) correct the bad sectors on disk
2110 */
2111 if (failed_bio->bi_vcnt > 1) {
2112 /*
2113 * to fulfill b), we need to know the exact failing sectors, as
2114 * we don't want to rewrite any more than the failed ones. thus,
2115 * we need separate read requests for the failed bio
2116 *
2117 * if the following BUG_ON triggers, our validation request got
2118 * merged. we need separate requests for our algorithm to work.
2119 */
2120 BUG_ON(failrec->in_validation);
2121 failrec->in_validation = 1;
2122 failrec->this_mirror = failed_mirror;
2123 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
2124 } else {
2125 /*
2126 * we're ready to fulfill a) and b) alongside. get a good copy
2127 * of the failed sector and if we succeed, we have setup
2128 * everything for repair_io_failure to do the rest for us.
2129 */
2130 if (failrec->in_validation) {
2131 BUG_ON(failrec->this_mirror != failed_mirror);
2132 failrec->in_validation = 0;
2133 failrec->this_mirror = 0;
2134 }
2135 failrec->failed_mirror = failed_mirror;
2136 failrec->this_mirror++;
2137 if (failrec->this_mirror == failed_mirror)
2138 failrec->this_mirror++;
2139 read_mode = READ_SYNC;
2140 }
2141
2142 if (!state || failrec->this_mirror > num_copies) {
2143 pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, "
2144 "next_mirror %d, failed_mirror %d\n", state,
2145 num_copies, failrec->this_mirror, failed_mirror);
2146 free_io_failure(inode, failrec, 0);
2147 return -EIO;
2148 }
2149
2150 bio = bio_alloc(GFP_NOFS, 1);
2151 bio->bi_private = state;
2152 bio->bi_end_io = failed_bio->bi_end_io;
2153 bio->bi_sector = failrec->logical >> 9;
2154 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2155 bio->bi_size = 0;
2156
2157 bio_add_page(bio, page, failrec->len, start - page_offset(page));
2158
2159 pr_debug("bio_readpage_error: submitting new read[%#x] to "
2160 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
2161 failrec->this_mirror, num_copies, failrec->in_validation);
2162
Tsutomu Itoh013bd4c2012-02-16 10:11:40 +09002163 ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
2164 failrec->this_mirror,
2165 failrec->bio_flags, 0);
2166 return ret;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002167}
2168
Chris Masond1310b22008-01-24 16:13:08 -05002169/* lots and lots of room for performance fixes in the end_bio funcs */
2170
Jeff Mahoney87826df2012-02-15 16:23:57 +01002171int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2172{
2173 int uptodate = (err == 0);
2174 struct extent_io_tree *tree;
2175 int ret;
2176
2177 tree = &BTRFS_I(page->mapping->host)->io_tree;
2178
2179 if (tree->ops && tree->ops->writepage_end_io_hook) {
2180 ret = tree->ops->writepage_end_io_hook(page, start,
2181 end, NULL, uptodate);
2182 if (ret)
2183 uptodate = 0;
2184 }
2185
2186 if (!uptodate && tree->ops &&
2187 tree->ops->writepage_io_failed_hook) {
2188 ret = tree->ops->writepage_io_failed_hook(NULL, page,
2189 start, end, NULL);
2190 /* Writeback already completed */
2191 if (ret == 0)
2192 return 1;
Jeff Mahoney355808c2011-10-03 23:23:14 -04002193 BUG_ON(ret < 0);
Jeff Mahoney87826df2012-02-15 16:23:57 +01002194 }
2195
2196 if (!uptodate) {
2197 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
2198 ClearPageUptodate(page);
2199 SetPageError(page);
2200 }
2201 return 0;
2202}
2203
Chris Masond1310b22008-01-24 16:13:08 -05002204/*
2205 * after a writepage IO is done, we need to:
2206 * clear the uptodate bits on error
2207 * clear the writeback bits in the extent tree for this IO
2208 * end_page_writeback if the page has no more pending IO
2209 *
2210 * Scheduling is not allowed, so the extent state tree is expected
2211 * to have one and only one object corresponding to this IO.
2212 */
Chris Masond1310b22008-01-24 16:13:08 -05002213static void end_bio_extent_writepage(struct bio *bio, int err)
Chris Masond1310b22008-01-24 16:13:08 -05002214{
Chris Masond1310b22008-01-24 16:13:08 -05002215 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
David Woodhouse902b22f2008-08-20 08:51:49 -04002216 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05002217 u64 start;
2218 u64 end;
2219 int whole_page;
2220
Chris Masond1310b22008-01-24 16:13:08 -05002221 do {
2222 struct page *page = bvec->bv_page;
David Woodhouse902b22f2008-08-20 08:51:49 -04002223 tree = &BTRFS_I(page->mapping->host)->io_tree;
2224
Chris Masond1310b22008-01-24 16:13:08 -05002225 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
2226 bvec->bv_offset;
2227 end = start + bvec->bv_len - 1;
2228
2229 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
2230 whole_page = 1;
2231 else
2232 whole_page = 0;
2233
2234 if (--bvec >= bio->bi_io_vec)
2235 prefetchw(&bvec->bv_page->flags);
Chris Mason1259ab72008-05-12 13:39:03 -04002236
Jeff Mahoney87826df2012-02-15 16:23:57 +01002237 if (end_extent_writepage(page, err, start, end))
2238 continue;
Chris Mason70dec802008-01-29 09:59:12 -05002239
Chris Masond1310b22008-01-24 16:13:08 -05002240 if (whole_page)
2241 end_page_writeback(page);
2242 else
2243 check_page_writeback(tree, page);
Chris Masond1310b22008-01-24 16:13:08 -05002244 } while (bvec >= bio->bi_io_vec);
Chris Mason2b1f55b2008-09-24 11:48:04 -04002245
Chris Masond1310b22008-01-24 16:13:08 -05002246 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05002247}
2248
2249/*
2250 * after a readpage IO is done, we need to:
2251 * clear the uptodate bits on error
2252 * set the uptodate bits if things worked
2253 * set the page up to date if all extents in the tree are uptodate
2254 * clear the lock bit in the extent tree
2255 * unlock the page if there are no other extents locked for it
2256 *
2257 * Scheduling is not allowed, so the extent state tree is expected
2258 * to have one and only one object corresponding to this IO.
2259 */
Chris Masond1310b22008-01-24 16:13:08 -05002260static void end_bio_extent_readpage(struct bio *bio, int err)
Chris Masond1310b22008-01-24 16:13:08 -05002261{
2262 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Mason4125bf72010-02-03 18:18:45 +00002263 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2264 struct bio_vec *bvec = bio->bi_io_vec;
David Woodhouse902b22f2008-08-20 08:51:49 -04002265 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05002266 u64 start;
2267 u64 end;
2268 int whole_page;
2269 int ret;
2270
Chris Masond20f7042008-12-08 16:58:54 -05002271 if (err)
2272 uptodate = 0;
2273
Chris Masond1310b22008-01-24 16:13:08 -05002274 do {
2275 struct page *page = bvec->bv_page;
Arne Jansen507903b2011-04-06 10:02:20 +00002276 struct extent_state *cached = NULL;
2277 struct extent_state *state;
2278
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002279 pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, "
2280 "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err,
2281 (long int)bio->bi_bdev);
David Woodhouse902b22f2008-08-20 08:51:49 -04002282 tree = &BTRFS_I(page->mapping->host)->io_tree;
2283
Chris Masond1310b22008-01-24 16:13:08 -05002284 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
2285 bvec->bv_offset;
2286 end = start + bvec->bv_len - 1;
2287
2288 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
2289 whole_page = 1;
2290 else
2291 whole_page = 0;
2292
Chris Mason4125bf72010-02-03 18:18:45 +00002293 if (++bvec <= bvec_end)
Chris Masond1310b22008-01-24 16:13:08 -05002294 prefetchw(&bvec->bv_page->flags);
2295
Arne Jansen507903b2011-04-06 10:02:20 +00002296 spin_lock(&tree->lock);
Chris Mason0d399202011-04-16 06:55:39 -04002297 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
Chris Mason109b36a2011-04-12 13:57:39 -04002298 if (state && state->start == start) {
Arne Jansen507903b2011-04-06 10:02:20 +00002299 /*
2300 * take a reference on the state, unlock will drop
2301 * the ref
2302 */
2303 cache_state(state, &cached);
2304 }
2305 spin_unlock(&tree->lock);
2306
Chris Masond1310b22008-01-24 16:13:08 -05002307 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
Chris Mason70dec802008-01-29 09:59:12 -05002308 ret = tree->ops->readpage_end_io_hook(page, start, end,
Arne Jansen507903b2011-04-06 10:02:20 +00002309 state);
Chris Masond1310b22008-01-24 16:13:08 -05002310 if (ret)
2311 uptodate = 0;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002312 else
2313 clean_io_failure(start, page);
Chris Masond1310b22008-01-24 16:13:08 -05002314 }
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002315 if (!uptodate) {
Jan Schmidt32240a92011-11-20 07:33:38 -05002316 int failed_mirror;
2317 failed_mirror = (int)(unsigned long)bio->bi_bdev;
Jan Schmidtf4a8e652011-12-01 09:30:36 -05002318 /*
2319 * The generic bio_readpage_error handles errors the
2320 * following way: If possible, new read requests are
2321 * created and submitted and will end up in
2322 * end_bio_extent_readpage as well (if we're lucky, not
2323 * in the !uptodate case). In that case it returns 0 and
2324 * we just go on with the next page in our bio. If it
2325 * can't handle the error it will return -EIO and we
2326 * remain responsible for that page.
2327 */
2328 ret = bio_readpage_error(bio, page, start, end,
2329 failed_mirror, NULL);
Chris Mason7e383262008-04-09 16:28:12 -04002330 if (ret == 0) {
Jan Schmidtf4a8e652011-12-01 09:30:36 -05002331error_handled:
Chris Mason3b951512008-04-17 11:29:12 -04002332 uptodate =
2333 test_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Masond20f7042008-12-08 16:58:54 -05002334 if (err)
2335 uptodate = 0;
Arne Jansen507903b2011-04-06 10:02:20 +00002336 uncache_state(&cached);
Chris Mason7e383262008-04-09 16:28:12 -04002337 continue;
2338 }
Jan Schmidtf4a8e652011-12-01 09:30:36 -05002339 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2340 ret = tree->ops->readpage_io_failed_hook(
2341 bio, page, start, end,
2342 failed_mirror, state);
2343 if (ret == 0)
2344 goto error_handled;
2345 }
Jeff Mahoney355808c2011-10-03 23:23:14 -04002346 BUG_ON(ret < 0);
Chris Mason7e383262008-04-09 16:28:12 -04002347 }
Chris Mason70dec802008-01-29 09:59:12 -05002348
Chris Mason771ed682008-11-06 22:02:51 -05002349 if (uptodate) {
Arne Jansen507903b2011-04-06 10:02:20 +00002350 set_extent_uptodate(tree, start, end, &cached,
David Woodhouse902b22f2008-08-20 08:51:49 -04002351 GFP_ATOMIC);
Chris Mason771ed682008-11-06 22:02:51 -05002352 }
Arne Jansen507903b2011-04-06 10:02:20 +00002353 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -05002354
Chris Mason70dec802008-01-29 09:59:12 -05002355 if (whole_page) {
2356 if (uptodate) {
2357 SetPageUptodate(page);
2358 } else {
2359 ClearPageUptodate(page);
2360 SetPageError(page);
2361 }
Chris Masond1310b22008-01-24 16:13:08 -05002362 unlock_page(page);
Chris Mason70dec802008-01-29 09:59:12 -05002363 } else {
2364 if (uptodate) {
2365 check_page_uptodate(tree, page);
2366 } else {
2367 ClearPageUptodate(page);
2368 SetPageError(page);
2369 }
Chris Masond1310b22008-01-24 16:13:08 -05002370 check_page_locked(tree, page);
Chris Mason70dec802008-01-29 09:59:12 -05002371 }
Chris Mason4125bf72010-02-03 18:18:45 +00002372 } while (bvec <= bvec_end);
Chris Masond1310b22008-01-24 16:13:08 -05002373
2374 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05002375}
2376
Miao Xie88f794e2010-11-22 03:02:55 +00002377struct bio *
2378btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2379 gfp_t gfp_flags)
Chris Masond1310b22008-01-24 16:13:08 -05002380{
2381 struct bio *bio;
2382
2383 bio = bio_alloc(gfp_flags, nr_vecs);
2384
2385 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
2386 while (!bio && (nr_vecs /= 2))
2387 bio = bio_alloc(gfp_flags, nr_vecs);
2388 }
2389
2390 if (bio) {
Chris Masone1c4b742008-04-22 13:26:46 -04002391 bio->bi_size = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002392 bio->bi_bdev = bdev;
2393 bio->bi_sector = first_sector;
2394 }
2395 return bio;
2396}
2397
Jeff Mahoney355808c2011-10-03 23:23:14 -04002398static int __must_check submit_one_bio(int rw, struct bio *bio,
2399 int mirror_num, unsigned long bio_flags)
Chris Masond1310b22008-01-24 16:13:08 -05002400{
Chris Masond1310b22008-01-24 16:13:08 -05002401 int ret = 0;
Chris Mason70dec802008-01-29 09:59:12 -05002402 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2403 struct page *page = bvec->bv_page;
2404 struct extent_io_tree *tree = bio->bi_private;
Chris Mason70dec802008-01-29 09:59:12 -05002405 u64 start;
Chris Mason70dec802008-01-29 09:59:12 -05002406
2407 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
Chris Mason70dec802008-01-29 09:59:12 -05002408
David Woodhouse902b22f2008-08-20 08:51:49 -04002409 bio->bi_private = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05002410
2411 bio_get(bio);
2412
Chris Mason065631f2008-02-20 12:07:25 -05002413 if (tree->ops && tree->ops->submit_bio_hook)
liubo6b82ce82011-01-26 06:21:39 +00002414 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
Chris Masoneaf25d92010-05-25 09:48:28 -04002415 mirror_num, bio_flags, start);
Chris Mason0b86a832008-03-24 15:01:56 -04002416 else
Stefan Behrens21adbd52011-11-09 13:44:05 +01002417 btrfsic_submit_bio(rw, bio);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002418
Chris Masond1310b22008-01-24 16:13:08 -05002419 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2420 ret = -EOPNOTSUPP;
2421 bio_put(bio);
2422 return ret;
2423}
2424
Jeff Mahoney3444a972011-10-03 23:23:13 -04002425static int merge_bio(struct extent_io_tree *tree, struct page *page,
2426 unsigned long offset, size_t size, struct bio *bio,
2427 unsigned long bio_flags)
2428{
2429 int ret = 0;
2430 if (tree->ops && tree->ops->merge_bio_hook)
2431 ret = tree->ops->merge_bio_hook(page, offset, size, bio,
2432 bio_flags);
2433 BUG_ON(ret < 0);
2434 return ret;
2435
2436}
2437
Chris Masond1310b22008-01-24 16:13:08 -05002438static int submit_extent_page(int rw, struct extent_io_tree *tree,
2439 struct page *page, sector_t sector,
2440 size_t size, unsigned long offset,
2441 struct block_device *bdev,
2442 struct bio **bio_ret,
2443 unsigned long max_pages,
Chris Masonf1885912008-04-09 16:28:12 -04002444 bio_end_io_t end_io_func,
Chris Masonc8b97812008-10-29 14:49:59 -04002445 int mirror_num,
2446 unsigned long prev_bio_flags,
2447 unsigned long bio_flags)
Chris Masond1310b22008-01-24 16:13:08 -05002448{
2449 int ret = 0;
2450 struct bio *bio;
2451 int nr;
Chris Masonc8b97812008-10-29 14:49:59 -04002452 int contig = 0;
2453 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
2454 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
Chris Mason5b050f02008-11-11 09:34:41 -05002455 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
Chris Masond1310b22008-01-24 16:13:08 -05002456
2457 if (bio_ret && *bio_ret) {
2458 bio = *bio_ret;
Chris Masonc8b97812008-10-29 14:49:59 -04002459 if (old_compressed)
2460 contig = bio->bi_sector == sector;
2461 else
2462 contig = bio->bi_sector + (bio->bi_size >> 9) ==
2463 sector;
2464
2465 if (prev_bio_flags != bio_flags || !contig ||
Jeff Mahoney3444a972011-10-03 23:23:13 -04002466 merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
Chris Masonc8b97812008-10-29 14:49:59 -04002467 bio_add_page(bio, page, page_size, offset) < page_size) {
2468 ret = submit_one_bio(rw, bio, mirror_num,
2469 prev_bio_flags);
Jeff Mahoney355808c2011-10-03 23:23:14 -04002470 BUG_ON(ret < 0);
Chris Masond1310b22008-01-24 16:13:08 -05002471 bio = NULL;
2472 } else {
2473 return 0;
2474 }
2475 }
Chris Masonc8b97812008-10-29 14:49:59 -04002476 if (this_compressed)
2477 nr = BIO_MAX_PAGES;
2478 else
2479 nr = bio_get_nr_vecs(bdev);
2480
Miao Xie88f794e2010-11-22 03:02:55 +00002481 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
Tsutomu Itoh5df67082011-02-01 09:17:35 +00002482 if (!bio)
2483 return -ENOMEM;
Chris Mason70dec802008-01-29 09:59:12 -05002484
Chris Masonc8b97812008-10-29 14:49:59 -04002485 bio_add_page(bio, page, page_size, offset);
Chris Masond1310b22008-01-24 16:13:08 -05002486 bio->bi_end_io = end_io_func;
2487 bio->bi_private = tree;
Chris Mason70dec802008-01-29 09:59:12 -05002488
Chris Masond3977122009-01-05 21:25:51 -05002489 if (bio_ret)
Chris Masond1310b22008-01-24 16:13:08 -05002490 *bio_ret = bio;
Jeff Mahoney355808c2011-10-03 23:23:14 -04002491 else {
Chris Masonc8b97812008-10-29 14:49:59 -04002492 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
Jeff Mahoney355808c2011-10-03 23:23:14 -04002493 BUG_ON(ret < 0);
2494 }
Chris Masond1310b22008-01-24 16:13:08 -05002495
2496 return ret;
2497}
2498
2499void set_page_extent_mapped(struct page *page)
2500{
2501 if (!PagePrivate(page)) {
2502 SetPagePrivate(page);
Chris Masond1310b22008-01-24 16:13:08 -05002503 page_cache_get(page);
Chris Mason6af118c2008-07-22 11:18:07 -04002504 set_page_private(page, EXTENT_PAGE_PRIVATE);
Chris Masond1310b22008-01-24 16:13:08 -05002505 }
2506}
2507
Christoph Hellwigb2950862008-12-02 09:54:17 -05002508static void set_page_extent_head(struct page *page, unsigned long len)
Chris Masond1310b22008-01-24 16:13:08 -05002509{
Chris Masoneb14ab82011-02-10 12:35:00 -05002510 WARN_ON(!PagePrivate(page));
Chris Masond1310b22008-01-24 16:13:08 -05002511 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
2512}
2513
2514/*
2515 * basic readpage implementation. Locked extent state structs are inserted
2516 * into the tree that are removed when the IO is done (by the end_io
2517 * handlers)
2518 */
2519static int __extent_read_full_page(struct extent_io_tree *tree,
2520 struct page *page,
2521 get_extent_t *get_extent,
Chris Masonc8b97812008-10-29 14:49:59 -04002522 struct bio **bio, int mirror_num,
2523 unsigned long *bio_flags)
Chris Masond1310b22008-01-24 16:13:08 -05002524{
2525 struct inode *inode = page->mapping->host;
2526 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2527 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2528 u64 end;
2529 u64 cur = start;
2530 u64 extent_offset;
2531 u64 last_byte = i_size_read(inode);
2532 u64 block_start;
2533 u64 cur_end;
2534 sector_t sector;
2535 struct extent_map *em;
2536 struct block_device *bdev;
Josef Bacik11c65dc2010-05-23 11:07:21 -04002537 struct btrfs_ordered_extent *ordered;
Chris Masond1310b22008-01-24 16:13:08 -05002538 int ret;
2539 int nr = 0;
David Sterba306e16c2011-04-19 14:29:38 +02002540 size_t pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002541 size_t iosize;
Chris Masonc8b97812008-10-29 14:49:59 -04002542 size_t disk_io_size;
Chris Masond1310b22008-01-24 16:13:08 -05002543 size_t blocksize = inode->i_sb->s_blocksize;
Chris Masonc8b97812008-10-29 14:49:59 -04002544 unsigned long this_bio_flag = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002545
2546 set_page_extent_mapped(page);
2547
Dan Magenheimer90a887c2011-05-26 10:01:56 -06002548 if (!PageUptodate(page)) {
2549 if (cleancache_get_page(page) == 0) {
2550 BUG_ON(blocksize != PAGE_SIZE);
2551 goto out;
2552 }
2553 }
2554
Chris Masond1310b22008-01-24 16:13:08 -05002555 end = page_end;
Josef Bacik11c65dc2010-05-23 11:07:21 -04002556 while (1) {
Jeff Mahoneyd0082372012-03-01 14:57:19 +01002557 lock_extent(tree, start, end);
Josef Bacik11c65dc2010-05-23 11:07:21 -04002558 ordered = btrfs_lookup_ordered_extent(inode, start);
2559 if (!ordered)
2560 break;
Jeff Mahoneyd0082372012-03-01 14:57:19 +01002561 unlock_extent(tree, start, end);
Josef Bacik11c65dc2010-05-23 11:07:21 -04002562 btrfs_start_ordered_extent(inode, ordered, 1);
2563 btrfs_put_ordered_extent(ordered);
2564 }
Chris Masond1310b22008-01-24 16:13:08 -05002565
Chris Masonc8b97812008-10-29 14:49:59 -04002566 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2567 char *userpage;
2568 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
2569
2570 if (zero_offset) {
2571 iosize = PAGE_CACHE_SIZE - zero_offset;
2572 userpage = kmap_atomic(page, KM_USER0);
2573 memset(userpage + zero_offset, 0, iosize);
2574 flush_dcache_page(page);
2575 kunmap_atomic(userpage, KM_USER0);
2576 }
2577 }
Chris Masond1310b22008-01-24 16:13:08 -05002578 while (cur <= end) {
2579 if (cur >= last_byte) {
2580 char *userpage;
Arne Jansen507903b2011-04-06 10:02:20 +00002581 struct extent_state *cached = NULL;
2582
David Sterba306e16c2011-04-19 14:29:38 +02002583 iosize = PAGE_CACHE_SIZE - pg_offset;
Chris Masond1310b22008-01-24 16:13:08 -05002584 userpage = kmap_atomic(page, KM_USER0);
David Sterba306e16c2011-04-19 14:29:38 +02002585 memset(userpage + pg_offset, 0, iosize);
Chris Masond1310b22008-01-24 16:13:08 -05002586 flush_dcache_page(page);
2587 kunmap_atomic(userpage, KM_USER0);
2588 set_extent_uptodate(tree, cur, cur + iosize - 1,
Arne Jansen507903b2011-04-06 10:02:20 +00002589 &cached, GFP_NOFS);
2590 unlock_extent_cached(tree, cur, cur + iosize - 1,
2591 &cached, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002592 break;
2593 }
David Sterba306e16c2011-04-19 14:29:38 +02002594 em = get_extent(inode, page, pg_offset, cur,
Chris Masond1310b22008-01-24 16:13:08 -05002595 end - cur + 1, 0);
David Sterbac7040052011-04-19 18:00:01 +02002596 if (IS_ERR_OR_NULL(em)) {
Chris Masond1310b22008-01-24 16:13:08 -05002597 SetPageError(page);
Jeff Mahoneyd0082372012-03-01 14:57:19 +01002598 unlock_extent(tree, cur, end);
Chris Masond1310b22008-01-24 16:13:08 -05002599 break;
2600 }
Chris Masond1310b22008-01-24 16:13:08 -05002601 extent_offset = cur - em->start;
2602 BUG_ON(extent_map_end(em) <= cur);
2603 BUG_ON(end < cur);
2604
Li Zefan261507a02010-12-17 14:21:50 +08002605 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
Chris Masonc8b97812008-10-29 14:49:59 -04002606 this_bio_flag = EXTENT_BIO_COMPRESSED;
Li Zefan261507a02010-12-17 14:21:50 +08002607 extent_set_compress_type(&this_bio_flag,
2608 em->compress_type);
2609 }
Chris Masonc8b97812008-10-29 14:49:59 -04002610
Chris Masond1310b22008-01-24 16:13:08 -05002611 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2612 cur_end = min(extent_map_end(em) - 1, end);
2613 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
Chris Masonc8b97812008-10-29 14:49:59 -04002614 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2615 disk_io_size = em->block_len;
2616 sector = em->block_start >> 9;
2617 } else {
2618 sector = (em->block_start + extent_offset) >> 9;
2619 disk_io_size = iosize;
2620 }
Chris Masond1310b22008-01-24 16:13:08 -05002621 bdev = em->bdev;
2622 block_start = em->block_start;
Yan Zhengd899e052008-10-30 14:25:28 -04002623 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2624 block_start = EXTENT_MAP_HOLE;
Chris Masond1310b22008-01-24 16:13:08 -05002625 free_extent_map(em);
2626 em = NULL;
2627
2628 /* we've found a hole, just zero and go on */
2629 if (block_start == EXTENT_MAP_HOLE) {
2630 char *userpage;
Arne Jansen507903b2011-04-06 10:02:20 +00002631 struct extent_state *cached = NULL;
2632
Chris Masond1310b22008-01-24 16:13:08 -05002633 userpage = kmap_atomic(page, KM_USER0);
David Sterba306e16c2011-04-19 14:29:38 +02002634 memset(userpage + pg_offset, 0, iosize);
Chris Masond1310b22008-01-24 16:13:08 -05002635 flush_dcache_page(page);
2636 kunmap_atomic(userpage, KM_USER0);
2637
2638 set_extent_uptodate(tree, cur, cur + iosize - 1,
Arne Jansen507903b2011-04-06 10:02:20 +00002639 &cached, GFP_NOFS);
2640 unlock_extent_cached(tree, cur, cur + iosize - 1,
2641 &cached, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002642 cur = cur + iosize;
David Sterba306e16c2011-04-19 14:29:38 +02002643 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002644 continue;
2645 }
2646 /* the get_extent function already copied into the page */
Chris Mason9655d292009-09-02 15:22:30 -04002647 if (test_range_bit(tree, cur, cur_end,
2648 EXTENT_UPTODATE, 1, NULL)) {
Chris Masona1b32a52008-09-05 16:09:51 -04002649 check_page_uptodate(tree, page);
Jeff Mahoneyd0082372012-03-01 14:57:19 +01002650 unlock_extent(tree, cur, cur + iosize - 1);
Chris Masond1310b22008-01-24 16:13:08 -05002651 cur = cur + iosize;
David Sterba306e16c2011-04-19 14:29:38 +02002652 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002653 continue;
2654 }
Chris Mason70dec802008-01-29 09:59:12 -05002655 /* we have an inline extent but it didn't get marked up
2656 * to date. Error out
2657 */
2658 if (block_start == EXTENT_MAP_INLINE) {
2659 SetPageError(page);
Jeff Mahoneyd0082372012-03-01 14:57:19 +01002660 unlock_extent(tree, cur, cur + iosize - 1);
Chris Mason70dec802008-01-29 09:59:12 -05002661 cur = cur + iosize;
David Sterba306e16c2011-04-19 14:29:38 +02002662 pg_offset += iosize;
Chris Mason70dec802008-01-29 09:59:12 -05002663 continue;
2664 }
Chris Masond1310b22008-01-24 16:13:08 -05002665
2666 ret = 0;
2667 if (tree->ops && tree->ops->readpage_io_hook) {
2668 ret = tree->ops->readpage_io_hook(page, cur,
2669 cur + iosize - 1);
2670 }
2671 if (!ret) {
Chris Mason89642222008-07-24 09:41:53 -04002672 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2673 pnr -= page->index;
Chris Masond1310b22008-01-24 16:13:08 -05002674 ret = submit_extent_page(READ, tree, page,
David Sterba306e16c2011-04-19 14:29:38 +02002675 sector, disk_io_size, pg_offset,
Chris Mason89642222008-07-24 09:41:53 -04002676 bdev, bio, pnr,
Chris Masonc8b97812008-10-29 14:49:59 -04002677 end_bio_extent_readpage, mirror_num,
2678 *bio_flags,
2679 this_bio_flag);
Chris Mason89642222008-07-24 09:41:53 -04002680 nr++;
Chris Masonc8b97812008-10-29 14:49:59 -04002681 *bio_flags = this_bio_flag;
Chris Masond1310b22008-01-24 16:13:08 -05002682 }
2683 if (ret)
2684 SetPageError(page);
2685 cur = cur + iosize;
David Sterba306e16c2011-04-19 14:29:38 +02002686 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002687 }
Dan Magenheimer90a887c2011-05-26 10:01:56 -06002688out:
Chris Masond1310b22008-01-24 16:13:08 -05002689 if (!nr) {
2690 if (!PageError(page))
2691 SetPageUptodate(page);
2692 unlock_page(page);
2693 }
2694 return 0;
2695}
2696
2697int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
Jan Schmidt8ddc7d92011-06-13 20:02:58 +02002698 get_extent_t *get_extent, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05002699{
2700 struct bio *bio = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04002701 unsigned long bio_flags = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002702 int ret;
2703
Jan Schmidt8ddc7d92011-06-13 20:02:58 +02002704 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
Chris Masonc8b97812008-10-29 14:49:59 -04002705 &bio_flags);
Jeff Mahoney355808c2011-10-03 23:23:14 -04002706 if (bio) {
Jan Schmidt8ddc7d92011-06-13 20:02:58 +02002707 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
Jeff Mahoney355808c2011-10-03 23:23:14 -04002708 BUG_ON(ret < 0);
2709 }
Chris Masond1310b22008-01-24 16:13:08 -05002710 return ret;
2711}
Chris Masond1310b22008-01-24 16:13:08 -05002712
Chris Mason11c83492009-04-20 15:50:09 -04002713static noinline void update_nr_written(struct page *page,
2714 struct writeback_control *wbc,
2715 unsigned long nr_written)
2716{
2717 wbc->nr_to_write -= nr_written;
2718 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2719 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2720 page->mapping->writeback_index = page->index + nr_written;
2721}
2722
Chris Masond1310b22008-01-24 16:13:08 -05002723/*
2724 * the writepage semantics are similar to regular writepage. extent
2725 * records are inserted to lock ranges in the tree, and as dirty areas
2726 * are found, they are marked writeback. Then the lock bits are removed
2727 * and the end_io handler clears the writeback ranges
2728 */
2729static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2730 void *data)
2731{
2732 struct inode *inode = page->mapping->host;
2733 struct extent_page_data *epd = data;
2734 struct extent_io_tree *tree = epd->tree;
2735 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2736 u64 delalloc_start;
2737 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2738 u64 end;
2739 u64 cur = start;
2740 u64 extent_offset;
2741 u64 last_byte = i_size_read(inode);
2742 u64 block_start;
2743 u64 iosize;
2744 sector_t sector;
Chris Mason2c64c532009-09-02 15:04:12 -04002745 struct extent_state *cached_state = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05002746 struct extent_map *em;
2747 struct block_device *bdev;
2748 int ret;
2749 int nr = 0;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002750 size_t pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002751 size_t blocksize;
2752 loff_t i_size = i_size_read(inode);
2753 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
2754 u64 nr_delalloc;
2755 u64 delalloc_end;
Chris Masonc8b97812008-10-29 14:49:59 -04002756 int page_started;
2757 int compressed;
Chris Masonffbd5172009-04-20 15:50:09 -04002758 int write_flags;
Chris Mason771ed682008-11-06 22:02:51 -05002759 unsigned long nr_written = 0;
Josef Bacik9e487102011-08-01 12:08:18 -04002760 bool fill_delalloc = true;
Chris Masond1310b22008-01-24 16:13:08 -05002761
Chris Masonffbd5172009-04-20 15:50:09 -04002762 if (wbc->sync_mode == WB_SYNC_ALL)
Jens Axboe721a9602011-03-09 11:56:30 +01002763 write_flags = WRITE_SYNC;
Chris Masonffbd5172009-04-20 15:50:09 -04002764 else
2765 write_flags = WRITE;
2766
liubo1abe9b82011-03-24 11:18:59 +00002767 trace___extent_writepage(page, inode, wbc);
2768
Chris Masond1310b22008-01-24 16:13:08 -05002769 WARN_ON(!PageLocked(page));
Chris Masonbf0da8c2011-11-04 12:29:37 -04002770
2771 ClearPageError(page);
2772
Chris Mason7f3c74f2008-07-18 12:01:11 -04002773 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
Chris Mason211c17f2008-05-15 09:13:45 -04002774 if (page->index > end_index ||
Chris Mason7f3c74f2008-07-18 12:01:11 -04002775 (page->index == end_index && !pg_offset)) {
Chris Mason39be25c2008-11-10 11:50:50 -05002776 page->mapping->a_ops->invalidatepage(page, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002777 unlock_page(page);
2778 return 0;
2779 }
2780
2781 if (page->index == end_index) {
2782 char *userpage;
2783
Chris Masond1310b22008-01-24 16:13:08 -05002784 userpage = kmap_atomic(page, KM_USER0);
Chris Mason7f3c74f2008-07-18 12:01:11 -04002785 memset(userpage + pg_offset, 0,
2786 PAGE_CACHE_SIZE - pg_offset);
Chris Masond1310b22008-01-24 16:13:08 -05002787 kunmap_atomic(userpage, KM_USER0);
Chris Mason211c17f2008-05-15 09:13:45 -04002788 flush_dcache_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002789 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002790 pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002791
2792 set_page_extent_mapped(page);
2793
Josef Bacik9e487102011-08-01 12:08:18 -04002794 if (!tree->ops || !tree->ops->fill_delalloc)
2795 fill_delalloc = false;
2796
Chris Masond1310b22008-01-24 16:13:08 -05002797 delalloc_start = start;
2798 delalloc_end = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04002799 page_started = 0;
Josef Bacik9e487102011-08-01 12:08:18 -04002800 if (!epd->extent_locked && fill_delalloc) {
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002801 u64 delalloc_to_write = 0;
Chris Mason11c83492009-04-20 15:50:09 -04002802 /*
2803 * make sure the wbc mapping index is at least updated
2804 * to this page.
2805 */
2806 update_nr_written(page, wbc, 0);
2807
Chris Masond3977122009-01-05 21:25:51 -05002808 while (delalloc_end < page_end) {
Chris Mason771ed682008-11-06 22:02:51 -05002809 nr_delalloc = find_lock_delalloc_range(inode, tree,
Chris Masonc8b97812008-10-29 14:49:59 -04002810 page,
2811 &delalloc_start,
Chris Masond1310b22008-01-24 16:13:08 -05002812 &delalloc_end,
2813 128 * 1024 * 1024);
Chris Mason771ed682008-11-06 22:02:51 -05002814 if (nr_delalloc == 0) {
2815 delalloc_start = delalloc_end + 1;
2816 continue;
2817 }
Tsutomu Itoh013bd4c2012-02-16 10:11:40 +09002818 ret = tree->ops->fill_delalloc(inode, page,
2819 delalloc_start,
2820 delalloc_end,
2821 &page_started,
2822 &nr_written);
2823 BUG_ON(ret);
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002824 /*
2825 * delalloc_end is already one less than the total
2826 * length, so we don't subtract one from
2827 * PAGE_CACHE_SIZE
2828 */
2829 delalloc_to_write += (delalloc_end - delalloc_start +
2830 PAGE_CACHE_SIZE) >>
2831 PAGE_CACHE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05002832 delalloc_start = delalloc_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -05002833 }
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002834 if (wbc->nr_to_write < delalloc_to_write) {
2835 int thresh = 8192;
2836
2837 if (delalloc_to_write < thresh * 2)
2838 thresh = delalloc_to_write;
2839 wbc->nr_to_write = min_t(u64, delalloc_to_write,
2840 thresh);
2841 }
Chris Masonc8b97812008-10-29 14:49:59 -04002842
Chris Mason771ed682008-11-06 22:02:51 -05002843 /* did the fill delalloc function already unlock and start
2844 * the IO?
2845 */
2846 if (page_started) {
2847 ret = 0;
Chris Mason11c83492009-04-20 15:50:09 -04002848 /*
2849 * we've unlocked the page, so we can't update
2850 * the mapping's writeback index, just update
2851 * nr_to_write.
2852 */
2853 wbc->nr_to_write -= nr_written;
2854 goto done_unlocked;
Chris Mason771ed682008-11-06 22:02:51 -05002855 }
Chris Masonc8b97812008-10-29 14:49:59 -04002856 }
Chris Mason247e7432008-07-17 12:53:51 -04002857 if (tree->ops && tree->ops->writepage_start_hook) {
Chris Masonc8b97812008-10-29 14:49:59 -04002858 ret = tree->ops->writepage_start_hook(page, start,
2859 page_end);
Jeff Mahoney87826df2012-02-15 16:23:57 +01002860 if (ret) {
2861 /* Fixup worker will requeue */
2862 if (ret == -EBUSY)
2863 wbc->pages_skipped++;
2864 else
2865 redirty_page_for_writepage(wbc, page);
Chris Mason11c83492009-04-20 15:50:09 -04002866 update_nr_written(page, wbc, nr_written);
Chris Mason247e7432008-07-17 12:53:51 -04002867 unlock_page(page);
Chris Mason771ed682008-11-06 22:02:51 -05002868 ret = 0;
Chris Mason11c83492009-04-20 15:50:09 -04002869 goto done_unlocked;
Chris Mason247e7432008-07-17 12:53:51 -04002870 }
2871 }
2872
Chris Mason11c83492009-04-20 15:50:09 -04002873 /*
2874 * we don't want to touch the inode after unlocking the page,
2875 * so we update the mapping writeback index now
2876 */
2877 update_nr_written(page, wbc, nr_written + 1);
Chris Mason771ed682008-11-06 22:02:51 -05002878
Chris Masond1310b22008-01-24 16:13:08 -05002879 end = page_end;
Chris Masond1310b22008-01-24 16:13:08 -05002880 if (last_byte <= start) {
Chris Masone6dcd2d2008-07-17 12:53:50 -04002881 if (tree->ops && tree->ops->writepage_end_io_hook)
2882 tree->ops->writepage_end_io_hook(page, start,
2883 page_end, NULL, 1);
Chris Masond1310b22008-01-24 16:13:08 -05002884 goto done;
2885 }
2886
Chris Masond1310b22008-01-24 16:13:08 -05002887 blocksize = inode->i_sb->s_blocksize;
2888
2889 while (cur <= end) {
2890 if (cur >= last_byte) {
Chris Masone6dcd2d2008-07-17 12:53:50 -04002891 if (tree->ops && tree->ops->writepage_end_io_hook)
2892 tree->ops->writepage_end_io_hook(page, cur,
2893 page_end, NULL, 1);
Chris Masond1310b22008-01-24 16:13:08 -05002894 break;
2895 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002896 em = epd->get_extent(inode, page, pg_offset, cur,
Chris Masond1310b22008-01-24 16:13:08 -05002897 end - cur + 1, 1);
David Sterbac7040052011-04-19 18:00:01 +02002898 if (IS_ERR_OR_NULL(em)) {
Chris Masond1310b22008-01-24 16:13:08 -05002899 SetPageError(page);
2900 break;
2901 }
2902
2903 extent_offset = cur - em->start;
2904 BUG_ON(extent_map_end(em) <= cur);
2905 BUG_ON(end < cur);
2906 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2907 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
2908 sector = (em->block_start + extent_offset) >> 9;
2909 bdev = em->bdev;
2910 block_start = em->block_start;
Chris Masonc8b97812008-10-29 14:49:59 -04002911 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
Chris Masond1310b22008-01-24 16:13:08 -05002912 free_extent_map(em);
2913 em = NULL;
2914
Chris Masonc8b97812008-10-29 14:49:59 -04002915 /*
2916 * compressed and inline extents are written through other
2917 * paths in the FS
2918 */
2919 if (compressed || block_start == EXTENT_MAP_HOLE ||
Chris Masond1310b22008-01-24 16:13:08 -05002920 block_start == EXTENT_MAP_INLINE) {
Chris Masonc8b97812008-10-29 14:49:59 -04002921 /*
2922 * end_io notification does not happen here for
2923 * compressed extents
2924 */
2925 if (!compressed && tree->ops &&
2926 tree->ops->writepage_end_io_hook)
Chris Masone6dcd2d2008-07-17 12:53:50 -04002927 tree->ops->writepage_end_io_hook(page, cur,
2928 cur + iosize - 1,
2929 NULL, 1);
Chris Masonc8b97812008-10-29 14:49:59 -04002930 else if (compressed) {
2931 /* we don't want to end_page_writeback on
2932 * a compressed extent. this happens
2933 * elsewhere
2934 */
2935 nr++;
2936 }
2937
2938 cur += iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002939 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002940 continue;
2941 }
Chris Masond1310b22008-01-24 16:13:08 -05002942 /* leave this out until we have a page_mkwrite call */
2943 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
Chris Mason9655d292009-09-02 15:22:30 -04002944 EXTENT_DIRTY, 0, NULL)) {
Chris Masond1310b22008-01-24 16:13:08 -05002945 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002946 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002947 continue;
2948 }
Chris Masonc8b97812008-10-29 14:49:59 -04002949
Chris Masond1310b22008-01-24 16:13:08 -05002950 if (tree->ops && tree->ops->writepage_io_hook) {
2951 ret = tree->ops->writepage_io_hook(page, cur,
2952 cur + iosize - 1);
2953 } else {
2954 ret = 0;
2955 }
Chris Mason1259ab72008-05-12 13:39:03 -04002956 if (ret) {
Chris Masond1310b22008-01-24 16:13:08 -05002957 SetPageError(page);
Chris Mason1259ab72008-05-12 13:39:03 -04002958 } else {
Chris Masond1310b22008-01-24 16:13:08 -05002959 unsigned long max_nr = end_index + 1;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002960
Chris Masond1310b22008-01-24 16:13:08 -05002961 set_range_writeback(tree, cur, cur + iosize - 1);
2962 if (!PageWriteback(page)) {
Chris Masond3977122009-01-05 21:25:51 -05002963 printk(KERN_ERR "btrfs warning page %lu not "
2964 "writeback, cur %llu end %llu\n",
2965 page->index, (unsigned long long)cur,
Chris Masond1310b22008-01-24 16:13:08 -05002966 (unsigned long long)end);
2967 }
2968
Chris Masonffbd5172009-04-20 15:50:09 -04002969 ret = submit_extent_page(write_flags, tree, page,
2970 sector, iosize, pg_offset,
2971 bdev, &epd->bio, max_nr,
Chris Masonc8b97812008-10-29 14:49:59 -04002972 end_bio_extent_writepage,
2973 0, 0, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002974 if (ret)
2975 SetPageError(page);
2976 }
2977 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002978 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002979 nr++;
2980 }
2981done:
2982 if (nr == 0) {
2983 /* make sure the mapping tag for page dirty gets cleared */
2984 set_page_writeback(page);
2985 end_page_writeback(page);
2986 }
Chris Masond1310b22008-01-24 16:13:08 -05002987 unlock_page(page);
Chris Mason771ed682008-11-06 22:02:51 -05002988
Chris Mason11c83492009-04-20 15:50:09 -04002989done_unlocked:
2990
Chris Mason2c64c532009-09-02 15:04:12 -04002991 /* drop our reference on any cached states */
2992 free_extent_state(cached_state);
Chris Masond1310b22008-01-24 16:13:08 -05002993 return 0;
2994}
2995
Chris Masond1310b22008-01-24 16:13:08 -05002996/**
Chris Mason4bef0842008-09-08 11:18:08 -04002997 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
Chris Masond1310b22008-01-24 16:13:08 -05002998 * @mapping: address space structure to write
2999 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
3000 * @writepage: function called for each page
3001 * @data: data passed to writepage function
3002 *
3003 * If a page is already under I/O, write_cache_pages() skips it, even
3004 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
3005 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
3006 * and msync() need to guarantee that all the data which was dirty at the time
3007 * the call was made get new I/O started against them. If wbc->sync_mode is
3008 * WB_SYNC_ALL then we were called for data integrity and we must wait for
3009 * existing IO to complete.
3010 */
Christoph Hellwigb2950862008-12-02 09:54:17 -05003011static int extent_write_cache_pages(struct extent_io_tree *tree,
Chris Mason4bef0842008-09-08 11:18:08 -04003012 struct address_space *mapping,
3013 struct writeback_control *wbc,
Chris Masond2c3f4f2008-11-19 12:44:22 -05003014 writepage_t writepage, void *data,
3015 void (*flush_fn)(void *))
Chris Masond1310b22008-01-24 16:13:08 -05003016{
Chris Masond1310b22008-01-24 16:13:08 -05003017 int ret = 0;
3018 int done = 0;
Chris Masonf85d7d6c2009-09-18 16:03:16 -04003019 int nr_to_write_done = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003020 struct pagevec pvec;
3021 int nr_pages;
3022 pgoff_t index;
3023 pgoff_t end; /* Inclusive */
3024 int scanned = 0;
Josef Bacikf7aaa062011-07-15 21:26:38 +00003025 int tag;
Chris Masond1310b22008-01-24 16:13:08 -05003026
Chris Masond1310b22008-01-24 16:13:08 -05003027 pagevec_init(&pvec, 0);
3028 if (wbc->range_cyclic) {
3029 index = mapping->writeback_index; /* Start from prev offset */
3030 end = -1;
3031 } else {
3032 index = wbc->range_start >> PAGE_CACHE_SHIFT;
3033 end = wbc->range_end >> PAGE_CACHE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05003034 scanned = 1;
3035 }
Josef Bacikf7aaa062011-07-15 21:26:38 +00003036 if (wbc->sync_mode == WB_SYNC_ALL)
3037 tag = PAGECACHE_TAG_TOWRITE;
3038 else
3039 tag = PAGECACHE_TAG_DIRTY;
Chris Masond1310b22008-01-24 16:13:08 -05003040retry:
Josef Bacikf7aaa062011-07-15 21:26:38 +00003041 if (wbc->sync_mode == WB_SYNC_ALL)
3042 tag_pages_for_writeback(mapping, index, end);
Chris Masonf85d7d6c2009-09-18 16:03:16 -04003043 while (!done && !nr_to_write_done && (index <= end) &&
Josef Bacikf7aaa062011-07-15 21:26:38 +00003044 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3045 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
Chris Masond1310b22008-01-24 16:13:08 -05003046 unsigned i;
3047
3048 scanned = 1;
3049 for (i = 0; i < nr_pages; i++) {
3050 struct page *page = pvec.pages[i];
3051
3052 /*
3053 * At this point we hold neither mapping->tree_lock nor
3054 * lock on the page itself: the page may be truncated or
3055 * invalidated (changing page->mapping to NULL), or even
3056 * swizzled back from swapper_space to tmpfs file
3057 * mapping
3058 */
Chris Mason01d658f2011-11-01 10:08:06 -04003059 if (tree->ops &&
3060 tree->ops->write_cache_pages_lock_hook) {
3061 tree->ops->write_cache_pages_lock_hook(page,
3062 data, flush_fn);
3063 } else {
3064 if (!trylock_page(page)) {
3065 flush_fn(data);
3066 lock_page(page);
3067 }
3068 }
Chris Masond1310b22008-01-24 16:13:08 -05003069
3070 if (unlikely(page->mapping != mapping)) {
3071 unlock_page(page);
3072 continue;
3073 }
3074
3075 if (!wbc->range_cyclic && page->index > end) {
3076 done = 1;
3077 unlock_page(page);
3078 continue;
3079 }
3080
Chris Masond2c3f4f2008-11-19 12:44:22 -05003081 if (wbc->sync_mode != WB_SYNC_NONE) {
Chris Mason0e6bd952008-11-20 10:46:35 -05003082 if (PageWriteback(page))
3083 flush_fn(data);
Chris Masond1310b22008-01-24 16:13:08 -05003084 wait_on_page_writeback(page);
Chris Masond2c3f4f2008-11-19 12:44:22 -05003085 }
Chris Masond1310b22008-01-24 16:13:08 -05003086
3087 if (PageWriteback(page) ||
3088 !clear_page_dirty_for_io(page)) {
3089 unlock_page(page);
3090 continue;
3091 }
3092
3093 ret = (*writepage)(page, wbc, data);
3094
3095 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
3096 unlock_page(page);
3097 ret = 0;
3098 }
Chris Masonf85d7d6c2009-09-18 16:03:16 -04003099 if (ret)
Chris Masond1310b22008-01-24 16:13:08 -05003100 done = 1;
Chris Masonf85d7d6c2009-09-18 16:03:16 -04003101
3102 /*
3103 * the filesystem may choose to bump up nr_to_write.
3104 * We have to make sure to honor the new nr_to_write
3105 * at any time
3106 */
3107 nr_to_write_done = wbc->nr_to_write <= 0;
Chris Masond1310b22008-01-24 16:13:08 -05003108 }
3109 pagevec_release(&pvec);
3110 cond_resched();
3111 }
3112 if (!scanned && !done) {
3113 /*
3114 * We hit the last page and there is more work to be done: wrap
3115 * back to the start of the file
3116 */
3117 scanned = 1;
3118 index = 0;
3119 goto retry;
3120 }
Chris Masond1310b22008-01-24 16:13:08 -05003121 return ret;
3122}
Chris Masond1310b22008-01-24 16:13:08 -05003123
Chris Masonffbd5172009-04-20 15:50:09 -04003124static void flush_epd_write_bio(struct extent_page_data *epd)
3125{
3126 if (epd->bio) {
Jeff Mahoney355808c2011-10-03 23:23:14 -04003127 int rw = WRITE;
3128 int ret;
3129
Chris Masonffbd5172009-04-20 15:50:09 -04003130 if (epd->sync_io)
Jeff Mahoney355808c2011-10-03 23:23:14 -04003131 rw = WRITE_SYNC;
3132
3133 ret = submit_one_bio(rw, epd->bio, 0, 0);
3134 BUG_ON(ret < 0);
Chris Masonffbd5172009-04-20 15:50:09 -04003135 epd->bio = NULL;
3136 }
3137}
3138
Chris Masond2c3f4f2008-11-19 12:44:22 -05003139static noinline void flush_write_bio(void *data)
3140{
3141 struct extent_page_data *epd = data;
Chris Masonffbd5172009-04-20 15:50:09 -04003142 flush_epd_write_bio(epd);
Chris Masond2c3f4f2008-11-19 12:44:22 -05003143}
3144
Chris Masond1310b22008-01-24 16:13:08 -05003145int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
3146 get_extent_t *get_extent,
3147 struct writeback_control *wbc)
3148{
3149 int ret;
Chris Masond1310b22008-01-24 16:13:08 -05003150 struct extent_page_data epd = {
3151 .bio = NULL,
3152 .tree = tree,
3153 .get_extent = get_extent,
Chris Mason771ed682008-11-06 22:02:51 -05003154 .extent_locked = 0,
Chris Masonffbd5172009-04-20 15:50:09 -04003155 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
Chris Masond1310b22008-01-24 16:13:08 -05003156 };
Chris Masond1310b22008-01-24 16:13:08 -05003157
Chris Masond1310b22008-01-24 16:13:08 -05003158 ret = __extent_writepage(page, wbc, &epd);
3159
Chris Masonffbd5172009-04-20 15:50:09 -04003160 flush_epd_write_bio(&epd);
Chris Masond1310b22008-01-24 16:13:08 -05003161 return ret;
3162}
Chris Masond1310b22008-01-24 16:13:08 -05003163
Chris Mason771ed682008-11-06 22:02:51 -05003164int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
3165 u64 start, u64 end, get_extent_t *get_extent,
3166 int mode)
3167{
3168 int ret = 0;
3169 struct address_space *mapping = inode->i_mapping;
3170 struct page *page;
3171 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
3172 PAGE_CACHE_SHIFT;
3173
3174 struct extent_page_data epd = {
3175 .bio = NULL,
3176 .tree = tree,
3177 .get_extent = get_extent,
3178 .extent_locked = 1,
Chris Masonffbd5172009-04-20 15:50:09 -04003179 .sync_io = mode == WB_SYNC_ALL,
Chris Mason771ed682008-11-06 22:02:51 -05003180 };
3181 struct writeback_control wbc_writepages = {
Chris Mason771ed682008-11-06 22:02:51 -05003182 .sync_mode = mode,
Chris Mason771ed682008-11-06 22:02:51 -05003183 .nr_to_write = nr_pages * 2,
3184 .range_start = start,
3185 .range_end = end + 1,
3186 };
3187
Chris Masond3977122009-01-05 21:25:51 -05003188 while (start <= end) {
Chris Mason771ed682008-11-06 22:02:51 -05003189 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
3190 if (clear_page_dirty_for_io(page))
3191 ret = __extent_writepage(page, &wbc_writepages, &epd);
3192 else {
3193 if (tree->ops && tree->ops->writepage_end_io_hook)
3194 tree->ops->writepage_end_io_hook(page, start,
3195 start + PAGE_CACHE_SIZE - 1,
3196 NULL, 1);
3197 unlock_page(page);
3198 }
3199 page_cache_release(page);
3200 start += PAGE_CACHE_SIZE;
3201 }
3202
Chris Masonffbd5172009-04-20 15:50:09 -04003203 flush_epd_write_bio(&epd);
Chris Mason771ed682008-11-06 22:02:51 -05003204 return ret;
3205}
Chris Masond1310b22008-01-24 16:13:08 -05003206
3207int extent_writepages(struct extent_io_tree *tree,
3208 struct address_space *mapping,
3209 get_extent_t *get_extent,
3210 struct writeback_control *wbc)
3211{
3212 int ret = 0;
3213 struct extent_page_data epd = {
3214 .bio = NULL,
3215 .tree = tree,
3216 .get_extent = get_extent,
Chris Mason771ed682008-11-06 22:02:51 -05003217 .extent_locked = 0,
Chris Masonffbd5172009-04-20 15:50:09 -04003218 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
Chris Masond1310b22008-01-24 16:13:08 -05003219 };
3220
Chris Mason4bef0842008-09-08 11:18:08 -04003221 ret = extent_write_cache_pages(tree, mapping, wbc,
Chris Masond2c3f4f2008-11-19 12:44:22 -05003222 __extent_writepage, &epd,
3223 flush_write_bio);
Chris Masonffbd5172009-04-20 15:50:09 -04003224 flush_epd_write_bio(&epd);
Chris Masond1310b22008-01-24 16:13:08 -05003225 return ret;
3226}
Chris Masond1310b22008-01-24 16:13:08 -05003227
3228int extent_readpages(struct extent_io_tree *tree,
3229 struct address_space *mapping,
3230 struct list_head *pages, unsigned nr_pages,
3231 get_extent_t get_extent)
3232{
3233 struct bio *bio = NULL;
3234 unsigned page_idx;
Chris Masonc8b97812008-10-29 14:49:59 -04003235 unsigned long bio_flags = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003236
Chris Masond1310b22008-01-24 16:13:08 -05003237 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
3238 struct page *page = list_entry(pages->prev, struct page, lru);
3239
3240 prefetchw(&page->flags);
3241 list_del(&page->lru);
Nick Piggin28ecb6092010-03-17 13:31:04 +00003242 if (!add_to_page_cache_lru(page, mapping,
Itaru Kitayama43e817a2011-04-25 19:43:51 -04003243 page->index, GFP_NOFS)) {
Chris Masonf1885912008-04-09 16:28:12 -04003244 __extent_read_full_page(tree, page, get_extent,
Chris Masonc8b97812008-10-29 14:49:59 -04003245 &bio, 0, &bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05003246 }
3247 page_cache_release(page);
3248 }
Chris Masond1310b22008-01-24 16:13:08 -05003249 BUG_ON(!list_empty(pages));
Jeff Mahoney355808c2011-10-03 23:23:14 -04003250 if (bio) {
3251 int ret = submit_one_bio(READ, bio, 0, bio_flags);
3252 BUG_ON(ret < 0);
3253 }
Chris Masond1310b22008-01-24 16:13:08 -05003254 return 0;
3255}
Chris Masond1310b22008-01-24 16:13:08 -05003256
3257/*
3258 * basic invalidatepage code, this waits on any locked or writeback
3259 * ranges corresponding to the page, and then deletes any extent state
3260 * records from the tree
3261 */
3262int extent_invalidatepage(struct extent_io_tree *tree,
3263 struct page *page, unsigned long offset)
3264{
Josef Bacik2ac55d42010-02-03 19:33:23 +00003265 struct extent_state *cached_state = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05003266 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
3267 u64 end = start + PAGE_CACHE_SIZE - 1;
3268 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
3269
Chris Masond3977122009-01-05 21:25:51 -05003270 start += (offset + blocksize - 1) & ~(blocksize - 1);
Chris Masond1310b22008-01-24 16:13:08 -05003271 if (start > end)
3272 return 0;
3273
Jeff Mahoneyd0082372012-03-01 14:57:19 +01003274 lock_extent_bits(tree, start, end, 0, &cached_state);
Chris Mason1edbb732009-09-02 13:24:36 -04003275 wait_on_page_writeback(page);
Chris Masond1310b22008-01-24 16:13:08 -05003276 clear_extent_bit(tree, start, end,
Josef Bacik32c00af2009-10-08 13:34:05 -04003277 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
3278 EXTENT_DO_ACCOUNTING,
Josef Bacik2ac55d42010-02-03 19:33:23 +00003279 1, 1, &cached_state, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05003280 return 0;
3281}
Chris Masond1310b22008-01-24 16:13:08 -05003282
3283/*
Chris Mason7b13b7b2008-04-18 10:29:50 -04003284 * a helper for releasepage, this tests for areas of the page that
3285 * are locked or under IO and drops the related state bits if it is safe
3286 * to drop the page.
3287 */
3288int try_release_extent_state(struct extent_map_tree *map,
3289 struct extent_io_tree *tree, struct page *page,
3290 gfp_t mask)
3291{
3292 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
3293 u64 end = start + PAGE_CACHE_SIZE - 1;
3294 int ret = 1;
3295
Chris Mason211f90e2008-07-18 11:56:15 -04003296 if (test_range_bit(tree, start, end,
Chris Mason8b62b722009-09-02 16:53:46 -04003297 EXTENT_IOBITS, 0, NULL))
Chris Mason7b13b7b2008-04-18 10:29:50 -04003298 ret = 0;
3299 else {
3300 if ((mask & GFP_NOFS) == GFP_NOFS)
3301 mask = GFP_NOFS;
Chris Mason11ef1602009-09-23 20:28:46 -04003302 /*
3303 * at this point we can safely clear everything except the
3304 * locked bit and the nodatasum bit
3305 */
Chris Masone3f24cc2011-02-14 12:52:08 -05003306 ret = clear_extent_bit(tree, start, end,
Chris Mason11ef1602009-09-23 20:28:46 -04003307 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
3308 0, 0, NULL, mask);
Chris Masone3f24cc2011-02-14 12:52:08 -05003309
3310 /* if clear_extent_bit failed for enomem reasons,
3311 * we can't allow the release to continue.
3312 */
3313 if (ret < 0)
3314 ret = 0;
3315 else
3316 ret = 1;
Chris Mason7b13b7b2008-04-18 10:29:50 -04003317 }
3318 return ret;
3319}
Chris Mason7b13b7b2008-04-18 10:29:50 -04003320
3321/*
Chris Masond1310b22008-01-24 16:13:08 -05003322 * a helper for releasepage. As long as there are no locked extents
3323 * in the range corresponding to the page, both state records and extent
3324 * map records are removed
3325 */
3326int try_release_extent_mapping(struct extent_map_tree *map,
Chris Mason70dec802008-01-29 09:59:12 -05003327 struct extent_io_tree *tree, struct page *page,
3328 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05003329{
3330 struct extent_map *em;
3331 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
3332 u64 end = start + PAGE_CACHE_SIZE - 1;
Chris Mason7b13b7b2008-04-18 10:29:50 -04003333
Chris Mason70dec802008-01-29 09:59:12 -05003334 if ((mask & __GFP_WAIT) &&
3335 page->mapping->host->i_size > 16 * 1024 * 1024) {
Yan39b56372008-02-15 10:40:50 -05003336 u64 len;
Chris Mason70dec802008-01-29 09:59:12 -05003337 while (start <= end) {
Yan39b56372008-02-15 10:40:50 -05003338 len = end - start + 1;
Chris Mason890871b2009-09-02 16:24:52 -04003339 write_lock(&map->lock);
Yan39b56372008-02-15 10:40:50 -05003340 em = lookup_extent_mapping(map, start, len);
Tsutomu Itoh285190d2012-02-16 16:23:58 +09003341 if (!em) {
Chris Mason890871b2009-09-02 16:24:52 -04003342 write_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05003343 break;
3344 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04003345 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
3346 em->start != start) {
Chris Mason890871b2009-09-02 16:24:52 -04003347 write_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05003348 free_extent_map(em);
3349 break;
3350 }
3351 if (!test_range_bit(tree, em->start,
3352 extent_map_end(em) - 1,
Chris Mason8b62b722009-09-02 16:53:46 -04003353 EXTENT_LOCKED | EXTENT_WRITEBACK,
Chris Mason9655d292009-09-02 15:22:30 -04003354 0, NULL)) {
Chris Mason70dec802008-01-29 09:59:12 -05003355 remove_extent_mapping(map, em);
3356 /* once for the rb tree */
3357 free_extent_map(em);
3358 }
3359 start = extent_map_end(em);
Chris Mason890871b2009-09-02 16:24:52 -04003360 write_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05003361
3362 /* once for us */
Chris Masond1310b22008-01-24 16:13:08 -05003363 free_extent_map(em);
3364 }
Chris Masond1310b22008-01-24 16:13:08 -05003365 }
Chris Mason7b13b7b2008-04-18 10:29:50 -04003366 return try_release_extent_state(map, tree, page, mask);
Chris Masond1310b22008-01-24 16:13:08 -05003367}
Chris Masond1310b22008-01-24 16:13:08 -05003368
Chris Masonec29ed52011-02-23 16:23:20 -05003369/*
3370 * helper function for fiemap, which doesn't want to see any holes.
3371 * This maps until we find something past 'last'
3372 */
3373static struct extent_map *get_extent_skip_holes(struct inode *inode,
3374 u64 offset,
3375 u64 last,
3376 get_extent_t *get_extent)
3377{
3378 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
3379 struct extent_map *em;
3380 u64 len;
3381
3382 if (offset >= last)
3383 return NULL;
3384
3385 while(1) {
3386 len = last - offset;
3387 if (len == 0)
3388 break;
3389 len = (len + sectorsize - 1) & ~(sectorsize - 1);
3390 em = get_extent(inode, NULL, 0, offset, len, 0);
David Sterbac7040052011-04-19 18:00:01 +02003391 if (IS_ERR_OR_NULL(em))
Chris Masonec29ed52011-02-23 16:23:20 -05003392 return em;
3393
3394 /* if this isn't a hole return it */
3395 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
3396 em->block_start != EXTENT_MAP_HOLE) {
3397 return em;
3398 }
3399
3400 /* this is a hole, advance to the next extent */
3401 offset = extent_map_end(em);
3402 free_extent_map(em);
3403 if (offset >= last)
3404 break;
3405 }
3406 return NULL;
3407}
3408
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003409int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3410 __u64 start, __u64 len, get_extent_t *get_extent)
3411{
Josef Bacik975f84f2010-11-23 19:36:57 +00003412 int ret = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003413 u64 off = start;
3414 u64 max = start + len;
3415 u32 flags = 0;
Josef Bacik975f84f2010-11-23 19:36:57 +00003416 u32 found_type;
3417 u64 last;
Chris Masonec29ed52011-02-23 16:23:20 -05003418 u64 last_for_get_extent = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003419 u64 disko = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05003420 u64 isize = i_size_read(inode);
Josef Bacik975f84f2010-11-23 19:36:57 +00003421 struct btrfs_key found_key;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003422 struct extent_map *em = NULL;
Josef Bacik2ac55d42010-02-03 19:33:23 +00003423 struct extent_state *cached_state = NULL;
Josef Bacik975f84f2010-11-23 19:36:57 +00003424 struct btrfs_path *path;
3425 struct btrfs_file_extent_item *item;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003426 int end = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05003427 u64 em_start = 0;
3428 u64 em_len = 0;
3429 u64 em_end = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003430 unsigned long emflags;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003431
3432 if (len == 0)
3433 return -EINVAL;
3434
Josef Bacik975f84f2010-11-23 19:36:57 +00003435 path = btrfs_alloc_path();
3436 if (!path)
3437 return -ENOMEM;
3438 path->leave_spinning = 1;
3439
Josef Bacik4d479cf2011-11-17 11:34:31 -05003440 start = ALIGN(start, BTRFS_I(inode)->root->sectorsize);
3441 len = ALIGN(len, BTRFS_I(inode)->root->sectorsize);
3442
Chris Masonec29ed52011-02-23 16:23:20 -05003443 /*
3444 * lookup the last file extent. We're not using i_size here
3445 * because there might be preallocation past i_size
3446 */
Josef Bacik975f84f2010-11-23 19:36:57 +00003447 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
Li Zefan33345d012011-04-20 10:31:50 +08003448 path, btrfs_ino(inode), -1, 0);
Josef Bacik975f84f2010-11-23 19:36:57 +00003449 if (ret < 0) {
3450 btrfs_free_path(path);
3451 return ret;
3452 }
3453 WARN_ON(!ret);
3454 path->slots[0]--;
3455 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3456 struct btrfs_file_extent_item);
3457 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
3458 found_type = btrfs_key_type(&found_key);
3459
Chris Masonec29ed52011-02-23 16:23:20 -05003460 /* No extents, but there might be delalloc bits */
Li Zefan33345d012011-04-20 10:31:50 +08003461 if (found_key.objectid != btrfs_ino(inode) ||
Josef Bacik975f84f2010-11-23 19:36:57 +00003462 found_type != BTRFS_EXTENT_DATA_KEY) {
Chris Masonec29ed52011-02-23 16:23:20 -05003463 /* have to trust i_size as the end */
3464 last = (u64)-1;
3465 last_for_get_extent = isize;
3466 } else {
3467 /*
3468 * remember the start of the last extent. There are a
3469 * bunch of different factors that go into the length of the
3470 * extent, so its much less complex to remember where it started
3471 */
3472 last = found_key.offset;
3473 last_for_get_extent = last + 1;
Josef Bacik975f84f2010-11-23 19:36:57 +00003474 }
Josef Bacik975f84f2010-11-23 19:36:57 +00003475 btrfs_free_path(path);
3476
Chris Masonec29ed52011-02-23 16:23:20 -05003477 /*
3478 * we might have some extents allocated but more delalloc past those
3479 * extents. so, we trust isize unless the start of the last extent is
3480 * beyond isize
3481 */
3482 if (last < isize) {
3483 last = (u64)-1;
3484 last_for_get_extent = isize;
3485 }
3486
Josef Bacik2ac55d42010-02-03 19:33:23 +00003487 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
Jeff Mahoneyd0082372012-03-01 14:57:19 +01003488 &cached_state);
Chris Masonec29ed52011-02-23 16:23:20 -05003489
Josef Bacik4d479cf2011-11-17 11:34:31 -05003490 em = get_extent_skip_holes(inode, start, last_for_get_extent,
Chris Masonec29ed52011-02-23 16:23:20 -05003491 get_extent);
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003492 if (!em)
3493 goto out;
3494 if (IS_ERR(em)) {
3495 ret = PTR_ERR(em);
3496 goto out;
3497 }
Josef Bacik975f84f2010-11-23 19:36:57 +00003498
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003499 while (!end) {
Chris Masonea8efc72011-03-08 11:54:40 -05003500 u64 offset_in_extent;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003501
Chris Masonea8efc72011-03-08 11:54:40 -05003502 /* break if the extent we found is outside the range */
3503 if (em->start >= max || extent_map_end(em) < off)
3504 break;
3505
3506 /*
3507 * get_extent may return an extent that starts before our
3508 * requested range. We have to make sure the ranges
3509 * we return to fiemap always move forward and don't
3510 * overlap, so adjust the offsets here
3511 */
3512 em_start = max(em->start, off);
3513
3514 /*
3515 * record the offset from the start of the extent
3516 * for adjusting the disk offset below
3517 */
3518 offset_in_extent = em_start - em->start;
Chris Masonec29ed52011-02-23 16:23:20 -05003519 em_end = extent_map_end(em);
Chris Masonea8efc72011-03-08 11:54:40 -05003520 em_len = em_end - em_start;
Chris Masonec29ed52011-02-23 16:23:20 -05003521 emflags = em->flags;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003522 disko = 0;
3523 flags = 0;
3524
Chris Masonea8efc72011-03-08 11:54:40 -05003525 /*
3526 * bump off for our next call to get_extent
3527 */
3528 off = extent_map_end(em);
3529 if (off >= max)
3530 end = 1;
3531
Heiko Carstens93dbfad2009-04-03 10:33:45 -04003532 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003533 end = 1;
3534 flags |= FIEMAP_EXTENT_LAST;
Heiko Carstens93dbfad2009-04-03 10:33:45 -04003535 } else if (em->block_start == EXTENT_MAP_INLINE) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003536 flags |= (FIEMAP_EXTENT_DATA_INLINE |
3537 FIEMAP_EXTENT_NOT_ALIGNED);
Heiko Carstens93dbfad2009-04-03 10:33:45 -04003538 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003539 flags |= (FIEMAP_EXTENT_DELALLOC |
3540 FIEMAP_EXTENT_UNKNOWN);
Heiko Carstens93dbfad2009-04-03 10:33:45 -04003541 } else {
Chris Masonea8efc72011-03-08 11:54:40 -05003542 disko = em->block_start + offset_in_extent;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003543 }
3544 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3545 flags |= FIEMAP_EXTENT_ENCODED;
3546
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003547 free_extent_map(em);
3548 em = NULL;
Chris Masonec29ed52011-02-23 16:23:20 -05003549 if ((em_start >= last) || em_len == (u64)-1 ||
3550 (last == (u64)-1 && isize <= em_end)) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003551 flags |= FIEMAP_EXTENT_LAST;
3552 end = 1;
3553 }
3554
Chris Masonec29ed52011-02-23 16:23:20 -05003555 /* now scan forward to see if this is really the last extent. */
3556 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3557 get_extent);
3558 if (IS_ERR(em)) {
3559 ret = PTR_ERR(em);
3560 goto out;
3561 }
3562 if (!em) {
Josef Bacik975f84f2010-11-23 19:36:57 +00003563 flags |= FIEMAP_EXTENT_LAST;
3564 end = 1;
3565 }
Chris Masonec29ed52011-02-23 16:23:20 -05003566 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3567 em_len, flags);
3568 if (ret)
3569 goto out_free;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003570 }
3571out_free:
3572 free_extent_map(em);
3573out:
Josef Bacik2ac55d42010-02-03 19:33:23 +00003574 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
3575 &cached_state, GFP_NOFS);
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003576 return ret;
3577}
3578
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02003579inline struct page *extent_buffer_page(struct extent_buffer *eb,
Chris Masond1310b22008-01-24 16:13:08 -05003580 unsigned long i)
3581{
3582 struct page *p;
3583 struct address_space *mapping;
3584
3585 if (i == 0)
3586 return eb->first_page;
3587 i += eb->start >> PAGE_CACHE_SHIFT;
3588 mapping = eb->first_page->mapping;
Chris Mason33958dc2008-07-30 10:29:12 -04003589 if (!mapping)
3590 return NULL;
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003591
3592 /*
3593 * extent_buffer_page is only called after pinning the page
3594 * by increasing the reference count. So we know the page must
3595 * be in the radix tree.
3596 */
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003597 rcu_read_lock();
Chris Masond1310b22008-01-24 16:13:08 -05003598 p = radix_tree_lookup(&mapping->page_tree, i);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003599 rcu_read_unlock();
Chris Mason2b1f55b2008-09-24 11:48:04 -04003600
Chris Masond1310b22008-01-24 16:13:08 -05003601 return p;
3602}
3603
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02003604inline unsigned long num_extent_pages(u64 start, u64 len)
Chris Masonce9adaa2008-04-09 16:28:12 -04003605{
Chris Mason6af118c2008-07-22 11:18:07 -04003606 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
3607 (start >> PAGE_CACHE_SHIFT);
Chris Mason728131d2008-04-09 16:28:12 -04003608}
3609
Chris Masond1310b22008-01-24 16:13:08 -05003610static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3611 u64 start,
3612 unsigned long len,
3613 gfp_t mask)
3614{
3615 struct extent_buffer *eb = NULL;
Chris Mason39351272009-02-04 09:24:05 -05003616#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -04003617 unsigned long flags;
Chris Mason4bef0842008-09-08 11:18:08 -04003618#endif
Chris Masond1310b22008-01-24 16:13:08 -05003619
Chris Masond1310b22008-01-24 16:13:08 -05003620 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00003621 if (eb == NULL)
3622 return NULL;
Chris Masond1310b22008-01-24 16:13:08 -05003623 eb->start = start;
3624 eb->len = len;
Chris Masonbd681512011-07-16 15:23:14 -04003625 rwlock_init(&eb->lock);
3626 atomic_set(&eb->write_locks, 0);
3627 atomic_set(&eb->read_locks, 0);
3628 atomic_set(&eb->blocking_readers, 0);
3629 atomic_set(&eb->blocking_writers, 0);
3630 atomic_set(&eb->spinning_readers, 0);
3631 atomic_set(&eb->spinning_writers, 0);
Arne Jansen5b25f702011-09-13 10:55:48 +02003632 eb->lock_nested = 0;
Chris Masonbd681512011-07-16 15:23:14 -04003633 init_waitqueue_head(&eb->write_lock_wq);
3634 init_waitqueue_head(&eb->read_lock_wq);
Chris Masonb4ce94d2009-02-04 09:25:08 -05003635
Chris Mason39351272009-02-04 09:24:05 -05003636#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -04003637 spin_lock_irqsave(&leak_lock, flags);
3638 list_add(&eb->leak_list, &buffers);
3639 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -04003640#endif
Chris Masond1310b22008-01-24 16:13:08 -05003641 atomic_set(&eb->refs, 1);
3642
3643 return eb;
3644}
3645
3646static void __free_extent_buffer(struct extent_buffer *eb)
3647{
Chris Mason39351272009-02-04 09:24:05 -05003648#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -04003649 unsigned long flags;
3650 spin_lock_irqsave(&leak_lock, flags);
3651 list_del(&eb->leak_list);
3652 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -04003653#endif
Chris Masond1310b22008-01-24 16:13:08 -05003654 kmem_cache_free(extent_buffer_cache, eb);
3655}
3656
Miao Xie897ca6e2010-10-26 20:57:29 -04003657/*
3658 * Helper for releasing extent buffer page.
3659 */
3660static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3661 unsigned long start_idx)
3662{
3663 unsigned long index;
3664 struct page *page;
3665
3666 if (!eb->first_page)
3667 return;
3668
3669 index = num_extent_pages(eb->start, eb->len);
3670 if (start_idx >= index)
3671 return;
3672
3673 do {
3674 index--;
3675 page = extent_buffer_page(eb, index);
3676 if (page)
3677 page_cache_release(page);
3678 } while (index != start_idx);
3679}
3680
3681/*
3682 * Helper for releasing the extent buffer.
3683 */
3684static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3685{
3686 btrfs_release_extent_buffer_page(eb, 0);
3687 __free_extent_buffer(eb);
3688}
3689
Chris Masond1310b22008-01-24 16:13:08 -05003690struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3691 u64 start, unsigned long len,
David Sterbaba144192011-04-21 01:12:06 +02003692 struct page *page0)
Chris Masond1310b22008-01-24 16:13:08 -05003693{
3694 unsigned long num_pages = num_extent_pages(start, len);
3695 unsigned long i;
3696 unsigned long index = start >> PAGE_CACHE_SHIFT;
3697 struct extent_buffer *eb;
Chris Mason6af118c2008-07-22 11:18:07 -04003698 struct extent_buffer *exists = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05003699 struct page *p;
3700 struct address_space *mapping = tree->mapping;
3701 int uptodate = 1;
Miao Xie19fe0a82010-10-26 20:57:29 -04003702 int ret;
Chris Masond1310b22008-01-24 16:13:08 -05003703
Miao Xie19fe0a82010-10-26 20:57:29 -04003704 rcu_read_lock();
3705 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3706 if (eb && atomic_inc_not_zero(&eb->refs)) {
3707 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -04003708 mark_page_accessed(eb->first_page);
Chris Mason6af118c2008-07-22 11:18:07 -04003709 return eb;
3710 }
Miao Xie19fe0a82010-10-26 20:57:29 -04003711 rcu_read_unlock();
Chris Mason6af118c2008-07-22 11:18:07 -04003712
David Sterbaba144192011-04-21 01:12:06 +02003713 eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
Peter2b114d12008-04-01 11:21:40 -04003714 if (!eb)
Chris Masond1310b22008-01-24 16:13:08 -05003715 return NULL;
3716
Chris Masond1310b22008-01-24 16:13:08 -05003717 if (page0) {
3718 eb->first_page = page0;
3719 i = 1;
3720 index++;
3721 page_cache_get(page0);
3722 mark_page_accessed(page0);
3723 set_page_extent_mapped(page0);
Chris Masond1310b22008-01-24 16:13:08 -05003724 set_page_extent_head(page0, len);
Chris Masonf1885912008-04-09 16:28:12 -04003725 uptodate = PageUptodate(page0);
Chris Masond1310b22008-01-24 16:13:08 -05003726 } else {
3727 i = 0;
3728 }
3729 for (; i < num_pages; i++, index++) {
Chris Masona6591712011-07-19 12:04:14 -04003730 p = find_or_create_page(mapping, index, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05003731 if (!p) {
3732 WARN_ON(1);
Chris Mason6af118c2008-07-22 11:18:07 -04003733 goto free_eb;
Chris Masond1310b22008-01-24 16:13:08 -05003734 }
3735 set_page_extent_mapped(p);
3736 mark_page_accessed(p);
3737 if (i == 0) {
3738 eb->first_page = p;
3739 set_page_extent_head(p, len);
3740 } else {
3741 set_page_private(p, EXTENT_PAGE_PRIVATE);
3742 }
3743 if (!PageUptodate(p))
3744 uptodate = 0;
Chris Masoneb14ab82011-02-10 12:35:00 -05003745
3746 /*
3747 * see below about how we avoid a nasty race with release page
3748 * and why we unlock later
3749 */
3750 if (i != 0)
3751 unlock_page(p);
Chris Masond1310b22008-01-24 16:13:08 -05003752 }
3753 if (uptodate)
Chris Masonb4ce94d2009-02-04 09:25:08 -05003754 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Masond1310b22008-01-24 16:13:08 -05003755
Miao Xie19fe0a82010-10-26 20:57:29 -04003756 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
3757 if (ret)
3758 goto free_eb;
3759
Chris Mason6af118c2008-07-22 11:18:07 -04003760 spin_lock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04003761 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
3762 if (ret == -EEXIST) {
3763 exists = radix_tree_lookup(&tree->buffer,
3764 start >> PAGE_CACHE_SHIFT);
Chris Mason6af118c2008-07-22 11:18:07 -04003765 /* add one reference for the caller */
3766 atomic_inc(&exists->refs);
3767 spin_unlock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04003768 radix_tree_preload_end();
Chris Mason6af118c2008-07-22 11:18:07 -04003769 goto free_eb;
3770 }
Chris Mason6af118c2008-07-22 11:18:07 -04003771 /* add one reference for the tree */
3772 atomic_inc(&eb->refs);
Yan, Zhengf044ba72010-02-04 08:46:56 +00003773 spin_unlock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04003774 radix_tree_preload_end();
Chris Masoneb14ab82011-02-10 12:35:00 -05003775
3776 /*
3777 * there is a race where release page may have
3778 * tried to find this extent buffer in the radix
3779 * but failed. It will tell the VM it is safe to
3780 * reclaim the, and it will clear the page private bit.
3781 * We must make sure to set the page private bit properly
3782 * after the extent buffer is in the radix tree so
3783 * it doesn't get lost
3784 */
3785 set_page_extent_mapped(eb->first_page);
3786 set_page_extent_head(eb->first_page, eb->len);
3787 if (!page0)
3788 unlock_page(eb->first_page);
Chris Masond1310b22008-01-24 16:13:08 -05003789 return eb;
3790
Chris Mason6af118c2008-07-22 11:18:07 -04003791free_eb:
Chris Masoneb14ab82011-02-10 12:35:00 -05003792 if (eb->first_page && !page0)
3793 unlock_page(eb->first_page);
3794
Chris Masond1310b22008-01-24 16:13:08 -05003795 if (!atomic_dec_and_test(&eb->refs))
Chris Mason6af118c2008-07-22 11:18:07 -04003796 return exists;
Miao Xie897ca6e2010-10-26 20:57:29 -04003797 btrfs_release_extent_buffer(eb);
Chris Mason6af118c2008-07-22 11:18:07 -04003798 return exists;
Chris Masond1310b22008-01-24 16:13:08 -05003799}
Chris Masond1310b22008-01-24 16:13:08 -05003800
3801struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
David Sterbaf09d1f62011-04-21 01:08:01 +02003802 u64 start, unsigned long len)
Chris Masond1310b22008-01-24 16:13:08 -05003803{
Chris Masond1310b22008-01-24 16:13:08 -05003804 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -05003805
Miao Xie19fe0a82010-10-26 20:57:29 -04003806 rcu_read_lock();
3807 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3808 if (eb && atomic_inc_not_zero(&eb->refs)) {
3809 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -04003810 mark_page_accessed(eb->first_page);
Miao Xie19fe0a82010-10-26 20:57:29 -04003811 return eb;
3812 }
3813 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -04003814
Miao Xie19fe0a82010-10-26 20:57:29 -04003815 return NULL;
Chris Masond1310b22008-01-24 16:13:08 -05003816}
Chris Masond1310b22008-01-24 16:13:08 -05003817
3818void free_extent_buffer(struct extent_buffer *eb)
3819{
Chris Masond1310b22008-01-24 16:13:08 -05003820 if (!eb)
3821 return;
3822
3823 if (!atomic_dec_and_test(&eb->refs))
3824 return;
3825
Chris Mason6af118c2008-07-22 11:18:07 -04003826 WARN_ON(1);
Chris Masond1310b22008-01-24 16:13:08 -05003827}
Chris Masond1310b22008-01-24 16:13:08 -05003828
Jeff Mahoney143bede2012-03-01 14:56:26 +01003829void clear_extent_buffer_dirty(struct extent_io_tree *tree,
Chris Masond1310b22008-01-24 16:13:08 -05003830 struct extent_buffer *eb)
3831{
Chris Masond1310b22008-01-24 16:13:08 -05003832 unsigned long i;
3833 unsigned long num_pages;
3834 struct page *page;
3835
Chris Masond1310b22008-01-24 16:13:08 -05003836 num_pages = num_extent_pages(eb->start, eb->len);
3837
3838 for (i = 0; i < num_pages; i++) {
3839 page = extent_buffer_page(eb, i);
Chris Masonb9473432009-03-13 11:00:37 -04003840 if (!PageDirty(page))
Chris Masond2c3f4f2008-11-19 12:44:22 -05003841 continue;
3842
Chris Masona61e6f22008-07-22 11:18:08 -04003843 lock_page(page);
Chris Masoneb14ab82011-02-10 12:35:00 -05003844 WARN_ON(!PagePrivate(page));
3845
3846 set_page_extent_mapped(page);
Chris Masond1310b22008-01-24 16:13:08 -05003847 if (i == 0)
3848 set_page_extent_head(page, eb->len);
Chris Masond1310b22008-01-24 16:13:08 -05003849
Chris Masond1310b22008-01-24 16:13:08 -05003850 clear_page_dirty_for_io(page);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003851 spin_lock_irq(&page->mapping->tree_lock);
Chris Masond1310b22008-01-24 16:13:08 -05003852 if (!PageDirty(page)) {
3853 radix_tree_tag_clear(&page->mapping->page_tree,
3854 page_index(page),
3855 PAGECACHE_TAG_DIRTY);
3856 }
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003857 spin_unlock_irq(&page->mapping->tree_lock);
Chris Masonbf0da8c2011-11-04 12:29:37 -04003858 ClearPageError(page);
Chris Masona61e6f22008-07-22 11:18:08 -04003859 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05003860 }
Chris Masond1310b22008-01-24 16:13:08 -05003861}
Chris Masond1310b22008-01-24 16:13:08 -05003862
Chris Masond1310b22008-01-24 16:13:08 -05003863int set_extent_buffer_dirty(struct extent_io_tree *tree,
3864 struct extent_buffer *eb)
3865{
3866 unsigned long i;
3867 unsigned long num_pages;
Chris Masonb9473432009-03-13 11:00:37 -04003868 int was_dirty = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003869
Chris Masonb9473432009-03-13 11:00:37 -04003870 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
Chris Masond1310b22008-01-24 16:13:08 -05003871 num_pages = num_extent_pages(eb->start, eb->len);
Chris Masonb9473432009-03-13 11:00:37 -04003872 for (i = 0; i < num_pages; i++)
Chris Masond1310b22008-01-24 16:13:08 -05003873 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
Chris Masonb9473432009-03-13 11:00:37 -04003874 return was_dirty;
Chris Masond1310b22008-01-24 16:13:08 -05003875}
Chris Masond1310b22008-01-24 16:13:08 -05003876
Chris Mason19b6caf2011-07-25 06:50:50 -04003877static int __eb_straddles_pages(u64 start, u64 len)
3878{
3879 if (len < PAGE_CACHE_SIZE)
3880 return 1;
3881 if (start & (PAGE_CACHE_SIZE - 1))
3882 return 1;
3883 if ((start + len) & (PAGE_CACHE_SIZE - 1))
3884 return 1;
3885 return 0;
3886}
3887
3888static int eb_straddles_pages(struct extent_buffer *eb)
3889{
3890 return __eb_straddles_pages(eb->start, eb->len);
3891}
3892
Chris Mason1259ab72008-05-12 13:39:03 -04003893int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
Josef Bacik2ac55d42010-02-03 19:33:23 +00003894 struct extent_buffer *eb,
3895 struct extent_state **cached_state)
Chris Mason1259ab72008-05-12 13:39:03 -04003896{
3897 unsigned long i;
3898 struct page *page;
3899 unsigned long num_pages;
3900
3901 num_pages = num_extent_pages(eb->start, eb->len);
Chris Masonb4ce94d2009-02-04 09:25:08 -05003902 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Mason1259ab72008-05-12 13:39:03 -04003903
Chris Mason50653192012-02-22 12:36:24 -05003904 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3905 cached_state, GFP_NOFS);
3906
Chris Mason1259ab72008-05-12 13:39:03 -04003907 for (i = 0; i < num_pages; i++) {
3908 page = extent_buffer_page(eb, i);
Chris Mason33958dc2008-07-30 10:29:12 -04003909 if (page)
3910 ClearPageUptodate(page);
Chris Mason1259ab72008-05-12 13:39:03 -04003911 }
3912 return 0;
3913}
3914
Chris Masond1310b22008-01-24 16:13:08 -05003915int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3916 struct extent_buffer *eb)
3917{
3918 unsigned long i;
3919 struct page *page;
3920 unsigned long num_pages;
3921
3922 num_pages = num_extent_pages(eb->start, eb->len);
3923
Chris Mason19b6caf2011-07-25 06:50:50 -04003924 if (eb_straddles_pages(eb)) {
3925 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3926 NULL, GFP_NOFS);
3927 }
Chris Masond1310b22008-01-24 16:13:08 -05003928 for (i = 0; i < num_pages; i++) {
3929 page = extent_buffer_page(eb, i);
3930 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3931 ((i == num_pages - 1) &&
3932 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3933 check_page_uptodate(tree, page);
3934 continue;
3935 }
3936 SetPageUptodate(page);
3937 }
3938 return 0;
3939}
Chris Masond1310b22008-01-24 16:13:08 -05003940
Chris Masonce9adaa2008-04-09 16:28:12 -04003941int extent_range_uptodate(struct extent_io_tree *tree,
3942 u64 start, u64 end)
3943{
3944 struct page *page;
3945 int ret;
3946 int pg_uptodate = 1;
3947 int uptodate;
3948 unsigned long index;
3949
Chris Mason19b6caf2011-07-25 06:50:50 -04003950 if (__eb_straddles_pages(start, end - start + 1)) {
3951 ret = test_range_bit(tree, start, end,
3952 EXTENT_UPTODATE, 1, NULL);
3953 if (ret)
3954 return 1;
3955 }
Chris Masond3977122009-01-05 21:25:51 -05003956 while (start <= end) {
Chris Masonce9adaa2008-04-09 16:28:12 -04003957 index = start >> PAGE_CACHE_SHIFT;
3958 page = find_get_page(tree->mapping, index);
Mitch Harder8bedd512012-01-26 15:01:11 -05003959 if (!page)
3960 return 1;
Chris Masonce9adaa2008-04-09 16:28:12 -04003961 uptodate = PageUptodate(page);
3962 page_cache_release(page);
3963 if (!uptodate) {
3964 pg_uptodate = 0;
3965 break;
3966 }
3967 start += PAGE_CACHE_SIZE;
3968 }
3969 return pg_uptodate;
3970}
3971
Chris Masond1310b22008-01-24 16:13:08 -05003972int extent_buffer_uptodate(struct extent_io_tree *tree,
Josef Bacik2ac55d42010-02-03 19:33:23 +00003973 struct extent_buffer *eb,
3974 struct extent_state *cached_state)
Chris Masond1310b22008-01-24 16:13:08 -05003975{
Chris Mason728131d2008-04-09 16:28:12 -04003976 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04003977 unsigned long num_pages;
3978 unsigned long i;
Chris Mason728131d2008-04-09 16:28:12 -04003979 struct page *page;
3980 int pg_uptodate = 1;
3981
Chris Masonb4ce94d2009-02-04 09:25:08 -05003982 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
Chris Mason42352982008-04-28 16:40:52 -04003983 return 1;
Chris Mason728131d2008-04-09 16:28:12 -04003984
Chris Mason19b6caf2011-07-25 06:50:50 -04003985 if (eb_straddles_pages(eb)) {
3986 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3987 EXTENT_UPTODATE, 1, cached_state);
3988 if (ret)
3989 return ret;
3990 }
Chris Mason728131d2008-04-09 16:28:12 -04003991
3992 num_pages = num_extent_pages(eb->start, eb->len);
3993 for (i = 0; i < num_pages; i++) {
3994 page = extent_buffer_page(eb, i);
3995 if (!PageUptodate(page)) {
3996 pg_uptodate = 0;
3997 break;
3998 }
3999 }
Chris Mason42352982008-04-28 16:40:52 -04004000 return pg_uptodate;
Chris Masond1310b22008-01-24 16:13:08 -05004001}
Chris Masond1310b22008-01-24 16:13:08 -05004002
4003int read_extent_buffer_pages(struct extent_io_tree *tree,
Arne Jansenbb82ab82011-06-10 14:06:53 +02004004 struct extent_buffer *eb, u64 start, int wait,
Chris Masonf1885912008-04-09 16:28:12 -04004005 get_extent_t *get_extent, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05004006{
4007 unsigned long i;
4008 unsigned long start_i;
4009 struct page *page;
4010 int err;
4011 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04004012 int locked_pages = 0;
4013 int all_uptodate = 1;
4014 int inc_all_pages = 0;
Chris Masond1310b22008-01-24 16:13:08 -05004015 unsigned long num_pages;
Chris Masona86c12c2008-02-07 10:50:54 -05004016 struct bio *bio = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04004017 unsigned long bio_flags = 0;
Chris Masona86c12c2008-02-07 10:50:54 -05004018
Chris Masonb4ce94d2009-02-04 09:25:08 -05004019 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
Chris Masond1310b22008-01-24 16:13:08 -05004020 return 0;
4021
Chris Mason19b6caf2011-07-25 06:50:50 -04004022 if (eb_straddles_pages(eb)) {
4023 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
4024 EXTENT_UPTODATE, 1, NULL)) {
4025 return 0;
4026 }
Chris Masond1310b22008-01-24 16:13:08 -05004027 }
4028
4029 if (start) {
4030 WARN_ON(start < eb->start);
4031 start_i = (start >> PAGE_CACHE_SHIFT) -
4032 (eb->start >> PAGE_CACHE_SHIFT);
4033 } else {
4034 start_i = 0;
4035 }
4036
4037 num_pages = num_extent_pages(eb->start, eb->len);
4038 for (i = start_i; i < num_pages; i++) {
4039 page = extent_buffer_page(eb, i);
Arne Jansenbb82ab82011-06-10 14:06:53 +02004040 if (wait == WAIT_NONE) {
David Woodhouse2db04962008-08-07 11:19:43 -04004041 if (!trylock_page(page))
Chris Masonce9adaa2008-04-09 16:28:12 -04004042 goto unlock_exit;
Chris Masond1310b22008-01-24 16:13:08 -05004043 } else {
4044 lock_page(page);
4045 }
Chris Masonce9adaa2008-04-09 16:28:12 -04004046 locked_pages++;
Chris Masond3977122009-01-05 21:25:51 -05004047 if (!PageUptodate(page))
Chris Masonce9adaa2008-04-09 16:28:12 -04004048 all_uptodate = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04004049 }
4050 if (all_uptodate) {
4051 if (start_i == 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05004052 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Masonce9adaa2008-04-09 16:28:12 -04004053 goto unlock_exit;
4054 }
4055
4056 for (i = start_i; i < num_pages; i++) {
4057 page = extent_buffer_page(eb, i);
Chris Masoneb14ab82011-02-10 12:35:00 -05004058
4059 WARN_ON(!PagePrivate(page));
4060
4061 set_page_extent_mapped(page);
4062 if (i == 0)
4063 set_page_extent_head(page, eb->len);
4064
Chris Masonce9adaa2008-04-09 16:28:12 -04004065 if (inc_all_pages)
4066 page_cache_get(page);
4067 if (!PageUptodate(page)) {
4068 if (start_i == 0)
4069 inc_all_pages = 1;
Chris Masonf1885912008-04-09 16:28:12 -04004070 ClearPageError(page);
Chris Masona86c12c2008-02-07 10:50:54 -05004071 err = __extent_read_full_page(tree, page,
Chris Masonf1885912008-04-09 16:28:12 -04004072 get_extent, &bio,
Chris Masonc8b97812008-10-29 14:49:59 -04004073 mirror_num, &bio_flags);
Chris Masond3977122009-01-05 21:25:51 -05004074 if (err)
Chris Masond1310b22008-01-24 16:13:08 -05004075 ret = err;
Chris Masond1310b22008-01-24 16:13:08 -05004076 } else {
4077 unlock_page(page);
4078 }
4079 }
4080
Jeff Mahoney355808c2011-10-03 23:23:14 -04004081 if (bio) {
4082 err = submit_one_bio(READ, bio, mirror_num, bio_flags);
4083 BUG_ON(err < 0);
4084 }
Chris Masona86c12c2008-02-07 10:50:54 -05004085
Arne Jansenbb82ab82011-06-10 14:06:53 +02004086 if (ret || wait != WAIT_COMPLETE)
Chris Masond1310b22008-01-24 16:13:08 -05004087 return ret;
Chris Masond3977122009-01-05 21:25:51 -05004088
Chris Masond1310b22008-01-24 16:13:08 -05004089 for (i = start_i; i < num_pages; i++) {
4090 page = extent_buffer_page(eb, i);
4091 wait_on_page_locked(page);
Chris Masond3977122009-01-05 21:25:51 -05004092 if (!PageUptodate(page))
Chris Masond1310b22008-01-24 16:13:08 -05004093 ret = -EIO;
Chris Masond1310b22008-01-24 16:13:08 -05004094 }
Chris Masond3977122009-01-05 21:25:51 -05004095
Chris Masond1310b22008-01-24 16:13:08 -05004096 if (!ret)
Chris Masonb4ce94d2009-02-04 09:25:08 -05004097 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Masond1310b22008-01-24 16:13:08 -05004098 return ret;
Chris Masonce9adaa2008-04-09 16:28:12 -04004099
4100unlock_exit:
4101 i = start_i;
Chris Masond3977122009-01-05 21:25:51 -05004102 while (locked_pages > 0) {
Chris Masonce9adaa2008-04-09 16:28:12 -04004103 page = extent_buffer_page(eb, i);
4104 i++;
4105 unlock_page(page);
4106 locked_pages--;
4107 }
4108 return ret;
Chris Masond1310b22008-01-24 16:13:08 -05004109}
Chris Masond1310b22008-01-24 16:13:08 -05004110
4111void read_extent_buffer(struct extent_buffer *eb, void *dstv,
4112 unsigned long start,
4113 unsigned long len)
4114{
4115 size_t cur;
4116 size_t offset;
4117 struct page *page;
4118 char *kaddr;
4119 char *dst = (char *)dstv;
4120 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
4121 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05004122
4123 WARN_ON(start > eb->len);
4124 WARN_ON(start + len > eb->start + eb->len);
4125
4126 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
4127
Chris Masond3977122009-01-05 21:25:51 -05004128 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05004129 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05004130
4131 cur = min(len, (PAGE_CACHE_SIZE - offset));
Chris Masona6591712011-07-19 12:04:14 -04004132 kaddr = page_address(page);
Chris Masond1310b22008-01-24 16:13:08 -05004133 memcpy(dst, kaddr + offset, cur);
Chris Masond1310b22008-01-24 16:13:08 -05004134
4135 dst += cur;
4136 len -= cur;
4137 offset = 0;
4138 i++;
4139 }
4140}
Chris Masond1310b22008-01-24 16:13:08 -05004141
4142int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
Chris Masona6591712011-07-19 12:04:14 -04004143 unsigned long min_len, char **map,
Chris Masond1310b22008-01-24 16:13:08 -05004144 unsigned long *map_start,
Chris Masona6591712011-07-19 12:04:14 -04004145 unsigned long *map_len)
Chris Masond1310b22008-01-24 16:13:08 -05004146{
4147 size_t offset = start & (PAGE_CACHE_SIZE - 1);
4148 char *kaddr;
4149 struct page *p;
4150 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
4151 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
4152 unsigned long end_i = (start_offset + start + min_len - 1) >>
4153 PAGE_CACHE_SHIFT;
4154
4155 if (i != end_i)
4156 return -EINVAL;
4157
4158 if (i == 0) {
4159 offset = start_offset;
4160 *map_start = 0;
4161 } else {
4162 offset = 0;
4163 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
4164 }
Chris Masond3977122009-01-05 21:25:51 -05004165
Chris Masond1310b22008-01-24 16:13:08 -05004166 if (start + min_len > eb->len) {
Chris Masond3977122009-01-05 21:25:51 -05004167 printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
4168 "wanted %lu %lu\n", (unsigned long long)eb->start,
4169 eb->len, start, min_len);
Chris Masond1310b22008-01-24 16:13:08 -05004170 WARN_ON(1);
Josef Bacik850265332011-03-15 14:52:12 -04004171 return -EINVAL;
Chris Masond1310b22008-01-24 16:13:08 -05004172 }
4173
4174 p = extent_buffer_page(eb, i);
Chris Masona6591712011-07-19 12:04:14 -04004175 kaddr = page_address(p);
Chris Masond1310b22008-01-24 16:13:08 -05004176 *map = kaddr + offset;
4177 *map_len = PAGE_CACHE_SIZE - offset;
4178 return 0;
4179}
Chris Masond1310b22008-01-24 16:13:08 -05004180
Chris Masond1310b22008-01-24 16:13:08 -05004181int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
4182 unsigned long start,
4183 unsigned long len)
4184{
4185 size_t cur;
4186 size_t offset;
4187 struct page *page;
4188 char *kaddr;
4189 char *ptr = (char *)ptrv;
4190 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
4191 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
4192 int ret = 0;
4193
4194 WARN_ON(start > eb->len);
4195 WARN_ON(start + len > eb->start + eb->len);
4196
4197 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
4198
Chris Masond3977122009-01-05 21:25:51 -05004199 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05004200 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05004201
4202 cur = min(len, (PAGE_CACHE_SIZE - offset));
4203
Chris Masona6591712011-07-19 12:04:14 -04004204 kaddr = page_address(page);
Chris Masond1310b22008-01-24 16:13:08 -05004205 ret = memcmp(ptr, kaddr + offset, cur);
Chris Masond1310b22008-01-24 16:13:08 -05004206 if (ret)
4207 break;
4208
4209 ptr += cur;
4210 len -= cur;
4211 offset = 0;
4212 i++;
4213 }
4214 return ret;
4215}
Chris Masond1310b22008-01-24 16:13:08 -05004216
4217void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
4218 unsigned long start, unsigned long len)
4219{
4220 size_t cur;
4221 size_t offset;
4222 struct page *page;
4223 char *kaddr;
4224 char *src = (char *)srcv;
4225 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
4226 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
4227
4228 WARN_ON(start > eb->len);
4229 WARN_ON(start + len > eb->start + eb->len);
4230
4231 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
4232
Chris Masond3977122009-01-05 21:25:51 -05004233 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05004234 page = extent_buffer_page(eb, i);
4235 WARN_ON(!PageUptodate(page));
4236
4237 cur = min(len, PAGE_CACHE_SIZE - offset);
Chris Masona6591712011-07-19 12:04:14 -04004238 kaddr = page_address(page);
Chris Masond1310b22008-01-24 16:13:08 -05004239 memcpy(kaddr + offset, src, cur);
Chris Masond1310b22008-01-24 16:13:08 -05004240
4241 src += cur;
4242 len -= cur;
4243 offset = 0;
4244 i++;
4245 }
4246}
Chris Masond1310b22008-01-24 16:13:08 -05004247
4248void memset_extent_buffer(struct extent_buffer *eb, char c,
4249 unsigned long start, unsigned long len)
4250{
4251 size_t cur;
4252 size_t offset;
4253 struct page *page;
4254 char *kaddr;
4255 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
4256 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
4257
4258 WARN_ON(start > eb->len);
4259 WARN_ON(start + len > eb->start + eb->len);
4260
4261 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
4262
Chris Masond3977122009-01-05 21:25:51 -05004263 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05004264 page = extent_buffer_page(eb, i);
4265 WARN_ON(!PageUptodate(page));
4266
4267 cur = min(len, PAGE_CACHE_SIZE - offset);
Chris Masona6591712011-07-19 12:04:14 -04004268 kaddr = page_address(page);
Chris Masond1310b22008-01-24 16:13:08 -05004269 memset(kaddr + offset, c, cur);
Chris Masond1310b22008-01-24 16:13:08 -05004270
4271 len -= cur;
4272 offset = 0;
4273 i++;
4274 }
4275}
Chris Masond1310b22008-01-24 16:13:08 -05004276
4277void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
4278 unsigned long dst_offset, unsigned long src_offset,
4279 unsigned long len)
4280{
4281 u64 dst_len = dst->len;
4282 size_t cur;
4283 size_t offset;
4284 struct page *page;
4285 char *kaddr;
4286 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
4287 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
4288
4289 WARN_ON(src->len != dst_len);
4290
4291 offset = (start_offset + dst_offset) &
4292 ((unsigned long)PAGE_CACHE_SIZE - 1);
4293
Chris Masond3977122009-01-05 21:25:51 -05004294 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05004295 page = extent_buffer_page(dst, i);
4296 WARN_ON(!PageUptodate(page));
4297
4298 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
4299
Chris Masona6591712011-07-19 12:04:14 -04004300 kaddr = page_address(page);
Chris Masond1310b22008-01-24 16:13:08 -05004301 read_extent_buffer(src, kaddr + offset, src_offset, cur);
Chris Masond1310b22008-01-24 16:13:08 -05004302
4303 src_offset += cur;
4304 len -= cur;
4305 offset = 0;
4306 i++;
4307 }
4308}
Chris Masond1310b22008-01-24 16:13:08 -05004309
4310static void move_pages(struct page *dst_page, struct page *src_page,
4311 unsigned long dst_off, unsigned long src_off,
4312 unsigned long len)
4313{
Chris Masona6591712011-07-19 12:04:14 -04004314 char *dst_kaddr = page_address(dst_page);
Chris Masond1310b22008-01-24 16:13:08 -05004315 if (dst_page == src_page) {
4316 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
4317 } else {
Chris Masona6591712011-07-19 12:04:14 -04004318 char *src_kaddr = page_address(src_page);
Chris Masond1310b22008-01-24 16:13:08 -05004319 char *p = dst_kaddr + dst_off + len;
4320 char *s = src_kaddr + src_off + len;
4321
4322 while (len--)
4323 *--p = *--s;
Chris Masond1310b22008-01-24 16:13:08 -05004324 }
Chris Masond1310b22008-01-24 16:13:08 -05004325}
4326
Sergei Trofimovich33872062011-04-11 21:52:52 +00004327static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
4328{
4329 unsigned long distance = (src > dst) ? src - dst : dst - src;
4330 return distance < len;
4331}
4332
Chris Masond1310b22008-01-24 16:13:08 -05004333static void copy_pages(struct page *dst_page, struct page *src_page,
4334 unsigned long dst_off, unsigned long src_off,
4335 unsigned long len)
4336{
Chris Masona6591712011-07-19 12:04:14 -04004337 char *dst_kaddr = page_address(dst_page);
Chris Masond1310b22008-01-24 16:13:08 -05004338 char *src_kaddr;
4339
Sergei Trofimovich33872062011-04-11 21:52:52 +00004340 if (dst_page != src_page) {
Chris Masona6591712011-07-19 12:04:14 -04004341 src_kaddr = page_address(src_page);
Sergei Trofimovich33872062011-04-11 21:52:52 +00004342 } else {
Chris Masond1310b22008-01-24 16:13:08 -05004343 src_kaddr = dst_kaddr;
Sergei Trofimovich33872062011-04-11 21:52:52 +00004344 BUG_ON(areas_overlap(src_off, dst_off, len));
4345 }
Chris Masond1310b22008-01-24 16:13:08 -05004346
4347 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
Chris Masond1310b22008-01-24 16:13:08 -05004348}
4349
4350void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4351 unsigned long src_offset, unsigned long len)
4352{
4353 size_t cur;
4354 size_t dst_off_in_page;
4355 size_t src_off_in_page;
4356 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
4357 unsigned long dst_i;
4358 unsigned long src_i;
4359
4360 if (src_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05004361 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
4362 "len %lu dst len %lu\n", src_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05004363 BUG_ON(1);
4364 }
4365 if (dst_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05004366 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
4367 "len %lu dst len %lu\n", dst_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05004368 BUG_ON(1);
4369 }
4370
Chris Masond3977122009-01-05 21:25:51 -05004371 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05004372 dst_off_in_page = (start_offset + dst_offset) &
4373 ((unsigned long)PAGE_CACHE_SIZE - 1);
4374 src_off_in_page = (start_offset + src_offset) &
4375 ((unsigned long)PAGE_CACHE_SIZE - 1);
4376
4377 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
4378 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
4379
4380 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
4381 src_off_in_page));
4382 cur = min_t(unsigned long, cur,
4383 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
4384
4385 copy_pages(extent_buffer_page(dst, dst_i),
4386 extent_buffer_page(dst, src_i),
4387 dst_off_in_page, src_off_in_page, cur);
4388
4389 src_offset += cur;
4390 dst_offset += cur;
4391 len -= cur;
4392 }
4393}
Chris Masond1310b22008-01-24 16:13:08 -05004394
4395void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4396 unsigned long src_offset, unsigned long len)
4397{
4398 size_t cur;
4399 size_t dst_off_in_page;
4400 size_t src_off_in_page;
4401 unsigned long dst_end = dst_offset + len - 1;
4402 unsigned long src_end = src_offset + len - 1;
4403 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
4404 unsigned long dst_i;
4405 unsigned long src_i;
4406
4407 if (src_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05004408 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
4409 "len %lu len %lu\n", src_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05004410 BUG_ON(1);
4411 }
4412 if (dst_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05004413 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
4414 "len %lu len %lu\n", dst_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05004415 BUG_ON(1);
4416 }
Sergei Trofimovich33872062011-04-11 21:52:52 +00004417 if (!areas_overlap(src_offset, dst_offset, len)) {
Chris Masond1310b22008-01-24 16:13:08 -05004418 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
4419 return;
4420 }
Chris Masond3977122009-01-05 21:25:51 -05004421 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05004422 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
4423 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
4424
4425 dst_off_in_page = (start_offset + dst_end) &
4426 ((unsigned long)PAGE_CACHE_SIZE - 1);
4427 src_off_in_page = (start_offset + src_end) &
4428 ((unsigned long)PAGE_CACHE_SIZE - 1);
4429
4430 cur = min_t(unsigned long, len, src_off_in_page + 1);
4431 cur = min(cur, dst_off_in_page + 1);
4432 move_pages(extent_buffer_page(dst, dst_i),
4433 extent_buffer_page(dst, src_i),
4434 dst_off_in_page - cur + 1,
4435 src_off_in_page - cur + 1, cur);
4436
4437 dst_end -= cur;
4438 src_end -= cur;
4439 len -= cur;
4440 }
4441}
Chris Mason6af118c2008-07-22 11:18:07 -04004442
Miao Xie19fe0a82010-10-26 20:57:29 -04004443static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4444{
4445 struct extent_buffer *eb =
4446 container_of(head, struct extent_buffer, rcu_head);
4447
4448 btrfs_release_extent_buffer(eb);
4449}
4450
Chris Mason6af118c2008-07-22 11:18:07 -04004451int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4452{
4453 u64 start = page_offset(page);
4454 struct extent_buffer *eb;
4455 int ret = 1;
Chris Mason6af118c2008-07-22 11:18:07 -04004456
4457 spin_lock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04004458 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
Chris Mason45f49bc2010-11-21 22:27:44 -05004459 if (!eb) {
4460 spin_unlock(&tree->buffer_lock);
4461 return ret;
4462 }
Chris Mason6af118c2008-07-22 11:18:07 -04004463
Chris Masonb9473432009-03-13 11:00:37 -04004464 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
4465 ret = 0;
4466 goto out;
4467 }
Miao Xie897ca6e2010-10-26 20:57:29 -04004468
Miao Xie19fe0a82010-10-26 20:57:29 -04004469 /*
4470 * set @eb->refs to 0 if it is already 1, and then release the @eb.
4471 * Or go back.
4472 */
4473 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
4474 ret = 0;
4475 goto out;
4476 }
4477
4478 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
Chris Mason6af118c2008-07-22 11:18:07 -04004479out:
4480 spin_unlock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04004481
4482 /* at this point we can safely release the extent buffer */
4483 if (atomic_read(&eb->refs) == 0)
4484 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
Chris Mason6af118c2008-07-22 11:18:07 -04004485 return ret;
4486}