blob: 99fcad631a216307579a209398e24fed59a3300f [file] [log] [blame]
Chris Mason6cbd5572007-06-12 09:07:21 -04001/*
Chris Masond352ac62008-09-29 15:18:18 -04002 * Copyright (C) 2007,2008 Oracle. All rights reserved.
Chris Mason6cbd5572007-06-12 09:07:21 -04003 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
Chris Masona6b6e752007-10-15 16:22:39 -040019#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Chris Masoneb60cea2007-02-02 09:18:22 -050021#include "ctree.h"
22#include "disk-io.h"
Chris Mason7f5c1512007-03-23 15:56:19 -040023#include "transaction.h"
Chris Mason5f39d392007-10-15 16:14:19 -040024#include "print-tree.h"
Chris Mason925baed2008-06-25 16:01:30 -040025#include "locking.h"
Chris Mason9a8dd152007-02-23 08:38:36 -050026
Chris Masone089f052007-03-16 16:20:31 -040027static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
28 *root, struct btrfs_path *path, int level);
29static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Masond4dbff92007-04-04 14:08:15 -040030 *root, struct btrfs_key *ins_key,
Chris Masoncc0c5532007-10-25 15:42:57 -040031 struct btrfs_path *path, int data_size, int extend);
Chris Mason5f39d392007-10-15 16:14:19 -040032static int push_node_left(struct btrfs_trans_handle *trans,
33 struct btrfs_root *root, struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -040034 struct extent_buffer *src, int empty);
Chris Mason5f39d392007-10-15 16:14:19 -040035static int balance_node_right(struct btrfs_trans_handle *trans,
36 struct btrfs_root *root,
37 struct extent_buffer *dst_buf,
38 struct extent_buffer *src_buf);
Jeff Mahoney143bede2012-03-01 14:56:26 +010039static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Chris Masone089f052007-03-16 16:20:31 -040040 struct btrfs_path *path, int level, int slot);
Chris Masond97e63b2007-02-20 16:40:44 -050041
Chris Mason2c90e5d2007-04-02 10:50:19 -040042struct btrfs_path *btrfs_alloc_path(void)
43{
Chris Masondf24a2b2007-04-04 09:36:31 -040044 struct btrfs_path *path;
Jeff Mahoneye00f7302009-02-12 14:11:25 -050045 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
Chris Masondf24a2b2007-04-04 09:36:31 -040046 return path;
Chris Mason2c90e5d2007-04-02 10:50:19 -040047}
48
Chris Masonb4ce94d2009-02-04 09:25:08 -050049/*
50 * set all locked nodes in the path to blocking locks. This should
51 * be done before scheduling
52 */
53noinline void btrfs_set_path_blocking(struct btrfs_path *p)
54{
55 int i;
56 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbd681512011-07-16 15:23:14 -040057 if (!p->nodes[i] || !p->locks[i])
58 continue;
59 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
60 if (p->locks[i] == BTRFS_READ_LOCK)
61 p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
62 else if (p->locks[i] == BTRFS_WRITE_LOCK)
63 p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
Chris Masonb4ce94d2009-02-04 09:25:08 -050064 }
65}
66
67/*
68 * reset all the locked nodes in the patch to spinning locks.
Chris Mason4008c042009-02-12 14:09:45 -050069 *
70 * held is used to keep lockdep happy, when lockdep is enabled
71 * we set held to a blocking lock before we go around and
72 * retake all the spinlocks in the path. You can safely use NULL
73 * for held
Chris Masonb4ce94d2009-02-04 09:25:08 -050074 */
Chris Mason4008c042009-02-12 14:09:45 -050075noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -040076 struct extent_buffer *held, int held_rw)
Chris Masonb4ce94d2009-02-04 09:25:08 -050077{
78 int i;
Chris Mason4008c042009-02-12 14:09:45 -050079
80#ifdef CONFIG_DEBUG_LOCK_ALLOC
81 /* lockdep really cares that we take all of these spinlocks
82 * in the right order. If any of the locks in the path are not
83 * currently blocking, it is going to complain. So, make really
84 * really sure by forcing the path to blocking before we clear
85 * the path blocking.
86 */
Chris Masonbd681512011-07-16 15:23:14 -040087 if (held) {
88 btrfs_set_lock_blocking_rw(held, held_rw);
89 if (held_rw == BTRFS_WRITE_LOCK)
90 held_rw = BTRFS_WRITE_LOCK_BLOCKING;
91 else if (held_rw == BTRFS_READ_LOCK)
92 held_rw = BTRFS_READ_LOCK_BLOCKING;
93 }
Chris Mason4008c042009-02-12 14:09:45 -050094 btrfs_set_path_blocking(p);
95#endif
96
97 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
Chris Masonbd681512011-07-16 15:23:14 -040098 if (p->nodes[i] && p->locks[i]) {
99 btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
100 if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
101 p->locks[i] = BTRFS_WRITE_LOCK;
102 else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
103 p->locks[i] = BTRFS_READ_LOCK;
104 }
Chris Masonb4ce94d2009-02-04 09:25:08 -0500105 }
Chris Mason4008c042009-02-12 14:09:45 -0500106
107#ifdef CONFIG_DEBUG_LOCK_ALLOC
108 if (held)
Chris Masonbd681512011-07-16 15:23:14 -0400109 btrfs_clear_lock_blocking_rw(held, held_rw);
Chris Mason4008c042009-02-12 14:09:45 -0500110#endif
Chris Masonb4ce94d2009-02-04 09:25:08 -0500111}
112
Chris Masond352ac62008-09-29 15:18:18 -0400113/* this also releases the path */
Chris Mason2c90e5d2007-04-02 10:50:19 -0400114void btrfs_free_path(struct btrfs_path *p)
115{
Jesper Juhlff175d52010-12-25 21:22:30 +0000116 if (!p)
117 return;
David Sterbab3b4aa72011-04-21 01:20:15 +0200118 btrfs_release_path(p);
Chris Mason2c90e5d2007-04-02 10:50:19 -0400119 kmem_cache_free(btrfs_path_cachep, p);
120}
121
Chris Masond352ac62008-09-29 15:18:18 -0400122/*
123 * path release drops references on the extent buffers in the path
124 * and it drops any locks held by this path
125 *
126 * It is safe to call this on paths that no locks or extent buffers held.
127 */
David Sterbab3b4aa72011-04-21 01:20:15 +0200128noinline void btrfs_release_path(struct btrfs_path *p)
Chris Masoneb60cea2007-02-02 09:18:22 -0500129{
130 int i;
Chris Masona2135012008-06-25 16:01:30 -0400131
Chris Mason234b63a2007-03-13 10:46:10 -0400132 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Mason3f157a22008-06-25 16:01:31 -0400133 p->slots[i] = 0;
Chris Masoneb60cea2007-02-02 09:18:22 -0500134 if (!p->nodes[i])
Chris Mason925baed2008-06-25 16:01:30 -0400135 continue;
136 if (p->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -0400137 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -0400138 p->locks[i] = 0;
139 }
Chris Mason5f39d392007-10-15 16:14:19 -0400140 free_extent_buffer(p->nodes[i]);
Chris Mason3f157a22008-06-25 16:01:31 -0400141 p->nodes[i] = NULL;
Chris Masoneb60cea2007-02-02 09:18:22 -0500142 }
143}
144
Chris Masond352ac62008-09-29 15:18:18 -0400145/*
146 * safely gets a reference on the root node of a tree. A lock
147 * is not taken, so a concurrent writer may put a different node
148 * at the root of the tree. See btrfs_lock_root_node for the
149 * looping required.
150 *
151 * The extent buffer returned by this has a reference taken, so
152 * it won't disappear. It may stop being the root of the tree
153 * at any time because there are no locks held.
154 */
Chris Mason925baed2008-06-25 16:01:30 -0400155struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
156{
157 struct extent_buffer *eb;
Chris Mason240f62c2011-03-23 14:54:42 -0400158
Josef Bacik3083ee22012-03-09 16:01:49 -0500159 while (1) {
160 rcu_read_lock();
161 eb = rcu_dereference(root->node);
162
163 /*
164 * RCU really hurts here, we could free up the root node because
165 * it was cow'ed but we may not get the new root node yet so do
166 * the inc_not_zero dance and if it doesn't work then
167 * synchronize_rcu and try again.
168 */
169 if (atomic_inc_not_zero(&eb->refs)) {
170 rcu_read_unlock();
171 break;
172 }
173 rcu_read_unlock();
174 synchronize_rcu();
175 }
Chris Mason925baed2008-06-25 16:01:30 -0400176 return eb;
177}
178
Chris Masond352ac62008-09-29 15:18:18 -0400179/* loop around taking references on and locking the root node of the
180 * tree until you end up with a lock on the root. A locked buffer
181 * is returned, with a reference held.
182 */
Chris Mason925baed2008-06-25 16:01:30 -0400183struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
184{
185 struct extent_buffer *eb;
186
Chris Masond3977122009-01-05 21:25:51 -0500187 while (1) {
Chris Mason925baed2008-06-25 16:01:30 -0400188 eb = btrfs_root_node(root);
189 btrfs_tree_lock(eb);
Chris Mason240f62c2011-03-23 14:54:42 -0400190 if (eb == root->node)
Chris Mason925baed2008-06-25 16:01:30 -0400191 break;
Chris Mason925baed2008-06-25 16:01:30 -0400192 btrfs_tree_unlock(eb);
193 free_extent_buffer(eb);
194 }
195 return eb;
196}
197
Chris Masonbd681512011-07-16 15:23:14 -0400198/* loop around taking references on and locking the root node of the
199 * tree until you end up with a lock on the root. A locked buffer
200 * is returned, with a reference held.
201 */
202struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
203{
204 struct extent_buffer *eb;
205
206 while (1) {
207 eb = btrfs_root_node(root);
208 btrfs_tree_read_lock(eb);
209 if (eb == root->node)
210 break;
211 btrfs_tree_read_unlock(eb);
212 free_extent_buffer(eb);
213 }
214 return eb;
215}
216
Chris Masond352ac62008-09-29 15:18:18 -0400217/* cowonly root (everything not a reference counted cow subvolume), just get
218 * put onto a simple dirty list. transaction.c walks this to make sure they
219 * get properly updated on disk.
220 */
Chris Mason0b86a832008-03-24 15:01:56 -0400221static void add_root_to_dirty_list(struct btrfs_root *root)
222{
Chris Masone5846fc2012-05-03 12:08:48 -0400223 spin_lock(&root->fs_info->trans_lock);
Chris Mason0b86a832008-03-24 15:01:56 -0400224 if (root->track_dirty && list_empty(&root->dirty_list)) {
225 list_add(&root->dirty_list,
226 &root->fs_info->dirty_cowonly_roots);
227 }
Chris Masone5846fc2012-05-03 12:08:48 -0400228 spin_unlock(&root->fs_info->trans_lock);
Chris Mason0b86a832008-03-24 15:01:56 -0400229}
230
Chris Masond352ac62008-09-29 15:18:18 -0400231/*
232 * used by snapshot creation to make a copy of a root for a tree with
233 * a given objectid. The buffer with the new root node is returned in
234 * cow_ret, and this func returns zero on success or a negative error code.
235 */
Chris Masonbe20aa92007-12-17 20:14:01 -0500236int btrfs_copy_root(struct btrfs_trans_handle *trans,
237 struct btrfs_root *root,
238 struct extent_buffer *buf,
239 struct extent_buffer **cow_ret, u64 new_root_objectid)
240{
241 struct extent_buffer *cow;
Chris Masonbe20aa92007-12-17 20:14:01 -0500242 int ret = 0;
243 int level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400244 struct btrfs_disk_key disk_key;
Chris Masonbe20aa92007-12-17 20:14:01 -0500245
246 WARN_ON(root->ref_cows && trans->transid !=
247 root->fs_info->running_transaction->transid);
248 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
249
250 level = btrfs_header_level(buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400251 if (level == 0)
252 btrfs_item_key(buf, &disk_key, 0);
253 else
254 btrfs_node_key(buf, &disk_key, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400255
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400256 cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
257 new_root_objectid, &disk_key, level,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200258 buf->start, 0, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400259 if (IS_ERR(cow))
Chris Masonbe20aa92007-12-17 20:14:01 -0500260 return PTR_ERR(cow);
261
262 copy_extent_buffer(cow, buf, 0, 0, cow->len);
263 btrfs_set_header_bytenr(cow, cow->start);
264 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400265 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
266 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
267 BTRFS_HEADER_FLAG_RELOC);
268 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
269 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
270 else
271 btrfs_set_header_owner(cow, new_root_objectid);
Chris Masonbe20aa92007-12-17 20:14:01 -0500272
Yan Zheng2b820322008-11-17 21:11:30 -0500273 write_extent_buffer(cow, root->fs_info->fsid,
274 (unsigned long)btrfs_header_fsid(cow),
275 BTRFS_FSID_SIZE);
276
Chris Masonbe20aa92007-12-17 20:14:01 -0500277 WARN_ON(btrfs_header_generation(buf) > trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400278 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200279 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400280 else
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200281 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
Chris Mason4aec2b52007-12-18 16:25:45 -0500282
Chris Masonbe20aa92007-12-17 20:14:01 -0500283 if (ret)
284 return ret;
285
286 btrfs_mark_buffer_dirty(cow);
287 *cow_ret = cow;
288 return 0;
289}
290
Chris Masond352ac62008-09-29 15:18:18 -0400291/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400292 * check if the tree block can be shared by multiple trees
293 */
294int btrfs_block_can_be_shared(struct btrfs_root *root,
295 struct extent_buffer *buf)
296{
297 /*
298 * Tree blocks not in refernece counted trees and tree roots
299 * are never shared. If a block was allocated after the last
300 * snapshot and the block was not allocated by tree relocation,
301 * we know the block is not shared.
302 */
303 if (root->ref_cows &&
304 buf != root->node && buf != root->commit_root &&
305 (btrfs_header_generation(buf) <=
306 btrfs_root_last_snapshot(&root->root_item) ||
307 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
308 return 1;
309#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
310 if (root->ref_cows &&
311 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
312 return 1;
313#endif
314 return 0;
315}
316
317static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
318 struct btrfs_root *root,
319 struct extent_buffer *buf,
Yan, Zhengf0486c62010-05-16 10:46:25 -0400320 struct extent_buffer *cow,
321 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400322{
323 u64 refs;
324 u64 owner;
325 u64 flags;
326 u64 new_flags = 0;
327 int ret;
328
329 /*
330 * Backrefs update rules:
331 *
332 * Always use full backrefs for extent pointers in tree block
333 * allocated by tree relocation.
334 *
335 * If a shared tree block is no longer referenced by its owner
336 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
337 * use full backrefs for extent pointers in tree block.
338 *
339 * If a tree block is been relocating
340 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
341 * use full backrefs for extent pointers in tree block.
342 * The reason for this is some operations (such as drop tree)
343 * are only allowed for blocks use full backrefs.
344 */
345
346 if (btrfs_block_can_be_shared(root, buf)) {
347 ret = btrfs_lookup_extent_info(trans, root, buf->start,
348 buf->len, &refs, &flags);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700349 if (ret)
350 return ret;
Mark Fashehe5df9572011-08-29 14:17:04 -0700351 if (refs == 0) {
352 ret = -EROFS;
353 btrfs_std_error(root->fs_info, ret);
354 return ret;
355 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400356 } else {
357 refs = 1;
358 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
359 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
360 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
361 else
362 flags = 0;
363 }
364
365 owner = btrfs_header_owner(buf);
366 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
367 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
368
369 if (refs > 1) {
370 if ((owner == root->root_key.objectid ||
371 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
372 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200373 ret = btrfs_inc_ref(trans, root, buf, 1, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100374 BUG_ON(ret); /* -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400375
376 if (root->root_key.objectid ==
377 BTRFS_TREE_RELOC_OBJECTID) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200378 ret = btrfs_dec_ref(trans, root, buf, 0, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100379 BUG_ON(ret); /* -ENOMEM */
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200380 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100381 BUG_ON(ret); /* -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400382 }
383 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
384 } else {
385
386 if (root->root_key.objectid ==
387 BTRFS_TREE_RELOC_OBJECTID)
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200388 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400389 else
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200390 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100391 BUG_ON(ret); /* -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400392 }
393 if (new_flags != 0) {
394 ret = btrfs_set_disk_extent_flags(trans, root,
395 buf->start,
396 buf->len,
397 new_flags, 0);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700398 if (ret)
399 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400400 }
401 } else {
402 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
403 if (root->root_key.objectid ==
404 BTRFS_TREE_RELOC_OBJECTID)
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200405 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400406 else
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200407 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100408 BUG_ON(ret); /* -ENOMEM */
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200409 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100410 BUG_ON(ret); /* -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400411 }
412 clean_tree_block(trans, root, buf);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400413 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400414 }
415 return 0;
416}
417
418/*
Chris Masond3977122009-01-05 21:25:51 -0500419 * does the dirty work in cow of a single block. The parent block (if
420 * supplied) is updated to point to the new cow copy. The new buffer is marked
421 * dirty and returned locked. If you modify the block it needs to be marked
422 * dirty again.
Chris Masond352ac62008-09-29 15:18:18 -0400423 *
424 * search_start -- an allocation hint for the new block
425 *
Chris Masond3977122009-01-05 21:25:51 -0500426 * empty_size -- a hint that you plan on doing more cow. This is the size in
427 * bytes the allocator should try to find free next to the block it returns.
428 * This is just a hint and may be ignored by the allocator.
Chris Masond352ac62008-09-29 15:18:18 -0400429 */
Chris Masond3977122009-01-05 21:25:51 -0500430static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -0400431 struct btrfs_root *root,
432 struct extent_buffer *buf,
433 struct extent_buffer *parent, int parent_slot,
434 struct extent_buffer **cow_ret,
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400435 u64 search_start, u64 empty_size)
Chris Mason6702ed42007-08-07 16:15:09 -0400436{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400437 struct btrfs_disk_key disk_key;
Chris Mason5f39d392007-10-15 16:14:19 -0400438 struct extent_buffer *cow;
Mark Fashehbe1a5562011-08-08 13:20:18 -0700439 int level, ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -0400440 int last_ref = 0;
Chris Mason925baed2008-06-25 16:01:30 -0400441 int unlock_orig = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400442 u64 parent_start;
Chris Mason6702ed42007-08-07 16:15:09 -0400443
Chris Mason925baed2008-06-25 16:01:30 -0400444 if (*cow_ret == buf)
445 unlock_orig = 1;
446
Chris Masonb9447ef2009-03-09 11:45:38 -0400447 btrfs_assert_tree_locked(buf);
Chris Mason925baed2008-06-25 16:01:30 -0400448
Chris Mason7bb86312007-12-11 09:25:06 -0500449 WARN_ON(root->ref_cows && trans->transid !=
450 root->fs_info->running_transaction->transid);
Chris Mason6702ed42007-08-07 16:15:09 -0400451 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
Chris Mason5f39d392007-10-15 16:14:19 -0400452
Chris Mason7bb86312007-12-11 09:25:06 -0500453 level = btrfs_header_level(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -0400454
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400455 if (level == 0)
456 btrfs_item_key(buf, &disk_key, 0);
457 else
458 btrfs_node_key(buf, &disk_key, 0);
459
460 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
461 if (parent)
462 parent_start = parent->start;
463 else
464 parent_start = 0;
465 } else
466 parent_start = 0;
467
468 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
469 root->root_key.objectid, &disk_key,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200470 level, search_start, empty_size, 1);
Chris Mason6702ed42007-08-07 16:15:09 -0400471 if (IS_ERR(cow))
472 return PTR_ERR(cow);
473
Chris Masonb4ce94d2009-02-04 09:25:08 -0500474 /* cow is set to blocking by btrfs_init_new_buffer */
475
Chris Mason5f39d392007-10-15 16:14:19 -0400476 copy_extent_buffer(cow, buf, 0, 0, cow->len);
Chris Masondb945352007-10-15 16:15:53 -0400477 btrfs_set_header_bytenr(cow, cow->start);
Chris Mason5f39d392007-10-15 16:14:19 -0400478 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400479 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
480 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
481 BTRFS_HEADER_FLAG_RELOC);
482 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
483 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
484 else
485 btrfs_set_header_owner(cow, root->root_key.objectid);
Chris Mason6702ed42007-08-07 16:15:09 -0400486
Yan Zheng2b820322008-11-17 21:11:30 -0500487 write_extent_buffer(cow, root->fs_info->fsid,
488 (unsigned long)btrfs_header_fsid(cow),
489 BTRFS_FSID_SIZE);
490
Mark Fashehbe1a5562011-08-08 13:20:18 -0700491 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
Mark Fashehb68dc2a2011-08-29 14:30:39 -0700492 if (ret) {
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100493 btrfs_abort_transaction(trans, root, ret);
Mark Fashehb68dc2a2011-08-29 14:30:39 -0700494 return ret;
495 }
Zheng Yan1a40e232008-09-26 10:09:34 -0400496
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400497 if (root->ref_cows)
498 btrfs_reloc_cow_block(trans, root, buf, cow);
499
Chris Mason6702ed42007-08-07 16:15:09 -0400500 if (buf == root->node) {
Chris Mason925baed2008-06-25 16:01:30 -0400501 WARN_ON(parent && parent != buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400502 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
503 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
504 parent_start = buf->start;
505 else
506 parent_start = 0;
Chris Mason925baed2008-06-25 16:01:30 -0400507
Chris Mason5f39d392007-10-15 16:14:19 -0400508 extent_buffer_get(cow);
Chris Mason240f62c2011-03-23 14:54:42 -0400509 rcu_assign_pointer(root->node, cow);
Chris Mason925baed2008-06-25 16:01:30 -0400510
Yan, Zhengf0486c62010-05-16 10:46:25 -0400511 btrfs_free_tree_block(trans, root, buf, parent_start,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200512 last_ref, 1);
Chris Mason5f39d392007-10-15 16:14:19 -0400513 free_extent_buffer(buf);
Chris Mason0b86a832008-03-24 15:01:56 -0400514 add_root_to_dirty_list(root);
Chris Mason6702ed42007-08-07 16:15:09 -0400515 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400516 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
517 parent_start = parent->start;
518 else
519 parent_start = 0;
520
521 WARN_ON(trans->transid != btrfs_header_generation(parent));
Chris Mason5f39d392007-10-15 16:14:19 -0400522 btrfs_set_node_blockptr(parent, parent_slot,
Chris Masondb945352007-10-15 16:15:53 -0400523 cow->start);
Chris Mason74493f72007-12-11 09:25:06 -0500524 btrfs_set_node_ptr_generation(parent, parent_slot,
525 trans->transid);
Chris Mason6702ed42007-08-07 16:15:09 -0400526 btrfs_mark_buffer_dirty(parent);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400527 btrfs_free_tree_block(trans, root, buf, parent_start,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200528 last_ref, 1);
Chris Mason6702ed42007-08-07 16:15:09 -0400529 }
Chris Mason925baed2008-06-25 16:01:30 -0400530 if (unlock_orig)
531 btrfs_tree_unlock(buf);
Josef Bacik3083ee22012-03-09 16:01:49 -0500532 free_extent_buffer_stale(buf);
Chris Mason6702ed42007-08-07 16:15:09 -0400533 btrfs_mark_buffer_dirty(cow);
534 *cow_ret = cow;
535 return 0;
536}
537
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400538static inline int should_cow_block(struct btrfs_trans_handle *trans,
539 struct btrfs_root *root,
540 struct extent_buffer *buf)
541{
Liu Bof1ebcc72011-11-14 20:48:06 -0500542 /* ensure we can see the force_cow */
543 smp_rmb();
544
545 /*
546 * We do not need to cow a block if
547 * 1) this block is not created or changed in this transaction;
548 * 2) this block does not belong to TREE_RELOC tree;
549 * 3) the root is not forced COW.
550 *
551 * What is forced COW:
552 * when we create snapshot during commiting the transaction,
553 * after we've finished coping src root, we must COW the shared
554 * block to ensure the metadata consistency.
555 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400556 if (btrfs_header_generation(buf) == trans->transid &&
557 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
558 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
Liu Bof1ebcc72011-11-14 20:48:06 -0500559 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
560 !root->force_cow)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400561 return 0;
562 return 1;
563}
564
Chris Masond352ac62008-09-29 15:18:18 -0400565/*
566 * cows a single block, see __btrfs_cow_block for the real work.
567 * This version of it has extra checks so that a block isn't cow'd more than
568 * once per transaction, as long as it hasn't been written yet
569 */
Chris Masond3977122009-01-05 21:25:51 -0500570noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -0400571 struct btrfs_root *root, struct extent_buffer *buf,
572 struct extent_buffer *parent, int parent_slot,
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400573 struct extent_buffer **cow_ret)
Chris Mason02217ed2007-03-02 16:08:05 -0500574{
Chris Mason6702ed42007-08-07 16:15:09 -0400575 u64 search_start;
Chris Masonf510cfe2007-10-15 16:14:48 -0400576 int ret;
Chris Masondc17ff82008-01-08 15:46:30 -0500577
Chris Masonccd467d2007-06-28 15:57:36 -0400578 if (trans->transaction != root->fs_info->running_transaction) {
Chris Masond3977122009-01-05 21:25:51 -0500579 printk(KERN_CRIT "trans %llu running %llu\n",
580 (unsigned long long)trans->transid,
581 (unsigned long long)
Chris Masonccd467d2007-06-28 15:57:36 -0400582 root->fs_info->running_transaction->transid);
583 WARN_ON(1);
584 }
585 if (trans->transid != root->fs_info->generation) {
Chris Masond3977122009-01-05 21:25:51 -0500586 printk(KERN_CRIT "trans %llu running %llu\n",
587 (unsigned long long)trans->transid,
588 (unsigned long long)root->fs_info->generation);
Chris Masonccd467d2007-06-28 15:57:36 -0400589 WARN_ON(1);
590 }
Chris Masondc17ff82008-01-08 15:46:30 -0500591
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400592 if (!should_cow_block(trans, root, buf)) {
Chris Mason02217ed2007-03-02 16:08:05 -0500593 *cow_ret = buf;
594 return 0;
595 }
Chris Masonc4876852009-02-04 09:24:25 -0500596
Chris Mason0b86a832008-03-24 15:01:56 -0400597 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500598
599 if (parent)
600 btrfs_set_lock_blocking(parent);
601 btrfs_set_lock_blocking(buf);
602
Chris Masonf510cfe2007-10-15 16:14:48 -0400603 ret = __btrfs_cow_block(trans, root, buf, parent,
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400604 parent_slot, cow_ret, search_start, 0);
liubo1abe9b82011-03-24 11:18:59 +0000605
606 trace_btrfs_cow_block(root, buf, *cow_ret);
607
Chris Masonf510cfe2007-10-15 16:14:48 -0400608 return ret;
Chris Mason6702ed42007-08-07 16:15:09 -0400609}
610
Chris Masond352ac62008-09-29 15:18:18 -0400611/*
612 * helper function for defrag to decide if two blocks pointed to by a
613 * node are actually close by
614 */
Chris Mason6b800532007-10-15 16:17:34 -0400615static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
Chris Mason6702ed42007-08-07 16:15:09 -0400616{
Chris Mason6b800532007-10-15 16:17:34 -0400617 if (blocknr < other && other - (blocknr + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -0400618 return 1;
Chris Mason6b800532007-10-15 16:17:34 -0400619 if (blocknr > other && blocknr - (other + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -0400620 return 1;
Chris Mason02217ed2007-03-02 16:08:05 -0500621 return 0;
622}
623
Chris Mason081e9572007-11-06 10:26:24 -0500624/*
625 * compare two keys in a memcmp fashion
626 */
627static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
628{
629 struct btrfs_key k1;
630
631 btrfs_disk_key_to_cpu(&k1, disk);
632
Diego Calleja20736ab2009-07-24 11:06:52 -0400633 return btrfs_comp_cpu_keys(&k1, k2);
Chris Mason081e9572007-11-06 10:26:24 -0500634}
635
Josef Bacikf3465ca2008-11-12 14:19:50 -0500636/*
637 * same as comp_keys only with two btrfs_key's
638 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400639int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
Josef Bacikf3465ca2008-11-12 14:19:50 -0500640{
641 if (k1->objectid > k2->objectid)
642 return 1;
643 if (k1->objectid < k2->objectid)
644 return -1;
645 if (k1->type > k2->type)
646 return 1;
647 if (k1->type < k2->type)
648 return -1;
649 if (k1->offset > k2->offset)
650 return 1;
651 if (k1->offset < k2->offset)
652 return -1;
653 return 0;
654}
Chris Mason081e9572007-11-06 10:26:24 -0500655
Chris Masond352ac62008-09-29 15:18:18 -0400656/*
657 * this is used by the defrag code to go through all the
658 * leaves pointed to by a node and reallocate them so that
659 * disk order is close to key order
660 */
Chris Mason6702ed42007-08-07 16:15:09 -0400661int btrfs_realloc_node(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -0400662 struct btrfs_root *root, struct extent_buffer *parent,
Chris Masona6b6e752007-10-15 16:22:39 -0400663 int start_slot, int cache_only, u64 *last_ret,
664 struct btrfs_key *progress)
Chris Mason6702ed42007-08-07 16:15:09 -0400665{
Chris Mason6b800532007-10-15 16:17:34 -0400666 struct extent_buffer *cur;
Chris Mason6702ed42007-08-07 16:15:09 -0400667 u64 blocknr;
Chris Masonca7a79a2008-05-12 12:59:19 -0400668 u64 gen;
Chris Masone9d0b132007-08-10 14:06:19 -0400669 u64 search_start = *last_ret;
670 u64 last_block = 0;
Chris Mason6702ed42007-08-07 16:15:09 -0400671 u64 other;
672 u32 parent_nritems;
Chris Mason6702ed42007-08-07 16:15:09 -0400673 int end_slot;
674 int i;
675 int err = 0;
Chris Masonf2183bd2007-08-10 14:42:37 -0400676 int parent_level;
Chris Mason6b800532007-10-15 16:17:34 -0400677 int uptodate;
678 u32 blocksize;
Chris Mason081e9572007-11-06 10:26:24 -0500679 int progress_passed = 0;
680 struct btrfs_disk_key disk_key;
Chris Mason6702ed42007-08-07 16:15:09 -0400681
Chris Mason5708b952007-10-25 15:43:18 -0400682 parent_level = btrfs_header_level(parent);
683 if (cache_only && parent_level != 1)
684 return 0;
685
Chris Masond3977122009-01-05 21:25:51 -0500686 if (trans->transaction != root->fs_info->running_transaction)
Chris Mason6702ed42007-08-07 16:15:09 -0400687 WARN_ON(1);
Chris Masond3977122009-01-05 21:25:51 -0500688 if (trans->transid != root->fs_info->generation)
Chris Mason6702ed42007-08-07 16:15:09 -0400689 WARN_ON(1);
Chris Mason86479a02007-09-10 19:58:16 -0400690
Chris Mason6b800532007-10-15 16:17:34 -0400691 parent_nritems = btrfs_header_nritems(parent);
Chris Mason6b800532007-10-15 16:17:34 -0400692 blocksize = btrfs_level_size(root, parent_level - 1);
Chris Mason6702ed42007-08-07 16:15:09 -0400693 end_slot = parent_nritems;
694
695 if (parent_nritems == 1)
696 return 0;
697
Chris Masonb4ce94d2009-02-04 09:25:08 -0500698 btrfs_set_lock_blocking(parent);
699
Chris Mason6702ed42007-08-07 16:15:09 -0400700 for (i = start_slot; i < end_slot; i++) {
701 int close = 1;
Chris Masona6b6e752007-10-15 16:22:39 -0400702
Chris Mason081e9572007-11-06 10:26:24 -0500703 btrfs_node_key(parent, &disk_key, i);
704 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
705 continue;
706
707 progress_passed = 1;
Chris Mason6b800532007-10-15 16:17:34 -0400708 blocknr = btrfs_node_blockptr(parent, i);
Chris Masonca7a79a2008-05-12 12:59:19 -0400709 gen = btrfs_node_ptr_generation(parent, i);
Chris Masone9d0b132007-08-10 14:06:19 -0400710 if (last_block == 0)
711 last_block = blocknr;
Chris Mason5708b952007-10-25 15:43:18 -0400712
Chris Mason6702ed42007-08-07 16:15:09 -0400713 if (i > 0) {
Chris Mason6b800532007-10-15 16:17:34 -0400714 other = btrfs_node_blockptr(parent, i - 1);
715 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -0400716 }
Chris Mason0ef3e662008-05-24 14:04:53 -0400717 if (!close && i < end_slot - 2) {
Chris Mason6b800532007-10-15 16:17:34 -0400718 other = btrfs_node_blockptr(parent, i + 1);
719 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -0400720 }
Chris Masone9d0b132007-08-10 14:06:19 -0400721 if (close) {
722 last_block = blocknr;
Chris Mason6702ed42007-08-07 16:15:09 -0400723 continue;
Chris Masone9d0b132007-08-10 14:06:19 -0400724 }
Chris Mason6702ed42007-08-07 16:15:09 -0400725
Chris Mason6b800532007-10-15 16:17:34 -0400726 cur = btrfs_find_tree_block(root, blocknr, blocksize);
727 if (cur)
Chris Masonb9fab912012-05-06 07:23:47 -0400728 uptodate = btrfs_buffer_uptodate(cur, gen, 0);
Chris Mason6b800532007-10-15 16:17:34 -0400729 else
730 uptodate = 0;
Chris Mason5708b952007-10-25 15:43:18 -0400731 if (!cur || !uptodate) {
Chris Mason6702ed42007-08-07 16:15:09 -0400732 if (cache_only) {
Chris Mason6b800532007-10-15 16:17:34 -0400733 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -0400734 continue;
735 }
Chris Mason6b800532007-10-15 16:17:34 -0400736 if (!cur) {
737 cur = read_tree_block(root, blocknr,
Chris Masonca7a79a2008-05-12 12:59:19 -0400738 blocksize, gen);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +0000739 if (!cur)
740 return -EIO;
Chris Mason6b800532007-10-15 16:17:34 -0400741 } else if (!uptodate) {
Tsutomu Itoh018642a2012-05-29 18:10:13 +0900742 err = btrfs_read_buffer(cur, gen);
743 if (err) {
744 free_extent_buffer(cur);
745 return err;
746 }
Chris Masonf2183bd2007-08-10 14:42:37 -0400747 }
Chris Mason6702ed42007-08-07 16:15:09 -0400748 }
Chris Masone9d0b132007-08-10 14:06:19 -0400749 if (search_start == 0)
Chris Mason6b800532007-10-15 16:17:34 -0400750 search_start = last_block;
Chris Masone9d0b132007-08-10 14:06:19 -0400751
Chris Masone7a84562008-06-25 16:01:31 -0400752 btrfs_tree_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500753 btrfs_set_lock_blocking(cur);
Chris Mason6b800532007-10-15 16:17:34 -0400754 err = __btrfs_cow_block(trans, root, cur, parent, i,
Chris Masone7a84562008-06-25 16:01:31 -0400755 &cur, search_start,
Chris Mason6b800532007-10-15 16:17:34 -0400756 min(16 * blocksize,
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400757 (end_slot - i) * blocksize));
Yan252c38f2007-08-29 09:11:44 -0400758 if (err) {
Chris Masone7a84562008-06-25 16:01:31 -0400759 btrfs_tree_unlock(cur);
Chris Mason6b800532007-10-15 16:17:34 -0400760 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -0400761 break;
Yan252c38f2007-08-29 09:11:44 -0400762 }
Chris Masone7a84562008-06-25 16:01:31 -0400763 search_start = cur->start;
764 last_block = cur->start;
Chris Masonf2183bd2007-08-10 14:42:37 -0400765 *last_ret = search_start;
Chris Masone7a84562008-06-25 16:01:31 -0400766 btrfs_tree_unlock(cur);
767 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -0400768 }
769 return err;
770}
771
Chris Mason74123bd2007-02-02 11:05:29 -0500772/*
773 * The leaf data grows from end-to-front in the node.
774 * this returns the address of the start of the last item,
775 * which is the stop of the leaf data stack
776 */
Chris Mason123abc82007-03-14 14:14:43 -0400777static inline unsigned int leaf_data_end(struct btrfs_root *root,
Chris Mason5f39d392007-10-15 16:14:19 -0400778 struct extent_buffer *leaf)
Chris Masonbe0e5c02007-01-26 15:51:26 -0500779{
Chris Mason5f39d392007-10-15 16:14:19 -0400780 u32 nr = btrfs_header_nritems(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500781 if (nr == 0)
Chris Mason123abc82007-03-14 14:14:43 -0400782 return BTRFS_LEAF_DATA_SIZE(root);
Chris Mason5f39d392007-10-15 16:14:19 -0400783 return btrfs_item_offset_nr(leaf, nr - 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500784}
785
Chris Masonaa5d6be2007-02-28 16:35:06 -0500786
Chris Mason74123bd2007-02-02 11:05:29 -0500787/*
Chris Mason5f39d392007-10-15 16:14:19 -0400788 * search for key in the extent_buffer. The items start at offset p,
789 * and they are item_size apart. There are 'max' items in p.
790 *
Chris Mason74123bd2007-02-02 11:05:29 -0500791 * the slot in the array is returned via slot, and it points to
792 * the place where you would insert key if it is not found in
793 * the array.
794 *
795 * slot may point to max if the key is bigger than all of the keys
796 */
Chris Masone02119d2008-09-05 16:13:11 -0400797static noinline int generic_bin_search(struct extent_buffer *eb,
798 unsigned long p,
799 int item_size, struct btrfs_key *key,
800 int max, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -0500801{
802 int low = 0;
803 int high = max;
804 int mid;
805 int ret;
Chris Mason479965d2007-10-15 16:14:27 -0400806 struct btrfs_disk_key *tmp = NULL;
Chris Mason5f39d392007-10-15 16:14:19 -0400807 struct btrfs_disk_key unaligned;
808 unsigned long offset;
Chris Mason5f39d392007-10-15 16:14:19 -0400809 char *kaddr = NULL;
810 unsigned long map_start = 0;
811 unsigned long map_len = 0;
Chris Mason479965d2007-10-15 16:14:27 -0400812 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -0500813
Chris Masond3977122009-01-05 21:25:51 -0500814 while (low < high) {
Chris Masonbe0e5c02007-01-26 15:51:26 -0500815 mid = (low + high) / 2;
Chris Mason5f39d392007-10-15 16:14:19 -0400816 offset = p + mid * item_size;
817
Chris Masona6591712011-07-19 12:04:14 -0400818 if (!kaddr || offset < map_start ||
Chris Mason5f39d392007-10-15 16:14:19 -0400819 (offset + sizeof(struct btrfs_disk_key)) >
820 map_start + map_len) {
Chris Mason934d3752008-12-08 16:43:10 -0500821
822 err = map_private_extent_buffer(eb, offset,
Chris Mason479965d2007-10-15 16:14:27 -0400823 sizeof(struct btrfs_disk_key),
Chris Masona6591712011-07-19 12:04:14 -0400824 &kaddr, &map_start, &map_len);
Chris Mason5f39d392007-10-15 16:14:19 -0400825
Chris Mason479965d2007-10-15 16:14:27 -0400826 if (!err) {
827 tmp = (struct btrfs_disk_key *)(kaddr + offset -
828 map_start);
829 } else {
830 read_extent_buffer(eb, &unaligned,
831 offset, sizeof(unaligned));
832 tmp = &unaligned;
833 }
834
Chris Mason5f39d392007-10-15 16:14:19 -0400835 } else {
836 tmp = (struct btrfs_disk_key *)(kaddr + offset -
837 map_start);
838 }
Chris Masonbe0e5c02007-01-26 15:51:26 -0500839 ret = comp_keys(tmp, key);
840
841 if (ret < 0)
842 low = mid + 1;
843 else if (ret > 0)
844 high = mid;
845 else {
846 *slot = mid;
847 return 0;
848 }
849 }
850 *slot = low;
851 return 1;
852}
853
Chris Mason97571fd2007-02-24 13:39:08 -0500854/*
855 * simple bin_search frontend that does the right thing for
856 * leaves vs nodes
857 */
Chris Mason5f39d392007-10-15 16:14:19 -0400858static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
859 int level, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -0500860{
Wang Sheng-Huif7757382012-03-30 15:14:27 +0800861 if (level == 0)
Chris Mason5f39d392007-10-15 16:14:19 -0400862 return generic_bin_search(eb,
863 offsetof(struct btrfs_leaf, items),
Chris Mason0783fcf2007-03-12 20:12:07 -0400864 sizeof(struct btrfs_item),
Chris Mason5f39d392007-10-15 16:14:19 -0400865 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -0400866 slot);
Wang Sheng-Huif7757382012-03-30 15:14:27 +0800867 else
Chris Mason5f39d392007-10-15 16:14:19 -0400868 return generic_bin_search(eb,
869 offsetof(struct btrfs_node, ptrs),
Chris Mason123abc82007-03-14 14:14:43 -0400870 sizeof(struct btrfs_key_ptr),
Chris Mason5f39d392007-10-15 16:14:19 -0400871 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -0400872 slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500873}
874
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400875int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
876 int level, int *slot)
877{
878 return bin_search(eb, key, level, slot);
879}
880
Yan, Zhengf0486c62010-05-16 10:46:25 -0400881static void root_add_used(struct btrfs_root *root, u32 size)
882{
883 spin_lock(&root->accounting_lock);
884 btrfs_set_root_used(&root->root_item,
885 btrfs_root_used(&root->root_item) + size);
886 spin_unlock(&root->accounting_lock);
887}
888
889static void root_sub_used(struct btrfs_root *root, u32 size)
890{
891 spin_lock(&root->accounting_lock);
892 btrfs_set_root_used(&root->root_item,
893 btrfs_root_used(&root->root_item) - size);
894 spin_unlock(&root->accounting_lock);
895}
896
Chris Masond352ac62008-09-29 15:18:18 -0400897/* given a node and slot number, this reads the blocks it points to. The
898 * extent buffer is returned with a reference taken (but unlocked).
899 * NULL is returned on error.
900 */
Chris Masone02119d2008-09-05 16:13:11 -0400901static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
Chris Mason5f39d392007-10-15 16:14:19 -0400902 struct extent_buffer *parent, int slot)
Chris Masonbb803952007-03-01 12:04:21 -0500903{
Chris Masonca7a79a2008-05-12 12:59:19 -0400904 int level = btrfs_header_level(parent);
Chris Masonbb803952007-03-01 12:04:21 -0500905 if (slot < 0)
906 return NULL;
Chris Mason5f39d392007-10-15 16:14:19 -0400907 if (slot >= btrfs_header_nritems(parent))
Chris Masonbb803952007-03-01 12:04:21 -0500908 return NULL;
Chris Masonca7a79a2008-05-12 12:59:19 -0400909
910 BUG_ON(level == 0);
911
Chris Masondb945352007-10-15 16:15:53 -0400912 return read_tree_block(root, btrfs_node_blockptr(parent, slot),
Chris Masonca7a79a2008-05-12 12:59:19 -0400913 btrfs_level_size(root, level - 1),
914 btrfs_node_ptr_generation(parent, slot));
Chris Masonbb803952007-03-01 12:04:21 -0500915}
916
Chris Masond352ac62008-09-29 15:18:18 -0400917/*
918 * node level balancing, used to make sure nodes are in proper order for
919 * item deletion. We balance from the top down, so we have to make sure
920 * that a deletion won't leave an node completely empty later on.
921 */
Chris Masone02119d2008-09-05 16:13:11 -0400922static noinline int balance_level(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -0500923 struct btrfs_root *root,
924 struct btrfs_path *path, int level)
Chris Masonbb803952007-03-01 12:04:21 -0500925{
Chris Mason5f39d392007-10-15 16:14:19 -0400926 struct extent_buffer *right = NULL;
927 struct extent_buffer *mid;
928 struct extent_buffer *left = NULL;
929 struct extent_buffer *parent = NULL;
Chris Masonbb803952007-03-01 12:04:21 -0500930 int ret = 0;
931 int wret;
932 int pslot;
Chris Masonbb803952007-03-01 12:04:21 -0500933 int orig_slot = path->slots[level];
Chris Mason79f95c82007-03-01 15:16:26 -0500934 u64 orig_ptr;
Chris Masonbb803952007-03-01 12:04:21 -0500935
936 if (level == 0)
937 return 0;
938
Chris Mason5f39d392007-10-15 16:14:19 -0400939 mid = path->nodes[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -0500940
Chris Masonbd681512011-07-16 15:23:14 -0400941 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
942 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
Chris Mason7bb86312007-12-11 09:25:06 -0500943 WARN_ON(btrfs_header_generation(mid) != trans->transid);
944
Chris Mason1d4f8a02007-03-13 09:28:32 -0400945 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
Chris Mason79f95c82007-03-01 15:16:26 -0500946
Li Zefana05a9bb2011-09-06 16:55:34 +0800947 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -0400948 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +0800949 pslot = path->slots[level + 1];
950 }
Chris Masonbb803952007-03-01 12:04:21 -0500951
Chris Mason40689472007-03-17 14:29:23 -0400952 /*
953 * deal with the case where there is only one pointer in the root
954 * by promoting the node below to a root
955 */
Chris Mason5f39d392007-10-15 16:14:19 -0400956 if (!parent) {
957 struct extent_buffer *child;
Chris Masonbb803952007-03-01 12:04:21 -0500958
Chris Mason5f39d392007-10-15 16:14:19 -0400959 if (btrfs_header_nritems(mid) != 1)
Chris Masonbb803952007-03-01 12:04:21 -0500960 return 0;
961
962 /* promote the child to a root */
Chris Mason5f39d392007-10-15 16:14:19 -0400963 child = read_node_slot(root, mid, 0);
Mark Fasheh305a26a2011-09-01 11:27:57 -0700964 if (!child) {
965 ret = -EROFS;
966 btrfs_std_error(root->fs_info, ret);
967 goto enospc;
968 }
969
Chris Mason925baed2008-06-25 16:01:30 -0400970 btrfs_tree_lock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500971 btrfs_set_lock_blocking(child);
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400972 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400973 if (ret) {
974 btrfs_tree_unlock(child);
975 free_extent_buffer(child);
976 goto enospc;
977 }
Yan2f375ab2008-02-01 14:58:07 -0500978
Chris Mason240f62c2011-03-23 14:54:42 -0400979 rcu_assign_pointer(root->node, child);
Chris Mason925baed2008-06-25 16:01:30 -0400980
Chris Mason0b86a832008-03-24 15:01:56 -0400981 add_root_to_dirty_list(root);
Chris Mason925baed2008-06-25 16:01:30 -0400982 btrfs_tree_unlock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500983
Chris Mason925baed2008-06-25 16:01:30 -0400984 path->locks[level] = 0;
Chris Masonbb803952007-03-01 12:04:21 -0500985 path->nodes[level] = NULL;
Chris Mason5f39d392007-10-15 16:14:19 -0400986 clean_tree_block(trans, root, mid);
Chris Mason925baed2008-06-25 16:01:30 -0400987 btrfs_tree_unlock(mid);
Chris Masonbb803952007-03-01 12:04:21 -0500988 /* once for the path */
Chris Mason5f39d392007-10-15 16:14:19 -0400989 free_extent_buffer(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400990
991 root_sub_used(root, mid->len);
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200992 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
Chris Masonbb803952007-03-01 12:04:21 -0500993 /* once for the root ptr */
Josef Bacik3083ee22012-03-09 16:01:49 -0500994 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400995 return 0;
Chris Masonbb803952007-03-01 12:04:21 -0500996 }
Chris Mason5f39d392007-10-15 16:14:19 -0400997 if (btrfs_header_nritems(mid) >
Chris Mason123abc82007-03-14 14:14:43 -0400998 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
Chris Masonbb803952007-03-01 12:04:21 -0500999 return 0;
1000
Andi Kleen559af822010-10-29 15:14:37 -04001001 btrfs_header_nritems(mid);
Chris Mason54aa1f42007-06-22 14:16:25 -04001002
Chris Mason5f39d392007-10-15 16:14:19 -04001003 left = read_node_slot(root, parent, pslot - 1);
1004 if (left) {
Chris Mason925baed2008-06-25 16:01:30 -04001005 btrfs_tree_lock(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001006 btrfs_set_lock_blocking(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001007 wret = btrfs_cow_block(trans, root, left,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001008 parent, pslot - 1, &left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001009 if (wret) {
1010 ret = wret;
1011 goto enospc;
1012 }
Chris Mason2cc58cf2007-08-27 16:49:44 -04001013 }
Chris Mason5f39d392007-10-15 16:14:19 -04001014 right = read_node_slot(root, parent, pslot + 1);
1015 if (right) {
Chris Mason925baed2008-06-25 16:01:30 -04001016 btrfs_tree_lock(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001017 btrfs_set_lock_blocking(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001018 wret = btrfs_cow_block(trans, root, right,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001019 parent, pslot + 1, &right);
Chris Mason2cc58cf2007-08-27 16:49:44 -04001020 if (wret) {
1021 ret = wret;
1022 goto enospc;
1023 }
1024 }
1025
1026 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04001027 if (left) {
1028 orig_slot += btrfs_header_nritems(left);
Chris Masonbce4eae2008-04-24 14:42:46 -04001029 wret = push_node_left(trans, root, left, mid, 1);
Chris Mason79f95c82007-03-01 15:16:26 -05001030 if (wret < 0)
1031 ret = wret;
Andi Kleen559af822010-10-29 15:14:37 -04001032 btrfs_header_nritems(mid);
Chris Masonbb803952007-03-01 12:04:21 -05001033 }
Chris Mason79f95c82007-03-01 15:16:26 -05001034
1035 /*
1036 * then try to empty the right most buffer into the middle
1037 */
Chris Mason5f39d392007-10-15 16:14:19 -04001038 if (right) {
Chris Mason971a1f62008-04-24 10:54:32 -04001039 wret = push_node_left(trans, root, mid, right, 1);
Chris Mason54aa1f42007-06-22 14:16:25 -04001040 if (wret < 0 && wret != -ENOSPC)
Chris Mason79f95c82007-03-01 15:16:26 -05001041 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04001042 if (btrfs_header_nritems(right) == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001043 clean_tree_block(trans, root, right);
Chris Mason925baed2008-06-25 16:01:30 -04001044 btrfs_tree_unlock(right);
Jeff Mahoney143bede2012-03-01 14:56:26 +01001045 del_ptr(trans, root, path, level + 1, pslot + 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001046 root_sub_used(root, right->len);
Arne Jansen66d7e7f2011-09-12 15:26:38 +02001047 btrfs_free_tree_block(trans, root, right, 0, 1, 0);
Josef Bacik3083ee22012-03-09 16:01:49 -05001048 free_extent_buffer_stale(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001049 right = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05001050 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001051 struct btrfs_disk_key right_key;
1052 btrfs_node_key(right, &right_key, 0);
1053 btrfs_set_node_key(parent, &right_key, pslot + 1);
1054 btrfs_mark_buffer_dirty(parent);
Chris Masonbb803952007-03-01 12:04:21 -05001055 }
1056 }
Chris Mason5f39d392007-10-15 16:14:19 -04001057 if (btrfs_header_nritems(mid) == 1) {
Chris Mason79f95c82007-03-01 15:16:26 -05001058 /*
1059 * we're not allowed to leave a node with one item in the
1060 * tree during a delete. A deletion from lower in the tree
1061 * could try to delete the only pointer in this node.
1062 * So, pull some keys from the left.
1063 * There has to be a left pointer at this point because
1064 * otherwise we would have pulled some pointers from the
1065 * right
1066 */
Mark Fasheh305a26a2011-09-01 11:27:57 -07001067 if (!left) {
1068 ret = -EROFS;
1069 btrfs_std_error(root->fs_info, ret);
1070 goto enospc;
1071 }
Chris Mason5f39d392007-10-15 16:14:19 -04001072 wret = balance_node_right(trans, root, mid, left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001073 if (wret < 0) {
Chris Mason79f95c82007-03-01 15:16:26 -05001074 ret = wret;
Chris Mason54aa1f42007-06-22 14:16:25 -04001075 goto enospc;
1076 }
Chris Masonbce4eae2008-04-24 14:42:46 -04001077 if (wret == 1) {
1078 wret = push_node_left(trans, root, left, mid, 1);
1079 if (wret < 0)
1080 ret = wret;
1081 }
Chris Mason79f95c82007-03-01 15:16:26 -05001082 BUG_ON(wret == 1);
1083 }
Chris Mason5f39d392007-10-15 16:14:19 -04001084 if (btrfs_header_nritems(mid) == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001085 clean_tree_block(trans, root, mid);
Chris Mason925baed2008-06-25 16:01:30 -04001086 btrfs_tree_unlock(mid);
Jeff Mahoney143bede2012-03-01 14:56:26 +01001087 del_ptr(trans, root, path, level + 1, pslot);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001088 root_sub_used(root, mid->len);
Arne Jansen66d7e7f2011-09-12 15:26:38 +02001089 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
Josef Bacik3083ee22012-03-09 16:01:49 -05001090 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001091 mid = NULL;
Chris Mason79f95c82007-03-01 15:16:26 -05001092 } else {
1093 /* update the parent key to reflect our changes */
Chris Mason5f39d392007-10-15 16:14:19 -04001094 struct btrfs_disk_key mid_key;
1095 btrfs_node_key(mid, &mid_key, 0);
1096 btrfs_set_node_key(parent, &mid_key, pslot);
1097 btrfs_mark_buffer_dirty(parent);
Chris Mason79f95c82007-03-01 15:16:26 -05001098 }
Chris Masonbb803952007-03-01 12:04:21 -05001099
Chris Mason79f95c82007-03-01 15:16:26 -05001100 /* update the path */
Chris Mason5f39d392007-10-15 16:14:19 -04001101 if (left) {
1102 if (btrfs_header_nritems(left) > orig_slot) {
1103 extent_buffer_get(left);
Chris Mason925baed2008-06-25 16:01:30 -04001104 /* left was locked after cow */
Chris Mason5f39d392007-10-15 16:14:19 -04001105 path->nodes[level] = left;
Chris Masonbb803952007-03-01 12:04:21 -05001106 path->slots[level + 1] -= 1;
1107 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04001108 if (mid) {
1109 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04001110 free_extent_buffer(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001111 }
Chris Masonbb803952007-03-01 12:04:21 -05001112 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001113 orig_slot -= btrfs_header_nritems(left);
Chris Masonbb803952007-03-01 12:04:21 -05001114 path->slots[level] = orig_slot;
1115 }
1116 }
Chris Mason79f95c82007-03-01 15:16:26 -05001117 /* double check we haven't messed things up */
Chris Masone20d96d2007-03-22 12:13:20 -04001118 if (orig_ptr !=
Chris Mason5f39d392007-10-15 16:14:19 -04001119 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
Chris Mason79f95c82007-03-01 15:16:26 -05001120 BUG();
Chris Mason54aa1f42007-06-22 14:16:25 -04001121enospc:
Chris Mason925baed2008-06-25 16:01:30 -04001122 if (right) {
1123 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001124 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04001125 }
1126 if (left) {
1127 if (path->nodes[level] != left)
1128 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001129 free_extent_buffer(left);
Chris Mason925baed2008-06-25 16:01:30 -04001130 }
Chris Masonbb803952007-03-01 12:04:21 -05001131 return ret;
1132}
1133
Chris Masond352ac62008-09-29 15:18:18 -04001134/* Node balancing for insertion. Here we only split or push nodes around
1135 * when they are completely full. This is also done top down, so we
1136 * have to be pessimistic.
1137 */
Chris Masond3977122009-01-05 21:25:51 -05001138static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05001139 struct btrfs_root *root,
1140 struct btrfs_path *path, int level)
Chris Masone66f7092007-04-20 13:16:02 -04001141{
Chris Mason5f39d392007-10-15 16:14:19 -04001142 struct extent_buffer *right = NULL;
1143 struct extent_buffer *mid;
1144 struct extent_buffer *left = NULL;
1145 struct extent_buffer *parent = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04001146 int ret = 0;
1147 int wret;
1148 int pslot;
1149 int orig_slot = path->slots[level];
Chris Masone66f7092007-04-20 13:16:02 -04001150
1151 if (level == 0)
1152 return 1;
1153
Chris Mason5f39d392007-10-15 16:14:19 -04001154 mid = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05001155 WARN_ON(btrfs_header_generation(mid) != trans->transid);
Chris Masone66f7092007-04-20 13:16:02 -04001156
Li Zefana05a9bb2011-09-06 16:55:34 +08001157 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04001158 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08001159 pslot = path->slots[level + 1];
1160 }
Chris Masone66f7092007-04-20 13:16:02 -04001161
Chris Mason5f39d392007-10-15 16:14:19 -04001162 if (!parent)
Chris Masone66f7092007-04-20 13:16:02 -04001163 return 1;
Chris Masone66f7092007-04-20 13:16:02 -04001164
Chris Mason5f39d392007-10-15 16:14:19 -04001165 left = read_node_slot(root, parent, pslot - 1);
Chris Masone66f7092007-04-20 13:16:02 -04001166
1167 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04001168 if (left) {
Chris Masone66f7092007-04-20 13:16:02 -04001169 u32 left_nr;
Chris Mason925baed2008-06-25 16:01:30 -04001170
1171 btrfs_tree_lock(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001172 btrfs_set_lock_blocking(left);
1173
Chris Mason5f39d392007-10-15 16:14:19 -04001174 left_nr = btrfs_header_nritems(left);
Chris Mason33ade1f2007-04-20 13:48:57 -04001175 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1176 wret = 1;
1177 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001178 ret = btrfs_cow_block(trans, root, left, parent,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001179 pslot - 1, &left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001180 if (ret)
1181 wret = 1;
1182 else {
Chris Mason54aa1f42007-06-22 14:16:25 -04001183 wret = push_node_left(trans, root,
Chris Mason971a1f62008-04-24 10:54:32 -04001184 left, mid, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04001185 }
Chris Mason33ade1f2007-04-20 13:48:57 -04001186 }
Chris Masone66f7092007-04-20 13:16:02 -04001187 if (wret < 0)
1188 ret = wret;
1189 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001190 struct btrfs_disk_key disk_key;
Chris Masone66f7092007-04-20 13:16:02 -04001191 orig_slot += left_nr;
Chris Mason5f39d392007-10-15 16:14:19 -04001192 btrfs_node_key(mid, &disk_key, 0);
1193 btrfs_set_node_key(parent, &disk_key, pslot);
1194 btrfs_mark_buffer_dirty(parent);
1195 if (btrfs_header_nritems(left) > orig_slot) {
1196 path->nodes[level] = left;
Chris Masone66f7092007-04-20 13:16:02 -04001197 path->slots[level + 1] -= 1;
1198 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04001199 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04001200 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04001201 } else {
1202 orig_slot -=
Chris Mason5f39d392007-10-15 16:14:19 -04001203 btrfs_header_nritems(left);
Chris Masone66f7092007-04-20 13:16:02 -04001204 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04001205 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001206 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04001207 }
Chris Masone66f7092007-04-20 13:16:02 -04001208 return 0;
1209 }
Chris Mason925baed2008-06-25 16:01:30 -04001210 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001211 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04001212 }
Chris Mason925baed2008-06-25 16:01:30 -04001213 right = read_node_slot(root, parent, pslot + 1);
Chris Masone66f7092007-04-20 13:16:02 -04001214
1215 /*
1216 * then try to empty the right most buffer into the middle
1217 */
Chris Mason5f39d392007-10-15 16:14:19 -04001218 if (right) {
Chris Mason33ade1f2007-04-20 13:48:57 -04001219 u32 right_nr;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001220
Chris Mason925baed2008-06-25 16:01:30 -04001221 btrfs_tree_lock(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001222 btrfs_set_lock_blocking(right);
1223
Chris Mason5f39d392007-10-15 16:14:19 -04001224 right_nr = btrfs_header_nritems(right);
Chris Mason33ade1f2007-04-20 13:48:57 -04001225 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1226 wret = 1;
1227 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001228 ret = btrfs_cow_block(trans, root, right,
1229 parent, pslot + 1,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001230 &right);
Chris Mason54aa1f42007-06-22 14:16:25 -04001231 if (ret)
1232 wret = 1;
1233 else {
Chris Mason54aa1f42007-06-22 14:16:25 -04001234 wret = balance_node_right(trans, root,
Chris Mason5f39d392007-10-15 16:14:19 -04001235 right, mid);
Chris Mason54aa1f42007-06-22 14:16:25 -04001236 }
Chris Mason33ade1f2007-04-20 13:48:57 -04001237 }
Chris Masone66f7092007-04-20 13:16:02 -04001238 if (wret < 0)
1239 ret = wret;
1240 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001241 struct btrfs_disk_key disk_key;
1242
1243 btrfs_node_key(right, &disk_key, 0);
1244 btrfs_set_node_key(parent, &disk_key, pslot + 1);
1245 btrfs_mark_buffer_dirty(parent);
1246
1247 if (btrfs_header_nritems(mid) <= orig_slot) {
1248 path->nodes[level] = right;
Chris Masone66f7092007-04-20 13:16:02 -04001249 path->slots[level + 1] += 1;
1250 path->slots[level] = orig_slot -
Chris Mason5f39d392007-10-15 16:14:19 -04001251 btrfs_header_nritems(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001252 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04001253 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04001254 } else {
Chris Mason925baed2008-06-25 16:01:30 -04001255 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001256 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04001257 }
Chris Masone66f7092007-04-20 13:16:02 -04001258 return 0;
1259 }
Chris Mason925baed2008-06-25 16:01:30 -04001260 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001261 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04001262 }
Chris Masone66f7092007-04-20 13:16:02 -04001263 return 1;
1264}
1265
Chris Mason74123bd2007-02-02 11:05:29 -05001266/*
Chris Masond352ac62008-09-29 15:18:18 -04001267 * readahead one full node of leaves, finding things that are close
1268 * to the block in 'slot', and triggering ra on them.
Chris Mason3c69fae2007-08-07 15:52:22 -04001269 */
Chris Masonc8c42862009-04-03 10:14:18 -04001270static void reada_for_search(struct btrfs_root *root,
1271 struct btrfs_path *path,
1272 int level, int slot, u64 objectid)
Chris Mason3c69fae2007-08-07 15:52:22 -04001273{
Chris Mason5f39d392007-10-15 16:14:19 -04001274 struct extent_buffer *node;
Chris Mason01f46652007-12-21 16:24:26 -05001275 struct btrfs_disk_key disk_key;
Chris Mason3c69fae2007-08-07 15:52:22 -04001276 u32 nritems;
Chris Mason3c69fae2007-08-07 15:52:22 -04001277 u64 search;
Chris Masona7175312009-01-22 09:23:10 -05001278 u64 target;
Chris Mason6b800532007-10-15 16:17:34 -04001279 u64 nread = 0;
Josef Bacikcb25c2e2011-05-11 12:17:34 -04001280 u64 gen;
Chris Mason3c69fae2007-08-07 15:52:22 -04001281 int direction = path->reada;
Chris Mason5f39d392007-10-15 16:14:19 -04001282 struct extent_buffer *eb;
Chris Mason6b800532007-10-15 16:17:34 -04001283 u32 nr;
1284 u32 blocksize;
1285 u32 nscan = 0;
Chris Masondb945352007-10-15 16:15:53 -04001286
Chris Masona6b6e752007-10-15 16:22:39 -04001287 if (level != 1)
Chris Mason3c69fae2007-08-07 15:52:22 -04001288 return;
1289
Chris Mason6702ed42007-08-07 16:15:09 -04001290 if (!path->nodes[level])
1291 return;
1292
Chris Mason5f39d392007-10-15 16:14:19 -04001293 node = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04001294
Chris Mason3c69fae2007-08-07 15:52:22 -04001295 search = btrfs_node_blockptr(node, slot);
Chris Mason6b800532007-10-15 16:17:34 -04001296 blocksize = btrfs_level_size(root, level - 1);
1297 eb = btrfs_find_tree_block(root, search, blocksize);
Chris Mason5f39d392007-10-15 16:14:19 -04001298 if (eb) {
1299 free_extent_buffer(eb);
Chris Mason3c69fae2007-08-07 15:52:22 -04001300 return;
1301 }
1302
Chris Masona7175312009-01-22 09:23:10 -05001303 target = search;
Chris Mason6b800532007-10-15 16:17:34 -04001304
Chris Mason5f39d392007-10-15 16:14:19 -04001305 nritems = btrfs_header_nritems(node);
Chris Mason6b800532007-10-15 16:17:34 -04001306 nr = slot;
Josef Bacik25b8b932011-06-08 14:36:54 -04001307
Chris Masond3977122009-01-05 21:25:51 -05001308 while (1) {
Chris Mason6b800532007-10-15 16:17:34 -04001309 if (direction < 0) {
1310 if (nr == 0)
1311 break;
1312 nr--;
1313 } else if (direction > 0) {
1314 nr++;
1315 if (nr >= nritems)
1316 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04001317 }
Chris Mason01f46652007-12-21 16:24:26 -05001318 if (path->reada < 0 && objectid) {
1319 btrfs_node_key(node, &disk_key, nr);
1320 if (btrfs_disk_key_objectid(&disk_key) != objectid)
1321 break;
1322 }
Chris Mason6b800532007-10-15 16:17:34 -04001323 search = btrfs_node_blockptr(node, nr);
Chris Masona7175312009-01-22 09:23:10 -05001324 if ((search <= target && target - search <= 65536) ||
1325 (search > target && search - target <= 65536)) {
Josef Bacikcb25c2e2011-05-11 12:17:34 -04001326 gen = btrfs_node_ptr_generation(node, nr);
Josef Bacikcb25c2e2011-05-11 12:17:34 -04001327 readahead_tree_block(root, search, blocksize, gen);
Chris Mason6b800532007-10-15 16:17:34 -04001328 nread += blocksize;
1329 }
1330 nscan++;
Chris Masona7175312009-01-22 09:23:10 -05001331 if ((nread > 65536 || nscan > 32))
Chris Mason6b800532007-10-15 16:17:34 -04001332 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04001333 }
1334}
Chris Mason925baed2008-06-25 16:01:30 -04001335
Chris Masond352ac62008-09-29 15:18:18 -04001336/*
Chris Masonb4ce94d2009-02-04 09:25:08 -05001337 * returns -EAGAIN if it had to drop the path, or zero if everything was in
1338 * cache
1339 */
1340static noinline int reada_for_balance(struct btrfs_root *root,
1341 struct btrfs_path *path, int level)
1342{
1343 int slot;
1344 int nritems;
1345 struct extent_buffer *parent;
1346 struct extent_buffer *eb;
1347 u64 gen;
1348 u64 block1 = 0;
1349 u64 block2 = 0;
1350 int ret = 0;
1351 int blocksize;
1352
Chris Mason8c594ea2009-04-20 15:50:10 -04001353 parent = path->nodes[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001354 if (!parent)
1355 return 0;
1356
1357 nritems = btrfs_header_nritems(parent);
Chris Mason8c594ea2009-04-20 15:50:10 -04001358 slot = path->slots[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001359 blocksize = btrfs_level_size(root, level);
1360
1361 if (slot > 0) {
1362 block1 = btrfs_node_blockptr(parent, slot - 1);
1363 gen = btrfs_node_ptr_generation(parent, slot - 1);
1364 eb = btrfs_find_tree_block(root, block1, blocksize);
Chris Masonb9fab912012-05-06 07:23:47 -04001365 /*
1366 * if we get -eagain from btrfs_buffer_uptodate, we
1367 * don't want to return eagain here. That will loop
1368 * forever
1369 */
1370 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05001371 block1 = 0;
1372 free_extent_buffer(eb);
1373 }
Chris Mason8c594ea2009-04-20 15:50:10 -04001374 if (slot + 1 < nritems) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05001375 block2 = btrfs_node_blockptr(parent, slot + 1);
1376 gen = btrfs_node_ptr_generation(parent, slot + 1);
1377 eb = btrfs_find_tree_block(root, block2, blocksize);
Chris Masonb9fab912012-05-06 07:23:47 -04001378 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05001379 block2 = 0;
1380 free_extent_buffer(eb);
1381 }
1382 if (block1 || block2) {
1383 ret = -EAGAIN;
Chris Mason8c594ea2009-04-20 15:50:10 -04001384
1385 /* release the whole path */
David Sterbab3b4aa72011-04-21 01:20:15 +02001386 btrfs_release_path(path);
Chris Mason8c594ea2009-04-20 15:50:10 -04001387
1388 /* read the blocks */
Chris Masonb4ce94d2009-02-04 09:25:08 -05001389 if (block1)
1390 readahead_tree_block(root, block1, blocksize, 0);
1391 if (block2)
1392 readahead_tree_block(root, block2, blocksize, 0);
1393
1394 if (block1) {
1395 eb = read_tree_block(root, block1, blocksize, 0);
1396 free_extent_buffer(eb);
1397 }
Chris Mason8c594ea2009-04-20 15:50:10 -04001398 if (block2) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05001399 eb = read_tree_block(root, block2, blocksize, 0);
1400 free_extent_buffer(eb);
1401 }
1402 }
1403 return ret;
1404}
1405
1406
1407/*
Chris Masond3977122009-01-05 21:25:51 -05001408 * when we walk down the tree, it is usually safe to unlock the higher layers
1409 * in the tree. The exceptions are when our path goes through slot 0, because
1410 * operations on the tree might require changing key pointers higher up in the
1411 * tree.
Chris Masond352ac62008-09-29 15:18:18 -04001412 *
Chris Masond3977122009-01-05 21:25:51 -05001413 * callers might also have set path->keep_locks, which tells this code to keep
1414 * the lock if the path points to the last slot in the block. This is part of
1415 * walking through the tree, and selecting the next slot in the higher block.
Chris Masond352ac62008-09-29 15:18:18 -04001416 *
Chris Masond3977122009-01-05 21:25:51 -05001417 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
1418 * if lowest_unlock is 1, level 0 won't be unlocked
Chris Masond352ac62008-09-29 15:18:18 -04001419 */
Chris Masone02119d2008-09-05 16:13:11 -04001420static noinline void unlock_up(struct btrfs_path *path, int level,
Chris Masonf7c79f32012-03-19 15:54:38 -04001421 int lowest_unlock, int min_write_lock_level,
1422 int *write_lock_level)
Chris Mason925baed2008-06-25 16:01:30 -04001423{
1424 int i;
1425 int skip_level = level;
Chris Mason051e1b92008-06-25 16:01:30 -04001426 int no_skips = 0;
Chris Mason925baed2008-06-25 16:01:30 -04001427 struct extent_buffer *t;
1428
1429 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1430 if (!path->nodes[i])
1431 break;
1432 if (!path->locks[i])
1433 break;
Chris Mason051e1b92008-06-25 16:01:30 -04001434 if (!no_skips && path->slots[i] == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04001435 skip_level = i + 1;
1436 continue;
1437 }
Chris Mason051e1b92008-06-25 16:01:30 -04001438 if (!no_skips && path->keep_locks) {
Chris Mason925baed2008-06-25 16:01:30 -04001439 u32 nritems;
1440 t = path->nodes[i];
1441 nritems = btrfs_header_nritems(t);
Chris Mason051e1b92008-06-25 16:01:30 -04001442 if (nritems < 1 || path->slots[i] >= nritems - 1) {
Chris Mason925baed2008-06-25 16:01:30 -04001443 skip_level = i + 1;
1444 continue;
1445 }
1446 }
Chris Mason051e1b92008-06-25 16:01:30 -04001447 if (skip_level < i && i >= lowest_unlock)
1448 no_skips = 1;
1449
Chris Mason925baed2008-06-25 16:01:30 -04001450 t = path->nodes[i];
1451 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -04001452 btrfs_tree_unlock_rw(t, path->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -04001453 path->locks[i] = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04001454 if (write_lock_level &&
1455 i > min_write_lock_level &&
1456 i <= *write_lock_level) {
1457 *write_lock_level = i - 1;
1458 }
Chris Mason925baed2008-06-25 16:01:30 -04001459 }
1460 }
1461}
1462
Chris Mason3c69fae2007-08-07 15:52:22 -04001463/*
Chris Masonb4ce94d2009-02-04 09:25:08 -05001464 * This releases any locks held in the path starting at level and
1465 * going all the way up to the root.
1466 *
1467 * btrfs_search_slot will keep the lock held on higher nodes in a few
1468 * corner cases, such as COW of the block at slot zero in the node. This
1469 * ignores those rules, and it should only be called when there are no
1470 * more updates to be done higher up in the tree.
1471 */
1472noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1473{
1474 int i;
1475
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001476 if (path->keep_locks)
Chris Masonb4ce94d2009-02-04 09:25:08 -05001477 return;
1478
1479 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1480 if (!path->nodes[i])
Chris Mason12f4dac2009-02-04 09:31:42 -05001481 continue;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001482 if (!path->locks[i])
Chris Mason12f4dac2009-02-04 09:31:42 -05001483 continue;
Chris Masonbd681512011-07-16 15:23:14 -04001484 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001485 path->locks[i] = 0;
1486 }
1487}
1488
1489/*
Chris Masonc8c42862009-04-03 10:14:18 -04001490 * helper function for btrfs_search_slot. The goal is to find a block
1491 * in cache without setting the path to blocking. If we find the block
1492 * we return zero and the path is unchanged.
1493 *
1494 * If we can't find the block, we set the path blocking and do some
1495 * reada. -EAGAIN is returned and the search must be repeated.
1496 */
1497static int
1498read_block_for_search(struct btrfs_trans_handle *trans,
1499 struct btrfs_root *root, struct btrfs_path *p,
1500 struct extent_buffer **eb_ret, int level, int slot,
1501 struct btrfs_key *key)
1502{
1503 u64 blocknr;
1504 u64 gen;
1505 u32 blocksize;
1506 struct extent_buffer *b = *eb_ret;
1507 struct extent_buffer *tmp;
Chris Mason76a05b32009-05-14 13:24:30 -04001508 int ret;
Chris Masonc8c42862009-04-03 10:14:18 -04001509
1510 blocknr = btrfs_node_blockptr(b, slot);
1511 gen = btrfs_node_ptr_generation(b, slot);
1512 blocksize = btrfs_level_size(root, level - 1);
1513
1514 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
Chris Masoncb449212010-10-24 11:01:27 -04001515 if (tmp) {
Chris Masonb9fab912012-05-06 07:23:47 -04001516 /* first we do an atomic uptodate check */
1517 if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) {
1518 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
Chris Masoncb449212010-10-24 11:01:27 -04001519 /*
1520 * we found an up to date block without
1521 * sleeping, return
1522 * right away
1523 */
1524 *eb_ret = tmp;
1525 return 0;
1526 }
1527 /* the pages were up to date, but we failed
1528 * the generation number check. Do a full
1529 * read for the generation number that is correct.
1530 * We must do this without dropping locks so
1531 * we can trust our generation number
1532 */
1533 free_extent_buffer(tmp);
Chris Masonbd681512011-07-16 15:23:14 -04001534 btrfs_set_path_blocking(p);
1535
Chris Masonb9fab912012-05-06 07:23:47 -04001536 /* now we're allowed to do a blocking uptodate check */
Chris Masoncb449212010-10-24 11:01:27 -04001537 tmp = read_tree_block(root, blocknr, blocksize, gen);
Chris Masonb9fab912012-05-06 07:23:47 -04001538 if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) {
Chris Masoncb449212010-10-24 11:01:27 -04001539 *eb_ret = tmp;
1540 return 0;
1541 }
1542 free_extent_buffer(tmp);
David Sterbab3b4aa72011-04-21 01:20:15 +02001543 btrfs_release_path(p);
Chris Masoncb449212010-10-24 11:01:27 -04001544 return -EIO;
1545 }
Chris Masonc8c42862009-04-03 10:14:18 -04001546 }
1547
1548 /*
1549 * reduce lock contention at high levels
1550 * of the btree by dropping locks before
Chris Mason76a05b32009-05-14 13:24:30 -04001551 * we read. Don't release the lock on the current
1552 * level because we need to walk this node to figure
1553 * out which blocks to read.
Chris Masonc8c42862009-04-03 10:14:18 -04001554 */
Chris Mason8c594ea2009-04-20 15:50:10 -04001555 btrfs_unlock_up_safe(p, level + 1);
1556 btrfs_set_path_blocking(p);
1557
Chris Masoncb449212010-10-24 11:01:27 -04001558 free_extent_buffer(tmp);
Chris Masonc8c42862009-04-03 10:14:18 -04001559 if (p->reada)
1560 reada_for_search(root, p, level, slot, key->objectid);
1561
David Sterbab3b4aa72011-04-21 01:20:15 +02001562 btrfs_release_path(p);
Chris Mason76a05b32009-05-14 13:24:30 -04001563
1564 ret = -EAGAIN;
Yan, Zheng5bdd3532010-05-26 11:20:30 -04001565 tmp = read_tree_block(root, blocknr, blocksize, 0);
Chris Mason76a05b32009-05-14 13:24:30 -04001566 if (tmp) {
1567 /*
1568 * If the read above didn't mark this buffer up to date,
1569 * it will never end up being up to date. Set ret to EIO now
1570 * and give up so that our caller doesn't loop forever
1571 * on our EAGAINs.
1572 */
Chris Masonb9fab912012-05-06 07:23:47 -04001573 if (!btrfs_buffer_uptodate(tmp, 0, 0))
Chris Mason76a05b32009-05-14 13:24:30 -04001574 ret = -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04001575 free_extent_buffer(tmp);
Chris Mason76a05b32009-05-14 13:24:30 -04001576 }
1577 return ret;
Chris Masonc8c42862009-04-03 10:14:18 -04001578}
1579
1580/*
1581 * helper function for btrfs_search_slot. This does all of the checks
1582 * for node-level blocks and does any balancing required based on
1583 * the ins_len.
1584 *
1585 * If no extra work was required, zero is returned. If we had to
1586 * drop the path, -EAGAIN is returned and btrfs_search_slot must
1587 * start over
1588 */
1589static int
1590setup_nodes_for_search(struct btrfs_trans_handle *trans,
1591 struct btrfs_root *root, struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -04001592 struct extent_buffer *b, int level, int ins_len,
1593 int *write_lock_level)
Chris Masonc8c42862009-04-03 10:14:18 -04001594{
1595 int ret;
1596 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1597 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1598 int sret;
1599
Chris Masonbd681512011-07-16 15:23:14 -04001600 if (*write_lock_level < level + 1) {
1601 *write_lock_level = level + 1;
1602 btrfs_release_path(p);
1603 goto again;
1604 }
1605
Chris Masonc8c42862009-04-03 10:14:18 -04001606 sret = reada_for_balance(root, p, level);
1607 if (sret)
1608 goto again;
1609
1610 btrfs_set_path_blocking(p);
1611 sret = split_node(trans, root, p, level);
Chris Masonbd681512011-07-16 15:23:14 -04001612 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonc8c42862009-04-03 10:14:18 -04001613
1614 BUG_ON(sret > 0);
1615 if (sret) {
1616 ret = sret;
1617 goto done;
1618 }
1619 b = p->nodes[level];
1620 } else if (ins_len < 0 && btrfs_header_nritems(b) <
Chris Masoncfbb9302009-05-18 10:41:58 -04001621 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
Chris Masonc8c42862009-04-03 10:14:18 -04001622 int sret;
1623
Chris Masonbd681512011-07-16 15:23:14 -04001624 if (*write_lock_level < level + 1) {
1625 *write_lock_level = level + 1;
1626 btrfs_release_path(p);
1627 goto again;
1628 }
1629
Chris Masonc8c42862009-04-03 10:14:18 -04001630 sret = reada_for_balance(root, p, level);
1631 if (sret)
1632 goto again;
1633
1634 btrfs_set_path_blocking(p);
1635 sret = balance_level(trans, root, p, level);
Chris Masonbd681512011-07-16 15:23:14 -04001636 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonc8c42862009-04-03 10:14:18 -04001637
1638 if (sret) {
1639 ret = sret;
1640 goto done;
1641 }
1642 b = p->nodes[level];
1643 if (!b) {
David Sterbab3b4aa72011-04-21 01:20:15 +02001644 btrfs_release_path(p);
Chris Masonc8c42862009-04-03 10:14:18 -04001645 goto again;
1646 }
1647 BUG_ON(btrfs_header_nritems(b) == 1);
1648 }
1649 return 0;
1650
1651again:
1652 ret = -EAGAIN;
1653done:
1654 return ret;
1655}
1656
1657/*
Chris Mason74123bd2007-02-02 11:05:29 -05001658 * look for key in the tree. path is filled in with nodes along the way
1659 * if key is found, we return zero and you can find the item in the leaf
1660 * level of the path (level 0)
1661 *
1662 * If the key isn't found, the path points to the slot where it should
Chris Masonaa5d6be2007-02-28 16:35:06 -05001663 * be inserted, and 1 is returned. If there are other errors during the
1664 * search a negative error number is returned.
Chris Mason97571fd2007-02-24 13:39:08 -05001665 *
1666 * if ins_len > 0, nodes and leaves will be split as we walk down the
1667 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
1668 * possible)
Chris Mason74123bd2007-02-02 11:05:29 -05001669 */
Chris Masone089f052007-03-16 16:20:31 -04001670int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1671 *root, struct btrfs_key *key, struct btrfs_path *p, int
1672 ins_len, int cow)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001673{
Chris Mason5f39d392007-10-15 16:14:19 -04001674 struct extent_buffer *b;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001675 int slot;
1676 int ret;
Yan Zheng33c66f42009-07-22 09:59:00 -04001677 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001678 int level;
Chris Mason925baed2008-06-25 16:01:30 -04001679 int lowest_unlock = 1;
Chris Masonbd681512011-07-16 15:23:14 -04001680 int root_lock;
1681 /* everything at write_lock_level or lower must be write locked */
1682 int write_lock_level = 0;
Chris Mason9f3a7422007-08-07 15:52:19 -04001683 u8 lowest_level = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04001684 int min_write_lock_level;
Chris Mason9f3a7422007-08-07 15:52:19 -04001685
Chris Mason6702ed42007-08-07 16:15:09 -04001686 lowest_level = p->lowest_level;
Chris Mason323ac952008-10-01 19:05:46 -04001687 WARN_ON(lowest_level && ins_len > 0);
Chris Mason22b0ebd2007-03-30 08:47:31 -04001688 WARN_ON(p->nodes[0] != NULL);
Josef Bacik25179202008-10-29 14:49:05 -04001689
Chris Masonbd681512011-07-16 15:23:14 -04001690 if (ins_len < 0) {
Chris Mason925baed2008-06-25 16:01:30 -04001691 lowest_unlock = 2;
Chris Mason65b51a02008-08-01 15:11:20 -04001692
Chris Masonbd681512011-07-16 15:23:14 -04001693 /* when we are removing items, we might have to go up to level
1694 * two as we update tree pointers Make sure we keep write
1695 * for those levels as well
1696 */
1697 write_lock_level = 2;
1698 } else if (ins_len > 0) {
1699 /*
1700 * for inserting items, make sure we have a write lock on
1701 * level 1 so we can update keys
1702 */
1703 write_lock_level = 1;
1704 }
1705
1706 if (!cow)
1707 write_lock_level = -1;
1708
1709 if (cow && (p->keep_locks || p->lowest_level))
1710 write_lock_level = BTRFS_MAX_LEVEL;
1711
Chris Masonf7c79f32012-03-19 15:54:38 -04001712 min_write_lock_level = write_lock_level;
1713
Chris Masonbb803952007-03-01 12:04:21 -05001714again:
Chris Masonbd681512011-07-16 15:23:14 -04001715 /*
1716 * we try very hard to do read locks on the root
1717 */
1718 root_lock = BTRFS_READ_LOCK;
1719 level = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001720 if (p->search_commit_root) {
Chris Masonbd681512011-07-16 15:23:14 -04001721 /*
1722 * the commit roots are read only
1723 * so we always do read locks
1724 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001725 b = root->commit_root;
1726 extent_buffer_get(b);
Chris Masonbd681512011-07-16 15:23:14 -04001727 level = btrfs_header_level(b);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001728 if (!p->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04001729 btrfs_tree_read_lock(b);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001730 } else {
Chris Masonbd681512011-07-16 15:23:14 -04001731 if (p->skip_locking) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001732 b = btrfs_root_node(root);
Chris Masonbd681512011-07-16 15:23:14 -04001733 level = btrfs_header_level(b);
1734 } else {
1735 /* we don't know the level of the root node
1736 * until we actually have it read locked
1737 */
1738 b = btrfs_read_lock_root_node(root);
1739 level = btrfs_header_level(b);
1740 if (level <= write_lock_level) {
1741 /* whoops, must trade for write lock */
1742 btrfs_tree_read_unlock(b);
1743 free_extent_buffer(b);
1744 b = btrfs_lock_root_node(root);
1745 root_lock = BTRFS_WRITE_LOCK;
1746
1747 /* the level might have changed, check again */
1748 level = btrfs_header_level(b);
1749 }
1750 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001751 }
Chris Masonbd681512011-07-16 15:23:14 -04001752 p->nodes[level] = b;
1753 if (!p->skip_locking)
1754 p->locks[level] = root_lock;
Chris Mason925baed2008-06-25 16:01:30 -04001755
Chris Masoneb60cea2007-02-02 09:18:22 -05001756 while (b) {
Chris Mason5f39d392007-10-15 16:14:19 -04001757 level = btrfs_header_level(b);
Chris Mason65b51a02008-08-01 15:11:20 -04001758
1759 /*
1760 * setup the path here so we can release it under lock
1761 * contention with the cow code
1762 */
Chris Mason02217ed2007-03-02 16:08:05 -05001763 if (cow) {
Chris Masonc8c42862009-04-03 10:14:18 -04001764 /*
1765 * if we don't really need to cow this block
1766 * then we don't want to set the path blocking,
1767 * so we test it here
1768 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001769 if (!should_cow_block(trans, root, b))
Chris Mason65b51a02008-08-01 15:11:20 -04001770 goto cow_done;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001771
Chris Masonb4ce94d2009-02-04 09:25:08 -05001772 btrfs_set_path_blocking(p);
1773
Chris Masonbd681512011-07-16 15:23:14 -04001774 /*
1775 * must have write locks on this node and the
1776 * parent
1777 */
1778 if (level + 1 > write_lock_level) {
1779 write_lock_level = level + 1;
1780 btrfs_release_path(p);
1781 goto again;
1782 }
1783
Yan Zheng33c66f42009-07-22 09:59:00 -04001784 err = btrfs_cow_block(trans, root, b,
1785 p->nodes[level + 1],
1786 p->slots[level + 1], &b);
1787 if (err) {
Yan Zheng33c66f42009-07-22 09:59:00 -04001788 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04001789 goto done;
Chris Mason54aa1f42007-06-22 14:16:25 -04001790 }
Chris Mason02217ed2007-03-02 16:08:05 -05001791 }
Chris Mason65b51a02008-08-01 15:11:20 -04001792cow_done:
Chris Mason02217ed2007-03-02 16:08:05 -05001793 BUG_ON(!cow && ins_len);
Chris Mason65b51a02008-08-01 15:11:20 -04001794
Chris Masoneb60cea2007-02-02 09:18:22 -05001795 p->nodes[level] = b;
Chris Masonbd681512011-07-16 15:23:14 -04001796 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001797
1798 /*
1799 * we have a lock on b and as long as we aren't changing
1800 * the tree, there is no way to for the items in b to change.
1801 * It is safe to drop the lock on our parent before we
1802 * go through the expensive btree search on b.
1803 *
1804 * If cow is true, then we might be changing slot zero,
1805 * which may require changing the parent. So, we can't
1806 * drop the lock until after we know which slot we're
1807 * operating on.
1808 */
1809 if (!cow)
1810 btrfs_unlock_up_safe(p, level + 1);
1811
Chris Mason5f39d392007-10-15 16:14:19 -04001812 ret = bin_search(b, key, level, &slot);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001813
Chris Mason5f39d392007-10-15 16:14:19 -04001814 if (level != 0) {
Yan Zheng33c66f42009-07-22 09:59:00 -04001815 int dec = 0;
1816 if (ret && slot > 0) {
1817 dec = 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001818 slot -= 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04001819 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05001820 p->slots[level] = slot;
Yan Zheng33c66f42009-07-22 09:59:00 -04001821 err = setup_nodes_for_search(trans, root, p, b, level,
Chris Masonbd681512011-07-16 15:23:14 -04001822 ins_len, &write_lock_level);
Yan Zheng33c66f42009-07-22 09:59:00 -04001823 if (err == -EAGAIN)
Chris Masonc8c42862009-04-03 10:14:18 -04001824 goto again;
Yan Zheng33c66f42009-07-22 09:59:00 -04001825 if (err) {
1826 ret = err;
Chris Masonc8c42862009-04-03 10:14:18 -04001827 goto done;
Yan Zheng33c66f42009-07-22 09:59:00 -04001828 }
Chris Masonc8c42862009-04-03 10:14:18 -04001829 b = p->nodes[level];
1830 slot = p->slots[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001831
Chris Masonbd681512011-07-16 15:23:14 -04001832 /*
1833 * slot 0 is special, if we change the key
1834 * we have to update the parent pointer
1835 * which means we must have a write lock
1836 * on the parent
1837 */
1838 if (slot == 0 && cow &&
1839 write_lock_level < level + 1) {
1840 write_lock_level = level + 1;
1841 btrfs_release_path(p);
1842 goto again;
1843 }
1844
Chris Masonf7c79f32012-03-19 15:54:38 -04001845 unlock_up(p, level, lowest_unlock,
1846 min_write_lock_level, &write_lock_level);
Chris Masonf9efa9c2008-06-25 16:14:04 -04001847
Chris Mason925baed2008-06-25 16:01:30 -04001848 if (level == lowest_level) {
Yan Zheng33c66f42009-07-22 09:59:00 -04001849 if (dec)
1850 p->slots[level]++;
Zheng Yan5b21f2e2008-09-26 10:05:38 -04001851 goto done;
Chris Mason925baed2008-06-25 16:01:30 -04001852 }
Chris Masonca7a79a2008-05-12 12:59:19 -04001853
Yan Zheng33c66f42009-07-22 09:59:00 -04001854 err = read_block_for_search(trans, root, p,
Chris Masonc8c42862009-04-03 10:14:18 -04001855 &b, level, slot, key);
Yan Zheng33c66f42009-07-22 09:59:00 -04001856 if (err == -EAGAIN)
Chris Masonc8c42862009-04-03 10:14:18 -04001857 goto again;
Yan Zheng33c66f42009-07-22 09:59:00 -04001858 if (err) {
1859 ret = err;
Chris Mason76a05b32009-05-14 13:24:30 -04001860 goto done;
Yan Zheng33c66f42009-07-22 09:59:00 -04001861 }
Chris Mason76a05b32009-05-14 13:24:30 -04001862
Chris Masonb4ce94d2009-02-04 09:25:08 -05001863 if (!p->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04001864 level = btrfs_header_level(b);
1865 if (level <= write_lock_level) {
1866 err = btrfs_try_tree_write_lock(b);
1867 if (!err) {
1868 btrfs_set_path_blocking(p);
1869 btrfs_tree_lock(b);
1870 btrfs_clear_path_blocking(p, b,
1871 BTRFS_WRITE_LOCK);
1872 }
1873 p->locks[level] = BTRFS_WRITE_LOCK;
1874 } else {
1875 err = btrfs_try_tree_read_lock(b);
1876 if (!err) {
1877 btrfs_set_path_blocking(p);
1878 btrfs_tree_read_lock(b);
1879 btrfs_clear_path_blocking(p, b,
1880 BTRFS_READ_LOCK);
1881 }
1882 p->locks[level] = BTRFS_READ_LOCK;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001883 }
Chris Masonbd681512011-07-16 15:23:14 -04001884 p->nodes[level] = b;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001885 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05001886 } else {
1887 p->slots[level] = slot;
Yan Zheng87b29b22008-12-17 10:21:48 -05001888 if (ins_len > 0 &&
1889 btrfs_leaf_free_space(root, b) < ins_len) {
Chris Masonbd681512011-07-16 15:23:14 -04001890 if (write_lock_level < 1) {
1891 write_lock_level = 1;
1892 btrfs_release_path(p);
1893 goto again;
1894 }
1895
Chris Masonb4ce94d2009-02-04 09:25:08 -05001896 btrfs_set_path_blocking(p);
Yan Zheng33c66f42009-07-22 09:59:00 -04001897 err = split_leaf(trans, root, key,
1898 p, ins_len, ret == 0);
Chris Masonbd681512011-07-16 15:23:14 -04001899 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001900
Yan Zheng33c66f42009-07-22 09:59:00 -04001901 BUG_ON(err > 0);
1902 if (err) {
1903 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04001904 goto done;
1905 }
Chris Mason5c680ed2007-02-22 11:39:13 -05001906 }
Chris Mason459931e2008-12-10 09:10:46 -05001907 if (!p->search_for_split)
Chris Masonf7c79f32012-03-19 15:54:38 -04001908 unlock_up(p, level, lowest_unlock,
1909 min_write_lock_level, &write_lock_level);
Chris Mason65b51a02008-08-01 15:11:20 -04001910 goto done;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001911 }
1912 }
Chris Mason65b51a02008-08-01 15:11:20 -04001913 ret = 1;
1914done:
Chris Masonb4ce94d2009-02-04 09:25:08 -05001915 /*
1916 * we don't really know what they plan on doing with the path
1917 * from here on, so for now just mark it as blocking
1918 */
Chris Masonb9473432009-03-13 11:00:37 -04001919 if (!p->leave_spinning)
1920 btrfs_set_path_blocking(p);
Chris Mason76a05b32009-05-14 13:24:30 -04001921 if (ret < 0)
David Sterbab3b4aa72011-04-21 01:20:15 +02001922 btrfs_release_path(p);
Chris Mason65b51a02008-08-01 15:11:20 -04001923 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001924}
1925
Chris Mason74123bd2007-02-02 11:05:29 -05001926/*
1927 * adjust the pointers going up the tree, starting at level
1928 * making sure the right key of each node is points to 'key'.
1929 * This is used after shifting pointers to the left, so it stops
1930 * fixing up pointers when a given leaf/node is not in slot 0 of the
1931 * higher levels
Chris Masonaa5d6be2007-02-28 16:35:06 -05001932 *
Chris Mason74123bd2007-02-02 11:05:29 -05001933 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01001934static void fixup_low_keys(struct btrfs_trans_handle *trans,
1935 struct btrfs_root *root, struct btrfs_path *path,
1936 struct btrfs_disk_key *key, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001937{
1938 int i;
Chris Mason5f39d392007-10-15 16:14:19 -04001939 struct extent_buffer *t;
1940
Chris Mason234b63a2007-03-13 10:46:10 -04001941 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05001942 int tslot = path->slots[i];
Chris Masoneb60cea2007-02-02 09:18:22 -05001943 if (!path->nodes[i])
Chris Masonbe0e5c02007-01-26 15:51:26 -05001944 break;
Chris Mason5f39d392007-10-15 16:14:19 -04001945 t = path->nodes[i];
1946 btrfs_set_node_key(t, key, tslot);
Chris Masond6025572007-03-30 14:27:56 -04001947 btrfs_mark_buffer_dirty(path->nodes[i]);
Chris Masonbe0e5c02007-01-26 15:51:26 -05001948 if (tslot != 0)
1949 break;
1950 }
1951}
1952
Chris Mason74123bd2007-02-02 11:05:29 -05001953/*
Zheng Yan31840ae2008-09-23 13:14:14 -04001954 * update item key.
1955 *
1956 * This function isn't completely safe. It's the caller's responsibility
1957 * that the new key won't break the order
1958 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01001959void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1960 struct btrfs_root *root, struct btrfs_path *path,
1961 struct btrfs_key *new_key)
Zheng Yan31840ae2008-09-23 13:14:14 -04001962{
1963 struct btrfs_disk_key disk_key;
1964 struct extent_buffer *eb;
1965 int slot;
1966
1967 eb = path->nodes[0];
1968 slot = path->slots[0];
1969 if (slot > 0) {
1970 btrfs_item_key(eb, &disk_key, slot - 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +01001971 BUG_ON(comp_keys(&disk_key, new_key) >= 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04001972 }
1973 if (slot < btrfs_header_nritems(eb) - 1) {
1974 btrfs_item_key(eb, &disk_key, slot + 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +01001975 BUG_ON(comp_keys(&disk_key, new_key) <= 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04001976 }
1977
1978 btrfs_cpu_key_to_disk(&disk_key, new_key);
1979 btrfs_set_item_key(eb, &disk_key, slot);
1980 btrfs_mark_buffer_dirty(eb);
1981 if (slot == 0)
1982 fixup_low_keys(trans, root, path, &disk_key, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -04001983}
1984
1985/*
Chris Mason74123bd2007-02-02 11:05:29 -05001986 * try to push data from one node into the next node left in the
Chris Mason79f95c82007-03-01 15:16:26 -05001987 * tree.
Chris Masonaa5d6be2007-02-28 16:35:06 -05001988 *
1989 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1990 * error, and > 0 if there was no room in the left hand block.
Chris Mason74123bd2007-02-02 11:05:29 -05001991 */
Chris Mason98ed5172008-01-03 10:01:48 -05001992static int push_node_left(struct btrfs_trans_handle *trans,
1993 struct btrfs_root *root, struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -04001994 struct extent_buffer *src, int empty)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001995{
Chris Masonbe0e5c02007-01-26 15:51:26 -05001996 int push_items = 0;
Chris Masonbb803952007-03-01 12:04:21 -05001997 int src_nritems;
1998 int dst_nritems;
Chris Masonaa5d6be2007-02-28 16:35:06 -05001999 int ret = 0;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002000
Chris Mason5f39d392007-10-15 16:14:19 -04002001 src_nritems = btrfs_header_nritems(src);
2002 dst_nritems = btrfs_header_nritems(dst);
Chris Mason123abc82007-03-14 14:14:43 -04002003 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
Chris Mason7bb86312007-12-11 09:25:06 -05002004 WARN_ON(btrfs_header_generation(src) != trans->transid);
2005 WARN_ON(btrfs_header_generation(dst) != trans->transid);
Chris Mason54aa1f42007-06-22 14:16:25 -04002006
Chris Masonbce4eae2008-04-24 14:42:46 -04002007 if (!empty && src_nritems <= 8)
Chris Mason971a1f62008-04-24 10:54:32 -04002008 return 1;
2009
Chris Masond3977122009-01-05 21:25:51 -05002010 if (push_items <= 0)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002011 return 1;
2012
Chris Masonbce4eae2008-04-24 14:42:46 -04002013 if (empty) {
Chris Mason971a1f62008-04-24 10:54:32 -04002014 push_items = min(src_nritems, push_items);
Chris Masonbce4eae2008-04-24 14:42:46 -04002015 if (push_items < src_nritems) {
2016 /* leave at least 8 pointers in the node if
2017 * we aren't going to empty it
2018 */
2019 if (src_nritems - push_items < 8) {
2020 if (push_items <= 8)
2021 return 1;
2022 push_items -= 8;
2023 }
2024 }
2025 } else
2026 push_items = min(src_nritems - 8, push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05002027
Chris Mason5f39d392007-10-15 16:14:19 -04002028 copy_extent_buffer(dst, src,
2029 btrfs_node_key_ptr_offset(dst_nritems),
2030 btrfs_node_key_ptr_offset(0),
Chris Masond3977122009-01-05 21:25:51 -05002031 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason5f39d392007-10-15 16:14:19 -04002032
Chris Masonbb803952007-03-01 12:04:21 -05002033 if (push_items < src_nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04002034 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2035 btrfs_node_key_ptr_offset(push_items),
2036 (src_nritems - push_items) *
2037 sizeof(struct btrfs_key_ptr));
Chris Masonbb803952007-03-01 12:04:21 -05002038 }
Chris Mason5f39d392007-10-15 16:14:19 -04002039 btrfs_set_header_nritems(src, src_nritems - push_items);
2040 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2041 btrfs_mark_buffer_dirty(src);
2042 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04002043
Chris Masonbb803952007-03-01 12:04:21 -05002044 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002045}
2046
Chris Mason97571fd2007-02-24 13:39:08 -05002047/*
Chris Mason79f95c82007-03-01 15:16:26 -05002048 * try to push data from one node into the next node right in the
2049 * tree.
2050 *
2051 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
2052 * error, and > 0 if there was no room in the right hand block.
2053 *
2054 * this will only push up to 1/2 the contents of the left node over
2055 */
Chris Mason5f39d392007-10-15 16:14:19 -04002056static int balance_node_right(struct btrfs_trans_handle *trans,
2057 struct btrfs_root *root,
2058 struct extent_buffer *dst,
2059 struct extent_buffer *src)
Chris Mason79f95c82007-03-01 15:16:26 -05002060{
Chris Mason79f95c82007-03-01 15:16:26 -05002061 int push_items = 0;
2062 int max_push;
2063 int src_nritems;
2064 int dst_nritems;
2065 int ret = 0;
Chris Mason79f95c82007-03-01 15:16:26 -05002066
Chris Mason7bb86312007-12-11 09:25:06 -05002067 WARN_ON(btrfs_header_generation(src) != trans->transid);
2068 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2069
Chris Mason5f39d392007-10-15 16:14:19 -04002070 src_nritems = btrfs_header_nritems(src);
2071 dst_nritems = btrfs_header_nritems(dst);
Chris Mason123abc82007-03-14 14:14:43 -04002072 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
Chris Masond3977122009-01-05 21:25:51 -05002073 if (push_items <= 0)
Chris Mason79f95c82007-03-01 15:16:26 -05002074 return 1;
Chris Masonbce4eae2008-04-24 14:42:46 -04002075
Chris Masond3977122009-01-05 21:25:51 -05002076 if (src_nritems < 4)
Chris Masonbce4eae2008-04-24 14:42:46 -04002077 return 1;
Chris Mason79f95c82007-03-01 15:16:26 -05002078
2079 max_push = src_nritems / 2 + 1;
2080 /* don't try to empty the node */
Chris Masond3977122009-01-05 21:25:51 -05002081 if (max_push >= src_nritems)
Chris Mason79f95c82007-03-01 15:16:26 -05002082 return 1;
Yan252c38f2007-08-29 09:11:44 -04002083
Chris Mason79f95c82007-03-01 15:16:26 -05002084 if (max_push < push_items)
2085 push_items = max_push;
2086
Chris Mason5f39d392007-10-15 16:14:19 -04002087 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2088 btrfs_node_key_ptr_offset(0),
2089 (dst_nritems) *
2090 sizeof(struct btrfs_key_ptr));
Chris Masond6025572007-03-30 14:27:56 -04002091
Chris Mason5f39d392007-10-15 16:14:19 -04002092 copy_extent_buffer(dst, src,
2093 btrfs_node_key_ptr_offset(0),
2094 btrfs_node_key_ptr_offset(src_nritems - push_items),
Chris Masond3977122009-01-05 21:25:51 -05002095 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason79f95c82007-03-01 15:16:26 -05002096
Chris Mason5f39d392007-10-15 16:14:19 -04002097 btrfs_set_header_nritems(src, src_nritems - push_items);
2098 btrfs_set_header_nritems(dst, dst_nritems + push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05002099
Chris Mason5f39d392007-10-15 16:14:19 -04002100 btrfs_mark_buffer_dirty(src);
2101 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04002102
Chris Mason79f95c82007-03-01 15:16:26 -05002103 return ret;
2104}
2105
2106/*
Chris Mason97571fd2007-02-24 13:39:08 -05002107 * helper function to insert a new root level in the tree.
2108 * A new node is allocated, and a single item is inserted to
2109 * point to the existing root
Chris Masonaa5d6be2007-02-28 16:35:06 -05002110 *
2111 * returns zero on success or < 0 on failure.
Chris Mason97571fd2007-02-24 13:39:08 -05002112 */
Chris Masond3977122009-01-05 21:25:51 -05002113static noinline int insert_new_root(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04002114 struct btrfs_root *root,
2115 struct btrfs_path *path, int level)
Chris Mason5c680ed2007-02-22 11:39:13 -05002116{
Chris Mason7bb86312007-12-11 09:25:06 -05002117 u64 lower_gen;
Chris Mason5f39d392007-10-15 16:14:19 -04002118 struct extent_buffer *lower;
2119 struct extent_buffer *c;
Chris Mason925baed2008-06-25 16:01:30 -04002120 struct extent_buffer *old;
Chris Mason5f39d392007-10-15 16:14:19 -04002121 struct btrfs_disk_key lower_key;
Chris Mason5c680ed2007-02-22 11:39:13 -05002122
2123 BUG_ON(path->nodes[level]);
2124 BUG_ON(path->nodes[level-1] != root->node);
2125
Chris Mason7bb86312007-12-11 09:25:06 -05002126 lower = path->nodes[level-1];
2127 if (level == 1)
2128 btrfs_item_key(lower, &lower_key, 0);
2129 else
2130 btrfs_node_key(lower, &lower_key, 0);
2131
Zheng Yan31840ae2008-09-23 13:14:14 -04002132 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002133 root->root_key.objectid, &lower_key,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002134 level, root->node->start, 0, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002135 if (IS_ERR(c))
2136 return PTR_ERR(c);
Chris Mason925baed2008-06-25 16:01:30 -04002137
Yan, Zhengf0486c62010-05-16 10:46:25 -04002138 root_add_used(root, root->nodesize);
2139
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002140 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
Chris Mason5f39d392007-10-15 16:14:19 -04002141 btrfs_set_header_nritems(c, 1);
2142 btrfs_set_header_level(c, level);
Chris Masondb945352007-10-15 16:15:53 -04002143 btrfs_set_header_bytenr(c, c->start);
Chris Mason5f39d392007-10-15 16:14:19 -04002144 btrfs_set_header_generation(c, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002145 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
Chris Mason5f39d392007-10-15 16:14:19 -04002146 btrfs_set_header_owner(c, root->root_key.objectid);
Chris Masond5719762007-03-23 10:01:08 -04002147
Chris Mason5f39d392007-10-15 16:14:19 -04002148 write_extent_buffer(c, root->fs_info->fsid,
2149 (unsigned long)btrfs_header_fsid(c),
2150 BTRFS_FSID_SIZE);
Chris Masone17cade2008-04-15 15:41:47 -04002151
2152 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
2153 (unsigned long)btrfs_header_chunk_tree_uuid(c),
2154 BTRFS_UUID_SIZE);
2155
Chris Mason5f39d392007-10-15 16:14:19 -04002156 btrfs_set_node_key(c, &lower_key, 0);
Chris Masondb945352007-10-15 16:15:53 -04002157 btrfs_set_node_blockptr(c, 0, lower->start);
Chris Mason7bb86312007-12-11 09:25:06 -05002158 lower_gen = btrfs_header_generation(lower);
Zheng Yan31840ae2008-09-23 13:14:14 -04002159 WARN_ON(lower_gen != trans->transid);
Chris Mason7bb86312007-12-11 09:25:06 -05002160
2161 btrfs_set_node_ptr_generation(c, 0, lower_gen);
Chris Mason5f39d392007-10-15 16:14:19 -04002162
2163 btrfs_mark_buffer_dirty(c);
Chris Masond5719762007-03-23 10:01:08 -04002164
Chris Mason925baed2008-06-25 16:01:30 -04002165 old = root->node;
Chris Mason240f62c2011-03-23 14:54:42 -04002166 rcu_assign_pointer(root->node, c);
Chris Mason925baed2008-06-25 16:01:30 -04002167
2168 /* the super has an extra ref to root->node */
2169 free_extent_buffer(old);
2170
Chris Mason0b86a832008-03-24 15:01:56 -04002171 add_root_to_dirty_list(root);
Chris Mason5f39d392007-10-15 16:14:19 -04002172 extent_buffer_get(c);
2173 path->nodes[level] = c;
Chris Masonbd681512011-07-16 15:23:14 -04002174 path->locks[level] = BTRFS_WRITE_LOCK;
Chris Mason5c680ed2007-02-22 11:39:13 -05002175 path->slots[level] = 0;
2176 return 0;
2177}
2178
Chris Mason74123bd2007-02-02 11:05:29 -05002179/*
2180 * worker function to insert a single pointer in a node.
2181 * the node should have enough room for the pointer already
Chris Mason97571fd2007-02-24 13:39:08 -05002182 *
Chris Mason74123bd2007-02-02 11:05:29 -05002183 * slot and level indicate where you want the key to go, and
2184 * blocknr is the block the key points to.
2185 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01002186static void insert_ptr(struct btrfs_trans_handle *trans,
2187 struct btrfs_root *root, struct btrfs_path *path,
2188 struct btrfs_disk_key *key, u64 bytenr,
2189 int slot, int level)
Chris Mason74123bd2007-02-02 11:05:29 -05002190{
Chris Mason5f39d392007-10-15 16:14:19 -04002191 struct extent_buffer *lower;
Chris Mason74123bd2007-02-02 11:05:29 -05002192 int nritems;
Chris Mason5c680ed2007-02-22 11:39:13 -05002193
2194 BUG_ON(!path->nodes[level]);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002195 btrfs_assert_tree_locked(path->nodes[level]);
Chris Mason5f39d392007-10-15 16:14:19 -04002196 lower = path->nodes[level];
2197 nritems = btrfs_header_nritems(lower);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04002198 BUG_ON(slot > nritems);
Jeff Mahoney143bede2012-03-01 14:56:26 +01002199 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
Chris Mason74123bd2007-02-02 11:05:29 -05002200 if (slot != nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04002201 memmove_extent_buffer(lower,
2202 btrfs_node_key_ptr_offset(slot + 1),
2203 btrfs_node_key_ptr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04002204 (nritems - slot) * sizeof(struct btrfs_key_ptr));
Chris Mason74123bd2007-02-02 11:05:29 -05002205 }
Chris Mason5f39d392007-10-15 16:14:19 -04002206 btrfs_set_node_key(lower, key, slot);
Chris Masondb945352007-10-15 16:15:53 -04002207 btrfs_set_node_blockptr(lower, slot, bytenr);
Chris Mason74493f72007-12-11 09:25:06 -05002208 WARN_ON(trans->transid == 0);
2209 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04002210 btrfs_set_header_nritems(lower, nritems + 1);
2211 btrfs_mark_buffer_dirty(lower);
Chris Mason74123bd2007-02-02 11:05:29 -05002212}
2213
Chris Mason97571fd2007-02-24 13:39:08 -05002214/*
2215 * split the node at the specified level in path in two.
2216 * The path is corrected to point to the appropriate node after the split
2217 *
2218 * Before splitting this tries to make some room in the node by pushing
2219 * left and right, if either one works, it returns right away.
Chris Masonaa5d6be2007-02-28 16:35:06 -05002220 *
2221 * returns 0 on success and < 0 on failure
Chris Mason97571fd2007-02-24 13:39:08 -05002222 */
Chris Masone02119d2008-09-05 16:13:11 -04002223static noinline int split_node(struct btrfs_trans_handle *trans,
2224 struct btrfs_root *root,
2225 struct btrfs_path *path, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002226{
Chris Mason5f39d392007-10-15 16:14:19 -04002227 struct extent_buffer *c;
2228 struct extent_buffer *split;
2229 struct btrfs_disk_key disk_key;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002230 int mid;
Chris Mason5c680ed2007-02-22 11:39:13 -05002231 int ret;
Chris Mason7518a232007-03-12 12:01:18 -04002232 u32 c_nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002233
Chris Mason5f39d392007-10-15 16:14:19 -04002234 c = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05002235 WARN_ON(btrfs_header_generation(c) != trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04002236 if (c == root->node) {
Chris Mason5c680ed2007-02-22 11:39:13 -05002237 /* trying to split the root, lets make a new one */
Chris Masone089f052007-03-16 16:20:31 -04002238 ret = insert_new_root(trans, root, path, level + 1);
Chris Mason5c680ed2007-02-22 11:39:13 -05002239 if (ret)
2240 return ret;
Chris Masonb3612422009-05-13 19:12:15 -04002241 } else {
Chris Masone66f7092007-04-20 13:16:02 -04002242 ret = push_nodes_for_insert(trans, root, path, level);
Chris Mason5f39d392007-10-15 16:14:19 -04002243 c = path->nodes[level];
2244 if (!ret && btrfs_header_nritems(c) <
Chris Masonc448acf2008-04-24 09:34:34 -04002245 BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
Chris Masone66f7092007-04-20 13:16:02 -04002246 return 0;
Chris Mason54aa1f42007-06-22 14:16:25 -04002247 if (ret < 0)
2248 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002249 }
Chris Masone66f7092007-04-20 13:16:02 -04002250
Chris Mason5f39d392007-10-15 16:14:19 -04002251 c_nritems = btrfs_header_nritems(c);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002252 mid = (c_nritems + 1) / 2;
2253 btrfs_node_key(c, &disk_key, mid);
Chris Mason7bb86312007-12-11 09:25:06 -05002254
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002255 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
Zheng Yan31840ae2008-09-23 13:14:14 -04002256 root->root_key.objectid,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002257 &disk_key, level, c->start, 0, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002258 if (IS_ERR(split))
2259 return PTR_ERR(split);
Chris Mason54aa1f42007-06-22 14:16:25 -04002260
Yan, Zhengf0486c62010-05-16 10:46:25 -04002261 root_add_used(root, root->nodesize);
2262
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002263 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
Chris Mason5f39d392007-10-15 16:14:19 -04002264 btrfs_set_header_level(split, btrfs_header_level(c));
Chris Masondb945352007-10-15 16:15:53 -04002265 btrfs_set_header_bytenr(split, split->start);
Chris Mason5f39d392007-10-15 16:14:19 -04002266 btrfs_set_header_generation(split, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002267 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
Chris Mason5f39d392007-10-15 16:14:19 -04002268 btrfs_set_header_owner(split, root->root_key.objectid);
2269 write_extent_buffer(split, root->fs_info->fsid,
2270 (unsigned long)btrfs_header_fsid(split),
2271 BTRFS_FSID_SIZE);
Chris Masone17cade2008-04-15 15:41:47 -04002272 write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2273 (unsigned long)btrfs_header_chunk_tree_uuid(split),
2274 BTRFS_UUID_SIZE);
Chris Mason5f39d392007-10-15 16:14:19 -04002275
Chris Mason5f39d392007-10-15 16:14:19 -04002276
2277 copy_extent_buffer(split, c,
2278 btrfs_node_key_ptr_offset(0),
2279 btrfs_node_key_ptr_offset(mid),
2280 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2281 btrfs_set_header_nritems(split, c_nritems - mid);
2282 btrfs_set_header_nritems(c, mid);
Chris Masonaa5d6be2007-02-28 16:35:06 -05002283 ret = 0;
2284
Chris Mason5f39d392007-10-15 16:14:19 -04002285 btrfs_mark_buffer_dirty(c);
2286 btrfs_mark_buffer_dirty(split);
2287
Jeff Mahoney143bede2012-03-01 14:56:26 +01002288 insert_ptr(trans, root, path, &disk_key, split->start,
2289 path->slots[level + 1] + 1, level + 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05002290
Chris Mason5de08d72007-02-24 06:24:44 -05002291 if (path->slots[level] >= mid) {
Chris Mason5c680ed2007-02-22 11:39:13 -05002292 path->slots[level] -= mid;
Chris Mason925baed2008-06-25 16:01:30 -04002293 btrfs_tree_unlock(c);
Chris Mason5f39d392007-10-15 16:14:19 -04002294 free_extent_buffer(c);
2295 path->nodes[level] = split;
Chris Mason5c680ed2007-02-22 11:39:13 -05002296 path->slots[level + 1] += 1;
2297 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002298 btrfs_tree_unlock(split);
Chris Mason5f39d392007-10-15 16:14:19 -04002299 free_extent_buffer(split);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002300 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05002301 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002302}
2303
Chris Mason74123bd2007-02-02 11:05:29 -05002304/*
2305 * how many bytes are required to store the items in a leaf. start
2306 * and nr indicate which items in the leaf to check. This totals up the
2307 * space used both by the item structs and the item data
2308 */
Chris Mason5f39d392007-10-15 16:14:19 -04002309static int leaf_space_used(struct extent_buffer *l, int start, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002310{
2311 int data_len;
Chris Mason5f39d392007-10-15 16:14:19 -04002312 int nritems = btrfs_header_nritems(l);
Chris Masond4dbff92007-04-04 14:08:15 -04002313 int end = min(nritems, start + nr) - 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002314
2315 if (!nr)
2316 return 0;
Chris Mason5f39d392007-10-15 16:14:19 -04002317 data_len = btrfs_item_end_nr(l, start);
2318 data_len = data_len - btrfs_item_offset_nr(l, end);
Chris Mason0783fcf2007-03-12 20:12:07 -04002319 data_len += sizeof(struct btrfs_item) * nr;
Chris Masond4dbff92007-04-04 14:08:15 -04002320 WARN_ON(data_len < 0);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002321 return data_len;
2322}
2323
Chris Mason74123bd2007-02-02 11:05:29 -05002324/*
Chris Masond4dbff92007-04-04 14:08:15 -04002325 * The space between the end of the leaf items and
2326 * the start of the leaf data. IOW, how much room
2327 * the leaf has left for both items and data
2328 */
Chris Masond3977122009-01-05 21:25:51 -05002329noinline int btrfs_leaf_free_space(struct btrfs_root *root,
Chris Masone02119d2008-09-05 16:13:11 -04002330 struct extent_buffer *leaf)
Chris Masond4dbff92007-04-04 14:08:15 -04002331{
Chris Mason5f39d392007-10-15 16:14:19 -04002332 int nritems = btrfs_header_nritems(leaf);
2333 int ret;
2334 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2335 if (ret < 0) {
Chris Masond3977122009-01-05 21:25:51 -05002336 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
2337 "used %d nritems %d\n",
Jens Axboeae2f5412007-10-19 09:22:59 -04002338 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
Chris Mason5f39d392007-10-15 16:14:19 -04002339 leaf_space_used(leaf, 0, nritems), nritems);
2340 }
2341 return ret;
Chris Masond4dbff92007-04-04 14:08:15 -04002342}
2343
Chris Mason99d8f832010-07-07 10:51:48 -04002344/*
2345 * min slot controls the lowest index we're willing to push to the
2346 * right. We'll push up to and including min_slot, but no lower
2347 */
Chris Mason44871b12009-03-13 10:04:31 -04002348static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2349 struct btrfs_root *root,
2350 struct btrfs_path *path,
2351 int data_size, int empty,
2352 struct extent_buffer *right,
Chris Mason99d8f832010-07-07 10:51:48 -04002353 int free_space, u32 left_nritems,
2354 u32 min_slot)
Chris Mason00ec4c52007-02-24 12:47:20 -05002355{
Chris Mason5f39d392007-10-15 16:14:19 -04002356 struct extent_buffer *left = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04002357 struct extent_buffer *upper = path->nodes[1];
Chris Masoncfed81a2012-03-03 07:40:03 -05002358 struct btrfs_map_token token;
Chris Mason5f39d392007-10-15 16:14:19 -04002359 struct btrfs_disk_key disk_key;
Chris Mason00ec4c52007-02-24 12:47:20 -05002360 int slot;
Chris Mason34a38212007-11-07 13:31:03 -05002361 u32 i;
Chris Mason00ec4c52007-02-24 12:47:20 -05002362 int push_space = 0;
2363 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04002364 struct btrfs_item *item;
Chris Mason34a38212007-11-07 13:31:03 -05002365 u32 nr;
Chris Mason7518a232007-03-12 12:01:18 -04002366 u32 right_nritems;
Chris Mason5f39d392007-10-15 16:14:19 -04002367 u32 data_end;
Chris Masondb945352007-10-15 16:15:53 -04002368 u32 this_item_size;
Chris Mason00ec4c52007-02-24 12:47:20 -05002369
Chris Masoncfed81a2012-03-03 07:40:03 -05002370 btrfs_init_map_token(&token);
2371
Chris Mason34a38212007-11-07 13:31:03 -05002372 if (empty)
2373 nr = 0;
2374 else
Chris Mason99d8f832010-07-07 10:51:48 -04002375 nr = max_t(u32, 1, min_slot);
Chris Mason34a38212007-11-07 13:31:03 -05002376
Zheng Yan31840ae2008-09-23 13:14:14 -04002377 if (path->slots[0] >= left_nritems)
Yan Zheng87b29b22008-12-17 10:21:48 -05002378 push_space += data_size;
Zheng Yan31840ae2008-09-23 13:14:14 -04002379
Chris Mason44871b12009-03-13 10:04:31 -04002380 slot = path->slots[1];
Chris Mason34a38212007-11-07 13:31:03 -05002381 i = left_nritems - 1;
2382 while (i >= nr) {
Chris Mason5f39d392007-10-15 16:14:19 -04002383 item = btrfs_item_nr(left, i);
Chris Masondb945352007-10-15 16:15:53 -04002384
Zheng Yan31840ae2008-09-23 13:14:14 -04002385 if (!empty && push_items > 0) {
2386 if (path->slots[0] > i)
2387 break;
2388 if (path->slots[0] == i) {
2389 int space = btrfs_leaf_free_space(root, left);
2390 if (space + push_space * 2 > free_space)
2391 break;
2392 }
2393 }
2394
Chris Mason00ec4c52007-02-24 12:47:20 -05002395 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05002396 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04002397
Chris Masondb945352007-10-15 16:15:53 -04002398 this_item_size = btrfs_item_size(left, item);
2399 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Mason00ec4c52007-02-24 12:47:20 -05002400 break;
Zheng Yan31840ae2008-09-23 13:14:14 -04002401
Chris Mason00ec4c52007-02-24 12:47:20 -05002402 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04002403 push_space += this_item_size + sizeof(*item);
Chris Mason34a38212007-11-07 13:31:03 -05002404 if (i == 0)
2405 break;
2406 i--;
Chris Masondb945352007-10-15 16:15:53 -04002407 }
Chris Mason5f39d392007-10-15 16:14:19 -04002408
Chris Mason925baed2008-06-25 16:01:30 -04002409 if (push_items == 0)
2410 goto out_unlock;
Chris Mason5f39d392007-10-15 16:14:19 -04002411
Chris Mason34a38212007-11-07 13:31:03 -05002412 if (!empty && push_items == left_nritems)
Chris Masona429e512007-04-18 16:15:28 -04002413 WARN_ON(1);
Chris Mason5f39d392007-10-15 16:14:19 -04002414
Chris Mason00ec4c52007-02-24 12:47:20 -05002415 /* push left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04002416 right_nritems = btrfs_header_nritems(right);
Chris Mason34a38212007-11-07 13:31:03 -05002417
Chris Mason5f39d392007-10-15 16:14:19 -04002418 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
Chris Mason123abc82007-03-14 14:14:43 -04002419 push_space -= leaf_data_end(root, left);
Chris Mason5f39d392007-10-15 16:14:19 -04002420
Chris Mason00ec4c52007-02-24 12:47:20 -05002421 /* make room in the right data area */
Chris Mason5f39d392007-10-15 16:14:19 -04002422 data_end = leaf_data_end(root, right);
2423 memmove_extent_buffer(right,
2424 btrfs_leaf_data(right) + data_end - push_space,
2425 btrfs_leaf_data(right) + data_end,
2426 BTRFS_LEAF_DATA_SIZE(root) - data_end);
2427
Chris Mason00ec4c52007-02-24 12:47:20 -05002428 /* copy from the left data area */
Chris Mason5f39d392007-10-15 16:14:19 -04002429 copy_extent_buffer(right, left, btrfs_leaf_data(right) +
Chris Masond6025572007-03-30 14:27:56 -04002430 BTRFS_LEAF_DATA_SIZE(root) - push_space,
2431 btrfs_leaf_data(left) + leaf_data_end(root, left),
2432 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04002433
2434 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2435 btrfs_item_nr_offset(0),
2436 right_nritems * sizeof(struct btrfs_item));
2437
Chris Mason00ec4c52007-02-24 12:47:20 -05002438 /* copy the items from left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04002439 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2440 btrfs_item_nr_offset(left_nritems - push_items),
2441 push_items * sizeof(struct btrfs_item));
Chris Mason00ec4c52007-02-24 12:47:20 -05002442
2443 /* update the item pointers */
Chris Mason7518a232007-03-12 12:01:18 -04002444 right_nritems += push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04002445 btrfs_set_header_nritems(right, right_nritems);
Chris Mason123abc82007-03-14 14:14:43 -04002446 push_space = BTRFS_LEAF_DATA_SIZE(root);
Chris Mason7518a232007-03-12 12:01:18 -04002447 for (i = 0; i < right_nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04002448 item = btrfs_item_nr(right, i);
Chris Masoncfed81a2012-03-03 07:40:03 -05002449 push_space -= btrfs_token_item_size(right, item, &token);
2450 btrfs_set_token_item_offset(right, item, push_space, &token);
Chris Masondb945352007-10-15 16:15:53 -04002451 }
2452
Chris Mason7518a232007-03-12 12:01:18 -04002453 left_nritems -= push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04002454 btrfs_set_header_nritems(left, left_nritems);
Chris Mason00ec4c52007-02-24 12:47:20 -05002455
Chris Mason34a38212007-11-07 13:31:03 -05002456 if (left_nritems)
2457 btrfs_mark_buffer_dirty(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002458 else
2459 clean_tree_block(trans, root, left);
2460
Chris Mason5f39d392007-10-15 16:14:19 -04002461 btrfs_mark_buffer_dirty(right);
Chris Masona429e512007-04-18 16:15:28 -04002462
Chris Mason5f39d392007-10-15 16:14:19 -04002463 btrfs_item_key(right, &disk_key, 0);
2464 btrfs_set_node_key(upper, &disk_key, slot + 1);
Chris Masond6025572007-03-30 14:27:56 -04002465 btrfs_mark_buffer_dirty(upper);
Chris Mason02217ed2007-03-02 16:08:05 -05002466
Chris Mason00ec4c52007-02-24 12:47:20 -05002467 /* then fixup the leaf pointer in the path */
Chris Mason7518a232007-03-12 12:01:18 -04002468 if (path->slots[0] >= left_nritems) {
2469 path->slots[0] -= left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04002470 if (btrfs_header_nritems(path->nodes[0]) == 0)
2471 clean_tree_block(trans, root, path->nodes[0]);
2472 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04002473 free_extent_buffer(path->nodes[0]);
2474 path->nodes[0] = right;
Chris Mason00ec4c52007-02-24 12:47:20 -05002475 path->slots[1] += 1;
2476 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002477 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002478 free_extent_buffer(right);
Chris Mason00ec4c52007-02-24 12:47:20 -05002479 }
2480 return 0;
Chris Mason925baed2008-06-25 16:01:30 -04002481
2482out_unlock:
2483 btrfs_tree_unlock(right);
2484 free_extent_buffer(right);
2485 return 1;
Chris Mason00ec4c52007-02-24 12:47:20 -05002486}
Chris Mason925baed2008-06-25 16:01:30 -04002487
Chris Mason00ec4c52007-02-24 12:47:20 -05002488/*
Chris Mason44871b12009-03-13 10:04:31 -04002489 * push some data in the path leaf to the right, trying to free up at
2490 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2491 *
2492 * returns 1 if the push failed because the other node didn't have enough
2493 * room, 0 if everything worked out and < 0 if there were major errors.
Chris Mason99d8f832010-07-07 10:51:48 -04002494 *
2495 * this will push starting from min_slot to the end of the leaf. It won't
2496 * push any slot lower than min_slot
Chris Mason44871b12009-03-13 10:04:31 -04002497 */
2498static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04002499 *root, struct btrfs_path *path,
2500 int min_data_size, int data_size,
2501 int empty, u32 min_slot)
Chris Mason44871b12009-03-13 10:04:31 -04002502{
2503 struct extent_buffer *left = path->nodes[0];
2504 struct extent_buffer *right;
2505 struct extent_buffer *upper;
2506 int slot;
2507 int free_space;
2508 u32 left_nritems;
2509 int ret;
2510
2511 if (!path->nodes[1])
2512 return 1;
2513
2514 slot = path->slots[1];
2515 upper = path->nodes[1];
2516 if (slot >= btrfs_header_nritems(upper) - 1)
2517 return 1;
2518
2519 btrfs_assert_tree_locked(path->nodes[1]);
2520
2521 right = read_node_slot(root, upper, slot + 1);
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00002522 if (right == NULL)
2523 return 1;
2524
Chris Mason44871b12009-03-13 10:04:31 -04002525 btrfs_tree_lock(right);
2526 btrfs_set_lock_blocking(right);
2527
2528 free_space = btrfs_leaf_free_space(root, right);
2529 if (free_space < data_size)
2530 goto out_unlock;
2531
2532 /* cow and double check */
2533 ret = btrfs_cow_block(trans, root, right, upper,
2534 slot + 1, &right);
2535 if (ret)
2536 goto out_unlock;
2537
2538 free_space = btrfs_leaf_free_space(root, right);
2539 if (free_space < data_size)
2540 goto out_unlock;
2541
2542 left_nritems = btrfs_header_nritems(left);
2543 if (left_nritems == 0)
2544 goto out_unlock;
2545
Chris Mason99d8f832010-07-07 10:51:48 -04002546 return __push_leaf_right(trans, root, path, min_data_size, empty,
2547 right, free_space, left_nritems, min_slot);
Chris Mason44871b12009-03-13 10:04:31 -04002548out_unlock:
2549 btrfs_tree_unlock(right);
2550 free_extent_buffer(right);
2551 return 1;
2552}
2553
2554/*
Chris Mason74123bd2007-02-02 11:05:29 -05002555 * push some data in the path leaf to the left, trying to free up at
2556 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04002557 *
2558 * max_slot can put a limit on how far into the leaf we'll push items. The
2559 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
2560 * items
Chris Mason74123bd2007-02-02 11:05:29 -05002561 */
Chris Mason44871b12009-03-13 10:04:31 -04002562static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2563 struct btrfs_root *root,
2564 struct btrfs_path *path, int data_size,
2565 int empty, struct extent_buffer *left,
Chris Mason99d8f832010-07-07 10:51:48 -04002566 int free_space, u32 right_nritems,
2567 u32 max_slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002568{
Chris Mason5f39d392007-10-15 16:14:19 -04002569 struct btrfs_disk_key disk_key;
2570 struct extent_buffer *right = path->nodes[0];
Chris Masonbe0e5c02007-01-26 15:51:26 -05002571 int i;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002572 int push_space = 0;
2573 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04002574 struct btrfs_item *item;
Chris Mason7518a232007-03-12 12:01:18 -04002575 u32 old_left_nritems;
Chris Mason34a38212007-11-07 13:31:03 -05002576 u32 nr;
Chris Masonaa5d6be2007-02-28 16:35:06 -05002577 int ret = 0;
Chris Masondb945352007-10-15 16:15:53 -04002578 u32 this_item_size;
2579 u32 old_left_item_size;
Chris Masoncfed81a2012-03-03 07:40:03 -05002580 struct btrfs_map_token token;
2581
2582 btrfs_init_map_token(&token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002583
Chris Mason34a38212007-11-07 13:31:03 -05002584 if (empty)
Chris Mason99d8f832010-07-07 10:51:48 -04002585 nr = min(right_nritems, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05002586 else
Chris Mason99d8f832010-07-07 10:51:48 -04002587 nr = min(right_nritems - 1, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05002588
2589 for (i = 0; i < nr; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04002590 item = btrfs_item_nr(right, i);
Chris Masondb945352007-10-15 16:15:53 -04002591
Zheng Yan31840ae2008-09-23 13:14:14 -04002592 if (!empty && push_items > 0) {
2593 if (path->slots[0] < i)
2594 break;
2595 if (path->slots[0] == i) {
2596 int space = btrfs_leaf_free_space(root, right);
2597 if (space + push_space * 2 > free_space)
2598 break;
2599 }
2600 }
2601
Chris Masonbe0e5c02007-01-26 15:51:26 -05002602 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05002603 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04002604
2605 this_item_size = btrfs_item_size(right, item);
2606 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002607 break;
Chris Masondb945352007-10-15 16:15:53 -04002608
Chris Masonbe0e5c02007-01-26 15:51:26 -05002609 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04002610 push_space += this_item_size + sizeof(*item);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002611 }
Chris Masondb945352007-10-15 16:15:53 -04002612
Chris Masonbe0e5c02007-01-26 15:51:26 -05002613 if (push_items == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002614 ret = 1;
2615 goto out;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002616 }
Chris Mason34a38212007-11-07 13:31:03 -05002617 if (!empty && push_items == btrfs_header_nritems(right))
Chris Masona429e512007-04-18 16:15:28 -04002618 WARN_ON(1);
Chris Mason5f39d392007-10-15 16:14:19 -04002619
Chris Masonbe0e5c02007-01-26 15:51:26 -05002620 /* push data from right to left */
Chris Mason5f39d392007-10-15 16:14:19 -04002621 copy_extent_buffer(left, right,
2622 btrfs_item_nr_offset(btrfs_header_nritems(left)),
2623 btrfs_item_nr_offset(0),
2624 push_items * sizeof(struct btrfs_item));
2625
Chris Mason123abc82007-03-14 14:14:43 -04002626 push_space = BTRFS_LEAF_DATA_SIZE(root) -
Chris Masond3977122009-01-05 21:25:51 -05002627 btrfs_item_offset_nr(right, push_items - 1);
Chris Mason5f39d392007-10-15 16:14:19 -04002628
2629 copy_extent_buffer(left, right, btrfs_leaf_data(left) +
Chris Masond6025572007-03-30 14:27:56 -04002630 leaf_data_end(root, left) - push_space,
2631 btrfs_leaf_data(right) +
Chris Mason5f39d392007-10-15 16:14:19 -04002632 btrfs_item_offset_nr(right, push_items - 1),
Chris Masond6025572007-03-30 14:27:56 -04002633 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04002634 old_left_nritems = btrfs_header_nritems(left);
Yan Zheng87b29b22008-12-17 10:21:48 -05002635 BUG_ON(old_left_nritems <= 0);
Chris Masoneb60cea2007-02-02 09:18:22 -05002636
Chris Masondb945352007-10-15 16:15:53 -04002637 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
Chris Mason0783fcf2007-03-12 20:12:07 -04002638 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04002639 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04002640
Chris Mason5f39d392007-10-15 16:14:19 -04002641 item = btrfs_item_nr(left, i);
Chris Masondb945352007-10-15 16:15:53 -04002642
Chris Masoncfed81a2012-03-03 07:40:03 -05002643 ioff = btrfs_token_item_offset(left, item, &token);
2644 btrfs_set_token_item_offset(left, item,
2645 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
2646 &token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002647 }
Chris Mason5f39d392007-10-15 16:14:19 -04002648 btrfs_set_header_nritems(left, old_left_nritems + push_items);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002649
2650 /* fixup right node */
Chris Mason34a38212007-11-07 13:31:03 -05002651 if (push_items > right_nritems) {
Chris Masond3977122009-01-05 21:25:51 -05002652 printk(KERN_CRIT "push items %d nr %u\n", push_items,
2653 right_nritems);
Chris Mason34a38212007-11-07 13:31:03 -05002654 WARN_ON(1);
2655 }
Chris Mason5f39d392007-10-15 16:14:19 -04002656
Chris Mason34a38212007-11-07 13:31:03 -05002657 if (push_items < right_nritems) {
2658 push_space = btrfs_item_offset_nr(right, push_items - 1) -
2659 leaf_data_end(root, right);
2660 memmove_extent_buffer(right, btrfs_leaf_data(right) +
2661 BTRFS_LEAF_DATA_SIZE(root) - push_space,
2662 btrfs_leaf_data(right) +
2663 leaf_data_end(root, right), push_space);
2664
2665 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
Chris Mason5f39d392007-10-15 16:14:19 -04002666 btrfs_item_nr_offset(push_items),
2667 (btrfs_header_nritems(right) - push_items) *
2668 sizeof(struct btrfs_item));
Chris Mason34a38212007-11-07 13:31:03 -05002669 }
Yaneef1c492007-11-26 10:58:13 -05002670 right_nritems -= push_items;
2671 btrfs_set_header_nritems(right, right_nritems);
Chris Mason123abc82007-03-14 14:14:43 -04002672 push_space = BTRFS_LEAF_DATA_SIZE(root);
Chris Mason5f39d392007-10-15 16:14:19 -04002673 for (i = 0; i < right_nritems; i++) {
2674 item = btrfs_item_nr(right, i);
Chris Masondb945352007-10-15 16:15:53 -04002675
Chris Masoncfed81a2012-03-03 07:40:03 -05002676 push_space = push_space - btrfs_token_item_size(right,
2677 item, &token);
2678 btrfs_set_token_item_offset(right, item, push_space, &token);
Chris Masondb945352007-10-15 16:15:53 -04002679 }
Chris Masoneb60cea2007-02-02 09:18:22 -05002680
Chris Mason5f39d392007-10-15 16:14:19 -04002681 btrfs_mark_buffer_dirty(left);
Chris Mason34a38212007-11-07 13:31:03 -05002682 if (right_nritems)
2683 btrfs_mark_buffer_dirty(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002684 else
2685 clean_tree_block(trans, root, right);
Chris Mason098f59c2007-05-11 11:33:21 -04002686
Chris Mason5f39d392007-10-15 16:14:19 -04002687 btrfs_item_key(right, &disk_key, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01002688 fixup_low_keys(trans, root, path, &disk_key, 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002689
2690 /* then fixup the leaf pointer in the path */
2691 if (path->slots[0] < push_items) {
2692 path->slots[0] += old_left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04002693 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04002694 free_extent_buffer(path->nodes[0]);
2695 path->nodes[0] = left;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002696 path->slots[1] -= 1;
2697 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002698 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002699 free_extent_buffer(left);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002700 path->slots[0] -= push_items;
2701 }
Chris Masoneb60cea2007-02-02 09:18:22 -05002702 BUG_ON(path->slots[0] < 0);
Chris Masonaa5d6be2007-02-28 16:35:06 -05002703 return ret;
Chris Mason925baed2008-06-25 16:01:30 -04002704out:
2705 btrfs_tree_unlock(left);
2706 free_extent_buffer(left);
2707 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002708}
2709
Chris Mason74123bd2007-02-02 11:05:29 -05002710/*
Chris Mason44871b12009-03-13 10:04:31 -04002711 * push some data in the path leaf to the left, trying to free up at
2712 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04002713 *
2714 * max_slot can put a limit on how far into the leaf we'll push items. The
2715 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
2716 * items
Chris Mason44871b12009-03-13 10:04:31 -04002717 */
2718static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04002719 *root, struct btrfs_path *path, int min_data_size,
2720 int data_size, int empty, u32 max_slot)
Chris Mason44871b12009-03-13 10:04:31 -04002721{
2722 struct extent_buffer *right = path->nodes[0];
2723 struct extent_buffer *left;
2724 int slot;
2725 int free_space;
2726 u32 right_nritems;
2727 int ret = 0;
2728
2729 slot = path->slots[1];
2730 if (slot == 0)
2731 return 1;
2732 if (!path->nodes[1])
2733 return 1;
2734
2735 right_nritems = btrfs_header_nritems(right);
2736 if (right_nritems == 0)
2737 return 1;
2738
2739 btrfs_assert_tree_locked(path->nodes[1]);
2740
2741 left = read_node_slot(root, path->nodes[1], slot - 1);
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00002742 if (left == NULL)
2743 return 1;
2744
Chris Mason44871b12009-03-13 10:04:31 -04002745 btrfs_tree_lock(left);
2746 btrfs_set_lock_blocking(left);
2747
2748 free_space = btrfs_leaf_free_space(root, left);
2749 if (free_space < data_size) {
2750 ret = 1;
2751 goto out;
2752 }
2753
2754 /* cow and double check */
2755 ret = btrfs_cow_block(trans, root, left,
2756 path->nodes[1], slot - 1, &left);
2757 if (ret) {
2758 /* we hit -ENOSPC, but it isn't fatal here */
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002759 if (ret == -ENOSPC)
2760 ret = 1;
Chris Mason44871b12009-03-13 10:04:31 -04002761 goto out;
2762 }
2763
2764 free_space = btrfs_leaf_free_space(root, left);
2765 if (free_space < data_size) {
2766 ret = 1;
2767 goto out;
2768 }
2769
Chris Mason99d8f832010-07-07 10:51:48 -04002770 return __push_leaf_left(trans, root, path, min_data_size,
2771 empty, left, free_space, right_nritems,
2772 max_slot);
Chris Mason44871b12009-03-13 10:04:31 -04002773out:
2774 btrfs_tree_unlock(left);
2775 free_extent_buffer(left);
2776 return ret;
2777}
2778
2779/*
Chris Mason74123bd2007-02-02 11:05:29 -05002780 * split the path's leaf in two, making sure there is at least data_size
2781 * available for the resulting leaf level of the path.
2782 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01002783static noinline void copy_for_split(struct btrfs_trans_handle *trans,
2784 struct btrfs_root *root,
2785 struct btrfs_path *path,
2786 struct extent_buffer *l,
2787 struct extent_buffer *right,
2788 int slot, int mid, int nritems)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002789{
Chris Masonbe0e5c02007-01-26 15:51:26 -05002790 int data_copy_size;
2791 int rt_data_off;
2792 int i;
Chris Masond4dbff92007-04-04 14:08:15 -04002793 struct btrfs_disk_key disk_key;
Chris Masoncfed81a2012-03-03 07:40:03 -05002794 struct btrfs_map_token token;
2795
2796 btrfs_init_map_token(&token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002797
Chris Mason5f39d392007-10-15 16:14:19 -04002798 nritems = nritems - mid;
2799 btrfs_set_header_nritems(right, nritems);
2800 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2801
2802 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2803 btrfs_item_nr_offset(mid),
2804 nritems * sizeof(struct btrfs_item));
2805
2806 copy_extent_buffer(right, l,
Chris Masond6025572007-03-30 14:27:56 -04002807 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2808 data_copy_size, btrfs_leaf_data(l) +
2809 leaf_data_end(root, l), data_copy_size);
Chris Mason74123bd2007-02-02 11:05:29 -05002810
Chris Mason5f39d392007-10-15 16:14:19 -04002811 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2812 btrfs_item_end_nr(l, mid);
2813
2814 for (i = 0; i < nritems; i++) {
2815 struct btrfs_item *item = btrfs_item_nr(right, i);
Chris Masondb945352007-10-15 16:15:53 -04002816 u32 ioff;
2817
Chris Masoncfed81a2012-03-03 07:40:03 -05002818 ioff = btrfs_token_item_offset(right, item, &token);
2819 btrfs_set_token_item_offset(right, item,
2820 ioff + rt_data_off, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04002821 }
Chris Mason74123bd2007-02-02 11:05:29 -05002822
Chris Mason5f39d392007-10-15 16:14:19 -04002823 btrfs_set_header_nritems(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002824 btrfs_item_key(right, &disk_key, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01002825 insert_ptr(trans, root, path, &disk_key, right->start,
2826 path->slots[1] + 1, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04002827
2828 btrfs_mark_buffer_dirty(right);
2829 btrfs_mark_buffer_dirty(l);
Chris Masoneb60cea2007-02-02 09:18:22 -05002830 BUG_ON(path->slots[0] != slot);
Chris Mason5f39d392007-10-15 16:14:19 -04002831
Chris Masonbe0e5c02007-01-26 15:51:26 -05002832 if (mid <= slot) {
Chris Mason925baed2008-06-25 16:01:30 -04002833 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04002834 free_extent_buffer(path->nodes[0]);
2835 path->nodes[0] = right;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002836 path->slots[0] -= mid;
2837 path->slots[1] += 1;
Chris Mason925baed2008-06-25 16:01:30 -04002838 } else {
2839 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002840 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04002841 }
Chris Mason5f39d392007-10-15 16:14:19 -04002842
Chris Masoneb60cea2007-02-02 09:18:22 -05002843 BUG_ON(path->slots[0] < 0);
Chris Mason44871b12009-03-13 10:04:31 -04002844}
2845
2846/*
Chris Mason99d8f832010-07-07 10:51:48 -04002847 * double splits happen when we need to insert a big item in the middle
2848 * of a leaf. A double split can leave us with 3 mostly empty leaves:
2849 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
2850 * A B C
2851 *
2852 * We avoid this by trying to push the items on either side of our target
2853 * into the adjacent leaves. If all goes well we can avoid the double split
2854 * completely.
2855 */
2856static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
2857 struct btrfs_root *root,
2858 struct btrfs_path *path,
2859 int data_size)
2860{
2861 int ret;
2862 int progress = 0;
2863 int slot;
2864 u32 nritems;
2865
2866 slot = path->slots[0];
2867
2868 /*
2869 * try to push all the items after our slot into the
2870 * right leaf
2871 */
2872 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
2873 if (ret < 0)
2874 return ret;
2875
2876 if (ret == 0)
2877 progress++;
2878
2879 nritems = btrfs_header_nritems(path->nodes[0]);
2880 /*
2881 * our goal is to get our slot at the start or end of a leaf. If
2882 * we've done so we're done
2883 */
2884 if (path->slots[0] == 0 || path->slots[0] == nritems)
2885 return 0;
2886
2887 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
2888 return 0;
2889
2890 /* try to push all the items before our slot into the next leaf */
2891 slot = path->slots[0];
2892 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
2893 if (ret < 0)
2894 return ret;
2895
2896 if (ret == 0)
2897 progress++;
2898
2899 if (progress)
2900 return 0;
2901 return 1;
2902}
2903
2904/*
Chris Mason44871b12009-03-13 10:04:31 -04002905 * split the path's leaf in two, making sure there is at least data_size
2906 * available for the resulting leaf level of the path.
2907 *
2908 * returns 0 if all went well and < 0 on failure.
2909 */
2910static noinline int split_leaf(struct btrfs_trans_handle *trans,
2911 struct btrfs_root *root,
2912 struct btrfs_key *ins_key,
2913 struct btrfs_path *path, int data_size,
2914 int extend)
2915{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002916 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04002917 struct extent_buffer *l;
2918 u32 nritems;
2919 int mid;
2920 int slot;
2921 struct extent_buffer *right;
2922 int ret = 0;
2923 int wret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002924 int split;
Chris Mason44871b12009-03-13 10:04:31 -04002925 int num_doubles = 0;
Chris Mason99d8f832010-07-07 10:51:48 -04002926 int tried_avoid_double = 0;
Chris Mason44871b12009-03-13 10:04:31 -04002927
Yan, Zhenga5719522009-09-24 09:17:31 -04002928 l = path->nodes[0];
2929 slot = path->slots[0];
2930 if (extend && data_size + btrfs_item_size_nr(l, slot) +
2931 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
2932 return -EOVERFLOW;
2933
Chris Mason44871b12009-03-13 10:04:31 -04002934 /* first try to make some room by pushing left and right */
Chris Mason99d8f832010-07-07 10:51:48 -04002935 if (data_size) {
2936 wret = push_leaf_right(trans, root, path, data_size,
2937 data_size, 0, 0);
Chris Mason44871b12009-03-13 10:04:31 -04002938 if (wret < 0)
2939 return wret;
2940 if (wret) {
Chris Mason99d8f832010-07-07 10:51:48 -04002941 wret = push_leaf_left(trans, root, path, data_size,
2942 data_size, 0, (u32)-1);
Chris Mason44871b12009-03-13 10:04:31 -04002943 if (wret < 0)
2944 return wret;
2945 }
2946 l = path->nodes[0];
2947
2948 /* did the pushes work? */
2949 if (btrfs_leaf_free_space(root, l) >= data_size)
2950 return 0;
2951 }
2952
2953 if (!path->nodes[1]) {
2954 ret = insert_new_root(trans, root, path, 1);
2955 if (ret)
2956 return ret;
2957 }
2958again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002959 split = 1;
Chris Mason44871b12009-03-13 10:04:31 -04002960 l = path->nodes[0];
2961 slot = path->slots[0];
2962 nritems = btrfs_header_nritems(l);
2963 mid = (nritems + 1) / 2;
2964
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002965 if (mid <= slot) {
2966 if (nritems == 1 ||
2967 leaf_space_used(l, mid, nritems - mid) + data_size >
2968 BTRFS_LEAF_DATA_SIZE(root)) {
2969 if (slot >= nritems) {
2970 split = 0;
2971 } else {
2972 mid = slot;
2973 if (mid != nritems &&
2974 leaf_space_used(l, mid, nritems - mid) +
2975 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
Chris Mason99d8f832010-07-07 10:51:48 -04002976 if (data_size && !tried_avoid_double)
2977 goto push_for_double;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002978 split = 2;
2979 }
2980 }
2981 }
2982 } else {
2983 if (leaf_space_used(l, 0, mid) + data_size >
2984 BTRFS_LEAF_DATA_SIZE(root)) {
2985 if (!extend && data_size && slot == 0) {
2986 split = 0;
2987 } else if ((extend || !data_size) && slot == 0) {
2988 mid = 1;
2989 } else {
2990 mid = slot;
2991 if (mid != nritems &&
2992 leaf_space_used(l, mid, nritems - mid) +
2993 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
Chris Mason99d8f832010-07-07 10:51:48 -04002994 if (data_size && !tried_avoid_double)
2995 goto push_for_double;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002996 split = 2 ;
2997 }
2998 }
2999 }
3000 }
3001
3002 if (split == 0)
3003 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3004 else
3005 btrfs_item_key(l, &disk_key, mid);
3006
3007 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
Chris Mason44871b12009-03-13 10:04:31 -04003008 root->root_key.objectid,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02003009 &disk_key, 0, l->start, 0, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003010 if (IS_ERR(right))
Chris Mason44871b12009-03-13 10:04:31 -04003011 return PTR_ERR(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003012
3013 root_add_used(root, root->leafsize);
Chris Mason44871b12009-03-13 10:04:31 -04003014
3015 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
3016 btrfs_set_header_bytenr(right, right->start);
3017 btrfs_set_header_generation(right, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003018 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
Chris Mason44871b12009-03-13 10:04:31 -04003019 btrfs_set_header_owner(right, root->root_key.objectid);
3020 btrfs_set_header_level(right, 0);
3021 write_extent_buffer(right, root->fs_info->fsid,
3022 (unsigned long)btrfs_header_fsid(right),
3023 BTRFS_FSID_SIZE);
3024
3025 write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
3026 (unsigned long)btrfs_header_chunk_tree_uuid(right),
3027 BTRFS_UUID_SIZE);
3028
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003029 if (split == 0) {
3030 if (mid <= slot) {
3031 btrfs_set_header_nritems(right, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003032 insert_ptr(trans, root, path, &disk_key, right->start,
3033 path->slots[1] + 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003034 btrfs_tree_unlock(path->nodes[0]);
3035 free_extent_buffer(path->nodes[0]);
3036 path->nodes[0] = right;
3037 path->slots[0] = 0;
3038 path->slots[1] += 1;
3039 } else {
3040 btrfs_set_header_nritems(right, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003041 insert_ptr(trans, root, path, &disk_key, right->start,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003042 path->slots[1], 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003043 btrfs_tree_unlock(path->nodes[0]);
3044 free_extent_buffer(path->nodes[0]);
3045 path->nodes[0] = right;
3046 path->slots[0] = 0;
Jeff Mahoney143bede2012-03-01 14:56:26 +01003047 if (path->slots[1] == 0)
3048 fixup_low_keys(trans, root, path,
3049 &disk_key, 1);
Chris Mason44871b12009-03-13 10:04:31 -04003050 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003051 btrfs_mark_buffer_dirty(right);
3052 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04003053 }
3054
Jeff Mahoney143bede2012-03-01 14:56:26 +01003055 copy_for_split(trans, root, path, l, right, slot, mid, nritems);
Chris Mason44871b12009-03-13 10:04:31 -04003056
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003057 if (split == 2) {
Chris Masoncc0c5532007-10-25 15:42:57 -04003058 BUG_ON(num_doubles != 0);
3059 num_doubles++;
3060 goto again;
Chris Mason3326d1b2007-10-15 16:18:25 -04003061 }
Chris Mason44871b12009-03-13 10:04:31 -04003062
Jeff Mahoney143bede2012-03-01 14:56:26 +01003063 return 0;
Chris Mason99d8f832010-07-07 10:51:48 -04003064
3065push_for_double:
3066 push_for_double_split(trans, root, path, data_size);
3067 tried_avoid_double = 1;
3068 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
3069 return 0;
3070 goto again;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003071}
3072
Yan, Zhengad48fd752009-11-12 09:33:58 +00003073static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3074 struct btrfs_root *root,
3075 struct btrfs_path *path, int ins_len)
Chris Mason459931e2008-12-10 09:10:46 -05003076{
Yan, Zhengad48fd752009-11-12 09:33:58 +00003077 struct btrfs_key key;
Chris Mason459931e2008-12-10 09:10:46 -05003078 struct extent_buffer *leaf;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003079 struct btrfs_file_extent_item *fi;
3080 u64 extent_len = 0;
3081 u32 item_size;
3082 int ret;
Chris Mason459931e2008-12-10 09:10:46 -05003083
3084 leaf = path->nodes[0];
Yan, Zhengad48fd752009-11-12 09:33:58 +00003085 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3086
3087 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3088 key.type != BTRFS_EXTENT_CSUM_KEY);
3089
3090 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3091 return 0;
Chris Mason459931e2008-12-10 09:10:46 -05003092
3093 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003094 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3095 fi = btrfs_item_ptr(leaf, path->slots[0],
3096 struct btrfs_file_extent_item);
3097 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3098 }
David Sterbab3b4aa72011-04-21 01:20:15 +02003099 btrfs_release_path(path);
Chris Mason459931e2008-12-10 09:10:46 -05003100
Chris Mason459931e2008-12-10 09:10:46 -05003101 path->keep_locks = 1;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003102 path->search_for_split = 1;
3103 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Chris Mason459931e2008-12-10 09:10:46 -05003104 path->search_for_split = 0;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003105 if (ret < 0)
3106 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05003107
Yan, Zhengad48fd752009-11-12 09:33:58 +00003108 ret = -EAGAIN;
3109 leaf = path->nodes[0];
Chris Mason459931e2008-12-10 09:10:46 -05003110 /* if our item isn't there or got smaller, return now */
Yan, Zhengad48fd752009-11-12 09:33:58 +00003111 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3112 goto err;
3113
Chris Mason109f6ae2010-04-02 09:20:18 -04003114 /* the leaf has changed, it now has room. return now */
3115 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
3116 goto err;
3117
Yan, Zhengad48fd752009-11-12 09:33:58 +00003118 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3119 fi = btrfs_item_ptr(leaf, path->slots[0],
3120 struct btrfs_file_extent_item);
3121 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3122 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05003123 }
3124
Chris Masonb9473432009-03-13 11:00:37 -04003125 btrfs_set_path_blocking(path);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003126 ret = split_leaf(trans, root, &key, path, ins_len, 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003127 if (ret)
3128 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05003129
Yan, Zhengad48fd752009-11-12 09:33:58 +00003130 path->keep_locks = 0;
Chris Masonb9473432009-03-13 11:00:37 -04003131 btrfs_unlock_up_safe(path, 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003132 return 0;
3133err:
3134 path->keep_locks = 0;
3135 return ret;
3136}
3137
3138static noinline int split_item(struct btrfs_trans_handle *trans,
3139 struct btrfs_root *root,
3140 struct btrfs_path *path,
3141 struct btrfs_key *new_key,
3142 unsigned long split_offset)
3143{
3144 struct extent_buffer *leaf;
3145 struct btrfs_item *item;
3146 struct btrfs_item *new_item;
3147 int slot;
3148 char *buf;
3149 u32 nritems;
3150 u32 item_size;
3151 u32 orig_offset;
3152 struct btrfs_disk_key disk_key;
3153
Chris Masonb9473432009-03-13 11:00:37 -04003154 leaf = path->nodes[0];
3155 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3156
Chris Masonb4ce94d2009-02-04 09:25:08 -05003157 btrfs_set_path_blocking(path);
3158
Chris Mason459931e2008-12-10 09:10:46 -05003159 item = btrfs_item_nr(leaf, path->slots[0]);
3160 orig_offset = btrfs_item_offset(leaf, item);
3161 item_size = btrfs_item_size(leaf, item);
3162
Chris Mason459931e2008-12-10 09:10:46 -05003163 buf = kmalloc(item_size, GFP_NOFS);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003164 if (!buf)
3165 return -ENOMEM;
3166
Chris Mason459931e2008-12-10 09:10:46 -05003167 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3168 path->slots[0]), item_size);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003169
Chris Mason459931e2008-12-10 09:10:46 -05003170 slot = path->slots[0] + 1;
Chris Mason459931e2008-12-10 09:10:46 -05003171 nritems = btrfs_header_nritems(leaf);
Chris Mason459931e2008-12-10 09:10:46 -05003172 if (slot != nritems) {
3173 /* shift the items */
3174 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
Yan, Zhengad48fd752009-11-12 09:33:58 +00003175 btrfs_item_nr_offset(slot),
3176 (nritems - slot) * sizeof(struct btrfs_item));
Chris Mason459931e2008-12-10 09:10:46 -05003177 }
3178
3179 btrfs_cpu_key_to_disk(&disk_key, new_key);
3180 btrfs_set_item_key(leaf, &disk_key, slot);
3181
3182 new_item = btrfs_item_nr(leaf, slot);
3183
3184 btrfs_set_item_offset(leaf, new_item, orig_offset);
3185 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3186
3187 btrfs_set_item_offset(leaf, item,
3188 orig_offset + item_size - split_offset);
3189 btrfs_set_item_size(leaf, item, split_offset);
3190
3191 btrfs_set_header_nritems(leaf, nritems + 1);
3192
3193 /* write the data for the start of the original item */
3194 write_extent_buffer(leaf, buf,
3195 btrfs_item_ptr_offset(leaf, path->slots[0]),
3196 split_offset);
3197
3198 /* write the data for the new item */
3199 write_extent_buffer(leaf, buf + split_offset,
3200 btrfs_item_ptr_offset(leaf, slot),
3201 item_size - split_offset);
3202 btrfs_mark_buffer_dirty(leaf);
3203
Yan, Zhengad48fd752009-11-12 09:33:58 +00003204 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
Chris Mason459931e2008-12-10 09:10:46 -05003205 kfree(buf);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003206 return 0;
3207}
3208
3209/*
3210 * This function splits a single item into two items,
3211 * giving 'new_key' to the new item and splitting the
3212 * old one at split_offset (from the start of the item).
3213 *
3214 * The path may be released by this operation. After
3215 * the split, the path is pointing to the old item. The
3216 * new item is going to be in the same node as the old one.
3217 *
3218 * Note, the item being split must be smaller enough to live alone on
3219 * a tree block with room for one extra struct btrfs_item
3220 *
3221 * This allows us to split the item in place, keeping a lock on the
3222 * leaf the entire time.
3223 */
3224int btrfs_split_item(struct btrfs_trans_handle *trans,
3225 struct btrfs_root *root,
3226 struct btrfs_path *path,
3227 struct btrfs_key *new_key,
3228 unsigned long split_offset)
3229{
3230 int ret;
3231 ret = setup_leaf_for_split(trans, root, path,
3232 sizeof(struct btrfs_item));
3233 if (ret)
3234 return ret;
3235
3236 ret = split_item(trans, root, path, new_key, split_offset);
Chris Mason459931e2008-12-10 09:10:46 -05003237 return ret;
3238}
3239
3240/*
Yan, Zhengad48fd752009-11-12 09:33:58 +00003241 * This function duplicate a item, giving 'new_key' to the new item.
3242 * It guarantees both items live in the same tree leaf and the new item
3243 * is contiguous with the original item.
3244 *
3245 * This allows us to split file extent in place, keeping a lock on the
3246 * leaf the entire time.
3247 */
3248int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3249 struct btrfs_root *root,
3250 struct btrfs_path *path,
3251 struct btrfs_key *new_key)
3252{
3253 struct extent_buffer *leaf;
3254 int ret;
3255 u32 item_size;
3256
3257 leaf = path->nodes[0];
3258 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3259 ret = setup_leaf_for_split(trans, root, path,
3260 item_size + sizeof(struct btrfs_item));
3261 if (ret)
3262 return ret;
3263
3264 path->slots[0]++;
Jeff Mahoney143bede2012-03-01 14:56:26 +01003265 setup_items_for_insert(trans, root, path, new_key, &item_size,
3266 item_size, item_size +
3267 sizeof(struct btrfs_item), 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003268 leaf = path->nodes[0];
3269 memcpy_extent_buffer(leaf,
3270 btrfs_item_ptr_offset(leaf, path->slots[0]),
3271 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3272 item_size);
3273 return 0;
3274}
3275
3276/*
Chris Masond352ac62008-09-29 15:18:18 -04003277 * make the item pointed to by the path smaller. new_size indicates
3278 * how small to make it, and from_end tells us if we just chop bytes
3279 * off the end of the item or if we shift the item to chop bytes off
3280 * the front.
3281 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003282void btrfs_truncate_item(struct btrfs_trans_handle *trans,
3283 struct btrfs_root *root,
3284 struct btrfs_path *path,
3285 u32 new_size, int from_end)
Chris Masonb18c6682007-04-17 13:26:50 -04003286{
Chris Masonb18c6682007-04-17 13:26:50 -04003287 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04003288 struct extent_buffer *leaf;
3289 struct btrfs_item *item;
Chris Masonb18c6682007-04-17 13:26:50 -04003290 u32 nritems;
3291 unsigned int data_end;
3292 unsigned int old_data_start;
3293 unsigned int old_size;
3294 unsigned int size_diff;
3295 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05003296 struct btrfs_map_token token;
3297
3298 btrfs_init_map_token(&token);
Chris Masonb18c6682007-04-17 13:26:50 -04003299
Chris Mason5f39d392007-10-15 16:14:19 -04003300 leaf = path->nodes[0];
Chris Mason179e29e2007-11-01 11:28:41 -04003301 slot = path->slots[0];
3302
3303 old_size = btrfs_item_size_nr(leaf, slot);
3304 if (old_size == new_size)
Jeff Mahoney143bede2012-03-01 14:56:26 +01003305 return;
Chris Masonb18c6682007-04-17 13:26:50 -04003306
Chris Mason5f39d392007-10-15 16:14:19 -04003307 nritems = btrfs_header_nritems(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003308 data_end = leaf_data_end(root, leaf);
3309
Chris Mason5f39d392007-10-15 16:14:19 -04003310 old_data_start = btrfs_item_offset_nr(leaf, slot);
Chris Mason179e29e2007-11-01 11:28:41 -04003311
Chris Masonb18c6682007-04-17 13:26:50 -04003312 size_diff = old_size - new_size;
3313
3314 BUG_ON(slot < 0);
3315 BUG_ON(slot >= nritems);
3316
3317 /*
3318 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3319 */
3320 /* first correct the data pointers */
3321 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003322 u32 ioff;
3323 item = btrfs_item_nr(leaf, i);
Chris Masondb945352007-10-15 16:15:53 -04003324
Chris Masoncfed81a2012-03-03 07:40:03 -05003325 ioff = btrfs_token_item_offset(leaf, item, &token);
3326 btrfs_set_token_item_offset(leaf, item,
3327 ioff + size_diff, &token);
Chris Masonb18c6682007-04-17 13:26:50 -04003328 }
Chris Masondb945352007-10-15 16:15:53 -04003329
Chris Masonb18c6682007-04-17 13:26:50 -04003330 /* shift the data */
Chris Mason179e29e2007-11-01 11:28:41 -04003331 if (from_end) {
3332 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3333 data_end + size_diff, btrfs_leaf_data(leaf) +
3334 data_end, old_data_start + new_size - data_end);
3335 } else {
3336 struct btrfs_disk_key disk_key;
3337 u64 offset;
3338
3339 btrfs_item_key(leaf, &disk_key, slot);
3340
3341 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3342 unsigned long ptr;
3343 struct btrfs_file_extent_item *fi;
3344
3345 fi = btrfs_item_ptr(leaf, slot,
3346 struct btrfs_file_extent_item);
3347 fi = (struct btrfs_file_extent_item *)(
3348 (unsigned long)fi - size_diff);
3349
3350 if (btrfs_file_extent_type(leaf, fi) ==
3351 BTRFS_FILE_EXTENT_INLINE) {
3352 ptr = btrfs_item_ptr_offset(leaf, slot);
3353 memmove_extent_buffer(leaf, ptr,
Chris Masond3977122009-01-05 21:25:51 -05003354 (unsigned long)fi,
3355 offsetof(struct btrfs_file_extent_item,
Chris Mason179e29e2007-11-01 11:28:41 -04003356 disk_bytenr));
3357 }
3358 }
3359
3360 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3361 data_end + size_diff, btrfs_leaf_data(leaf) +
3362 data_end, old_data_start - data_end);
3363
3364 offset = btrfs_disk_key_offset(&disk_key);
3365 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3366 btrfs_set_item_key(leaf, &disk_key, slot);
3367 if (slot == 0)
3368 fixup_low_keys(trans, root, path, &disk_key, 1);
3369 }
Chris Mason5f39d392007-10-15 16:14:19 -04003370
3371 item = btrfs_item_nr(leaf, slot);
3372 btrfs_set_item_size(leaf, item, new_size);
3373 btrfs_mark_buffer_dirty(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003374
Chris Mason5f39d392007-10-15 16:14:19 -04003375 if (btrfs_leaf_free_space(root, leaf) < 0) {
3376 btrfs_print_leaf(root, leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003377 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003378 }
Chris Masonb18c6682007-04-17 13:26:50 -04003379}
3380
Chris Masond352ac62008-09-29 15:18:18 -04003381/*
3382 * make the item pointed to by the path bigger, data_size is the new size.
3383 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003384void btrfs_extend_item(struct btrfs_trans_handle *trans,
3385 struct btrfs_root *root, struct btrfs_path *path,
3386 u32 data_size)
Chris Mason6567e832007-04-16 09:22:45 -04003387{
Chris Mason6567e832007-04-16 09:22:45 -04003388 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04003389 struct extent_buffer *leaf;
3390 struct btrfs_item *item;
Chris Mason6567e832007-04-16 09:22:45 -04003391 u32 nritems;
3392 unsigned int data_end;
3393 unsigned int old_data;
3394 unsigned int old_size;
3395 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05003396 struct btrfs_map_token token;
3397
3398 btrfs_init_map_token(&token);
Chris Mason6567e832007-04-16 09:22:45 -04003399
Chris Mason5f39d392007-10-15 16:14:19 -04003400 leaf = path->nodes[0];
Chris Mason6567e832007-04-16 09:22:45 -04003401
Chris Mason5f39d392007-10-15 16:14:19 -04003402 nritems = btrfs_header_nritems(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003403 data_end = leaf_data_end(root, leaf);
3404
Chris Mason5f39d392007-10-15 16:14:19 -04003405 if (btrfs_leaf_free_space(root, leaf) < data_size) {
3406 btrfs_print_leaf(root, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003407 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003408 }
Chris Mason6567e832007-04-16 09:22:45 -04003409 slot = path->slots[0];
Chris Mason5f39d392007-10-15 16:14:19 -04003410 old_data = btrfs_item_end_nr(leaf, slot);
Chris Mason6567e832007-04-16 09:22:45 -04003411
3412 BUG_ON(slot < 0);
Chris Mason3326d1b2007-10-15 16:18:25 -04003413 if (slot >= nritems) {
3414 btrfs_print_leaf(root, leaf);
Chris Masond3977122009-01-05 21:25:51 -05003415 printk(KERN_CRIT "slot %d too large, nritems %d\n",
3416 slot, nritems);
Chris Mason3326d1b2007-10-15 16:18:25 -04003417 BUG_ON(1);
3418 }
Chris Mason6567e832007-04-16 09:22:45 -04003419
3420 /*
3421 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3422 */
3423 /* first correct the data pointers */
3424 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003425 u32 ioff;
3426 item = btrfs_item_nr(leaf, i);
Chris Masondb945352007-10-15 16:15:53 -04003427
Chris Masoncfed81a2012-03-03 07:40:03 -05003428 ioff = btrfs_token_item_offset(leaf, item, &token);
3429 btrfs_set_token_item_offset(leaf, item,
3430 ioff - data_size, &token);
Chris Mason6567e832007-04-16 09:22:45 -04003431 }
Chris Mason5f39d392007-10-15 16:14:19 -04003432
Chris Mason6567e832007-04-16 09:22:45 -04003433 /* shift the data */
Chris Mason5f39d392007-10-15 16:14:19 -04003434 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
Chris Mason6567e832007-04-16 09:22:45 -04003435 data_end - data_size, btrfs_leaf_data(leaf) +
3436 data_end, old_data - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04003437
Chris Mason6567e832007-04-16 09:22:45 -04003438 data_end = old_data;
Chris Mason5f39d392007-10-15 16:14:19 -04003439 old_size = btrfs_item_size_nr(leaf, slot);
3440 item = btrfs_item_nr(leaf, slot);
3441 btrfs_set_item_size(leaf, item, old_size + data_size);
3442 btrfs_mark_buffer_dirty(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003443
Chris Mason5f39d392007-10-15 16:14:19 -04003444 if (btrfs_leaf_free_space(root, leaf) < 0) {
3445 btrfs_print_leaf(root, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003446 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003447 }
Chris Mason6567e832007-04-16 09:22:45 -04003448}
3449
Chris Mason74123bd2007-02-02 11:05:29 -05003450/*
Chris Masond352ac62008-09-29 15:18:18 -04003451 * Given a key and some data, insert items into the tree.
Chris Mason74123bd2007-02-02 11:05:29 -05003452 * This does all the path init required, making room in the tree if needed.
Josef Bacikf3465ca2008-11-12 14:19:50 -05003453 * Returns the number of keys that were inserted.
3454 */
3455int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3456 struct btrfs_root *root,
3457 struct btrfs_path *path,
3458 struct btrfs_key *cpu_key, u32 *data_size,
3459 int nr)
3460{
3461 struct extent_buffer *leaf;
3462 struct btrfs_item *item;
3463 int ret = 0;
3464 int slot;
Josef Bacikf3465ca2008-11-12 14:19:50 -05003465 int i;
3466 u32 nritems;
3467 u32 total_data = 0;
3468 u32 total_size = 0;
3469 unsigned int data_end;
3470 struct btrfs_disk_key disk_key;
3471 struct btrfs_key found_key;
Chris Masoncfed81a2012-03-03 07:40:03 -05003472 struct btrfs_map_token token;
3473
3474 btrfs_init_map_token(&token);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003475
Yan Zheng87b29b22008-12-17 10:21:48 -05003476 for (i = 0; i < nr; i++) {
3477 if (total_size + data_size[i] + sizeof(struct btrfs_item) >
3478 BTRFS_LEAF_DATA_SIZE(root)) {
3479 break;
3480 nr = i;
3481 }
Josef Bacikf3465ca2008-11-12 14:19:50 -05003482 total_data += data_size[i];
Yan Zheng87b29b22008-12-17 10:21:48 -05003483 total_size += data_size[i] + sizeof(struct btrfs_item);
3484 }
3485 BUG_ON(nr == 0);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003486
Josef Bacikf3465ca2008-11-12 14:19:50 -05003487 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3488 if (ret == 0)
3489 return -EEXIST;
3490 if (ret < 0)
3491 goto out;
3492
Josef Bacikf3465ca2008-11-12 14:19:50 -05003493 leaf = path->nodes[0];
3494
3495 nritems = btrfs_header_nritems(leaf);
3496 data_end = leaf_data_end(root, leaf);
3497
3498 if (btrfs_leaf_free_space(root, leaf) < total_size) {
3499 for (i = nr; i >= 0; i--) {
3500 total_data -= data_size[i];
3501 total_size -= data_size[i] + sizeof(struct btrfs_item);
3502 if (total_size < btrfs_leaf_free_space(root, leaf))
3503 break;
3504 }
3505 nr = i;
3506 }
3507
3508 slot = path->slots[0];
3509 BUG_ON(slot < 0);
3510
3511 if (slot != nritems) {
3512 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3513
3514 item = btrfs_item_nr(leaf, slot);
3515 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3516
3517 /* figure out how many keys we can insert in here */
3518 total_data = data_size[0];
3519 for (i = 1; i < nr; i++) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003520 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
Josef Bacikf3465ca2008-11-12 14:19:50 -05003521 break;
3522 total_data += data_size[i];
3523 }
3524 nr = i;
3525
3526 if (old_data < data_end) {
3527 btrfs_print_leaf(root, leaf);
Chris Masond3977122009-01-05 21:25:51 -05003528 printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
Josef Bacikf3465ca2008-11-12 14:19:50 -05003529 slot, old_data, data_end);
3530 BUG_ON(1);
3531 }
3532 /*
3533 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3534 */
3535 /* first correct the data pointers */
Josef Bacikf3465ca2008-11-12 14:19:50 -05003536 for (i = slot; i < nritems; i++) {
3537 u32 ioff;
3538
3539 item = btrfs_item_nr(leaf, i);
Chris Masoncfed81a2012-03-03 07:40:03 -05003540 ioff = btrfs_token_item_offset(leaf, item, &token);
3541 btrfs_set_token_item_offset(leaf, item,
3542 ioff - total_data, &token);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003543 }
Josef Bacikf3465ca2008-11-12 14:19:50 -05003544 /* shift the items */
3545 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3546 btrfs_item_nr_offset(slot),
3547 (nritems - slot) * sizeof(struct btrfs_item));
3548
3549 /* shift the data */
3550 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3551 data_end - total_data, btrfs_leaf_data(leaf) +
3552 data_end, old_data - data_end);
3553 data_end = old_data;
3554 } else {
3555 /*
3556 * this sucks but it has to be done, if we are inserting at
3557 * the end of the leaf only insert 1 of the items, since we
3558 * have no way of knowing whats on the next leaf and we'd have
3559 * to drop our current locks to figure it out
3560 */
3561 nr = 1;
3562 }
3563
3564 /* setup the item for the new data */
3565 for (i = 0; i < nr; i++) {
3566 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3567 btrfs_set_item_key(leaf, &disk_key, slot + i);
3568 item = btrfs_item_nr(leaf, slot + i);
Chris Masoncfed81a2012-03-03 07:40:03 -05003569 btrfs_set_token_item_offset(leaf, item,
3570 data_end - data_size[i], &token);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003571 data_end -= data_size[i];
Chris Masoncfed81a2012-03-03 07:40:03 -05003572 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003573 }
3574 btrfs_set_header_nritems(leaf, nritems + nr);
3575 btrfs_mark_buffer_dirty(leaf);
3576
3577 ret = 0;
3578 if (slot == 0) {
3579 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003580 fixup_low_keys(trans, root, path, &disk_key, 1);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003581 }
3582
3583 if (btrfs_leaf_free_space(root, leaf) < 0) {
3584 btrfs_print_leaf(root, leaf);
3585 BUG();
3586 }
3587out:
3588 if (!ret)
3589 ret = nr;
3590 return ret;
3591}
3592
3593/*
Chris Mason44871b12009-03-13 10:04:31 -04003594 * this is a helper for btrfs_insert_empty_items, the main goal here is
3595 * to save stack depth by doing the bulk of the work in a function
3596 * that doesn't call btrfs_search_slot
Chris Mason74123bd2007-02-02 11:05:29 -05003597 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003598void setup_items_for_insert(struct btrfs_trans_handle *trans,
3599 struct btrfs_root *root, struct btrfs_path *path,
3600 struct btrfs_key *cpu_key, u32 *data_size,
3601 u32 total_data, u32 total_size, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003602{
Chris Mason5f39d392007-10-15 16:14:19 -04003603 struct btrfs_item *item;
Chris Mason9c583092008-01-29 15:15:18 -05003604 int i;
Chris Mason7518a232007-03-12 12:01:18 -04003605 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003606 unsigned int data_end;
Chris Masone2fa7222007-03-12 16:22:34 -04003607 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04003608 struct extent_buffer *leaf;
3609 int slot;
Chris Masoncfed81a2012-03-03 07:40:03 -05003610 struct btrfs_map_token token;
3611
3612 btrfs_init_map_token(&token);
Chris Masone2fa7222007-03-12 16:22:34 -04003613
Chris Mason5f39d392007-10-15 16:14:19 -04003614 leaf = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04003615 slot = path->slots[0];
Chris Mason74123bd2007-02-02 11:05:29 -05003616
Chris Mason5f39d392007-10-15 16:14:19 -04003617 nritems = btrfs_header_nritems(leaf);
Chris Mason123abc82007-03-14 14:14:43 -04003618 data_end = leaf_data_end(root, leaf);
Chris Masoneb60cea2007-02-02 09:18:22 -05003619
Chris Masonf25956c2008-09-12 15:32:53 -04003620 if (btrfs_leaf_free_space(root, leaf) < total_size) {
Chris Mason3326d1b2007-10-15 16:18:25 -04003621 btrfs_print_leaf(root, leaf);
Chris Masond3977122009-01-05 21:25:51 -05003622 printk(KERN_CRIT "not enough freespace need %u have %d\n",
Chris Mason9c583092008-01-29 15:15:18 -05003623 total_size, btrfs_leaf_free_space(root, leaf));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003624 BUG();
Chris Masond4dbff92007-04-04 14:08:15 -04003625 }
Chris Mason5f39d392007-10-15 16:14:19 -04003626
Chris Masonbe0e5c02007-01-26 15:51:26 -05003627 if (slot != nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04003628 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003629
Chris Mason5f39d392007-10-15 16:14:19 -04003630 if (old_data < data_end) {
3631 btrfs_print_leaf(root, leaf);
Chris Masond3977122009-01-05 21:25:51 -05003632 printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
Chris Mason5f39d392007-10-15 16:14:19 -04003633 slot, old_data, data_end);
3634 BUG_ON(1);
3635 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05003636 /*
3637 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3638 */
3639 /* first correct the data pointers */
Chris Mason0783fcf2007-03-12 20:12:07 -04003640 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003641 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003642
Chris Mason5f39d392007-10-15 16:14:19 -04003643 item = btrfs_item_nr(leaf, i);
Chris Masoncfed81a2012-03-03 07:40:03 -05003644 ioff = btrfs_token_item_offset(leaf, item, &token);
3645 btrfs_set_token_item_offset(leaf, item,
3646 ioff - total_data, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04003647 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05003648 /* shift the items */
Chris Mason9c583092008-01-29 15:15:18 -05003649 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
Chris Mason5f39d392007-10-15 16:14:19 -04003650 btrfs_item_nr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04003651 (nritems - slot) * sizeof(struct btrfs_item));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003652
3653 /* shift the data */
Chris Mason5f39d392007-10-15 16:14:19 -04003654 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
Chris Mason9c583092008-01-29 15:15:18 -05003655 data_end - total_data, btrfs_leaf_data(leaf) +
Chris Masond6025572007-03-30 14:27:56 -04003656 data_end, old_data - data_end);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003657 data_end = old_data;
3658 }
Chris Mason5f39d392007-10-15 16:14:19 -04003659
Chris Mason62e27492007-03-15 12:56:47 -04003660 /* setup the item for the new data */
Chris Mason9c583092008-01-29 15:15:18 -05003661 for (i = 0; i < nr; i++) {
3662 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3663 btrfs_set_item_key(leaf, &disk_key, slot + i);
3664 item = btrfs_item_nr(leaf, slot + i);
Chris Masoncfed81a2012-03-03 07:40:03 -05003665 btrfs_set_token_item_offset(leaf, item,
3666 data_end - data_size[i], &token);
Chris Mason9c583092008-01-29 15:15:18 -05003667 data_end -= data_size[i];
Chris Masoncfed81a2012-03-03 07:40:03 -05003668 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
Chris Mason9c583092008-01-29 15:15:18 -05003669 }
Chris Mason44871b12009-03-13 10:04:31 -04003670
Chris Mason9c583092008-01-29 15:15:18 -05003671 btrfs_set_header_nritems(leaf, nritems + nr);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003672
Chris Mason5a01a2e2008-01-30 11:43:54 -05003673 if (slot == 0) {
3674 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003675 fixup_low_keys(trans, root, path, &disk_key, 1);
Chris Mason5a01a2e2008-01-30 11:43:54 -05003676 }
Chris Masonb9473432009-03-13 11:00:37 -04003677 btrfs_unlock_up_safe(path, 1);
3678 btrfs_mark_buffer_dirty(leaf);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003679
Chris Mason5f39d392007-10-15 16:14:19 -04003680 if (btrfs_leaf_free_space(root, leaf) < 0) {
3681 btrfs_print_leaf(root, leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003682 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003683 }
Chris Mason44871b12009-03-13 10:04:31 -04003684}
3685
3686/*
3687 * Given a key and some data, insert items into the tree.
3688 * This does all the path init required, making room in the tree if needed.
3689 */
3690int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3691 struct btrfs_root *root,
3692 struct btrfs_path *path,
3693 struct btrfs_key *cpu_key, u32 *data_size,
3694 int nr)
3695{
Chris Mason44871b12009-03-13 10:04:31 -04003696 int ret = 0;
3697 int slot;
3698 int i;
3699 u32 total_size = 0;
3700 u32 total_data = 0;
3701
3702 for (i = 0; i < nr; i++)
3703 total_data += data_size[i];
3704
3705 total_size = total_data + (nr * sizeof(struct btrfs_item));
3706 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3707 if (ret == 0)
3708 return -EEXIST;
3709 if (ret < 0)
Jeff Mahoney143bede2012-03-01 14:56:26 +01003710 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04003711
Chris Mason44871b12009-03-13 10:04:31 -04003712 slot = path->slots[0];
3713 BUG_ON(slot < 0);
3714
Jeff Mahoney143bede2012-03-01 14:56:26 +01003715 setup_items_for_insert(trans, root, path, cpu_key, data_size,
Chris Mason44871b12009-03-13 10:04:31 -04003716 total_data, total_size, nr);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003717 return 0;
Chris Mason62e27492007-03-15 12:56:47 -04003718}
3719
3720/*
3721 * Given a key and some data, insert an item into the tree.
3722 * This does all the path init required, making room in the tree if needed.
3723 */
Chris Masone089f052007-03-16 16:20:31 -04003724int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3725 *root, struct btrfs_key *cpu_key, void *data, u32
3726 data_size)
Chris Mason62e27492007-03-15 12:56:47 -04003727{
3728 int ret = 0;
Chris Mason2c90e5d2007-04-02 10:50:19 -04003729 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -04003730 struct extent_buffer *leaf;
3731 unsigned long ptr;
Chris Mason62e27492007-03-15 12:56:47 -04003732
Chris Mason2c90e5d2007-04-02 10:50:19 -04003733 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00003734 if (!path)
3735 return -ENOMEM;
Chris Mason2c90e5d2007-04-02 10:50:19 -04003736 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
Chris Mason62e27492007-03-15 12:56:47 -04003737 if (!ret) {
Chris Mason5f39d392007-10-15 16:14:19 -04003738 leaf = path->nodes[0];
3739 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3740 write_extent_buffer(leaf, data, ptr, data_size);
3741 btrfs_mark_buffer_dirty(leaf);
Chris Mason62e27492007-03-15 12:56:47 -04003742 }
Chris Mason2c90e5d2007-04-02 10:50:19 -04003743 btrfs_free_path(path);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003744 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003745}
3746
Chris Mason74123bd2007-02-02 11:05:29 -05003747/*
Chris Mason5de08d72007-02-24 06:24:44 -05003748 * delete the pointer from a given node.
Chris Mason74123bd2007-02-02 11:05:29 -05003749 *
Chris Masond352ac62008-09-29 15:18:18 -04003750 * the tree should have been previously balanced so the deletion does not
3751 * empty a node.
Chris Mason74123bd2007-02-02 11:05:29 -05003752 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003753static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3754 struct btrfs_path *path, int level, int slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003755{
Chris Mason5f39d392007-10-15 16:14:19 -04003756 struct extent_buffer *parent = path->nodes[level];
Chris Mason7518a232007-03-12 12:01:18 -04003757 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003758
Chris Mason5f39d392007-10-15 16:14:19 -04003759 nritems = btrfs_header_nritems(parent);
Chris Masond3977122009-01-05 21:25:51 -05003760 if (slot != nritems - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04003761 memmove_extent_buffer(parent,
3762 btrfs_node_key_ptr_offset(slot),
3763 btrfs_node_key_ptr_offset(slot + 1),
Chris Masond6025572007-03-30 14:27:56 -04003764 sizeof(struct btrfs_key_ptr) *
3765 (nritems - slot - 1));
Chris Masonbb803952007-03-01 12:04:21 -05003766 }
Chris Mason7518a232007-03-12 12:01:18 -04003767 nritems--;
Chris Mason5f39d392007-10-15 16:14:19 -04003768 btrfs_set_header_nritems(parent, nritems);
Chris Mason7518a232007-03-12 12:01:18 -04003769 if (nritems == 0 && parent == root->node) {
Chris Mason5f39d392007-10-15 16:14:19 -04003770 BUG_ON(btrfs_header_level(root->node) != 1);
Chris Masonbb803952007-03-01 12:04:21 -05003771 /* just turn the root into a leaf and break */
Chris Mason5f39d392007-10-15 16:14:19 -04003772 btrfs_set_header_level(root->node, 0);
Chris Masonbb803952007-03-01 12:04:21 -05003773 } else if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04003774 struct btrfs_disk_key disk_key;
3775
3776 btrfs_node_key(parent, &disk_key, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003777 fixup_low_keys(trans, root, path, &disk_key, level + 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003778 }
Chris Masond6025572007-03-30 14:27:56 -04003779 btrfs_mark_buffer_dirty(parent);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003780}
3781
Chris Mason74123bd2007-02-02 11:05:29 -05003782/*
Chris Mason323ac952008-10-01 19:05:46 -04003783 * a helper function to delete the leaf pointed to by path->slots[1] and
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003784 * path->nodes[1].
Chris Mason323ac952008-10-01 19:05:46 -04003785 *
3786 * This deletes the pointer in path->nodes[1] and frees the leaf
3787 * block extent. zero is returned if it all worked out, < 0 otherwise.
3788 *
3789 * The path must have already been setup for deleting the leaf, including
3790 * all the proper balancing. path->nodes[1] must be locked.
3791 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003792static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
3793 struct btrfs_root *root,
3794 struct btrfs_path *path,
3795 struct extent_buffer *leaf)
Chris Mason323ac952008-10-01 19:05:46 -04003796{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003797 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003798 del_ptr(trans, root, path, 1, path->slots[1]);
Chris Mason323ac952008-10-01 19:05:46 -04003799
Chris Mason4d081c42009-02-04 09:31:28 -05003800 /*
3801 * btrfs_free_extent is expensive, we want to make sure we
3802 * aren't holding any locks when we call it
3803 */
3804 btrfs_unlock_up_safe(path, 0);
3805
Yan, Zhengf0486c62010-05-16 10:46:25 -04003806 root_sub_used(root, leaf->len);
3807
Josef Bacik3083ee22012-03-09 16:01:49 -05003808 extent_buffer_get(leaf);
Arne Jansen66d7e7f2011-09-12 15:26:38 +02003809 btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
Josef Bacik3083ee22012-03-09 16:01:49 -05003810 free_extent_buffer_stale(leaf);
Chris Mason323ac952008-10-01 19:05:46 -04003811}
3812/*
Chris Mason74123bd2007-02-02 11:05:29 -05003813 * delete the item at the leaf level in path. If that empties
3814 * the leaf, remove it from the tree
3815 */
Chris Mason85e21ba2008-01-29 15:11:36 -05003816int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3817 struct btrfs_path *path, int slot, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003818{
Chris Mason5f39d392007-10-15 16:14:19 -04003819 struct extent_buffer *leaf;
3820 struct btrfs_item *item;
Chris Mason85e21ba2008-01-29 15:11:36 -05003821 int last_off;
3822 int dsize = 0;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003823 int ret = 0;
3824 int wret;
Chris Mason85e21ba2008-01-29 15:11:36 -05003825 int i;
Chris Mason7518a232007-03-12 12:01:18 -04003826 u32 nritems;
Chris Masoncfed81a2012-03-03 07:40:03 -05003827 struct btrfs_map_token token;
3828
3829 btrfs_init_map_token(&token);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003830
Chris Mason5f39d392007-10-15 16:14:19 -04003831 leaf = path->nodes[0];
Chris Mason85e21ba2008-01-29 15:11:36 -05003832 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3833
3834 for (i = 0; i < nr; i++)
3835 dsize += btrfs_item_size_nr(leaf, slot + i);
3836
Chris Mason5f39d392007-10-15 16:14:19 -04003837 nritems = btrfs_header_nritems(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003838
Chris Mason85e21ba2008-01-29 15:11:36 -05003839 if (slot + nr != nritems) {
Chris Mason123abc82007-03-14 14:14:43 -04003840 int data_end = leaf_data_end(root, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04003841
3842 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
Chris Masond6025572007-03-30 14:27:56 -04003843 data_end + dsize,
3844 btrfs_leaf_data(leaf) + data_end,
Chris Mason85e21ba2008-01-29 15:11:36 -05003845 last_off - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04003846
Chris Mason85e21ba2008-01-29 15:11:36 -05003847 for (i = slot + nr; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003848 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003849
Chris Mason5f39d392007-10-15 16:14:19 -04003850 item = btrfs_item_nr(leaf, i);
Chris Masoncfed81a2012-03-03 07:40:03 -05003851 ioff = btrfs_token_item_offset(leaf, item, &token);
3852 btrfs_set_token_item_offset(leaf, item,
3853 ioff + dsize, &token);
Chris Mason0783fcf2007-03-12 20:12:07 -04003854 }
Chris Masondb945352007-10-15 16:15:53 -04003855
Chris Mason5f39d392007-10-15 16:14:19 -04003856 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
Chris Mason85e21ba2008-01-29 15:11:36 -05003857 btrfs_item_nr_offset(slot + nr),
Chris Masond6025572007-03-30 14:27:56 -04003858 sizeof(struct btrfs_item) *
Chris Mason85e21ba2008-01-29 15:11:36 -05003859 (nritems - slot - nr));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003860 }
Chris Mason85e21ba2008-01-29 15:11:36 -05003861 btrfs_set_header_nritems(leaf, nritems - nr);
3862 nritems -= nr;
Chris Mason5f39d392007-10-15 16:14:19 -04003863
Chris Mason74123bd2007-02-02 11:05:29 -05003864 /* delete the leaf if we've emptied it */
Chris Mason7518a232007-03-12 12:01:18 -04003865 if (nritems == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04003866 if (leaf == root->node) {
3867 btrfs_set_header_level(leaf, 0);
Chris Mason9a8dd152007-02-23 08:38:36 -05003868 } else {
Yan, Zhengf0486c62010-05-16 10:46:25 -04003869 btrfs_set_path_blocking(path);
3870 clean_tree_block(trans, root, leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003871 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason9a8dd152007-02-23 08:38:36 -05003872 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05003873 } else {
Chris Mason7518a232007-03-12 12:01:18 -04003874 int used = leaf_space_used(leaf, 0, nritems);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003875 if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04003876 struct btrfs_disk_key disk_key;
3877
3878 btrfs_item_key(leaf, &disk_key, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003879 fixup_low_keys(trans, root, path, &disk_key, 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003880 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05003881
Chris Mason74123bd2007-02-02 11:05:29 -05003882 /* delete the leaf if it is mostly empty */
Yan Zhengd717aa12009-07-24 12:42:46 -04003883 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05003884 /* push_leaf_left fixes the path.
3885 * make sure the path still points to our leaf
3886 * for possible call to del_ptr below
3887 */
Chris Mason4920c9a2007-01-26 16:38:42 -05003888 slot = path->slots[1];
Chris Mason5f39d392007-10-15 16:14:19 -04003889 extent_buffer_get(leaf);
3890
Chris Masonb9473432009-03-13 11:00:37 -04003891 btrfs_set_path_blocking(path);
Chris Mason99d8f832010-07-07 10:51:48 -04003892 wret = push_leaf_left(trans, root, path, 1, 1,
3893 1, (u32)-1);
Chris Mason54aa1f42007-06-22 14:16:25 -04003894 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05003895 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04003896
3897 if (path->nodes[0] == leaf &&
3898 btrfs_header_nritems(leaf)) {
Chris Mason99d8f832010-07-07 10:51:48 -04003899 wret = push_leaf_right(trans, root, path, 1,
3900 1, 1, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04003901 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05003902 ret = wret;
3903 }
Chris Mason5f39d392007-10-15 16:14:19 -04003904
3905 if (btrfs_header_nritems(leaf) == 0) {
Chris Mason323ac952008-10-01 19:05:46 -04003906 path->slots[1] = slot;
Jeff Mahoney143bede2012-03-01 14:56:26 +01003907 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04003908 free_extent_buffer(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003909 ret = 0;
Chris Mason5de08d72007-02-24 06:24:44 -05003910 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003911 /* if we're still in the path, make sure
3912 * we're dirty. Otherwise, one of the
3913 * push_leaf functions must have already
3914 * dirtied this buffer
3915 */
3916 if (path->nodes[0] == leaf)
3917 btrfs_mark_buffer_dirty(leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04003918 free_extent_buffer(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003919 }
Chris Masond5719762007-03-23 10:01:08 -04003920 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04003921 btrfs_mark_buffer_dirty(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003922 }
3923 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05003924 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003925}
3926
Chris Mason97571fd2007-02-24 13:39:08 -05003927/*
Chris Mason925baed2008-06-25 16:01:30 -04003928 * search the tree again to find a leaf with lesser keys
Chris Mason7bb86312007-12-11 09:25:06 -05003929 * returns 0 if it found something or 1 if there are no lesser leaves.
3930 * returns < 0 on io errors.
Chris Masond352ac62008-09-29 15:18:18 -04003931 *
3932 * This may release the path, and so you may lose any locks held at the
3933 * time you call it.
Chris Mason7bb86312007-12-11 09:25:06 -05003934 */
3935int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3936{
Chris Mason925baed2008-06-25 16:01:30 -04003937 struct btrfs_key key;
3938 struct btrfs_disk_key found_key;
3939 int ret;
Chris Mason7bb86312007-12-11 09:25:06 -05003940
Chris Mason925baed2008-06-25 16:01:30 -04003941 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
Chris Mason7bb86312007-12-11 09:25:06 -05003942
Chris Mason925baed2008-06-25 16:01:30 -04003943 if (key.offset > 0)
3944 key.offset--;
3945 else if (key.type > 0)
3946 key.type--;
3947 else if (key.objectid > 0)
3948 key.objectid--;
3949 else
3950 return 1;
Chris Mason7bb86312007-12-11 09:25:06 -05003951
David Sterbab3b4aa72011-04-21 01:20:15 +02003952 btrfs_release_path(path);
Chris Mason925baed2008-06-25 16:01:30 -04003953 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3954 if (ret < 0)
3955 return ret;
3956 btrfs_item_key(path->nodes[0], &found_key, 0);
3957 ret = comp_keys(&found_key, &key);
3958 if (ret < 0)
3959 return 0;
3960 return 1;
Chris Mason7bb86312007-12-11 09:25:06 -05003961}
3962
Chris Mason3f157a22008-06-25 16:01:31 -04003963/*
3964 * A helper function to walk down the tree starting at min_key, and looking
3965 * for nodes or leaves that are either in cache or have a minimum
Chris Masond352ac62008-09-29 15:18:18 -04003966 * transaction id. This is used by the btree defrag code, and tree logging
Chris Mason3f157a22008-06-25 16:01:31 -04003967 *
3968 * This does not cow, but it does stuff the starting key it finds back
3969 * into min_key, so you can call btrfs_search_slot with cow=1 on the
3970 * key and get a writable path.
3971 *
3972 * This does lock as it descends, and path->keep_locks should be set
3973 * to 1 by the caller.
3974 *
3975 * This honors path->lowest_level to prevent descent past a given level
3976 * of the tree.
3977 *
Chris Masond352ac62008-09-29 15:18:18 -04003978 * min_trans indicates the oldest transaction that you are interested
3979 * in walking through. Any nodes or leaves older than min_trans are
3980 * skipped over (without reading them).
3981 *
Chris Mason3f157a22008-06-25 16:01:31 -04003982 * returns zero if something useful was found, < 0 on error and 1 if there
3983 * was nothing in the tree that matched the search criteria.
3984 */
3985int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
Chris Masone02119d2008-09-05 16:13:11 -04003986 struct btrfs_key *max_key,
Chris Mason3f157a22008-06-25 16:01:31 -04003987 struct btrfs_path *path, int cache_only,
3988 u64 min_trans)
3989{
3990 struct extent_buffer *cur;
3991 struct btrfs_key found_key;
3992 int slot;
Yan96524802008-07-24 12:19:49 -04003993 int sret;
Chris Mason3f157a22008-06-25 16:01:31 -04003994 u32 nritems;
3995 int level;
3996 int ret = 1;
3997
Chris Mason934d3752008-12-08 16:43:10 -05003998 WARN_ON(!path->keep_locks);
Chris Mason3f157a22008-06-25 16:01:31 -04003999again:
Chris Masonbd681512011-07-16 15:23:14 -04004000 cur = btrfs_read_lock_root_node(root);
Chris Mason3f157a22008-06-25 16:01:31 -04004001 level = btrfs_header_level(cur);
Chris Masone02119d2008-09-05 16:13:11 -04004002 WARN_ON(path->nodes[level]);
Chris Mason3f157a22008-06-25 16:01:31 -04004003 path->nodes[level] = cur;
Chris Masonbd681512011-07-16 15:23:14 -04004004 path->locks[level] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04004005
4006 if (btrfs_header_generation(cur) < min_trans) {
4007 ret = 1;
4008 goto out;
4009 }
Chris Masond3977122009-01-05 21:25:51 -05004010 while (1) {
Chris Mason3f157a22008-06-25 16:01:31 -04004011 nritems = btrfs_header_nritems(cur);
4012 level = btrfs_header_level(cur);
Yan96524802008-07-24 12:19:49 -04004013 sret = bin_search(cur, min_key, level, &slot);
Chris Mason3f157a22008-06-25 16:01:31 -04004014
Chris Mason323ac952008-10-01 19:05:46 -04004015 /* at the lowest level, we're done, setup the path and exit */
4016 if (level == path->lowest_level) {
Chris Masone02119d2008-09-05 16:13:11 -04004017 if (slot >= nritems)
4018 goto find_next_key;
Chris Mason3f157a22008-06-25 16:01:31 -04004019 ret = 0;
4020 path->slots[level] = slot;
4021 btrfs_item_key_to_cpu(cur, &found_key, slot);
4022 goto out;
4023 }
Yan96524802008-07-24 12:19:49 -04004024 if (sret && slot > 0)
4025 slot--;
Chris Mason3f157a22008-06-25 16:01:31 -04004026 /*
4027 * check this node pointer against the cache_only and
4028 * min_trans parameters. If it isn't in cache or is too
4029 * old, skip to the next one.
4030 */
Chris Masond3977122009-01-05 21:25:51 -05004031 while (slot < nritems) {
Chris Mason3f157a22008-06-25 16:01:31 -04004032 u64 blockptr;
4033 u64 gen;
4034 struct extent_buffer *tmp;
Chris Masone02119d2008-09-05 16:13:11 -04004035 struct btrfs_disk_key disk_key;
4036
Chris Mason3f157a22008-06-25 16:01:31 -04004037 blockptr = btrfs_node_blockptr(cur, slot);
4038 gen = btrfs_node_ptr_generation(cur, slot);
4039 if (gen < min_trans) {
4040 slot++;
4041 continue;
4042 }
4043 if (!cache_only)
4044 break;
4045
Chris Masone02119d2008-09-05 16:13:11 -04004046 if (max_key) {
4047 btrfs_node_key(cur, &disk_key, slot);
4048 if (comp_keys(&disk_key, max_key) >= 0) {
4049 ret = 1;
4050 goto out;
4051 }
4052 }
4053
Chris Mason3f157a22008-06-25 16:01:31 -04004054 tmp = btrfs_find_tree_block(root, blockptr,
4055 btrfs_level_size(root, level - 1));
4056
Chris Masonb9fab912012-05-06 07:23:47 -04004057 if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
Chris Mason3f157a22008-06-25 16:01:31 -04004058 free_extent_buffer(tmp);
4059 break;
4060 }
4061 if (tmp)
4062 free_extent_buffer(tmp);
4063 slot++;
4064 }
Chris Masone02119d2008-09-05 16:13:11 -04004065find_next_key:
Chris Mason3f157a22008-06-25 16:01:31 -04004066 /*
4067 * we didn't find a candidate key in this node, walk forward
4068 * and find another one
4069 */
4070 if (slot >= nritems) {
Chris Masone02119d2008-09-05 16:13:11 -04004071 path->slots[level] = slot;
Chris Masonb4ce94d2009-02-04 09:25:08 -05004072 btrfs_set_path_blocking(path);
Chris Masone02119d2008-09-05 16:13:11 -04004073 sret = btrfs_find_next_key(root, path, min_key, level,
Chris Mason3f157a22008-06-25 16:01:31 -04004074 cache_only, min_trans);
Chris Masone02119d2008-09-05 16:13:11 -04004075 if (sret == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02004076 btrfs_release_path(path);
Chris Mason3f157a22008-06-25 16:01:31 -04004077 goto again;
4078 } else {
4079 goto out;
4080 }
4081 }
4082 /* save our key for returning back */
4083 btrfs_node_key_to_cpu(cur, &found_key, slot);
4084 path->slots[level] = slot;
4085 if (level == path->lowest_level) {
4086 ret = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04004087 unlock_up(path, level, 1, 0, NULL);
Chris Mason3f157a22008-06-25 16:01:31 -04004088 goto out;
4089 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05004090 btrfs_set_path_blocking(path);
Chris Mason3f157a22008-06-25 16:01:31 -04004091 cur = read_node_slot(root, cur, slot);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004092 BUG_ON(!cur); /* -ENOMEM */
Chris Mason3f157a22008-06-25 16:01:31 -04004093
Chris Masonbd681512011-07-16 15:23:14 -04004094 btrfs_tree_read_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -05004095
Chris Masonbd681512011-07-16 15:23:14 -04004096 path->locks[level - 1] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04004097 path->nodes[level - 1] = cur;
Chris Masonf7c79f32012-03-19 15:54:38 -04004098 unlock_up(path, level, 1, 0, NULL);
Chris Masonbd681512011-07-16 15:23:14 -04004099 btrfs_clear_path_blocking(path, NULL, 0);
Chris Mason3f157a22008-06-25 16:01:31 -04004100 }
4101out:
4102 if (ret == 0)
4103 memcpy(min_key, &found_key, sizeof(found_key));
Chris Masonb4ce94d2009-02-04 09:25:08 -05004104 btrfs_set_path_blocking(path);
Chris Mason3f157a22008-06-25 16:01:31 -04004105 return ret;
4106}
4107
4108/*
4109 * this is similar to btrfs_next_leaf, but does not try to preserve
4110 * and fixup the path. It looks for and returns the next key in the
4111 * tree based on the current path and the cache_only and min_trans
4112 * parameters.
4113 *
4114 * 0 is returned if another key is found, < 0 if there are any errors
4115 * and 1 is returned if there are no higher keys in the tree
4116 *
4117 * path->keep_locks should be set to 1 on the search made before
4118 * calling this function.
4119 */
Chris Masone7a84562008-06-25 16:01:31 -04004120int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
Yan Zheng33c66f42009-07-22 09:59:00 -04004121 struct btrfs_key *key, int level,
Chris Mason3f157a22008-06-25 16:01:31 -04004122 int cache_only, u64 min_trans)
Chris Masone7a84562008-06-25 16:01:31 -04004123{
Chris Masone7a84562008-06-25 16:01:31 -04004124 int slot;
4125 struct extent_buffer *c;
4126
Chris Mason934d3752008-12-08 16:43:10 -05004127 WARN_ON(!path->keep_locks);
Chris Masond3977122009-01-05 21:25:51 -05004128 while (level < BTRFS_MAX_LEVEL) {
Chris Masone7a84562008-06-25 16:01:31 -04004129 if (!path->nodes[level])
4130 return 1;
4131
4132 slot = path->slots[level] + 1;
4133 c = path->nodes[level];
Chris Mason3f157a22008-06-25 16:01:31 -04004134next:
Chris Masone7a84562008-06-25 16:01:31 -04004135 if (slot >= btrfs_header_nritems(c)) {
Yan Zheng33c66f42009-07-22 09:59:00 -04004136 int ret;
4137 int orig_lowest;
4138 struct btrfs_key cur_key;
4139 if (level + 1 >= BTRFS_MAX_LEVEL ||
4140 !path->nodes[level + 1])
Chris Masone7a84562008-06-25 16:01:31 -04004141 return 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04004142
4143 if (path->locks[level + 1]) {
4144 level++;
4145 continue;
4146 }
4147
4148 slot = btrfs_header_nritems(c) - 1;
4149 if (level == 0)
4150 btrfs_item_key_to_cpu(c, &cur_key, slot);
4151 else
4152 btrfs_node_key_to_cpu(c, &cur_key, slot);
4153
4154 orig_lowest = path->lowest_level;
David Sterbab3b4aa72011-04-21 01:20:15 +02004155 btrfs_release_path(path);
Yan Zheng33c66f42009-07-22 09:59:00 -04004156 path->lowest_level = level;
4157 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4158 0, 0);
4159 path->lowest_level = orig_lowest;
4160 if (ret < 0)
4161 return ret;
4162
4163 c = path->nodes[level];
4164 slot = path->slots[level];
4165 if (ret == 0)
4166 slot++;
4167 goto next;
Chris Masone7a84562008-06-25 16:01:31 -04004168 }
Yan Zheng33c66f42009-07-22 09:59:00 -04004169
Chris Masone7a84562008-06-25 16:01:31 -04004170 if (level == 0)
4171 btrfs_item_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04004172 else {
4173 u64 blockptr = btrfs_node_blockptr(c, slot);
4174 u64 gen = btrfs_node_ptr_generation(c, slot);
4175
4176 if (cache_only) {
4177 struct extent_buffer *cur;
4178 cur = btrfs_find_tree_block(root, blockptr,
4179 btrfs_level_size(root, level - 1));
Chris Masonb9fab912012-05-06 07:23:47 -04004180 if (!cur ||
4181 btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
Chris Mason3f157a22008-06-25 16:01:31 -04004182 slot++;
4183 if (cur)
4184 free_extent_buffer(cur);
4185 goto next;
4186 }
4187 free_extent_buffer(cur);
4188 }
4189 if (gen < min_trans) {
4190 slot++;
4191 goto next;
4192 }
Chris Masone7a84562008-06-25 16:01:31 -04004193 btrfs_node_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04004194 }
Chris Masone7a84562008-06-25 16:01:31 -04004195 return 0;
4196 }
4197 return 1;
4198}
4199
Chris Mason7bb86312007-12-11 09:25:06 -05004200/*
Chris Mason925baed2008-06-25 16:01:30 -04004201 * search the tree again to find a leaf with greater keys
Chris Mason0f70abe2007-02-28 16:46:22 -05004202 * returns 0 if it found something or 1 if there are no greater leaves.
4203 * returns < 0 on io errors.
Chris Mason97571fd2007-02-24 13:39:08 -05004204 */
Chris Mason234b63a2007-03-13 10:46:10 -04004205int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Masond97e63b2007-02-20 16:40:44 -05004206{
4207 int slot;
Chris Mason8e73f272009-04-03 10:14:18 -04004208 int level;
Chris Mason5f39d392007-10-15 16:14:19 -04004209 struct extent_buffer *c;
Chris Mason8e73f272009-04-03 10:14:18 -04004210 struct extent_buffer *next;
Chris Mason925baed2008-06-25 16:01:30 -04004211 struct btrfs_key key;
4212 u32 nritems;
4213 int ret;
Chris Mason8e73f272009-04-03 10:14:18 -04004214 int old_spinning = path->leave_spinning;
Chris Masonbd681512011-07-16 15:23:14 -04004215 int next_rw_lock = 0;
Chris Mason925baed2008-06-25 16:01:30 -04004216
4217 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Masond3977122009-01-05 21:25:51 -05004218 if (nritems == 0)
Chris Mason925baed2008-06-25 16:01:30 -04004219 return 1;
Chris Mason925baed2008-06-25 16:01:30 -04004220
Chris Mason8e73f272009-04-03 10:14:18 -04004221 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4222again:
4223 level = 1;
4224 next = NULL;
Chris Masonbd681512011-07-16 15:23:14 -04004225 next_rw_lock = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02004226 btrfs_release_path(path);
Chris Mason8e73f272009-04-03 10:14:18 -04004227
Chris Masona2135012008-06-25 16:01:30 -04004228 path->keep_locks = 1;
Chris Mason31533fb2011-07-26 16:01:59 -04004229 path->leave_spinning = 1;
Chris Mason8e73f272009-04-03 10:14:18 -04004230
Chris Mason925baed2008-06-25 16:01:30 -04004231 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4232 path->keep_locks = 0;
4233
4234 if (ret < 0)
4235 return ret;
4236
Chris Masona2135012008-06-25 16:01:30 -04004237 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Mason168fd7d2008-06-25 16:01:30 -04004238 /*
4239 * by releasing the path above we dropped all our locks. A balance
4240 * could have added more items next to the key that used to be
4241 * at the very end of the block. So, check again here and
4242 * advance the path if there are now more items available.
4243 */
Chris Masona2135012008-06-25 16:01:30 -04004244 if (nritems > 0 && path->slots[0] < nritems - 1) {
Yan Zhenge457afe2009-07-22 09:59:00 -04004245 if (ret == 0)
4246 path->slots[0]++;
Chris Mason8e73f272009-04-03 10:14:18 -04004247 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04004248 goto done;
4249 }
Chris Masond97e63b2007-02-20 16:40:44 -05004250
Chris Masond3977122009-01-05 21:25:51 -05004251 while (level < BTRFS_MAX_LEVEL) {
Chris Mason8e73f272009-04-03 10:14:18 -04004252 if (!path->nodes[level]) {
4253 ret = 1;
4254 goto done;
4255 }
Chris Mason5f39d392007-10-15 16:14:19 -04004256
Chris Masond97e63b2007-02-20 16:40:44 -05004257 slot = path->slots[level] + 1;
4258 c = path->nodes[level];
Chris Mason5f39d392007-10-15 16:14:19 -04004259 if (slot >= btrfs_header_nritems(c)) {
Chris Masond97e63b2007-02-20 16:40:44 -05004260 level++;
Chris Mason8e73f272009-04-03 10:14:18 -04004261 if (level == BTRFS_MAX_LEVEL) {
4262 ret = 1;
4263 goto done;
4264 }
Chris Masond97e63b2007-02-20 16:40:44 -05004265 continue;
4266 }
Chris Mason5f39d392007-10-15 16:14:19 -04004267
Chris Mason925baed2008-06-25 16:01:30 -04004268 if (next) {
Chris Masonbd681512011-07-16 15:23:14 -04004269 btrfs_tree_unlock_rw(next, next_rw_lock);
Chris Mason5f39d392007-10-15 16:14:19 -04004270 free_extent_buffer(next);
Chris Mason925baed2008-06-25 16:01:30 -04004271 }
Chris Mason5f39d392007-10-15 16:14:19 -04004272
Chris Mason8e73f272009-04-03 10:14:18 -04004273 next = c;
Chris Masonbd681512011-07-16 15:23:14 -04004274 next_rw_lock = path->locks[level];
Chris Mason8e73f272009-04-03 10:14:18 -04004275 ret = read_block_for_search(NULL, root, path, &next, level,
4276 slot, &key);
4277 if (ret == -EAGAIN)
4278 goto again;
Chris Mason5f39d392007-10-15 16:14:19 -04004279
Chris Mason76a05b32009-05-14 13:24:30 -04004280 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02004281 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04004282 goto done;
4283 }
4284
Chris Mason5cd57b22008-06-25 16:01:30 -04004285 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04004286 ret = btrfs_try_tree_read_lock(next);
Chris Mason8e73f272009-04-03 10:14:18 -04004287 if (!ret) {
4288 btrfs_set_path_blocking(path);
Chris Masonbd681512011-07-16 15:23:14 -04004289 btrfs_tree_read_lock(next);
Chris Mason31533fb2011-07-26 16:01:59 -04004290 btrfs_clear_path_blocking(path, next,
Chris Masonbd681512011-07-16 15:23:14 -04004291 BTRFS_READ_LOCK);
Chris Mason8e73f272009-04-03 10:14:18 -04004292 }
Chris Mason31533fb2011-07-26 16:01:59 -04004293 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04004294 }
Chris Masond97e63b2007-02-20 16:40:44 -05004295 break;
4296 }
4297 path->slots[level] = slot;
Chris Masond3977122009-01-05 21:25:51 -05004298 while (1) {
Chris Masond97e63b2007-02-20 16:40:44 -05004299 level--;
4300 c = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04004301 if (path->locks[level])
Chris Masonbd681512011-07-16 15:23:14 -04004302 btrfs_tree_unlock_rw(c, path->locks[level]);
Chris Mason8e73f272009-04-03 10:14:18 -04004303
Chris Mason5f39d392007-10-15 16:14:19 -04004304 free_extent_buffer(c);
Chris Masond97e63b2007-02-20 16:40:44 -05004305 path->nodes[level] = next;
4306 path->slots[level] = 0;
Chris Masona74a4b92008-06-25 16:01:31 -04004307 if (!path->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04004308 path->locks[level] = next_rw_lock;
Chris Masond97e63b2007-02-20 16:40:44 -05004309 if (!level)
4310 break;
Chris Masonb4ce94d2009-02-04 09:25:08 -05004311
Chris Mason8e73f272009-04-03 10:14:18 -04004312 ret = read_block_for_search(NULL, root, path, &next, level,
4313 0, &key);
4314 if (ret == -EAGAIN)
4315 goto again;
4316
Chris Mason76a05b32009-05-14 13:24:30 -04004317 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02004318 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04004319 goto done;
4320 }
4321
Chris Mason5cd57b22008-06-25 16:01:30 -04004322 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04004323 ret = btrfs_try_tree_read_lock(next);
Chris Mason8e73f272009-04-03 10:14:18 -04004324 if (!ret) {
4325 btrfs_set_path_blocking(path);
Chris Masonbd681512011-07-16 15:23:14 -04004326 btrfs_tree_read_lock(next);
Chris Mason31533fb2011-07-26 16:01:59 -04004327 btrfs_clear_path_blocking(path, next,
Chris Masonbd681512011-07-16 15:23:14 -04004328 BTRFS_READ_LOCK);
Chris Mason8e73f272009-04-03 10:14:18 -04004329 }
Chris Mason31533fb2011-07-26 16:01:59 -04004330 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04004331 }
Chris Masond97e63b2007-02-20 16:40:44 -05004332 }
Chris Mason8e73f272009-04-03 10:14:18 -04004333 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04004334done:
Chris Masonf7c79f32012-03-19 15:54:38 -04004335 unlock_up(path, 0, 1, 0, NULL);
Chris Mason8e73f272009-04-03 10:14:18 -04004336 path->leave_spinning = old_spinning;
4337 if (!old_spinning)
4338 btrfs_set_path_blocking(path);
4339
4340 return ret;
Chris Masond97e63b2007-02-20 16:40:44 -05004341}
Chris Mason0b86a832008-03-24 15:01:56 -04004342
Chris Mason3f157a22008-06-25 16:01:31 -04004343/*
4344 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4345 * searching until it gets past min_objectid or finds an item of 'type'
4346 *
4347 * returns 0 if something is found, 1 if nothing was found and < 0 on error
4348 */
Chris Mason0b86a832008-03-24 15:01:56 -04004349int btrfs_previous_item(struct btrfs_root *root,
4350 struct btrfs_path *path, u64 min_objectid,
4351 int type)
4352{
4353 struct btrfs_key found_key;
4354 struct extent_buffer *leaf;
Chris Masone02119d2008-09-05 16:13:11 -04004355 u32 nritems;
Chris Mason0b86a832008-03-24 15:01:56 -04004356 int ret;
4357
Chris Masond3977122009-01-05 21:25:51 -05004358 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04004359 if (path->slots[0] == 0) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05004360 btrfs_set_path_blocking(path);
Chris Mason0b86a832008-03-24 15:01:56 -04004361 ret = btrfs_prev_leaf(root, path);
4362 if (ret != 0)
4363 return ret;
4364 } else {
4365 path->slots[0]--;
4366 }
4367 leaf = path->nodes[0];
Chris Masone02119d2008-09-05 16:13:11 -04004368 nritems = btrfs_header_nritems(leaf);
4369 if (nritems == 0)
4370 return 1;
4371 if (path->slots[0] == nritems)
4372 path->slots[0]--;
4373
Chris Mason0b86a832008-03-24 15:01:56 -04004374 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Chris Masone02119d2008-09-05 16:13:11 -04004375 if (found_key.objectid < min_objectid)
4376 break;
Yan Zheng0a4eefb2009-07-24 11:06:53 -04004377 if (found_key.type == type)
4378 return 0;
Chris Masone02119d2008-09-05 16:13:11 -04004379 if (found_key.objectid == min_objectid &&
4380 found_key.type < type)
4381 break;
Chris Mason0b86a832008-03-24 15:01:56 -04004382 }
4383 return 1;
4384}