blob: 96bb3c0a79b08da610f41ed7442cec92cd8973c2 [file] [log] [blame]
Chris Mason0b86a832008-03-24 15:01:56 -04001/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/bio.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Chris Mason8a4b83c2008-03-24 15:02:07 -040021#include <linux/buffer_head.h>
Chris Masonf2d8d742008-04-21 10:03:05 -040022#include <linux/blkdev.h>
Chris Mason788f20e2008-04-28 15:29:42 -040023#include <linux/random.h>
Chris Masonb765ead2009-04-03 10:27:10 -040024#include <linux/iocontext.h>
Ben Hutchings6f88a442010-12-29 14:55:03 +000025#include <linux/capability.h>
Chris Mason593060d2008-03-25 16:50:33 -040026#include <asm/div64.h>
Chris Mason4b4e25f2008-11-20 10:22:27 -050027#include "compat.h"
Chris Mason0b86a832008-03-24 15:01:56 -040028#include "ctree.h"
29#include "extent_map.h"
30#include "disk-io.h"
31#include "transaction.h"
32#include "print-tree.h"
33#include "volumes.h"
Chris Mason8b712842008-06-11 16:50:36 -040034#include "async-thread.h"
Chris Mason0b86a832008-03-24 15:01:56 -040035
Yan Zheng2b820322008-11-17 21:11:30 -050036static int init_first_rw_device(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root,
38 struct btrfs_device *device);
39static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
40
Chris Mason8a4b83c2008-03-24 15:02:07 -040041static DEFINE_MUTEX(uuid_mutex);
42static LIST_HEAD(fs_uuids);
43
Chris Mason7d9eb122008-07-08 14:19:17 -040044static void lock_chunks(struct btrfs_root *root)
45{
Chris Mason7d9eb122008-07-08 14:19:17 -040046 mutex_lock(&root->fs_info->chunk_mutex);
47}
48
49static void unlock_chunks(struct btrfs_root *root)
50{
Chris Mason7d9eb122008-07-08 14:19:17 -040051 mutex_unlock(&root->fs_info->chunk_mutex);
52}
53
Yan Zhenge4404d62008-12-12 10:03:26 -050054static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
55{
56 struct btrfs_device *device;
57 WARN_ON(fs_devices->opened);
58 while (!list_empty(&fs_devices->devices)) {
59 device = list_entry(fs_devices->devices.next,
60 struct btrfs_device, dev_list);
61 list_del(&device->dev_list);
62 kfree(device->name);
63 kfree(device);
64 }
65 kfree(fs_devices);
66}
67
Chris Mason8a4b83c2008-03-24 15:02:07 -040068int btrfs_cleanup_fs_uuids(void)
69{
70 struct btrfs_fs_devices *fs_devices;
Chris Mason8a4b83c2008-03-24 15:02:07 -040071
Yan Zheng2b820322008-11-17 21:11:30 -050072 while (!list_empty(&fs_uuids)) {
73 fs_devices = list_entry(fs_uuids.next,
74 struct btrfs_fs_devices, list);
75 list_del(&fs_devices->list);
Yan Zhenge4404d62008-12-12 10:03:26 -050076 free_fs_devices(fs_devices);
Chris Mason8a4b83c2008-03-24 15:02:07 -040077 }
78 return 0;
79}
80
Chris Masona1b32a52008-09-05 16:09:51 -040081static noinline struct btrfs_device *__find_device(struct list_head *head,
82 u64 devid, u8 *uuid)
Chris Mason8a4b83c2008-03-24 15:02:07 -040083{
84 struct btrfs_device *dev;
Chris Mason8a4b83c2008-03-24 15:02:07 -040085
Qinghuang Fengc6e30872009-01-21 10:59:08 -050086 list_for_each_entry(dev, head, dev_list) {
Chris Masona4437552008-04-18 10:29:38 -040087 if (dev->devid == devid &&
Chris Mason8f18cf12008-04-25 16:53:30 -040088 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
Chris Mason8a4b83c2008-03-24 15:02:07 -040089 return dev;
Chris Masona4437552008-04-18 10:29:38 -040090 }
Chris Mason8a4b83c2008-03-24 15:02:07 -040091 }
92 return NULL;
93}
94
Chris Masona1b32a52008-09-05 16:09:51 -040095static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
Chris Mason8a4b83c2008-03-24 15:02:07 -040096{
Chris Mason8a4b83c2008-03-24 15:02:07 -040097 struct btrfs_fs_devices *fs_devices;
98
Qinghuang Fengc6e30872009-01-21 10:59:08 -050099 list_for_each_entry(fs_devices, &fs_uuids, list) {
Chris Mason8a4b83c2008-03-24 15:02:07 -0400100 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
101 return fs_devices;
102 }
103 return NULL;
104}
105
Chris Masonffbd5172009-04-20 15:50:09 -0400106static void requeue_list(struct btrfs_pending_bios *pending_bios,
107 struct bio *head, struct bio *tail)
108{
109
110 struct bio *old_head;
111
112 old_head = pending_bios->head;
113 pending_bios->head = head;
114 if (pending_bios->tail)
115 tail->bi_next = old_head;
116 else
117 pending_bios->tail = tail;
118}
119
Chris Mason8b712842008-06-11 16:50:36 -0400120/*
121 * we try to collect pending bios for a device so we don't get a large
122 * number of procs sending bios down to the same device. This greatly
123 * improves the schedulers ability to collect and merge the bios.
124 *
125 * But, it also turns into a long list of bios to process and that is sure
126 * to eventually make the worker thread block. The solution here is to
127 * make some progress and then put this work struct back at the end of
128 * the list if the block device is congested. This way, multiple devices
129 * can make progress from a single worker thread.
130 */
Chris Masond3977122009-01-05 21:25:51 -0500131static noinline int run_scheduled_bios(struct btrfs_device *device)
Chris Mason8b712842008-06-11 16:50:36 -0400132{
133 struct bio *pending;
134 struct backing_dev_info *bdi;
Chris Masonb64a2852008-08-20 13:39:41 -0400135 struct btrfs_fs_info *fs_info;
Chris Masonffbd5172009-04-20 15:50:09 -0400136 struct btrfs_pending_bios *pending_bios;
Chris Mason8b712842008-06-11 16:50:36 -0400137 struct bio *tail;
138 struct bio *cur;
139 int again = 0;
Chris Masonffbd5172009-04-20 15:50:09 -0400140 unsigned long num_run;
Chris Masond644d8a2009-06-09 15:59:22 -0400141 unsigned long batch_run = 0;
Chris Masonb64a2852008-08-20 13:39:41 -0400142 unsigned long limit;
Chris Masonb765ead2009-04-03 10:27:10 -0400143 unsigned long last_waited = 0;
Chris Masond84275c2009-06-09 15:39:08 -0400144 int force_reg = 0;
Miao Xie0e588852011-08-05 09:32:37 +0000145 int sync_pending = 0;
Chris Mason211588a2011-04-19 20:12:40 -0400146 struct blk_plug plug;
147
148 /*
149 * this function runs all the bios we've collected for
150 * a particular device. We don't want to wander off to
151 * another device without first sending all of these down.
152 * So, setup a plug here and finish it off before we return
153 */
154 blk_start_plug(&plug);
Chris Mason8b712842008-06-11 16:50:36 -0400155
Chris Masonbedf7622009-04-03 10:32:58 -0400156 bdi = blk_get_backing_dev_info(device->bdev);
Chris Masonb64a2852008-08-20 13:39:41 -0400157 fs_info = device->dev_root->fs_info;
158 limit = btrfs_async_submit_limit(fs_info);
159 limit = limit * 2 / 3;
160
Chris Mason8b712842008-06-11 16:50:36 -0400161loop:
162 spin_lock(&device->io_lock);
163
Chris Masona6837052009-02-04 09:19:41 -0500164loop_lock:
Chris Masond84275c2009-06-09 15:39:08 -0400165 num_run = 0;
Chris Masonffbd5172009-04-20 15:50:09 -0400166
Chris Mason8b712842008-06-11 16:50:36 -0400167 /* take all the bios off the list at once and process them
168 * later on (without the lock held). But, remember the
169 * tail and other pointers so the bios can be properly reinserted
170 * into the list if we hit congestion
171 */
Chris Masond84275c2009-06-09 15:39:08 -0400172 if (!force_reg && device->pending_sync_bios.head) {
Chris Masonffbd5172009-04-20 15:50:09 -0400173 pending_bios = &device->pending_sync_bios;
Chris Masond84275c2009-06-09 15:39:08 -0400174 force_reg = 1;
175 } else {
Chris Masonffbd5172009-04-20 15:50:09 -0400176 pending_bios = &device->pending_bios;
Chris Masond84275c2009-06-09 15:39:08 -0400177 force_reg = 0;
178 }
Chris Masonffbd5172009-04-20 15:50:09 -0400179
180 pending = pending_bios->head;
181 tail = pending_bios->tail;
Chris Mason8b712842008-06-11 16:50:36 -0400182 WARN_ON(pending && !tail);
Chris Mason8b712842008-06-11 16:50:36 -0400183
184 /*
185 * if pending was null this time around, no bios need processing
186 * at all and we can stop. Otherwise it'll loop back up again
187 * and do an additional check so no bios are missed.
188 *
189 * device->running_pending is used to synchronize with the
190 * schedule_bio code.
191 */
Chris Masonffbd5172009-04-20 15:50:09 -0400192 if (device->pending_sync_bios.head == NULL &&
193 device->pending_bios.head == NULL) {
Chris Mason8b712842008-06-11 16:50:36 -0400194 again = 0;
195 device->running_pending = 0;
Chris Masonffbd5172009-04-20 15:50:09 -0400196 } else {
197 again = 1;
198 device->running_pending = 1;
Chris Mason8b712842008-06-11 16:50:36 -0400199 }
Chris Masonffbd5172009-04-20 15:50:09 -0400200
201 pending_bios->head = NULL;
202 pending_bios->tail = NULL;
203
Chris Mason8b712842008-06-11 16:50:36 -0400204 spin_unlock(&device->io_lock);
205
Chris Masond3977122009-01-05 21:25:51 -0500206 while (pending) {
Chris Masonffbd5172009-04-20 15:50:09 -0400207
208 rmb();
Chris Masond84275c2009-06-09 15:39:08 -0400209 /* we want to work on both lists, but do more bios on the
210 * sync list than the regular list
211 */
212 if ((num_run > 32 &&
213 pending_bios != &device->pending_sync_bios &&
214 device->pending_sync_bios.head) ||
215 (num_run > 64 && pending_bios == &device->pending_sync_bios &&
216 device->pending_bios.head)) {
Chris Masonffbd5172009-04-20 15:50:09 -0400217 spin_lock(&device->io_lock);
218 requeue_list(pending_bios, pending, tail);
219 goto loop_lock;
220 }
221
Chris Mason8b712842008-06-11 16:50:36 -0400222 cur = pending;
223 pending = pending->bi_next;
224 cur->bi_next = NULL;
Chris Masonb64a2852008-08-20 13:39:41 -0400225 atomic_dec(&fs_info->nr_async_bios);
226
227 if (atomic_read(&fs_info->nr_async_bios) < limit &&
228 waitqueue_active(&fs_info->async_submit_wait))
229 wake_up(&fs_info->async_submit_wait);
Chris Mason492bb6de2008-07-31 16:29:02 -0400230
231 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
Chris Masond644d8a2009-06-09 15:59:22 -0400232
Chris Mason2ab1ba62011-08-04 14:28:36 -0400233 /*
234 * if we're doing the sync list, record that our
235 * plug has some sync requests on it
236 *
237 * If we're doing the regular list and there are
238 * sync requests sitting around, unplug before
239 * we add more
240 */
241 if (pending_bios == &device->pending_sync_bios) {
242 sync_pending = 1;
243 } else if (sync_pending) {
244 blk_finish_plug(&plug);
245 blk_start_plug(&plug);
246 sync_pending = 0;
247 }
248
Chris Mason5ff7ba32010-03-15 10:21:30 -0400249 submit_bio(cur->bi_rw, cur);
250 num_run++;
251 batch_run++;
Jens Axboe7eaceac2011-03-10 08:52:07 +0100252 if (need_resched())
Chris Masonffbd5172009-04-20 15:50:09 -0400253 cond_resched();
Chris Mason8b712842008-06-11 16:50:36 -0400254
255 /*
256 * we made progress, there is more work to do and the bdi
257 * is now congested. Back off and let other work structs
258 * run instead
259 */
Chris Mason57fd5a52009-08-07 09:59:15 -0400260 if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
Chris Mason5f2cc082008-11-07 18:22:45 -0500261 fs_info->fs_devices->open_devices > 1) {
Chris Masonb765ead2009-04-03 10:27:10 -0400262 struct io_context *ioc;
Chris Mason8b712842008-06-11 16:50:36 -0400263
Chris Masonb765ead2009-04-03 10:27:10 -0400264 ioc = current->io_context;
265
266 /*
267 * the main goal here is that we don't want to
268 * block if we're going to be able to submit
269 * more requests without blocking.
270 *
271 * This code does two great things, it pokes into
272 * the elevator code from a filesystem _and_
273 * it makes assumptions about how batching works.
274 */
275 if (ioc && ioc->nr_batch_requests > 0 &&
276 time_before(jiffies, ioc->last_waited + HZ/50UL) &&
277 (last_waited == 0 ||
278 ioc->last_waited == last_waited)) {
279 /*
280 * we want to go through our batch of
281 * requests and stop. So, we copy out
282 * the ioc->last_waited time and test
283 * against it before looping
284 */
285 last_waited = ioc->last_waited;
Jens Axboe7eaceac2011-03-10 08:52:07 +0100286 if (need_resched())
Chris Masonffbd5172009-04-20 15:50:09 -0400287 cond_resched();
Chris Masonb765ead2009-04-03 10:27:10 -0400288 continue;
289 }
Chris Mason8b712842008-06-11 16:50:36 -0400290 spin_lock(&device->io_lock);
Chris Masonffbd5172009-04-20 15:50:09 -0400291 requeue_list(pending_bios, pending, tail);
Chris Masona6837052009-02-04 09:19:41 -0500292 device->running_pending = 1;
Chris Mason8b712842008-06-11 16:50:36 -0400293
294 spin_unlock(&device->io_lock);
295 btrfs_requeue_work(&device->work);
296 goto done;
297 }
Chris Masond85c8a6f2011-12-15 15:38:41 -0500298 /* unplug every 64 requests just for good measure */
299 if (batch_run % 64 == 0) {
300 blk_finish_plug(&plug);
301 blk_start_plug(&plug);
302 sync_pending = 0;
303 }
Chris Mason8b712842008-06-11 16:50:36 -0400304 }
Chris Masonffbd5172009-04-20 15:50:09 -0400305
Chris Mason51684082010-03-10 15:33:32 -0500306 cond_resched();
307 if (again)
308 goto loop;
309
310 spin_lock(&device->io_lock);
311 if (device->pending_bios.head || device->pending_sync_bios.head)
312 goto loop_lock;
313 spin_unlock(&device->io_lock);
314
Chris Mason8b712842008-06-11 16:50:36 -0400315done:
Chris Mason211588a2011-04-19 20:12:40 -0400316 blk_finish_plug(&plug);
Chris Mason8b712842008-06-11 16:50:36 -0400317 return 0;
318}
319
Christoph Hellwigb2950862008-12-02 09:54:17 -0500320static void pending_bios_fn(struct btrfs_work *work)
Chris Mason8b712842008-06-11 16:50:36 -0400321{
322 struct btrfs_device *device;
323
324 device = container_of(work, struct btrfs_device, work);
325 run_scheduled_bios(device);
326}
327
Chris Masona1b32a52008-09-05 16:09:51 -0400328static noinline int device_list_add(const char *path,
Chris Mason8a4b83c2008-03-24 15:02:07 -0400329 struct btrfs_super_block *disk_super,
330 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
331{
332 struct btrfs_device *device;
333 struct btrfs_fs_devices *fs_devices;
334 u64 found_transid = btrfs_super_generation(disk_super);
TARUISI Hiroaki3a0524d2010-02-09 06:36:45 +0000335 char *name;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400336
337 fs_devices = find_fsid(disk_super->fsid);
338 if (!fs_devices) {
Chris Mason515dc322008-05-16 13:30:15 -0400339 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400340 if (!fs_devices)
341 return -ENOMEM;
342 INIT_LIST_HEAD(&fs_devices->devices);
Chris Masonb3075712008-04-22 09:22:07 -0400343 INIT_LIST_HEAD(&fs_devices->alloc_list);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400344 list_add(&fs_devices->list, &fs_uuids);
345 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
346 fs_devices->latest_devid = devid;
347 fs_devices->latest_trans = found_transid;
Chris Masone5e9a522009-06-10 15:17:02 -0400348 mutex_init(&fs_devices->device_list_mutex);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400349 device = NULL;
350 } else {
Chris Masona4437552008-04-18 10:29:38 -0400351 device = __find_device(&fs_devices->devices, devid,
352 disk_super->dev_item.uuid);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400353 }
354 if (!device) {
Yan Zheng2b820322008-11-17 21:11:30 -0500355 if (fs_devices->opened)
356 return -EBUSY;
357
Chris Mason8a4b83c2008-03-24 15:02:07 -0400358 device = kzalloc(sizeof(*device), GFP_NOFS);
359 if (!device) {
360 /* we can safely leave the fs_devices entry around */
361 return -ENOMEM;
362 }
363 device->devid = devid;
Chris Mason8b712842008-06-11 16:50:36 -0400364 device->work.func = pending_bios_fn;
Chris Masona4437552008-04-18 10:29:38 -0400365 memcpy(device->uuid, disk_super->dev_item.uuid,
366 BTRFS_UUID_SIZE);
Chris Masonb248a412008-04-14 09:48:18 -0400367 spin_lock_init(&device->io_lock);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400368 device->name = kstrdup(path, GFP_NOFS);
369 if (!device->name) {
370 kfree(device);
371 return -ENOMEM;
372 }
Yan Zheng2b820322008-11-17 21:11:30 -0500373 INIT_LIST_HEAD(&device->dev_alloc_list);
Chris Masone5e9a522009-06-10 15:17:02 -0400374
Arne Jansen90519d62011-05-23 14:30:00 +0200375 /* init readahead state */
376 spin_lock_init(&device->reada_lock);
377 device->reada_curr_zone = NULL;
378 atomic_set(&device->reada_in_flight, 0);
379 device->reada_next = 0;
380 INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
381 INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
382
Chris Masone5e9a522009-06-10 15:17:02 -0400383 mutex_lock(&fs_devices->device_list_mutex);
Xiao Guangrong1f781602011-04-20 10:09:16 +0000384 list_add_rcu(&device->dev_list, &fs_devices->devices);
Chris Masone5e9a522009-06-10 15:17:02 -0400385 mutex_unlock(&fs_devices->device_list_mutex);
386
Yan Zheng2b820322008-11-17 21:11:30 -0500387 device->fs_devices = fs_devices;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400388 fs_devices->num_devices++;
Chris Masoncd02dca2010-12-13 14:56:23 -0500389 } else if (!device->name || strcmp(device->name, path)) {
TARUISI Hiroaki3a0524d2010-02-09 06:36:45 +0000390 name = kstrdup(path, GFP_NOFS);
391 if (!name)
392 return -ENOMEM;
393 kfree(device->name);
394 device->name = name;
Chris Masoncd02dca2010-12-13 14:56:23 -0500395 if (device->missing) {
396 fs_devices->missing_devices--;
397 device->missing = 0;
398 }
Chris Mason8a4b83c2008-03-24 15:02:07 -0400399 }
400
401 if (found_transid > fs_devices->latest_trans) {
402 fs_devices->latest_devid = devid;
403 fs_devices->latest_trans = found_transid;
404 }
Chris Mason8a4b83c2008-03-24 15:02:07 -0400405 *fs_devices_ret = fs_devices;
406 return 0;
407}
408
Yan Zhenge4404d62008-12-12 10:03:26 -0500409static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
410{
411 struct btrfs_fs_devices *fs_devices;
412 struct btrfs_device *device;
413 struct btrfs_device *orig_dev;
414
415 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
416 if (!fs_devices)
417 return ERR_PTR(-ENOMEM);
418
419 INIT_LIST_HEAD(&fs_devices->devices);
420 INIT_LIST_HEAD(&fs_devices->alloc_list);
421 INIT_LIST_HEAD(&fs_devices->list);
Chris Masone5e9a522009-06-10 15:17:02 -0400422 mutex_init(&fs_devices->device_list_mutex);
Yan Zhenge4404d62008-12-12 10:03:26 -0500423 fs_devices->latest_devid = orig->latest_devid;
424 fs_devices->latest_trans = orig->latest_trans;
425 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
426
Xiao Guangrong46224702011-04-20 10:08:47 +0000427 /* We have held the volume lock, it is safe to get the devices. */
Yan Zhenge4404d62008-12-12 10:03:26 -0500428 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
429 device = kzalloc(sizeof(*device), GFP_NOFS);
430 if (!device)
431 goto error;
432
433 device->name = kstrdup(orig_dev->name, GFP_NOFS);
Julia Lawallfd2696f2009-09-29 13:51:04 -0400434 if (!device->name) {
435 kfree(device);
Yan Zhenge4404d62008-12-12 10:03:26 -0500436 goto error;
Julia Lawallfd2696f2009-09-29 13:51:04 -0400437 }
Yan Zhenge4404d62008-12-12 10:03:26 -0500438
439 device->devid = orig_dev->devid;
440 device->work.func = pending_bios_fn;
441 memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
Yan Zhenge4404d62008-12-12 10:03:26 -0500442 spin_lock_init(&device->io_lock);
443 INIT_LIST_HEAD(&device->dev_list);
444 INIT_LIST_HEAD(&device->dev_alloc_list);
445
446 list_add(&device->dev_list, &fs_devices->devices);
447 device->fs_devices = fs_devices;
448 fs_devices->num_devices++;
449 }
450 return fs_devices;
451error:
452 free_fs_devices(fs_devices);
453 return ERR_PTR(-ENOMEM);
454}
455
Chris Masondfe25022008-05-13 13:46:40 -0400456int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
457{
Qinghuang Fengc6e30872009-01-21 10:59:08 -0500458 struct btrfs_device *device, *next;
Chris Masondfe25022008-05-13 13:46:40 -0400459
460 mutex_lock(&uuid_mutex);
461again:
Xiao Guangrong46224702011-04-20 10:08:47 +0000462 /* This is the initialized path, it is safe to release the devices. */
Qinghuang Fengc6e30872009-01-21 10:59:08 -0500463 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
Yan Zheng2b820322008-11-17 21:11:30 -0500464 if (device->in_fs_metadata)
465 continue;
466
467 if (device->bdev) {
Tejun Heod4d77622010-11-13 11:55:18 +0100468 blkdev_put(device->bdev, device->mode);
Yan Zheng2b820322008-11-17 21:11:30 -0500469 device->bdev = NULL;
470 fs_devices->open_devices--;
471 }
472 if (device->writeable) {
473 list_del_init(&device->dev_alloc_list);
474 device->writeable = 0;
475 fs_devices->rw_devices--;
476 }
Yan Zhenge4404d62008-12-12 10:03:26 -0500477 list_del_init(&device->dev_list);
478 fs_devices->num_devices--;
479 kfree(device->name);
480 kfree(device);
Chris Masondfe25022008-05-13 13:46:40 -0400481 }
Yan Zheng2b820322008-11-17 21:11:30 -0500482
483 if (fs_devices->seed) {
484 fs_devices = fs_devices->seed;
Yan Zheng2b820322008-11-17 21:11:30 -0500485 goto again;
486 }
487
Chris Masondfe25022008-05-13 13:46:40 -0400488 mutex_unlock(&uuid_mutex);
489 return 0;
490}
Chris Masona0af4692008-05-13 16:03:06 -0400491
Xiao Guangrong1f781602011-04-20 10:09:16 +0000492static void __free_device(struct work_struct *work)
493{
494 struct btrfs_device *device;
495
496 device = container_of(work, struct btrfs_device, rcu_work);
497
498 if (device->bdev)
499 blkdev_put(device->bdev, device->mode);
500
501 kfree(device->name);
502 kfree(device);
503}
504
505static void free_device(struct rcu_head *head)
506{
507 struct btrfs_device *device;
508
509 device = container_of(head, struct btrfs_device, rcu);
510
511 INIT_WORK(&device->rcu_work, __free_device);
512 schedule_work(&device->rcu_work);
513}
514
Yan Zheng2b820322008-11-17 21:11:30 -0500515static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
Chris Mason8a4b83c2008-03-24 15:02:07 -0400516{
Chris Mason8a4b83c2008-03-24 15:02:07 -0400517 struct btrfs_device *device;
Yan Zhenge4404d62008-12-12 10:03:26 -0500518
Yan Zheng2b820322008-11-17 21:11:30 -0500519 if (--fs_devices->opened > 0)
520 return 0;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400521
Xiao Guangrongc9513ed2011-04-20 10:07:30 +0000522 mutex_lock(&fs_devices->device_list_mutex);
Qinghuang Fengc6e30872009-01-21 10:59:08 -0500523 list_for_each_entry(device, &fs_devices->devices, dev_list) {
Xiao Guangrong1f781602011-04-20 10:09:16 +0000524 struct btrfs_device *new_device;
525
526 if (device->bdev)
Chris Masona0af4692008-05-13 16:03:06 -0400527 fs_devices->open_devices--;
Xiao Guangrong1f781602011-04-20 10:09:16 +0000528
Yan Zheng2b820322008-11-17 21:11:30 -0500529 if (device->writeable) {
530 list_del_init(&device->dev_alloc_list);
531 fs_devices->rw_devices--;
532 }
533
Josef Bacikd5e20032011-08-04 14:52:27 +0000534 if (device->can_discard)
535 fs_devices->num_can_discard--;
536
Xiao Guangrong1f781602011-04-20 10:09:16 +0000537 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
538 BUG_ON(!new_device);
539 memcpy(new_device, device, sizeof(*new_device));
540 new_device->name = kstrdup(device->name, GFP_NOFS);
Arne Jansen5f3f3022011-05-30 08:36:16 +0000541 BUG_ON(device->name && !new_device->name);
Xiao Guangrong1f781602011-04-20 10:09:16 +0000542 new_device->bdev = NULL;
543 new_device->writeable = 0;
544 new_device->in_fs_metadata = 0;
Josef Bacikd5e20032011-08-04 14:52:27 +0000545 new_device->can_discard = 0;
Xiao Guangrong1f781602011-04-20 10:09:16 +0000546 list_replace_rcu(&device->dev_list, &new_device->dev_list);
547
548 call_rcu(&device->rcu, free_device);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400549 }
Xiao Guangrongc9513ed2011-04-20 10:07:30 +0000550 mutex_unlock(&fs_devices->device_list_mutex);
551
Yan Zhenge4404d62008-12-12 10:03:26 -0500552 WARN_ON(fs_devices->open_devices);
553 WARN_ON(fs_devices->rw_devices);
Yan Zheng2b820322008-11-17 21:11:30 -0500554 fs_devices->opened = 0;
555 fs_devices->seeding = 0;
Yan Zheng2b820322008-11-17 21:11:30 -0500556
Chris Mason8a4b83c2008-03-24 15:02:07 -0400557 return 0;
558}
559
Yan Zheng2b820322008-11-17 21:11:30 -0500560int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
561{
Yan Zhenge4404d62008-12-12 10:03:26 -0500562 struct btrfs_fs_devices *seed_devices = NULL;
Yan Zheng2b820322008-11-17 21:11:30 -0500563 int ret;
564
565 mutex_lock(&uuid_mutex);
566 ret = __btrfs_close_devices(fs_devices);
Yan Zhenge4404d62008-12-12 10:03:26 -0500567 if (!fs_devices->opened) {
568 seed_devices = fs_devices->seed;
569 fs_devices->seed = NULL;
570 }
Yan Zheng2b820322008-11-17 21:11:30 -0500571 mutex_unlock(&uuid_mutex);
Yan Zhenge4404d62008-12-12 10:03:26 -0500572
573 while (seed_devices) {
574 fs_devices = seed_devices;
575 seed_devices = fs_devices->seed;
576 __btrfs_close_devices(fs_devices);
577 free_fs_devices(fs_devices);
578 }
Yan Zheng2b820322008-11-17 21:11:30 -0500579 return ret;
580}
581
Yan Zhenge4404d62008-12-12 10:03:26 -0500582static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
583 fmode_t flags, void *holder)
Chris Mason8a4b83c2008-03-24 15:02:07 -0400584{
Josef Bacikd5e20032011-08-04 14:52:27 +0000585 struct request_queue *q;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400586 struct block_device *bdev;
587 struct list_head *head = &fs_devices->devices;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400588 struct btrfs_device *device;
Chris Masona0af4692008-05-13 16:03:06 -0400589 struct block_device *latest_bdev = NULL;
590 struct buffer_head *bh;
591 struct btrfs_super_block *disk_super;
592 u64 latest_devid = 0;
593 u64 latest_transid = 0;
Chris Masona0af4692008-05-13 16:03:06 -0400594 u64 devid;
Yan Zheng2b820322008-11-17 21:11:30 -0500595 int seeding = 1;
Chris Masona0af4692008-05-13 16:03:06 -0400596 int ret = 0;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400597
Tejun Heod4d77622010-11-13 11:55:18 +0100598 flags |= FMODE_EXCL;
599
Qinghuang Fengc6e30872009-01-21 10:59:08 -0500600 list_for_each_entry(device, head, dev_list) {
Chris Masonc1c4d912008-05-08 15:05:58 -0400601 if (device->bdev)
602 continue;
Chris Masondfe25022008-05-13 13:46:40 -0400603 if (!device->name)
604 continue;
605
Tejun Heod4d77622010-11-13 11:55:18 +0100606 bdev = blkdev_get_by_path(device->name, flags, holder);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400607 if (IS_ERR(bdev)) {
Chris Masond3977122009-01-05 21:25:51 -0500608 printk(KERN_INFO "open %s failed\n", device->name);
Chris Masona0af4692008-05-13 16:03:06 -0400609 goto error;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400610 }
Chris Masona061fc82008-05-07 11:43:44 -0400611 set_blocksize(bdev, 4096);
Chris Masona0af4692008-05-13 16:03:06 -0400612
Yan Zhenga512bbf2008-12-08 16:46:26 -0500613 bh = btrfs_read_dev_super(bdev);
Ilya Dryomov20bcd642011-10-20 00:06:20 +0300614 if (!bh)
Chris Masona0af4692008-05-13 16:03:06 -0400615 goto error_close;
616
617 disk_super = (struct btrfs_super_block *)bh->b_data;
Xiao Guangronga3438322010-01-06 11:48:18 +0000618 devid = btrfs_stack_device_id(&disk_super->dev_item);
Chris Masona0af4692008-05-13 16:03:06 -0400619 if (devid != device->devid)
620 goto error_brelse;
621
Yan Zheng2b820322008-11-17 21:11:30 -0500622 if (memcmp(device->uuid, disk_super->dev_item.uuid,
623 BTRFS_UUID_SIZE))
624 goto error_brelse;
625
626 device->generation = btrfs_super_generation(disk_super);
627 if (!latest_transid || device->generation > latest_transid) {
Chris Masona0af4692008-05-13 16:03:06 -0400628 latest_devid = devid;
Yan Zheng2b820322008-11-17 21:11:30 -0500629 latest_transid = device->generation;
Chris Masona0af4692008-05-13 16:03:06 -0400630 latest_bdev = bdev;
631 }
632
Yan Zheng2b820322008-11-17 21:11:30 -0500633 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
634 device->writeable = 0;
635 } else {
636 device->writeable = !bdev_read_only(bdev);
637 seeding = 0;
638 }
639
Josef Bacikd5e20032011-08-04 14:52:27 +0000640 q = bdev_get_queue(bdev);
641 if (blk_queue_discard(q)) {
642 device->can_discard = 1;
643 fs_devices->num_can_discard++;
644 }
645
Chris Mason8a4b83c2008-03-24 15:02:07 -0400646 device->bdev = bdev;
Chris Masondfe25022008-05-13 13:46:40 -0400647 device->in_fs_metadata = 0;
Chris Mason15916de2008-11-19 21:17:22 -0500648 device->mode = flags;
649
Chris Masonc289811c2009-06-10 09:51:32 -0400650 if (!blk_queue_nonrot(bdev_get_queue(bdev)))
651 fs_devices->rotating = 1;
652
Chris Masona0af4692008-05-13 16:03:06 -0400653 fs_devices->open_devices++;
Yan Zheng2b820322008-11-17 21:11:30 -0500654 if (device->writeable) {
655 fs_devices->rw_devices++;
656 list_add(&device->dev_alloc_list,
657 &fs_devices->alloc_list);
658 }
Xiao Guangrong4f6c9322011-04-20 10:06:40 +0000659 brelse(bh);
Chris Masona0af4692008-05-13 16:03:06 -0400660 continue;
Chris Masona061fc82008-05-07 11:43:44 -0400661
Chris Masona0af4692008-05-13 16:03:06 -0400662error_brelse:
663 brelse(bh);
664error_close:
Tejun Heod4d77622010-11-13 11:55:18 +0100665 blkdev_put(bdev, flags);
Chris Masona0af4692008-05-13 16:03:06 -0400666error:
667 continue;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400668 }
Chris Masona0af4692008-05-13 16:03:06 -0400669 if (fs_devices->open_devices == 0) {
Ilya Dryomov20bcd642011-10-20 00:06:20 +0300670 ret = -EINVAL;
Chris Masona0af4692008-05-13 16:03:06 -0400671 goto out;
672 }
Yan Zheng2b820322008-11-17 21:11:30 -0500673 fs_devices->seeding = seeding;
674 fs_devices->opened = 1;
Chris Masona0af4692008-05-13 16:03:06 -0400675 fs_devices->latest_bdev = latest_bdev;
676 fs_devices->latest_devid = latest_devid;
677 fs_devices->latest_trans = latest_transid;
Yan Zheng2b820322008-11-17 21:11:30 -0500678 fs_devices->total_rw_bytes = 0;
Chris Masona0af4692008-05-13 16:03:06 -0400679out:
Yan Zheng2b820322008-11-17 21:11:30 -0500680 return ret;
681}
682
683int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
Christoph Hellwig97288f22008-12-02 06:36:09 -0500684 fmode_t flags, void *holder)
Yan Zheng2b820322008-11-17 21:11:30 -0500685{
686 int ret;
687
688 mutex_lock(&uuid_mutex);
689 if (fs_devices->opened) {
Yan Zhenge4404d62008-12-12 10:03:26 -0500690 fs_devices->opened++;
691 ret = 0;
Yan Zheng2b820322008-11-17 21:11:30 -0500692 } else {
Chris Mason15916de2008-11-19 21:17:22 -0500693 ret = __btrfs_open_devices(fs_devices, flags, holder);
Yan Zheng2b820322008-11-17 21:11:30 -0500694 }
Chris Mason8a4b83c2008-03-24 15:02:07 -0400695 mutex_unlock(&uuid_mutex);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400696 return ret;
697}
698
Christoph Hellwig97288f22008-12-02 06:36:09 -0500699int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
Chris Mason8a4b83c2008-03-24 15:02:07 -0400700 struct btrfs_fs_devices **fs_devices_ret)
701{
702 struct btrfs_super_block *disk_super;
703 struct block_device *bdev;
704 struct buffer_head *bh;
705 int ret;
706 u64 devid;
Chris Masonf2984462008-04-10 16:19:33 -0400707 u64 transid;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400708
Tejun Heod4d77622010-11-13 11:55:18 +0100709 flags |= FMODE_EXCL;
710 bdev = blkdev_get_by_path(path, flags, holder);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400711
712 if (IS_ERR(bdev)) {
Chris Mason8a4b83c2008-03-24 15:02:07 -0400713 ret = PTR_ERR(bdev);
714 goto error;
715 }
716
Al Viro10f63272011-11-17 15:05:22 -0500717 mutex_lock(&uuid_mutex);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400718 ret = set_blocksize(bdev, 4096);
719 if (ret)
720 goto error_close;
Yan Zhenga512bbf2008-12-08 16:46:26 -0500721 bh = btrfs_read_dev_super(bdev);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400722 if (!bh) {
Dave Young20b45072011-01-08 10:09:13 +0000723 ret = -EINVAL;
Chris Mason8a4b83c2008-03-24 15:02:07 -0400724 goto error_close;
725 }
726 disk_super = (struct btrfs_super_block *)bh->b_data;
Xiao Guangronga3438322010-01-06 11:48:18 +0000727 devid = btrfs_stack_device_id(&disk_super->dev_item);
Chris Masonf2984462008-04-10 16:19:33 -0400728 transid = btrfs_super_generation(disk_super);
Chris Mason7ae9c092008-04-18 10:29:49 -0400729 if (disk_super->label[0])
Chris Masond3977122009-01-05 21:25:51 -0500730 printk(KERN_INFO "device label %s ", disk_super->label);
Ilya Dryomov22b63a22011-02-09 16:05:31 +0200731 else
732 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
Roland Dreier119e10c2009-01-21 10:49:16 -0500733 printk(KERN_CONT "devid %llu transid %llu %s\n",
Chris Masond3977122009-01-05 21:25:51 -0500734 (unsigned long long)devid, (unsigned long long)transid, path);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400735 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
736
Chris Mason8a4b83c2008-03-24 15:02:07 -0400737 brelse(bh);
738error_close:
Al Viro10f63272011-11-17 15:05:22 -0500739 mutex_unlock(&uuid_mutex);
Tejun Heod4d77622010-11-13 11:55:18 +0100740 blkdev_put(bdev, flags);
Chris Mason8a4b83c2008-03-24 15:02:07 -0400741error:
Chris Mason8a4b83c2008-03-24 15:02:07 -0400742 return ret;
743}
Chris Mason0b86a832008-03-24 15:01:56 -0400744
Miao Xie6d07bce2011-01-05 10:07:31 +0000745/* helper to account the used device space in the range */
746int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
747 u64 end, u64 *length)
Chris Mason0b86a832008-03-24 15:01:56 -0400748{
749 struct btrfs_key key;
750 struct btrfs_root *root = device->dev_root;
Miao Xie6d07bce2011-01-05 10:07:31 +0000751 struct btrfs_dev_extent *dev_extent;
Yan Zheng2b820322008-11-17 21:11:30 -0500752 struct btrfs_path *path;
Miao Xie6d07bce2011-01-05 10:07:31 +0000753 u64 extent_end;
Chris Mason0b86a832008-03-24 15:01:56 -0400754 int ret;
Miao Xie6d07bce2011-01-05 10:07:31 +0000755 int slot;
Chris Mason0b86a832008-03-24 15:01:56 -0400756 struct extent_buffer *l;
757
Miao Xie6d07bce2011-01-05 10:07:31 +0000758 *length = 0;
759
760 if (start >= device->total_bytes)
761 return 0;
762
Yan Zheng2b820322008-11-17 21:11:30 -0500763 path = btrfs_alloc_path();
764 if (!path)
765 return -ENOMEM;
Chris Mason0b86a832008-03-24 15:01:56 -0400766 path->reada = 2;
Chris Mason8f18cf12008-04-25 16:53:30 -0400767
Chris Mason0b86a832008-03-24 15:01:56 -0400768 key.objectid = device->devid;
Miao Xie6d07bce2011-01-05 10:07:31 +0000769 key.offset = start;
Chris Mason0b86a832008-03-24 15:01:56 -0400770 key.type = BTRFS_DEV_EXTENT_KEY;
Miao Xie6d07bce2011-01-05 10:07:31 +0000771
772 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Chris Mason0b86a832008-03-24 15:01:56 -0400773 if (ret < 0)
Miao Xie6d07bce2011-01-05 10:07:31 +0000774 goto out;
Yan Zheng1fcbac52009-07-24 11:06:53 -0400775 if (ret > 0) {
776 ret = btrfs_previous_item(root, path, key.objectid, key.type);
777 if (ret < 0)
Miao Xie6d07bce2011-01-05 10:07:31 +0000778 goto out;
Yan Zheng1fcbac52009-07-24 11:06:53 -0400779 }
Miao Xie6d07bce2011-01-05 10:07:31 +0000780
Chris Mason0b86a832008-03-24 15:01:56 -0400781 while (1) {
782 l = path->nodes[0];
783 slot = path->slots[0];
784 if (slot >= btrfs_header_nritems(l)) {
785 ret = btrfs_next_leaf(root, path);
786 if (ret == 0)
787 continue;
788 if (ret < 0)
Miao Xie6d07bce2011-01-05 10:07:31 +0000789 goto out;
790
791 break;
Chris Mason0b86a832008-03-24 15:01:56 -0400792 }
793 btrfs_item_key_to_cpu(l, &key, slot);
794
795 if (key.objectid < device->devid)
796 goto next;
797
798 if (key.objectid > device->devid)
Miao Xie6d07bce2011-01-05 10:07:31 +0000799 break;
Chris Mason0b86a832008-03-24 15:01:56 -0400800
Chris Masond3977122009-01-05 21:25:51 -0500801 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
Chris Mason0b86a832008-03-24 15:01:56 -0400802 goto next;
Chris Mason0b86a832008-03-24 15:01:56 -0400803
Chris Mason0b86a832008-03-24 15:01:56 -0400804 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
Miao Xie6d07bce2011-01-05 10:07:31 +0000805 extent_end = key.offset + btrfs_dev_extent_length(l,
806 dev_extent);
807 if (key.offset <= start && extent_end > end) {
808 *length = end - start + 1;
809 break;
810 } else if (key.offset <= start && extent_end > start)
811 *length += extent_end - start;
812 else if (key.offset > start && extent_end <= end)
813 *length += extent_end - key.offset;
814 else if (key.offset > start && key.offset <= end) {
815 *length += end - key.offset + 1;
816 break;
817 } else if (key.offset > end)
818 break;
819
820next:
821 path->slots[0]++;
822 }
823 ret = 0;
824out:
825 btrfs_free_path(path);
826 return ret;
827}
828
Chris Mason0b86a832008-03-24 15:01:56 -0400829/*
Miao Xie7bfc8372011-01-05 10:07:26 +0000830 * find_free_dev_extent - find free space in the specified device
831 * @trans: transaction handler
832 * @device: the device which we search the free space in
833 * @num_bytes: the size of the free space that we need
834 * @start: store the start of the free space.
835 * @len: the size of the free space. that we find, or the size of the max
836 * free space if we don't find suitable free space
837 *
Chris Mason0b86a832008-03-24 15:01:56 -0400838 * this uses a pretty simple search, the expectation is that it is
839 * called very infrequently and that a given device has a small number
840 * of extents
Miao Xie7bfc8372011-01-05 10:07:26 +0000841 *
842 * @start is used to store the start of the free space if we find. But if we
843 * don't find suitable free space, it will be used to store the start position
844 * of the max free space.
845 *
846 * @len is used to store the size of the free space that we find.
847 * But if we don't find suitable free space, it is used to store the size of
848 * the max free space.
Chris Mason0b86a832008-03-24 15:01:56 -0400849 */
850int find_free_dev_extent(struct btrfs_trans_handle *trans,
851 struct btrfs_device *device, u64 num_bytes,
Miao Xie7bfc8372011-01-05 10:07:26 +0000852 u64 *start, u64 *len)
Chris Mason0b86a832008-03-24 15:01:56 -0400853{
854 struct btrfs_key key;
855 struct btrfs_root *root = device->dev_root;
Miao Xie7bfc8372011-01-05 10:07:26 +0000856 struct btrfs_dev_extent *dev_extent;
Chris Mason0b86a832008-03-24 15:01:56 -0400857 struct btrfs_path *path;
Miao Xie7bfc8372011-01-05 10:07:26 +0000858 u64 hole_size;
859 u64 max_hole_start;
860 u64 max_hole_size;
861 u64 extent_end;
862 u64 search_start;
Chris Mason0b86a832008-03-24 15:01:56 -0400863 u64 search_end = device->total_bytes;
864 int ret;
Miao Xie7bfc8372011-01-05 10:07:26 +0000865 int slot;
Chris Mason0b86a832008-03-24 15:01:56 -0400866 struct extent_buffer *l;
867
Chris Mason0b86a832008-03-24 15:01:56 -0400868 /* FIXME use last free of some kind */
869
870 /* we don't want to overwrite the superblock on the drive,
871 * so we make sure to start at an offset of at least 1MB
872 */
Arne Jansena9c9bf62011-04-12 11:01:20 +0200873 search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
Chris Mason0b86a832008-03-24 15:01:56 -0400874
Miao Xie7bfc8372011-01-05 10:07:26 +0000875 max_hole_start = search_start;
876 max_hole_size = 0;
liubo38c01b92011-08-02 02:39:03 +0000877 hole_size = 0;
Miao Xie7bfc8372011-01-05 10:07:26 +0000878
879 if (search_start >= search_end) {
880 ret = -ENOSPC;
881 goto error;
882 }
883
884 path = btrfs_alloc_path();
885 if (!path) {
886 ret = -ENOMEM;
887 goto error;
888 }
889 path->reada = 2;
890
Chris Mason0b86a832008-03-24 15:01:56 -0400891 key.objectid = device->devid;
892 key.offset = search_start;
893 key.type = BTRFS_DEV_EXTENT_KEY;
Miao Xie7bfc8372011-01-05 10:07:26 +0000894
Chris Mason0b86a832008-03-24 15:01:56 -0400895 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
896 if (ret < 0)
Miao Xie7bfc8372011-01-05 10:07:26 +0000897 goto out;
Chris Mason0b86a832008-03-24 15:01:56 -0400898 if (ret > 0) {
899 ret = btrfs_previous_item(root, path, key.objectid, key.type);
900 if (ret < 0)
Miao Xie7bfc8372011-01-05 10:07:26 +0000901 goto out;
Chris Mason0b86a832008-03-24 15:01:56 -0400902 }
Miao Xie7bfc8372011-01-05 10:07:26 +0000903
Chris Mason0b86a832008-03-24 15:01:56 -0400904 while (1) {
905 l = path->nodes[0];
906 slot = path->slots[0];
907 if (slot >= btrfs_header_nritems(l)) {
908 ret = btrfs_next_leaf(root, path);
909 if (ret == 0)
910 continue;
911 if (ret < 0)
Miao Xie7bfc8372011-01-05 10:07:26 +0000912 goto out;
913
914 break;
Chris Mason0b86a832008-03-24 15:01:56 -0400915 }
916 btrfs_item_key_to_cpu(l, &key, slot);
917
918 if (key.objectid < device->devid)
919 goto next;
920
921 if (key.objectid > device->devid)
Miao Xie7bfc8372011-01-05 10:07:26 +0000922 break;
Chris Mason0b86a832008-03-24 15:01:56 -0400923
Chris Mason0b86a832008-03-24 15:01:56 -0400924 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
925 goto next;
926
Miao Xie7bfc8372011-01-05 10:07:26 +0000927 if (key.offset > search_start) {
928 hole_size = key.offset - search_start;
929
930 if (hole_size > max_hole_size) {
931 max_hole_start = search_start;
932 max_hole_size = hole_size;
933 }
934
935 /*
936 * If this free space is greater than which we need,
937 * it must be the max free space that we have found
938 * until now, so max_hole_start must point to the start
939 * of this free space and the length of this free space
940 * is stored in max_hole_size. Thus, we return
941 * max_hole_start and max_hole_size and go back to the
942 * caller.
943 */
944 if (hole_size >= num_bytes) {
945 ret = 0;
946 goto out;
947 }
948 }
949
Chris Mason0b86a832008-03-24 15:01:56 -0400950 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
Miao Xie7bfc8372011-01-05 10:07:26 +0000951 extent_end = key.offset + btrfs_dev_extent_length(l,
952 dev_extent);
953 if (extent_end > search_start)
954 search_start = extent_end;
Chris Mason0b86a832008-03-24 15:01:56 -0400955next:
956 path->slots[0]++;
957 cond_resched();
958 }
Chris Mason0b86a832008-03-24 15:01:56 -0400959
liubo38c01b92011-08-02 02:39:03 +0000960 /*
961 * At this point, search_start should be the end of
962 * allocated dev extents, and when shrinking the device,
963 * search_end may be smaller than search_start.
964 */
965 if (search_end > search_start)
966 hole_size = search_end - search_start;
967
Miao Xie7bfc8372011-01-05 10:07:26 +0000968 if (hole_size > max_hole_size) {
969 max_hole_start = search_start;
970 max_hole_size = hole_size;
Chris Mason0b86a832008-03-24 15:01:56 -0400971 }
Chris Mason0b86a832008-03-24 15:01:56 -0400972
Miao Xie7bfc8372011-01-05 10:07:26 +0000973 /* See above. */
974 if (hole_size < num_bytes)
975 ret = -ENOSPC;
976 else
977 ret = 0;
978
979out:
Yan Zheng2b820322008-11-17 21:11:30 -0500980 btrfs_free_path(path);
Miao Xie7bfc8372011-01-05 10:07:26 +0000981error:
982 *start = max_hole_start;
Miao Xieb2117a32011-01-05 10:07:28 +0000983 if (len)
Miao Xie7bfc8372011-01-05 10:07:26 +0000984 *len = max_hole_size;
Chris Mason0b86a832008-03-24 15:01:56 -0400985 return ret;
986}
987
Christoph Hellwigb2950862008-12-02 09:54:17 -0500988static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
Chris Mason8f18cf12008-04-25 16:53:30 -0400989 struct btrfs_device *device,
990 u64 start)
991{
992 int ret;
993 struct btrfs_path *path;
994 struct btrfs_root *root = device->dev_root;
995 struct btrfs_key key;
Chris Masona061fc82008-05-07 11:43:44 -0400996 struct btrfs_key found_key;
997 struct extent_buffer *leaf = NULL;
998 struct btrfs_dev_extent *extent = NULL;
Chris Mason8f18cf12008-04-25 16:53:30 -0400999
1000 path = btrfs_alloc_path();
1001 if (!path)
1002 return -ENOMEM;
1003
1004 key.objectid = device->devid;
1005 key.offset = start;
1006 key.type = BTRFS_DEV_EXTENT_KEY;
Miao Xie924cd8f2011-11-10 20:45:04 -05001007again:
Chris Mason8f18cf12008-04-25 16:53:30 -04001008 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
Chris Masona061fc82008-05-07 11:43:44 -04001009 if (ret > 0) {
1010 ret = btrfs_previous_item(root, path, key.objectid,
1011 BTRFS_DEV_EXTENT_KEY);
Tsutomu Itohb0b802d2011-05-19 07:03:42 +00001012 if (ret)
1013 goto out;
Chris Masona061fc82008-05-07 11:43:44 -04001014 leaf = path->nodes[0];
1015 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1016 extent = btrfs_item_ptr(leaf, path->slots[0],
1017 struct btrfs_dev_extent);
1018 BUG_ON(found_key.offset > start || found_key.offset +
1019 btrfs_dev_extent_length(leaf, extent) < start);
Miao Xie924cd8f2011-11-10 20:45:04 -05001020 key = found_key;
1021 btrfs_release_path(path);
1022 goto again;
Chris Masona061fc82008-05-07 11:43:44 -04001023 } else if (ret == 0) {
1024 leaf = path->nodes[0];
1025 extent = btrfs_item_ptr(leaf, path->slots[0],
1026 struct btrfs_dev_extent);
1027 }
Chris Mason8f18cf12008-04-25 16:53:30 -04001028 BUG_ON(ret);
1029
Josef Bacik2bf64752011-09-26 17:12:22 -04001030 if (device->bytes_used > 0) {
1031 u64 len = btrfs_dev_extent_length(leaf, extent);
1032 device->bytes_used -= len;
1033 spin_lock(&root->fs_info->free_chunk_lock);
1034 root->fs_info->free_chunk_space += len;
1035 spin_unlock(&root->fs_info->free_chunk_lock);
1036 }
Chris Mason8f18cf12008-04-25 16:53:30 -04001037 ret = btrfs_del_item(trans, root, path);
Chris Mason8f18cf12008-04-25 16:53:30 -04001038
Tsutomu Itohb0b802d2011-05-19 07:03:42 +00001039out:
Chris Mason8f18cf12008-04-25 16:53:30 -04001040 btrfs_free_path(path);
1041 return ret;
1042}
1043
Yan Zheng2b820322008-11-17 21:11:30 -05001044int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
Chris Mason0b86a832008-03-24 15:01:56 -04001045 struct btrfs_device *device,
Chris Masone17cade2008-04-15 15:41:47 -04001046 u64 chunk_tree, u64 chunk_objectid,
Yan Zheng2b820322008-11-17 21:11:30 -05001047 u64 chunk_offset, u64 start, u64 num_bytes)
Chris Mason0b86a832008-03-24 15:01:56 -04001048{
1049 int ret;
1050 struct btrfs_path *path;
1051 struct btrfs_root *root = device->dev_root;
1052 struct btrfs_dev_extent *extent;
1053 struct extent_buffer *leaf;
1054 struct btrfs_key key;
1055
Chris Masondfe25022008-05-13 13:46:40 -04001056 WARN_ON(!device->in_fs_metadata);
Chris Mason0b86a832008-03-24 15:01:56 -04001057 path = btrfs_alloc_path();
1058 if (!path)
1059 return -ENOMEM;
1060
Chris Mason0b86a832008-03-24 15:01:56 -04001061 key.objectid = device->devid;
Yan Zheng2b820322008-11-17 21:11:30 -05001062 key.offset = start;
Chris Mason0b86a832008-03-24 15:01:56 -04001063 key.type = BTRFS_DEV_EXTENT_KEY;
1064 ret = btrfs_insert_empty_item(trans, root, path, &key,
1065 sizeof(*extent));
1066 BUG_ON(ret);
1067
1068 leaf = path->nodes[0];
1069 extent = btrfs_item_ptr(leaf, path->slots[0],
1070 struct btrfs_dev_extent);
Chris Masone17cade2008-04-15 15:41:47 -04001071 btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
1072 btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
1073 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1074
1075 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1076 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
1077 BTRFS_UUID_SIZE);
1078
Chris Mason0b86a832008-03-24 15:01:56 -04001079 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1080 btrfs_mark_buffer_dirty(leaf);
Chris Mason0b86a832008-03-24 15:01:56 -04001081 btrfs_free_path(path);
1082 return ret;
1083}
1084
Chris Masona1b32a52008-09-05 16:09:51 -04001085static noinline int find_next_chunk(struct btrfs_root *root,
1086 u64 objectid, u64 *offset)
Chris Mason0b86a832008-03-24 15:01:56 -04001087{
1088 struct btrfs_path *path;
1089 int ret;
1090 struct btrfs_key key;
Chris Masone17cade2008-04-15 15:41:47 -04001091 struct btrfs_chunk *chunk;
Chris Mason0b86a832008-03-24 15:01:56 -04001092 struct btrfs_key found_key;
1093
1094 path = btrfs_alloc_path();
Mark Fasheh92b8e892011-07-12 10:57:59 -07001095 if (!path)
1096 return -ENOMEM;
Chris Mason0b86a832008-03-24 15:01:56 -04001097
Chris Masone17cade2008-04-15 15:41:47 -04001098 key.objectid = objectid;
Chris Mason0b86a832008-03-24 15:01:56 -04001099 key.offset = (u64)-1;
1100 key.type = BTRFS_CHUNK_ITEM_KEY;
1101
1102 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1103 if (ret < 0)
1104 goto error;
1105
1106 BUG_ON(ret == 0);
1107
1108 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
1109 if (ret) {
Chris Masone17cade2008-04-15 15:41:47 -04001110 *offset = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04001111 } else {
1112 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1113 path->slots[0]);
Chris Masone17cade2008-04-15 15:41:47 -04001114 if (found_key.objectid != objectid)
1115 *offset = 0;
1116 else {
1117 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
1118 struct btrfs_chunk);
1119 *offset = found_key.offset +
1120 btrfs_chunk_length(path->nodes[0], chunk);
1121 }
Chris Mason0b86a832008-03-24 15:01:56 -04001122 }
1123 ret = 0;
1124error:
1125 btrfs_free_path(path);
1126 return ret;
1127}
1128
Yan Zheng2b820322008-11-17 21:11:30 -05001129static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
Chris Mason0b86a832008-03-24 15:01:56 -04001130{
1131 int ret;
1132 struct btrfs_key key;
1133 struct btrfs_key found_key;
Yan Zheng2b820322008-11-17 21:11:30 -05001134 struct btrfs_path *path;
1135
1136 root = root->fs_info->chunk_root;
1137
1138 path = btrfs_alloc_path();
1139 if (!path)
1140 return -ENOMEM;
Chris Mason0b86a832008-03-24 15:01:56 -04001141
1142 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1143 key.type = BTRFS_DEV_ITEM_KEY;
1144 key.offset = (u64)-1;
1145
1146 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1147 if (ret < 0)
1148 goto error;
1149
1150 BUG_ON(ret == 0);
1151
1152 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
1153 BTRFS_DEV_ITEM_KEY);
1154 if (ret) {
1155 *objectid = 1;
1156 } else {
1157 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1158 path->slots[0]);
1159 *objectid = found_key.offset + 1;
1160 }
1161 ret = 0;
1162error:
Yan Zheng2b820322008-11-17 21:11:30 -05001163 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -04001164 return ret;
1165}
1166
1167/*
1168 * the device information is stored in the chunk root
1169 * the btrfs_device struct should be fully filled in
1170 */
1171int btrfs_add_device(struct btrfs_trans_handle *trans,
1172 struct btrfs_root *root,
1173 struct btrfs_device *device)
1174{
1175 int ret;
1176 struct btrfs_path *path;
1177 struct btrfs_dev_item *dev_item;
1178 struct extent_buffer *leaf;
1179 struct btrfs_key key;
1180 unsigned long ptr;
Chris Mason0b86a832008-03-24 15:01:56 -04001181
1182 root = root->fs_info->chunk_root;
1183
1184 path = btrfs_alloc_path();
1185 if (!path)
1186 return -ENOMEM;
1187
Chris Mason0b86a832008-03-24 15:01:56 -04001188 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1189 key.type = BTRFS_DEV_ITEM_KEY;
Yan Zheng2b820322008-11-17 21:11:30 -05001190 key.offset = device->devid;
Chris Mason0b86a832008-03-24 15:01:56 -04001191
1192 ret = btrfs_insert_empty_item(trans, root, path, &key,
Chris Mason0d81ba52008-03-24 15:02:07 -04001193 sizeof(*dev_item));
Chris Mason0b86a832008-03-24 15:01:56 -04001194 if (ret)
1195 goto out;
1196
1197 leaf = path->nodes[0];
1198 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1199
1200 btrfs_set_device_id(leaf, dev_item, device->devid);
Yan Zheng2b820322008-11-17 21:11:30 -05001201 btrfs_set_device_generation(leaf, dev_item, 0);
Chris Mason0b86a832008-03-24 15:01:56 -04001202 btrfs_set_device_type(leaf, dev_item, device->type);
1203 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1204 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1205 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
Chris Mason0b86a832008-03-24 15:01:56 -04001206 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1207 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
Chris Masone17cade2008-04-15 15:41:47 -04001208 btrfs_set_device_group(leaf, dev_item, 0);
1209 btrfs_set_device_seek_speed(leaf, dev_item, 0);
1210 btrfs_set_device_bandwidth(leaf, dev_item, 0);
Chris Masonc3027eb2008-12-08 16:40:21 -05001211 btrfs_set_device_start_offset(leaf, dev_item, 0);
Chris Mason0b86a832008-03-24 15:01:56 -04001212
Chris Mason0b86a832008-03-24 15:01:56 -04001213 ptr = (unsigned long)btrfs_device_uuid(dev_item);
Chris Masone17cade2008-04-15 15:41:47 -04001214 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
Yan Zheng2b820322008-11-17 21:11:30 -05001215 ptr = (unsigned long)btrfs_device_fsid(dev_item);
1216 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -04001217 btrfs_mark_buffer_dirty(leaf);
Chris Mason0b86a832008-03-24 15:01:56 -04001218
Yan Zheng2b820322008-11-17 21:11:30 -05001219 ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04001220out:
1221 btrfs_free_path(path);
1222 return ret;
1223}
Chris Mason8f18cf12008-04-25 16:53:30 -04001224
Chris Masona061fc82008-05-07 11:43:44 -04001225static int btrfs_rm_dev_item(struct btrfs_root *root,
1226 struct btrfs_device *device)
1227{
1228 int ret;
1229 struct btrfs_path *path;
Chris Masona061fc82008-05-07 11:43:44 -04001230 struct btrfs_key key;
Chris Masona061fc82008-05-07 11:43:44 -04001231 struct btrfs_trans_handle *trans;
1232
1233 root = root->fs_info->chunk_root;
1234
1235 path = btrfs_alloc_path();
1236 if (!path)
1237 return -ENOMEM;
1238
Yan, Zhenga22285a2010-05-16 10:48:46 -04001239 trans = btrfs_start_transaction(root, 0);
Tsutomu Itoh98d5dc12011-01-20 06:19:37 +00001240 if (IS_ERR(trans)) {
1241 btrfs_free_path(path);
1242 return PTR_ERR(trans);
1243 }
Chris Masona061fc82008-05-07 11:43:44 -04001244 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1245 key.type = BTRFS_DEV_ITEM_KEY;
1246 key.offset = device->devid;
Chris Mason7d9eb122008-07-08 14:19:17 -04001247 lock_chunks(root);
Chris Masona061fc82008-05-07 11:43:44 -04001248
1249 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1250 if (ret < 0)
1251 goto out;
1252
1253 if (ret > 0) {
1254 ret = -ENOENT;
1255 goto out;
1256 }
1257
1258 ret = btrfs_del_item(trans, root, path);
1259 if (ret)
1260 goto out;
Chris Masona061fc82008-05-07 11:43:44 -04001261out:
1262 btrfs_free_path(path);
Chris Mason7d9eb122008-07-08 14:19:17 -04001263 unlock_chunks(root);
Chris Masona061fc82008-05-07 11:43:44 -04001264 btrfs_commit_transaction(trans, root);
1265 return ret;
1266}
1267
1268int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1269{
1270 struct btrfs_device *device;
Yan Zheng2b820322008-11-17 21:11:30 -05001271 struct btrfs_device *next_device;
Chris Masona061fc82008-05-07 11:43:44 -04001272 struct block_device *bdev;
Chris Masondfe25022008-05-13 13:46:40 -04001273 struct buffer_head *bh = NULL;
Chris Masona061fc82008-05-07 11:43:44 -04001274 struct btrfs_super_block *disk_super;
Xiao Guangrong1f781602011-04-20 10:09:16 +00001275 struct btrfs_fs_devices *cur_devices;
Chris Masona061fc82008-05-07 11:43:44 -04001276 u64 all_avail;
1277 u64 devid;
Yan Zheng2b820322008-11-17 21:11:30 -05001278 u64 num_devices;
1279 u8 *dev_uuid;
Chris Masona061fc82008-05-07 11:43:44 -04001280 int ret = 0;
Xiao Guangrong1f781602011-04-20 10:09:16 +00001281 bool clear_super = false;
Chris Masona061fc82008-05-07 11:43:44 -04001282
Chris Masona061fc82008-05-07 11:43:44 -04001283 mutex_lock(&uuid_mutex);
Chris Mason7d9eb122008-07-08 14:19:17 -04001284 mutex_lock(&root->fs_info->volume_mutex);
Chris Masona061fc82008-05-07 11:43:44 -04001285
1286 all_avail = root->fs_info->avail_data_alloc_bits |
1287 root->fs_info->avail_system_alloc_bits |
1288 root->fs_info->avail_metadata_alloc_bits;
1289
1290 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
Josef Bacik035fe032010-01-27 02:09:38 +00001291 root->fs_info->fs_devices->num_devices <= 4) {
Chris Masond3977122009-01-05 21:25:51 -05001292 printk(KERN_ERR "btrfs: unable to go below four devices "
1293 "on raid10\n");
Chris Masona061fc82008-05-07 11:43:44 -04001294 ret = -EINVAL;
1295 goto out;
1296 }
1297
1298 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
Josef Bacik035fe032010-01-27 02:09:38 +00001299 root->fs_info->fs_devices->num_devices <= 2) {
Chris Masond3977122009-01-05 21:25:51 -05001300 printk(KERN_ERR "btrfs: unable to go below two "
1301 "devices on raid1\n");
Chris Masona061fc82008-05-07 11:43:44 -04001302 ret = -EINVAL;
1303 goto out;
1304 }
1305
Chris Masondfe25022008-05-13 13:46:40 -04001306 if (strcmp(device_path, "missing") == 0) {
Chris Masondfe25022008-05-13 13:46:40 -04001307 struct list_head *devices;
1308 struct btrfs_device *tmp;
Chris Masona061fc82008-05-07 11:43:44 -04001309
Chris Masondfe25022008-05-13 13:46:40 -04001310 device = NULL;
1311 devices = &root->fs_info->fs_devices->devices;
Xiao Guangrong46224702011-04-20 10:08:47 +00001312 /*
1313 * It is safe to read the devices since the volume_mutex
1314 * is held.
1315 */
Qinghuang Fengc6e30872009-01-21 10:59:08 -05001316 list_for_each_entry(tmp, devices, dev_list) {
Chris Masondfe25022008-05-13 13:46:40 -04001317 if (tmp->in_fs_metadata && !tmp->bdev) {
1318 device = tmp;
1319 break;
1320 }
1321 }
1322 bdev = NULL;
1323 bh = NULL;
1324 disk_super = NULL;
1325 if (!device) {
Chris Masond3977122009-01-05 21:25:51 -05001326 printk(KERN_ERR "btrfs: no missing devices found to "
1327 "remove\n");
Chris Masondfe25022008-05-13 13:46:40 -04001328 goto out;
1329 }
Chris Masondfe25022008-05-13 13:46:40 -04001330 } else {
Tejun Heod4d77622010-11-13 11:55:18 +01001331 bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
1332 root->fs_info->bdev_holder);
Chris Masondfe25022008-05-13 13:46:40 -04001333 if (IS_ERR(bdev)) {
1334 ret = PTR_ERR(bdev);
1335 goto out;
1336 }
1337
Yan Zheng2b820322008-11-17 21:11:30 -05001338 set_blocksize(bdev, 4096);
Yan Zhenga512bbf2008-12-08 16:46:26 -05001339 bh = btrfs_read_dev_super(bdev);
Chris Masondfe25022008-05-13 13:46:40 -04001340 if (!bh) {
Dave Young20b45072011-01-08 10:09:13 +00001341 ret = -EINVAL;
Chris Masondfe25022008-05-13 13:46:40 -04001342 goto error_close;
1343 }
1344 disk_super = (struct btrfs_super_block *)bh->b_data;
Xiao Guangronga3438322010-01-06 11:48:18 +00001345 devid = btrfs_stack_device_id(&disk_super->dev_item);
Yan Zheng2b820322008-11-17 21:11:30 -05001346 dev_uuid = disk_super->dev_item.uuid;
1347 device = btrfs_find_device(root, devid, dev_uuid,
1348 disk_super->fsid);
Chris Masondfe25022008-05-13 13:46:40 -04001349 if (!device) {
1350 ret = -ENOENT;
1351 goto error_brelse;
1352 }
Chris Masondfe25022008-05-13 13:46:40 -04001353 }
Yan Zheng2b820322008-11-17 21:11:30 -05001354
1355 if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
Chris Masond3977122009-01-05 21:25:51 -05001356 printk(KERN_ERR "btrfs: unable to remove the only writeable "
1357 "device\n");
Yan Zheng2b820322008-11-17 21:11:30 -05001358 ret = -EINVAL;
1359 goto error_brelse;
1360 }
1361
1362 if (device->writeable) {
Xiao Guangrong0c1daee2011-04-20 10:08:16 +00001363 lock_chunks(root);
Yan Zheng2b820322008-11-17 21:11:30 -05001364 list_del_init(&device->dev_alloc_list);
Xiao Guangrong0c1daee2011-04-20 10:08:16 +00001365 unlock_chunks(root);
Yan Zheng2b820322008-11-17 21:11:30 -05001366 root->fs_info->fs_devices->rw_devices--;
Xiao Guangrong1f781602011-04-20 10:09:16 +00001367 clear_super = true;
Yan Zheng2b820322008-11-17 21:11:30 -05001368 }
Chris Masona061fc82008-05-07 11:43:44 -04001369
1370 ret = btrfs_shrink_device(device, 0);
1371 if (ret)
Ilya Dryomov9b3517e2011-02-15 18:14:25 +00001372 goto error_undo;
Chris Masona061fc82008-05-07 11:43:44 -04001373
Chris Masona061fc82008-05-07 11:43:44 -04001374 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1375 if (ret)
Ilya Dryomov9b3517e2011-02-15 18:14:25 +00001376 goto error_undo;
Chris Masona061fc82008-05-07 11:43:44 -04001377
Josef Bacik2bf64752011-09-26 17:12:22 -04001378 spin_lock(&root->fs_info->free_chunk_lock);
1379 root->fs_info->free_chunk_space = device->total_bytes -
1380 device->bytes_used;
1381 spin_unlock(&root->fs_info->free_chunk_lock);
1382
Yan Zheng2b820322008-11-17 21:11:30 -05001383 device->in_fs_metadata = 0;
Arne Jansena2de7332011-03-08 14:14:00 +01001384 btrfs_scrub_cancel_dev(root, device);
Chris Masone5e9a522009-06-10 15:17:02 -04001385
1386 /*
1387 * the device list mutex makes sure that we don't change
1388 * the device list while someone else is writing out all
1389 * the device supers.
1390 */
Xiao Guangrong1f781602011-04-20 10:09:16 +00001391
1392 cur_devices = device->fs_devices;
Chris Masone5e9a522009-06-10 15:17:02 -04001393 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
Xiao Guangrong1f781602011-04-20 10:09:16 +00001394 list_del_rcu(&device->dev_list);
Chris Masone5e9a522009-06-10 15:17:02 -04001395
Yan Zhenge4404d62008-12-12 10:03:26 -05001396 device->fs_devices->num_devices--;
Yan Zheng2b820322008-11-17 21:11:30 -05001397
Chris Masoncd02dca2010-12-13 14:56:23 -05001398 if (device->missing)
1399 root->fs_info->fs_devices->missing_devices--;
1400
Yan Zheng2b820322008-11-17 21:11:30 -05001401 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1402 struct btrfs_device, dev_list);
1403 if (device->bdev == root->fs_info->sb->s_bdev)
1404 root->fs_info->sb->s_bdev = next_device->bdev;
1405 if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1406 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1407
Xiao Guangrong1f781602011-04-20 10:09:16 +00001408 if (device->bdev)
Yan Zhenge4404d62008-12-12 10:03:26 -05001409 device->fs_devices->open_devices--;
Xiao Guangrong1f781602011-04-20 10:09:16 +00001410
1411 call_rcu(&device->rcu, free_device);
1412 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
Yan Zhenge4404d62008-12-12 10:03:26 -05001413
David Sterba6c417612011-04-13 15:41:04 +02001414 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
1415 btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
Yan Zheng2b820322008-11-17 21:11:30 -05001416
Xiao Guangrong1f781602011-04-20 10:09:16 +00001417 if (cur_devices->open_devices == 0) {
Yan Zhenge4404d62008-12-12 10:03:26 -05001418 struct btrfs_fs_devices *fs_devices;
1419 fs_devices = root->fs_info->fs_devices;
1420 while (fs_devices) {
Xiao Guangrong1f781602011-04-20 10:09:16 +00001421 if (fs_devices->seed == cur_devices)
Yan Zhenge4404d62008-12-12 10:03:26 -05001422 break;
1423 fs_devices = fs_devices->seed;
Yan Zheng2b820322008-11-17 21:11:30 -05001424 }
Xiao Guangrong1f781602011-04-20 10:09:16 +00001425 fs_devices->seed = cur_devices->seed;
1426 cur_devices->seed = NULL;
Xiao Guangrong0c1daee2011-04-20 10:08:16 +00001427 lock_chunks(root);
Xiao Guangrong1f781602011-04-20 10:09:16 +00001428 __btrfs_close_devices(cur_devices);
Xiao Guangrong0c1daee2011-04-20 10:08:16 +00001429 unlock_chunks(root);
Xiao Guangrong1f781602011-04-20 10:09:16 +00001430 free_fs_devices(cur_devices);
Yan Zheng2b820322008-11-17 21:11:30 -05001431 }
1432
1433 /*
1434 * at this point, the device is zero sized. We want to
1435 * remove it from the devices list and zero out the old super
1436 */
Xiao Guangrong1f781602011-04-20 10:09:16 +00001437 if (clear_super) {
Chris Masondfe25022008-05-13 13:46:40 -04001438 /* make sure this device isn't detected as part of
1439 * the FS anymore
1440 */
1441 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
1442 set_buffer_dirty(bh);
1443 sync_dirty_buffer(bh);
Chris Masondfe25022008-05-13 13:46:40 -04001444 }
Chris Masona061fc82008-05-07 11:43:44 -04001445
Chris Masona061fc82008-05-07 11:43:44 -04001446 ret = 0;
Chris Masona061fc82008-05-07 11:43:44 -04001447
1448error_brelse:
1449 brelse(bh);
1450error_close:
Chris Masondfe25022008-05-13 13:46:40 -04001451 if (bdev)
Tejun Heoe525fd82010-11-13 11:55:17 +01001452 blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
Chris Masona061fc82008-05-07 11:43:44 -04001453out:
Chris Mason7d9eb122008-07-08 14:19:17 -04001454 mutex_unlock(&root->fs_info->volume_mutex);
Chris Masona061fc82008-05-07 11:43:44 -04001455 mutex_unlock(&uuid_mutex);
Chris Masona061fc82008-05-07 11:43:44 -04001456 return ret;
Ilya Dryomov9b3517e2011-02-15 18:14:25 +00001457error_undo:
1458 if (device->writeable) {
Xiao Guangrong0c1daee2011-04-20 10:08:16 +00001459 lock_chunks(root);
Ilya Dryomov9b3517e2011-02-15 18:14:25 +00001460 list_add(&device->dev_alloc_list,
1461 &root->fs_info->fs_devices->alloc_list);
Xiao Guangrong0c1daee2011-04-20 10:08:16 +00001462 unlock_chunks(root);
Ilya Dryomov9b3517e2011-02-15 18:14:25 +00001463 root->fs_info->fs_devices->rw_devices++;
1464 }
1465 goto error_brelse;
Chris Masona061fc82008-05-07 11:43:44 -04001466}
1467
Yan Zheng2b820322008-11-17 21:11:30 -05001468/*
1469 * does all the dirty work required for changing file system's UUID.
1470 */
1471static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1472 struct btrfs_root *root)
1473{
1474 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
1475 struct btrfs_fs_devices *old_devices;
Yan Zhenge4404d62008-12-12 10:03:26 -05001476 struct btrfs_fs_devices *seed_devices;
David Sterba6c417612011-04-13 15:41:04 +02001477 struct btrfs_super_block *disk_super = root->fs_info->super_copy;
Yan Zheng2b820322008-11-17 21:11:30 -05001478 struct btrfs_device *device;
1479 u64 super_flags;
1480
1481 BUG_ON(!mutex_is_locked(&uuid_mutex));
Yan Zhenge4404d62008-12-12 10:03:26 -05001482 if (!fs_devices->seeding)
Yan Zheng2b820322008-11-17 21:11:30 -05001483 return -EINVAL;
1484
Yan Zhenge4404d62008-12-12 10:03:26 -05001485 seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
1486 if (!seed_devices)
Yan Zheng2b820322008-11-17 21:11:30 -05001487 return -ENOMEM;
1488
Yan Zhenge4404d62008-12-12 10:03:26 -05001489 old_devices = clone_fs_devices(fs_devices);
1490 if (IS_ERR(old_devices)) {
1491 kfree(seed_devices);
1492 return PTR_ERR(old_devices);
Yan Zheng2b820322008-11-17 21:11:30 -05001493 }
Yan Zhenge4404d62008-12-12 10:03:26 -05001494
Yan Zheng2b820322008-11-17 21:11:30 -05001495 list_add(&old_devices->list, &fs_uuids);
1496
Yan Zhenge4404d62008-12-12 10:03:26 -05001497 memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
1498 seed_devices->opened = 1;
1499 INIT_LIST_HEAD(&seed_devices->devices);
1500 INIT_LIST_HEAD(&seed_devices->alloc_list);
Chris Masone5e9a522009-06-10 15:17:02 -04001501 mutex_init(&seed_devices->device_list_mutex);
Xiao Guangrongc9513ed2011-04-20 10:07:30 +00001502
1503 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
Xiao Guangrong1f781602011-04-20 10:09:16 +00001504 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
1505 synchronize_rcu);
Xiao Guangrongc9513ed2011-04-20 10:07:30 +00001506 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1507
Yan Zhenge4404d62008-12-12 10:03:26 -05001508 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1509 list_for_each_entry(device, &seed_devices->devices, dev_list) {
1510 device->fs_devices = seed_devices;
1511 }
1512
Yan Zheng2b820322008-11-17 21:11:30 -05001513 fs_devices->seeding = 0;
1514 fs_devices->num_devices = 0;
1515 fs_devices->open_devices = 0;
Yan Zhenge4404d62008-12-12 10:03:26 -05001516 fs_devices->seed = seed_devices;
Yan Zheng2b820322008-11-17 21:11:30 -05001517
1518 generate_random_uuid(fs_devices->fsid);
1519 memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1520 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1521 super_flags = btrfs_super_flags(disk_super) &
1522 ~BTRFS_SUPER_FLAG_SEEDING;
1523 btrfs_set_super_flags(disk_super, super_flags);
1524
1525 return 0;
1526}
1527
1528/*
1529 * strore the expected generation for seed devices in device items.
1530 */
1531static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
1532 struct btrfs_root *root)
1533{
1534 struct btrfs_path *path;
1535 struct extent_buffer *leaf;
1536 struct btrfs_dev_item *dev_item;
1537 struct btrfs_device *device;
1538 struct btrfs_key key;
1539 u8 fs_uuid[BTRFS_UUID_SIZE];
1540 u8 dev_uuid[BTRFS_UUID_SIZE];
1541 u64 devid;
1542 int ret;
1543
1544 path = btrfs_alloc_path();
1545 if (!path)
1546 return -ENOMEM;
1547
1548 root = root->fs_info->chunk_root;
1549 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1550 key.offset = 0;
1551 key.type = BTRFS_DEV_ITEM_KEY;
1552
1553 while (1) {
1554 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1555 if (ret < 0)
1556 goto error;
1557
1558 leaf = path->nodes[0];
1559next_slot:
1560 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1561 ret = btrfs_next_leaf(root, path);
1562 if (ret > 0)
1563 break;
1564 if (ret < 0)
1565 goto error;
1566 leaf = path->nodes[0];
1567 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
David Sterbab3b4aa72011-04-21 01:20:15 +02001568 btrfs_release_path(path);
Yan Zheng2b820322008-11-17 21:11:30 -05001569 continue;
1570 }
1571
1572 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1573 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
1574 key.type != BTRFS_DEV_ITEM_KEY)
1575 break;
1576
1577 dev_item = btrfs_item_ptr(leaf, path->slots[0],
1578 struct btrfs_dev_item);
1579 devid = btrfs_device_id(leaf, dev_item);
1580 read_extent_buffer(leaf, dev_uuid,
1581 (unsigned long)btrfs_device_uuid(dev_item),
1582 BTRFS_UUID_SIZE);
1583 read_extent_buffer(leaf, fs_uuid,
1584 (unsigned long)btrfs_device_fsid(dev_item),
1585 BTRFS_UUID_SIZE);
1586 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1587 BUG_ON(!device);
1588
1589 if (device->fs_devices->seeding) {
1590 btrfs_set_device_generation(leaf, dev_item,
1591 device->generation);
1592 btrfs_mark_buffer_dirty(leaf);
1593 }
1594
1595 path->slots[0]++;
1596 goto next_slot;
1597 }
1598 ret = 0;
1599error:
1600 btrfs_free_path(path);
1601 return ret;
1602}
1603
Chris Mason788f20e2008-04-28 15:29:42 -04001604int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1605{
Josef Bacikd5e20032011-08-04 14:52:27 +00001606 struct request_queue *q;
Chris Mason788f20e2008-04-28 15:29:42 -04001607 struct btrfs_trans_handle *trans;
1608 struct btrfs_device *device;
1609 struct block_device *bdev;
Chris Mason788f20e2008-04-28 15:29:42 -04001610 struct list_head *devices;
Yan Zheng2b820322008-11-17 21:11:30 -05001611 struct super_block *sb = root->fs_info->sb;
Chris Mason788f20e2008-04-28 15:29:42 -04001612 u64 total_bytes;
Yan Zheng2b820322008-11-17 21:11:30 -05001613 int seeding_dev = 0;
Chris Mason788f20e2008-04-28 15:29:42 -04001614 int ret = 0;
1615
Yan Zheng2b820322008-11-17 21:11:30 -05001616 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1617 return -EINVAL;
Chris Mason788f20e2008-04-28 15:29:42 -04001618
Li Zefana5d16332011-12-07 20:08:40 -05001619 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
Tejun Heod4d77622010-11-13 11:55:18 +01001620 root->fs_info->bdev_holder);
Josef Bacik7f592032010-01-27 02:09:00 +00001621 if (IS_ERR(bdev))
1622 return PTR_ERR(bdev);
Chris Masona2135012008-06-25 16:01:30 -04001623
Yan Zheng2b820322008-11-17 21:11:30 -05001624 if (root->fs_info->fs_devices->seeding) {
1625 seeding_dev = 1;
1626 down_write(&sb->s_umount);
1627 mutex_lock(&uuid_mutex);
1628 }
1629
Chris Mason8c8bee12008-09-29 11:19:10 -04001630 filemap_write_and_wait(bdev->bd_inode->i_mapping);
Chris Mason7d9eb122008-07-08 14:19:17 -04001631 mutex_lock(&root->fs_info->volume_mutex);
Chris Masona2135012008-06-25 16:01:30 -04001632
Chris Mason788f20e2008-04-28 15:29:42 -04001633 devices = &root->fs_info->fs_devices->devices;
Chris Masone5e9a522009-06-10 15:17:02 -04001634 /*
1635 * we have the volume lock, so we don't need the extra
1636 * device list mutex while reading the list here.
1637 */
Qinghuang Fengc6e30872009-01-21 10:59:08 -05001638 list_for_each_entry(device, devices, dev_list) {
Chris Mason788f20e2008-04-28 15:29:42 -04001639 if (device->bdev == bdev) {
1640 ret = -EEXIST;
Yan Zheng2b820322008-11-17 21:11:30 -05001641 goto error;
Chris Mason788f20e2008-04-28 15:29:42 -04001642 }
1643 }
1644
1645 device = kzalloc(sizeof(*device), GFP_NOFS);
1646 if (!device) {
1647 /* we can safely leave the fs_devices entry around */
1648 ret = -ENOMEM;
Yan Zheng2b820322008-11-17 21:11:30 -05001649 goto error;
Chris Mason788f20e2008-04-28 15:29:42 -04001650 }
1651
Chris Mason788f20e2008-04-28 15:29:42 -04001652 device->name = kstrdup(device_path, GFP_NOFS);
1653 if (!device->name) {
1654 kfree(device);
Yan Zheng2b820322008-11-17 21:11:30 -05001655 ret = -ENOMEM;
1656 goto error;
Chris Mason788f20e2008-04-28 15:29:42 -04001657 }
Yan Zheng2b820322008-11-17 21:11:30 -05001658
1659 ret = find_next_devid(root, &device->devid);
1660 if (ret) {
Ilya Dryomov67100f22011-02-06 19:58:21 +00001661 kfree(device->name);
Yan Zheng2b820322008-11-17 21:11:30 -05001662 kfree(device);
1663 goto error;
1664 }
1665
Yan, Zhenga22285a2010-05-16 10:48:46 -04001666 trans = btrfs_start_transaction(root, 0);
Tsutomu Itoh98d5dc12011-01-20 06:19:37 +00001667 if (IS_ERR(trans)) {
Ilya Dryomov67100f22011-02-06 19:58:21 +00001668 kfree(device->name);
Tsutomu Itoh98d5dc12011-01-20 06:19:37 +00001669 kfree(device);
1670 ret = PTR_ERR(trans);
1671 goto error;
1672 }
1673
Yan Zheng2b820322008-11-17 21:11:30 -05001674 lock_chunks(root);
1675
Josef Bacikd5e20032011-08-04 14:52:27 +00001676 q = bdev_get_queue(bdev);
1677 if (blk_queue_discard(q))
1678 device->can_discard = 1;
Yan Zheng2b820322008-11-17 21:11:30 -05001679 device->writeable = 1;
1680 device->work.func = pending_bios_fn;
1681 generate_random_uuid(device->uuid);
1682 spin_lock_init(&device->io_lock);
1683 device->generation = trans->transid;
Chris Mason788f20e2008-04-28 15:29:42 -04001684 device->io_width = root->sectorsize;
1685 device->io_align = root->sectorsize;
1686 device->sector_size = root->sectorsize;
1687 device->total_bytes = i_size_read(bdev->bd_inode);
Yan Zheng2cc3c552009-06-04 09:23:50 -04001688 device->disk_total_bytes = device->total_bytes;
Chris Mason788f20e2008-04-28 15:29:42 -04001689 device->dev_root = root->fs_info->dev_root;
1690 device->bdev = bdev;
Chris Masondfe25022008-05-13 13:46:40 -04001691 device->in_fs_metadata = 1;
Ilya Dryomovfb01aa82011-02-15 18:12:57 +00001692 device->mode = FMODE_EXCL;
Zheng Yan325cd4b2008-09-05 16:43:54 -04001693 set_blocksize(device->bdev, 4096);
1694
Yan Zheng2b820322008-11-17 21:11:30 -05001695 if (seeding_dev) {
1696 sb->s_flags &= ~MS_RDONLY;
1697 ret = btrfs_prepare_sprout(trans, root);
1698 BUG_ON(ret);
1699 }
1700
1701 device->fs_devices = root->fs_info->fs_devices;
Chris Masone5e9a522009-06-10 15:17:02 -04001702
1703 /*
1704 * we don't want write_supers to jump in here with our device
1705 * half setup
1706 */
1707 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
Xiao Guangrong1f781602011-04-20 10:09:16 +00001708 list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
Yan Zheng2b820322008-11-17 21:11:30 -05001709 list_add(&device->dev_alloc_list,
1710 &root->fs_info->fs_devices->alloc_list);
1711 root->fs_info->fs_devices->num_devices++;
1712 root->fs_info->fs_devices->open_devices++;
1713 root->fs_info->fs_devices->rw_devices++;
Josef Bacikd5e20032011-08-04 14:52:27 +00001714 if (device->can_discard)
1715 root->fs_info->fs_devices->num_can_discard++;
Yan Zheng2b820322008-11-17 21:11:30 -05001716 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
1717
Josef Bacik2bf64752011-09-26 17:12:22 -04001718 spin_lock(&root->fs_info->free_chunk_lock);
1719 root->fs_info->free_chunk_space += device->total_bytes;
1720 spin_unlock(&root->fs_info->free_chunk_lock);
1721
Chris Masonc289811c2009-06-10 09:51:32 -04001722 if (!blk_queue_nonrot(bdev_get_queue(bdev)))
1723 root->fs_info->fs_devices->rotating = 1;
1724
David Sterba6c417612011-04-13 15:41:04 +02001725 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1726 btrfs_set_super_total_bytes(root->fs_info->super_copy,
Chris Mason788f20e2008-04-28 15:29:42 -04001727 total_bytes + device->total_bytes);
1728
David Sterba6c417612011-04-13 15:41:04 +02001729 total_bytes = btrfs_super_num_devices(root->fs_info->super_copy);
1730 btrfs_set_super_num_devices(root->fs_info->super_copy,
Chris Mason788f20e2008-04-28 15:29:42 -04001731 total_bytes + 1);
Chris Masone5e9a522009-06-10 15:17:02 -04001732 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
Chris Mason788f20e2008-04-28 15:29:42 -04001733
Yan Zheng2b820322008-11-17 21:11:30 -05001734 if (seeding_dev) {
1735 ret = init_first_rw_device(trans, root, device);
1736 BUG_ON(ret);
1737 ret = btrfs_finish_sprout(trans, root);
1738 BUG_ON(ret);
1739 } else {
1740 ret = btrfs_add_device(trans, root, device);
1741 }
1742
Chris Mason913d9522009-03-10 13:17:18 -04001743 /*
1744 * we've got more storage, clear any full flags on the space
1745 * infos
1746 */
1747 btrfs_clear_space_info_full(root->fs_info);
1748
Chris Mason7d9eb122008-07-08 14:19:17 -04001749 unlock_chunks(root);
Yan Zheng2b820322008-11-17 21:11:30 -05001750 btrfs_commit_transaction(trans, root);
1751
1752 if (seeding_dev) {
1753 mutex_unlock(&uuid_mutex);
1754 up_write(&sb->s_umount);
1755
1756 ret = btrfs_relocate_sys_chunks(root);
1757 BUG_ON(ret);
1758 }
1759out:
Chris Mason7d9eb122008-07-08 14:19:17 -04001760 mutex_unlock(&root->fs_info->volume_mutex);
Chris Mason788f20e2008-04-28 15:29:42 -04001761 return ret;
Yan Zheng2b820322008-11-17 21:11:30 -05001762error:
Tejun Heoe525fd82010-11-13 11:55:17 +01001763 blkdev_put(bdev, FMODE_EXCL);
Yan Zheng2b820322008-11-17 21:11:30 -05001764 if (seeding_dev) {
1765 mutex_unlock(&uuid_mutex);
1766 up_write(&sb->s_umount);
1767 }
Chris Mason788f20e2008-04-28 15:29:42 -04001768 goto out;
1769}
1770
Chris Masond3977122009-01-05 21:25:51 -05001771static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
1772 struct btrfs_device *device)
Chris Mason0b86a832008-03-24 15:01:56 -04001773{
1774 int ret;
1775 struct btrfs_path *path;
1776 struct btrfs_root *root;
1777 struct btrfs_dev_item *dev_item;
1778 struct extent_buffer *leaf;
1779 struct btrfs_key key;
1780
1781 root = device->dev_root->fs_info->chunk_root;
1782
1783 path = btrfs_alloc_path();
1784 if (!path)
1785 return -ENOMEM;
1786
1787 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1788 key.type = BTRFS_DEV_ITEM_KEY;
1789 key.offset = device->devid;
1790
1791 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1792 if (ret < 0)
1793 goto out;
1794
1795 if (ret > 0) {
1796 ret = -ENOENT;
1797 goto out;
1798 }
1799
1800 leaf = path->nodes[0];
1801 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1802
1803 btrfs_set_device_id(leaf, dev_item, device->devid);
1804 btrfs_set_device_type(leaf, dev_item, device->type);
1805 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1806 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1807 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
Chris Balld6397ba2009-04-27 07:29:03 -04001808 btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
Chris Mason0b86a832008-03-24 15:01:56 -04001809 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1810 btrfs_mark_buffer_dirty(leaf);
1811
1812out:
1813 btrfs_free_path(path);
1814 return ret;
1815}
1816
Chris Mason7d9eb122008-07-08 14:19:17 -04001817static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
Chris Mason8f18cf12008-04-25 16:53:30 -04001818 struct btrfs_device *device, u64 new_size)
1819{
1820 struct btrfs_super_block *super_copy =
David Sterba6c417612011-04-13 15:41:04 +02001821 device->dev_root->fs_info->super_copy;
Chris Mason8f18cf12008-04-25 16:53:30 -04001822 u64 old_total = btrfs_super_total_bytes(super_copy);
1823 u64 diff = new_size - device->total_bytes;
1824
Yan Zheng2b820322008-11-17 21:11:30 -05001825 if (!device->writeable)
1826 return -EACCES;
1827 if (new_size <= device->total_bytes)
1828 return -EINVAL;
1829
Chris Mason8f18cf12008-04-25 16:53:30 -04001830 btrfs_set_super_total_bytes(super_copy, old_total + diff);
Yan Zheng2b820322008-11-17 21:11:30 -05001831 device->fs_devices->total_rw_bytes += diff;
1832
1833 device->total_bytes = new_size;
Chris Mason9779b722009-07-24 16:41:41 -04001834 device->disk_total_bytes = new_size;
Chris Mason4184ea72009-03-10 12:39:20 -04001835 btrfs_clear_space_info_full(device->dev_root->fs_info);
1836
Chris Mason8f18cf12008-04-25 16:53:30 -04001837 return btrfs_update_device(trans, device);
1838}
1839
Chris Mason7d9eb122008-07-08 14:19:17 -04001840int btrfs_grow_device(struct btrfs_trans_handle *trans,
1841 struct btrfs_device *device, u64 new_size)
1842{
1843 int ret;
1844 lock_chunks(device->dev_root);
1845 ret = __btrfs_grow_device(trans, device, new_size);
1846 unlock_chunks(device->dev_root);
1847 return ret;
1848}
1849
Chris Mason8f18cf12008-04-25 16:53:30 -04001850static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1851 struct btrfs_root *root,
1852 u64 chunk_tree, u64 chunk_objectid,
1853 u64 chunk_offset)
1854{
1855 int ret;
1856 struct btrfs_path *path;
1857 struct btrfs_key key;
1858
1859 root = root->fs_info->chunk_root;
1860 path = btrfs_alloc_path();
1861 if (!path)
1862 return -ENOMEM;
1863
1864 key.objectid = chunk_objectid;
1865 key.offset = chunk_offset;
1866 key.type = BTRFS_CHUNK_ITEM_KEY;
1867
1868 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1869 BUG_ON(ret);
1870
1871 ret = btrfs_del_item(trans, root, path);
Chris Mason8f18cf12008-04-25 16:53:30 -04001872
1873 btrfs_free_path(path);
Tsutomu Itoh65a246c2011-05-19 04:37:44 +00001874 return ret;
Chris Mason8f18cf12008-04-25 16:53:30 -04001875}
1876
Christoph Hellwigb2950862008-12-02 09:54:17 -05001877static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
Chris Mason8f18cf12008-04-25 16:53:30 -04001878 chunk_offset)
1879{
David Sterba6c417612011-04-13 15:41:04 +02001880 struct btrfs_super_block *super_copy = root->fs_info->super_copy;
Chris Mason8f18cf12008-04-25 16:53:30 -04001881 struct btrfs_disk_key *disk_key;
1882 struct btrfs_chunk *chunk;
1883 u8 *ptr;
1884 int ret = 0;
1885 u32 num_stripes;
1886 u32 array_size;
1887 u32 len = 0;
1888 u32 cur;
1889 struct btrfs_key key;
1890
1891 array_size = btrfs_super_sys_array_size(super_copy);
1892
1893 ptr = super_copy->sys_chunk_array;
1894 cur = 0;
1895
1896 while (cur < array_size) {
1897 disk_key = (struct btrfs_disk_key *)ptr;
1898 btrfs_disk_key_to_cpu(&key, disk_key);
1899
1900 len = sizeof(*disk_key);
1901
1902 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1903 chunk = (struct btrfs_chunk *)(ptr + len);
1904 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1905 len += btrfs_chunk_item_size(num_stripes);
1906 } else {
1907 ret = -EIO;
1908 break;
1909 }
1910 if (key.objectid == chunk_objectid &&
1911 key.offset == chunk_offset) {
1912 memmove(ptr, ptr + len, array_size - (cur + len));
1913 array_size -= len;
1914 btrfs_set_super_sys_array_size(super_copy, array_size);
1915 } else {
1916 ptr += len;
1917 cur += len;
1918 }
1919 }
1920 return ret;
1921}
1922
Christoph Hellwigb2950862008-12-02 09:54:17 -05001923static int btrfs_relocate_chunk(struct btrfs_root *root,
Chris Mason8f18cf12008-04-25 16:53:30 -04001924 u64 chunk_tree, u64 chunk_objectid,
1925 u64 chunk_offset)
1926{
1927 struct extent_map_tree *em_tree;
1928 struct btrfs_root *extent_root;
1929 struct btrfs_trans_handle *trans;
1930 struct extent_map *em;
1931 struct map_lookup *map;
1932 int ret;
1933 int i;
1934
1935 root = root->fs_info->chunk_root;
1936 extent_root = root->fs_info->extent_root;
1937 em_tree = &root->fs_info->mapping_tree.map_tree;
1938
Josef Bacikba1bf482009-09-11 16:11:19 -04001939 ret = btrfs_can_relocate(extent_root, chunk_offset);
1940 if (ret)
1941 return -ENOSPC;
1942
Chris Mason8f18cf12008-04-25 16:53:30 -04001943 /* step one, relocate all the extents inside this chunk */
Zheng Yan1a40e232008-09-26 10:09:34 -04001944 ret = btrfs_relocate_block_group(extent_root, chunk_offset);
Yan, Zhenga22285a2010-05-16 10:48:46 -04001945 if (ret)
1946 return ret;
Chris Mason8f18cf12008-04-25 16:53:30 -04001947
Yan, Zhenga22285a2010-05-16 10:48:46 -04001948 trans = btrfs_start_transaction(root, 0);
Tsutomu Itoh98d5dc12011-01-20 06:19:37 +00001949 BUG_ON(IS_ERR(trans));
Chris Mason8f18cf12008-04-25 16:53:30 -04001950
Chris Mason7d9eb122008-07-08 14:19:17 -04001951 lock_chunks(root);
1952
Chris Mason8f18cf12008-04-25 16:53:30 -04001953 /*
1954 * step two, delete the device extents and the
1955 * chunk tree entries
1956 */
Chris Mason890871b2009-09-02 16:24:52 -04001957 read_lock(&em_tree->lock);
Chris Mason8f18cf12008-04-25 16:53:30 -04001958 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
Chris Mason890871b2009-09-02 16:24:52 -04001959 read_unlock(&em_tree->lock);
Chris Mason8f18cf12008-04-25 16:53:30 -04001960
Chris Masona061fc82008-05-07 11:43:44 -04001961 BUG_ON(em->start > chunk_offset ||
1962 em->start + em->len < chunk_offset);
Chris Mason8f18cf12008-04-25 16:53:30 -04001963 map = (struct map_lookup *)em->bdev;
1964
1965 for (i = 0; i < map->num_stripes; i++) {
1966 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1967 map->stripes[i].physical);
1968 BUG_ON(ret);
Chris Masona061fc82008-05-07 11:43:44 -04001969
Chris Masondfe25022008-05-13 13:46:40 -04001970 if (map->stripes[i].dev) {
1971 ret = btrfs_update_device(trans, map->stripes[i].dev);
1972 BUG_ON(ret);
1973 }
Chris Mason8f18cf12008-04-25 16:53:30 -04001974 }
1975 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1976 chunk_offset);
1977
1978 BUG_ON(ret);
1979
liubo1abe9b82011-03-24 11:18:59 +00001980 trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
1981
Chris Mason8f18cf12008-04-25 16:53:30 -04001982 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1983 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1984 BUG_ON(ret);
Chris Mason8f18cf12008-04-25 16:53:30 -04001985 }
1986
Zheng Yan1a40e232008-09-26 10:09:34 -04001987 ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
1988 BUG_ON(ret);
1989
Chris Mason890871b2009-09-02 16:24:52 -04001990 write_lock(&em_tree->lock);
Chris Mason8f18cf12008-04-25 16:53:30 -04001991 remove_extent_mapping(em_tree, em);
Chris Mason890871b2009-09-02 16:24:52 -04001992 write_unlock(&em_tree->lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04001993
Chris Mason8f18cf12008-04-25 16:53:30 -04001994 kfree(map);
1995 em->bdev = NULL;
1996
1997 /* once for the tree */
1998 free_extent_map(em);
Chris Mason8f18cf12008-04-25 16:53:30 -04001999 /* once for us */
2000 free_extent_map(em);
2001
Chris Mason7d9eb122008-07-08 14:19:17 -04002002 unlock_chunks(root);
Chris Mason8f18cf12008-04-25 16:53:30 -04002003 btrfs_end_transaction(trans, root);
2004 return 0;
2005}
2006
Yan Zheng2b820322008-11-17 21:11:30 -05002007static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
2008{
2009 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
2010 struct btrfs_path *path;
2011 struct extent_buffer *leaf;
2012 struct btrfs_chunk *chunk;
2013 struct btrfs_key key;
2014 struct btrfs_key found_key;
2015 u64 chunk_tree = chunk_root->root_key.objectid;
2016 u64 chunk_type;
Josef Bacikba1bf482009-09-11 16:11:19 -04002017 bool retried = false;
2018 int failed = 0;
Yan Zheng2b820322008-11-17 21:11:30 -05002019 int ret;
2020
2021 path = btrfs_alloc_path();
2022 if (!path)
2023 return -ENOMEM;
2024
Josef Bacikba1bf482009-09-11 16:11:19 -04002025again:
Yan Zheng2b820322008-11-17 21:11:30 -05002026 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2027 key.offset = (u64)-1;
2028 key.type = BTRFS_CHUNK_ITEM_KEY;
2029
2030 while (1) {
2031 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
2032 if (ret < 0)
2033 goto error;
2034 BUG_ON(ret == 0);
2035
2036 ret = btrfs_previous_item(chunk_root, path, key.objectid,
2037 key.type);
2038 if (ret < 0)
2039 goto error;
2040 if (ret > 0)
2041 break;
2042
2043 leaf = path->nodes[0];
2044 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2045
2046 chunk = btrfs_item_ptr(leaf, path->slots[0],
2047 struct btrfs_chunk);
2048 chunk_type = btrfs_chunk_type(leaf, chunk);
David Sterbab3b4aa72011-04-21 01:20:15 +02002049 btrfs_release_path(path);
Yan Zheng2b820322008-11-17 21:11:30 -05002050
2051 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
2052 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
2053 found_key.objectid,
2054 found_key.offset);
Josef Bacikba1bf482009-09-11 16:11:19 -04002055 if (ret == -ENOSPC)
2056 failed++;
2057 else if (ret)
2058 BUG();
Yan Zheng2b820322008-11-17 21:11:30 -05002059 }
2060
2061 if (found_key.offset == 0)
2062 break;
2063 key.offset = found_key.offset - 1;
2064 }
2065 ret = 0;
Josef Bacikba1bf482009-09-11 16:11:19 -04002066 if (failed && !retried) {
2067 failed = 0;
2068 retried = true;
2069 goto again;
2070 } else if (failed && retried) {
2071 WARN_ON(1);
2072 ret = -ENOSPC;
2073 }
Yan Zheng2b820322008-11-17 21:11:30 -05002074error:
2075 btrfs_free_path(path);
2076 return ret;
2077}
2078
Chris Masonec44a352008-04-28 15:29:52 -04002079static u64 div_factor(u64 num, int factor)
2080{
2081 if (factor == 10)
2082 return num;
2083 num *= factor;
2084 do_div(num, 10);
2085 return num;
2086}
2087
Chris Masonec44a352008-04-28 15:29:52 -04002088int btrfs_balance(struct btrfs_root *dev_root)
2089{
2090 int ret;
Chris Masonec44a352008-04-28 15:29:52 -04002091 struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
2092 struct btrfs_device *device;
2093 u64 old_size;
2094 u64 size_to_free;
2095 struct btrfs_path *path;
2096 struct btrfs_key key;
Chris Masonec44a352008-04-28 15:29:52 -04002097 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
2098 struct btrfs_trans_handle *trans;
2099 struct btrfs_key found_key;
2100
Yan Zheng2b820322008-11-17 21:11:30 -05002101 if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
2102 return -EROFS;
Chris Masonec44a352008-04-28 15:29:52 -04002103
Ben Hutchings6f88a442010-12-29 14:55:03 +00002104 if (!capable(CAP_SYS_ADMIN))
2105 return -EPERM;
2106
Chris Mason7d9eb122008-07-08 14:19:17 -04002107 mutex_lock(&dev_root->fs_info->volume_mutex);
Chris Masonec44a352008-04-28 15:29:52 -04002108 dev_root = dev_root->fs_info->dev_root;
2109
Chris Masonec44a352008-04-28 15:29:52 -04002110 /* step one make some room on all the devices */
Qinghuang Fengc6e30872009-01-21 10:59:08 -05002111 list_for_each_entry(device, devices, dev_list) {
Chris Masonec44a352008-04-28 15:29:52 -04002112 old_size = device->total_bytes;
2113 size_to_free = div_factor(old_size, 1);
2114 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
Yan Zheng2b820322008-11-17 21:11:30 -05002115 if (!device->writeable ||
2116 device->total_bytes - device->bytes_used > size_to_free)
Chris Masonec44a352008-04-28 15:29:52 -04002117 continue;
2118
2119 ret = btrfs_shrink_device(device, old_size - size_to_free);
Josef Bacikba1bf482009-09-11 16:11:19 -04002120 if (ret == -ENOSPC)
2121 break;
Chris Masonec44a352008-04-28 15:29:52 -04002122 BUG_ON(ret);
2123
Yan, Zhenga22285a2010-05-16 10:48:46 -04002124 trans = btrfs_start_transaction(dev_root, 0);
Tsutomu Itoh98d5dc12011-01-20 06:19:37 +00002125 BUG_ON(IS_ERR(trans));
Chris Masonec44a352008-04-28 15:29:52 -04002126
2127 ret = btrfs_grow_device(trans, device, old_size);
2128 BUG_ON(ret);
2129
2130 btrfs_end_transaction(trans, dev_root);
2131 }
2132
2133 /* step two, relocate all the chunks */
2134 path = btrfs_alloc_path();
Mark Fasheh17e9f792011-07-12 11:10:23 -07002135 if (!path) {
2136 ret = -ENOMEM;
2137 goto error;
2138 }
Chris Masonec44a352008-04-28 15:29:52 -04002139 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2140 key.offset = (u64)-1;
2141 key.type = BTRFS_CHUNK_ITEM_KEY;
2142
Chris Masond3977122009-01-05 21:25:51 -05002143 while (1) {
Chris Masonec44a352008-04-28 15:29:52 -04002144 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
2145 if (ret < 0)
2146 goto error;
2147
2148 /*
2149 * this shouldn't happen, it means the last relocate
2150 * failed
2151 */
2152 if (ret == 0)
2153 break;
2154
2155 ret = btrfs_previous_item(chunk_root, path, 0,
2156 BTRFS_CHUNK_ITEM_KEY);
Chris Mason7d9eb122008-07-08 14:19:17 -04002157 if (ret)
Chris Masonec44a352008-04-28 15:29:52 -04002158 break;
Chris Mason7d9eb122008-07-08 14:19:17 -04002159
Chris Masonec44a352008-04-28 15:29:52 -04002160 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2161 path->slots[0]);
2162 if (found_key.objectid != key.objectid)
2163 break;
Chris Mason7d9eb122008-07-08 14:19:17 -04002164
Chris Masonec44a352008-04-28 15:29:52 -04002165 /* chunk zero is special */
Josef Bacikba1bf482009-09-11 16:11:19 -04002166 if (found_key.offset == 0)
Chris Masonec44a352008-04-28 15:29:52 -04002167 break;
2168
David Sterbab3b4aa72011-04-21 01:20:15 +02002169 btrfs_release_path(path);
Chris Masonec44a352008-04-28 15:29:52 -04002170 ret = btrfs_relocate_chunk(chunk_root,
2171 chunk_root->root_key.objectid,
2172 found_key.objectid,
2173 found_key.offset);
Josef Bacik508794e2011-07-02 21:24:41 +00002174 if (ret && ret != -ENOSPC)
2175 goto error;
Josef Bacikba1bf482009-09-11 16:11:19 -04002176 key.offset = found_key.offset - 1;
Chris Masonec44a352008-04-28 15:29:52 -04002177 }
2178 ret = 0;
2179error:
2180 btrfs_free_path(path);
Chris Mason7d9eb122008-07-08 14:19:17 -04002181 mutex_unlock(&dev_root->fs_info->volume_mutex);
Chris Masonec44a352008-04-28 15:29:52 -04002182 return ret;
2183}
2184
Chris Mason8f18cf12008-04-25 16:53:30 -04002185/*
2186 * shrinking a device means finding all of the device extents past
2187 * the new size, and then following the back refs to the chunks.
2188 * The chunk relocation code actually frees the device extent
2189 */
2190int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2191{
2192 struct btrfs_trans_handle *trans;
2193 struct btrfs_root *root = device->dev_root;
2194 struct btrfs_dev_extent *dev_extent = NULL;
2195 struct btrfs_path *path;
2196 u64 length;
2197 u64 chunk_tree;
2198 u64 chunk_objectid;
2199 u64 chunk_offset;
2200 int ret;
2201 int slot;
Josef Bacikba1bf482009-09-11 16:11:19 -04002202 int failed = 0;
2203 bool retried = false;
Chris Mason8f18cf12008-04-25 16:53:30 -04002204 struct extent_buffer *l;
2205 struct btrfs_key key;
David Sterba6c417612011-04-13 15:41:04 +02002206 struct btrfs_super_block *super_copy = root->fs_info->super_copy;
Chris Mason8f18cf12008-04-25 16:53:30 -04002207 u64 old_total = btrfs_super_total_bytes(super_copy);
Josef Bacikba1bf482009-09-11 16:11:19 -04002208 u64 old_size = device->total_bytes;
Chris Mason8f18cf12008-04-25 16:53:30 -04002209 u64 diff = device->total_bytes - new_size;
2210
Yan Zheng2b820322008-11-17 21:11:30 -05002211 if (new_size >= device->total_bytes)
2212 return -EINVAL;
Chris Mason8f18cf12008-04-25 16:53:30 -04002213
2214 path = btrfs_alloc_path();
2215 if (!path)
2216 return -ENOMEM;
2217
Chris Mason8f18cf12008-04-25 16:53:30 -04002218 path->reada = 2;
2219
Chris Mason7d9eb122008-07-08 14:19:17 -04002220 lock_chunks(root);
2221
Chris Mason8f18cf12008-04-25 16:53:30 -04002222 device->total_bytes = new_size;
Josef Bacik2bf64752011-09-26 17:12:22 -04002223 if (device->writeable) {
Yan Zheng2b820322008-11-17 21:11:30 -05002224 device->fs_devices->total_rw_bytes -= diff;
Josef Bacik2bf64752011-09-26 17:12:22 -04002225 spin_lock(&root->fs_info->free_chunk_lock);
2226 root->fs_info->free_chunk_space -= diff;
2227 spin_unlock(&root->fs_info->free_chunk_lock);
2228 }
Chris Mason7d9eb122008-07-08 14:19:17 -04002229 unlock_chunks(root);
Chris Mason8f18cf12008-04-25 16:53:30 -04002230
Josef Bacikba1bf482009-09-11 16:11:19 -04002231again:
Chris Mason8f18cf12008-04-25 16:53:30 -04002232 key.objectid = device->devid;
2233 key.offset = (u64)-1;
2234 key.type = BTRFS_DEV_EXTENT_KEY;
2235
2236 while (1) {
2237 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2238 if (ret < 0)
2239 goto done;
2240
2241 ret = btrfs_previous_item(root, path, 0, key.type);
2242 if (ret < 0)
2243 goto done;
2244 if (ret) {
2245 ret = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02002246 btrfs_release_path(path);
Yan Zhengbf1fb512009-07-22 09:59:00 -04002247 break;
Chris Mason8f18cf12008-04-25 16:53:30 -04002248 }
2249
2250 l = path->nodes[0];
2251 slot = path->slots[0];
2252 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
2253
Josef Bacikba1bf482009-09-11 16:11:19 -04002254 if (key.objectid != device->devid) {
David Sterbab3b4aa72011-04-21 01:20:15 +02002255 btrfs_release_path(path);
Yan Zhengbf1fb512009-07-22 09:59:00 -04002256 break;
Josef Bacikba1bf482009-09-11 16:11:19 -04002257 }
Chris Mason8f18cf12008-04-25 16:53:30 -04002258
2259 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2260 length = btrfs_dev_extent_length(l, dev_extent);
2261
Josef Bacikba1bf482009-09-11 16:11:19 -04002262 if (key.offset + length <= new_size) {
David Sterbab3b4aa72011-04-21 01:20:15 +02002263 btrfs_release_path(path);
Chris Balld6397ba2009-04-27 07:29:03 -04002264 break;
Josef Bacikba1bf482009-09-11 16:11:19 -04002265 }
Chris Mason8f18cf12008-04-25 16:53:30 -04002266
2267 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2268 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2269 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
David Sterbab3b4aa72011-04-21 01:20:15 +02002270 btrfs_release_path(path);
Chris Mason8f18cf12008-04-25 16:53:30 -04002271
2272 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
2273 chunk_offset);
Josef Bacikba1bf482009-09-11 16:11:19 -04002274 if (ret && ret != -ENOSPC)
Chris Mason8f18cf12008-04-25 16:53:30 -04002275 goto done;
Josef Bacikba1bf482009-09-11 16:11:19 -04002276 if (ret == -ENOSPC)
2277 failed++;
2278 key.offset -= 1;
2279 }
2280
2281 if (failed && !retried) {
2282 failed = 0;
2283 retried = true;
2284 goto again;
2285 } else if (failed && retried) {
2286 ret = -ENOSPC;
2287 lock_chunks(root);
2288
2289 device->total_bytes = old_size;
2290 if (device->writeable)
2291 device->fs_devices->total_rw_bytes += diff;
Josef Bacik2bf64752011-09-26 17:12:22 -04002292 spin_lock(&root->fs_info->free_chunk_lock);
2293 root->fs_info->free_chunk_space += diff;
2294 spin_unlock(&root->fs_info->free_chunk_lock);
Josef Bacikba1bf482009-09-11 16:11:19 -04002295 unlock_chunks(root);
2296 goto done;
Chris Mason8f18cf12008-04-25 16:53:30 -04002297 }
2298
Chris Balld6397ba2009-04-27 07:29:03 -04002299 /* Shrinking succeeded, else we would be at "done". */
Yan, Zhenga22285a2010-05-16 10:48:46 -04002300 trans = btrfs_start_transaction(root, 0);
Tsutomu Itoh98d5dc12011-01-20 06:19:37 +00002301 if (IS_ERR(trans)) {
2302 ret = PTR_ERR(trans);
2303 goto done;
2304 }
2305
Chris Balld6397ba2009-04-27 07:29:03 -04002306 lock_chunks(root);
2307
2308 device->disk_total_bytes = new_size;
2309 /* Now btrfs_update_device() will change the on-disk size. */
2310 ret = btrfs_update_device(trans, device);
2311 if (ret) {
2312 unlock_chunks(root);
2313 btrfs_end_transaction(trans, root);
2314 goto done;
2315 }
2316 WARN_ON(diff > old_total);
2317 btrfs_set_super_total_bytes(super_copy, old_total - diff);
2318 unlock_chunks(root);
2319 btrfs_end_transaction(trans, root);
Chris Mason8f18cf12008-04-25 16:53:30 -04002320done:
2321 btrfs_free_path(path);
2322 return ret;
2323}
2324
Christoph Hellwigb2950862008-12-02 09:54:17 -05002325static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
Chris Mason0b86a832008-03-24 15:01:56 -04002326 struct btrfs_root *root,
2327 struct btrfs_key *key,
2328 struct btrfs_chunk *chunk, int item_size)
2329{
David Sterba6c417612011-04-13 15:41:04 +02002330 struct btrfs_super_block *super_copy = root->fs_info->super_copy;
Chris Mason0b86a832008-03-24 15:01:56 -04002331 struct btrfs_disk_key disk_key;
2332 u32 array_size;
2333 u8 *ptr;
2334
2335 array_size = btrfs_super_sys_array_size(super_copy);
2336 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
2337 return -EFBIG;
2338
2339 ptr = super_copy->sys_chunk_array + array_size;
2340 btrfs_cpu_key_to_disk(&disk_key, key);
2341 memcpy(ptr, &disk_key, sizeof(disk_key));
2342 ptr += sizeof(disk_key);
2343 memcpy(ptr, chunk, item_size);
2344 item_size += sizeof(disk_key);
2345 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
2346 return 0;
2347}
2348
Miao Xieb2117a32011-01-05 10:07:28 +00002349/*
Arne Jansen73c5de02011-04-12 12:07:57 +02002350 * sort the devices in descending order by max_avail, total_avail
Miao Xieb2117a32011-01-05 10:07:28 +00002351 */
Arne Jansen73c5de02011-04-12 12:07:57 +02002352static int btrfs_cmp_device_info(const void *a, const void *b)
Miao Xieb2117a32011-01-05 10:07:28 +00002353{
Arne Jansen73c5de02011-04-12 12:07:57 +02002354 const struct btrfs_device_info *di_a = a;
2355 const struct btrfs_device_info *di_b = b;
Miao Xieb2117a32011-01-05 10:07:28 +00002356
Arne Jansen73c5de02011-04-12 12:07:57 +02002357 if (di_a->max_avail > di_b->max_avail)
2358 return -1;
2359 if (di_a->max_avail < di_b->max_avail)
2360 return 1;
2361 if (di_a->total_avail > di_b->total_avail)
2362 return -1;
2363 if (di_a->total_avail < di_b->total_avail)
2364 return 1;
Miao Xieb2117a32011-01-05 10:07:28 +00002365 return 0;
2366}
2367
2368static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2369 struct btrfs_root *extent_root,
2370 struct map_lookup **map_ret,
Arne Jansen73c5de02011-04-12 12:07:57 +02002371 u64 *num_bytes_out, u64 *stripe_size_out,
Miao Xieb2117a32011-01-05 10:07:28 +00002372 u64 start, u64 type)
2373{
2374 struct btrfs_fs_info *info = extent_root->fs_info;
Miao Xieb2117a32011-01-05 10:07:28 +00002375 struct btrfs_fs_devices *fs_devices = info->fs_devices;
2376 struct list_head *cur;
Arne Jansen73c5de02011-04-12 12:07:57 +02002377 struct map_lookup *map = NULL;
Miao Xieb2117a32011-01-05 10:07:28 +00002378 struct extent_map_tree *em_tree;
2379 struct extent_map *em;
Arne Jansen73c5de02011-04-12 12:07:57 +02002380 struct btrfs_device_info *devices_info = NULL;
2381 u64 total_avail;
2382 int num_stripes; /* total number of stripes to allocate */
2383 int sub_stripes; /* sub_stripes info for map */
2384 int dev_stripes; /* stripes per dev */
2385 int devs_max; /* max devs to use */
2386 int devs_min; /* min devs needed */
2387 int devs_increment; /* ndevs has to be a multiple of this */
2388 int ncopies; /* how many copies to data has */
Miao Xieb2117a32011-01-05 10:07:28 +00002389 int ret;
Arne Jansen73c5de02011-04-12 12:07:57 +02002390 u64 max_stripe_size;
2391 u64 max_chunk_size;
2392 u64 stripe_size;
2393 u64 num_bytes;
2394 int ndevs;
2395 int i;
2396 int j;
Miao Xieb2117a32011-01-05 10:07:28 +00002397
2398 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
2399 (type & BTRFS_BLOCK_GROUP_DUP)) {
2400 WARN_ON(1);
2401 type &= ~BTRFS_BLOCK_GROUP_DUP;
2402 }
Arne Jansen73c5de02011-04-12 12:07:57 +02002403
Miao Xieb2117a32011-01-05 10:07:28 +00002404 if (list_empty(&fs_devices->alloc_list))
2405 return -ENOSPC;
2406
Arne Jansen73c5de02011-04-12 12:07:57 +02002407 sub_stripes = 1;
2408 dev_stripes = 1;
2409 devs_increment = 1;
2410 ncopies = 1;
2411 devs_max = 0; /* 0 == as many as possible */
2412 devs_min = 1;
2413
2414 /*
2415 * define the properties of each RAID type.
2416 * FIXME: move this to a global table and use it in all RAID
2417 * calculation code
2418 */
2419 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
2420 dev_stripes = 2;
2421 ncopies = 2;
2422 devs_max = 1;
2423 } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
2424 devs_min = 2;
2425 } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2426 devs_increment = 2;
2427 ncopies = 2;
2428 devs_max = 2;
2429 devs_min = 2;
2430 } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2431 sub_stripes = 2;
2432 devs_increment = 2;
2433 ncopies = 2;
2434 devs_min = 4;
2435 } else {
2436 devs_max = 1;
2437 }
2438
2439 if (type & BTRFS_BLOCK_GROUP_DATA) {
2440 max_stripe_size = 1024 * 1024 * 1024;
2441 max_chunk_size = 10 * max_stripe_size;
2442 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2443 max_stripe_size = 256 * 1024 * 1024;
2444 max_chunk_size = max_stripe_size;
2445 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2446 max_stripe_size = 8 * 1024 * 1024;
2447 max_chunk_size = 2 * max_stripe_size;
2448 } else {
2449 printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
2450 type);
2451 BUG_ON(1);
2452 }
2453
2454 /* we don't want a chunk larger than 10% of writeable space */
2455 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
2456 max_chunk_size);
Miao Xieb2117a32011-01-05 10:07:28 +00002457
2458 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
2459 GFP_NOFS);
2460 if (!devices_info)
2461 return -ENOMEM;
2462
Arne Jansen73c5de02011-04-12 12:07:57 +02002463 cur = fs_devices->alloc_list.next;
2464
2465 /*
2466 * in the first pass through the devices list, we gather information
2467 * about the available holes on each device.
2468 */
2469 ndevs = 0;
2470 while (cur != &fs_devices->alloc_list) {
2471 struct btrfs_device *device;
2472 u64 max_avail;
2473 u64 dev_offset;
2474
2475 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
2476
2477 cur = cur->next;
2478
2479 if (!device->writeable) {
2480 printk(KERN_ERR
2481 "btrfs: read-only device in alloc_list\n");
2482 WARN_ON(1);
2483 continue;
2484 }
2485
2486 if (!device->in_fs_metadata)
2487 continue;
2488
2489 if (device->total_bytes > device->bytes_used)
2490 total_avail = device->total_bytes - device->bytes_used;
2491 else
2492 total_avail = 0;
liubo38c01b92011-08-02 02:39:03 +00002493
2494 /* If there is no space on this device, skip it. */
2495 if (total_avail == 0)
2496 continue;
Arne Jansen73c5de02011-04-12 12:07:57 +02002497
2498 ret = find_free_dev_extent(trans, device,
2499 max_stripe_size * dev_stripes,
2500 &dev_offset, &max_avail);
2501 if (ret && ret != -ENOSPC)
2502 goto error;
2503
2504 if (ret == 0)
2505 max_avail = max_stripe_size * dev_stripes;
2506
2507 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
2508 continue;
2509
2510 devices_info[ndevs].dev_offset = dev_offset;
2511 devices_info[ndevs].max_avail = max_avail;
2512 devices_info[ndevs].total_avail = total_avail;
2513 devices_info[ndevs].dev = device;
2514 ++ndevs;
2515 }
2516
2517 /*
2518 * now sort the devices by hole size / available space
2519 */
2520 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
2521 btrfs_cmp_device_info, NULL);
2522
2523 /* round down to number of usable stripes */
2524 ndevs -= ndevs % devs_increment;
2525
2526 if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
2527 ret = -ENOSPC;
2528 goto error;
2529 }
2530
2531 if (devs_max && ndevs > devs_max)
2532 ndevs = devs_max;
2533 /*
2534 * the primary goal is to maximize the number of stripes, so use as many
2535 * devices as possible, even if the stripes are not maximum sized.
2536 */
2537 stripe_size = devices_info[ndevs-1].max_avail;
2538 num_stripes = ndevs * dev_stripes;
2539
2540 if (stripe_size * num_stripes > max_chunk_size * ncopies) {
2541 stripe_size = max_chunk_size * ncopies;
2542 do_div(stripe_size, num_stripes);
2543 }
2544
2545 do_div(stripe_size, dev_stripes);
2546 do_div(stripe_size, BTRFS_STRIPE_LEN);
2547 stripe_size *= BTRFS_STRIPE_LEN;
2548
Miao Xieb2117a32011-01-05 10:07:28 +00002549 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2550 if (!map) {
2551 ret = -ENOMEM;
2552 goto error;
2553 }
2554 map->num_stripes = num_stripes;
Chris Mason9b3f68b2008-04-18 10:29:51 -04002555
Arne Jansen73c5de02011-04-12 12:07:57 +02002556 for (i = 0; i < ndevs; ++i) {
2557 for (j = 0; j < dev_stripes; ++j) {
2558 int s = i * dev_stripes + j;
2559 map->stripes[s].dev = devices_info[i].dev;
2560 map->stripes[s].physical = devices_info[i].dev_offset +
2561 j * stripe_size;
Chris Masona40a90a2008-04-18 11:55:51 -04002562 }
Chris Mason6324fbf2008-03-24 15:01:59 -04002563 }
Chris Mason593060d2008-03-25 16:50:33 -04002564 map->sector_size = extent_root->sectorsize;
Miao Xieb2117a32011-01-05 10:07:28 +00002565 map->stripe_len = BTRFS_STRIPE_LEN;
2566 map->io_align = BTRFS_STRIPE_LEN;
2567 map->io_width = BTRFS_STRIPE_LEN;
Chris Mason593060d2008-03-25 16:50:33 -04002568 map->type = type;
Chris Mason321aecc2008-04-16 10:49:51 -04002569 map->sub_stripes = sub_stripes;
Chris Mason0b86a832008-03-24 15:01:56 -04002570
Yan Zheng2b820322008-11-17 21:11:30 -05002571 *map_ret = map;
Arne Jansen73c5de02011-04-12 12:07:57 +02002572 num_bytes = stripe_size * (num_stripes / ncopies);
Chris Mason0b86a832008-03-24 15:01:56 -04002573
Arne Jansen73c5de02011-04-12 12:07:57 +02002574 *stripe_size_out = stripe_size;
2575 *num_bytes_out = num_bytes;
2576
2577 trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
liubo1abe9b82011-03-24 11:18:59 +00002578
David Sterba172ddd62011-04-21 00:48:27 +02002579 em = alloc_extent_map();
Yan Zheng2b820322008-11-17 21:11:30 -05002580 if (!em) {
Miao Xieb2117a32011-01-05 10:07:28 +00002581 ret = -ENOMEM;
2582 goto error;
Yan Zheng2b820322008-11-17 21:11:30 -05002583 }
Chris Mason0b86a832008-03-24 15:01:56 -04002584 em->bdev = (struct block_device *)map;
Yan Zheng2b820322008-11-17 21:11:30 -05002585 em->start = start;
Arne Jansen73c5de02011-04-12 12:07:57 +02002586 em->len = num_bytes;
Chris Mason0b86a832008-03-24 15:01:56 -04002587 em->block_start = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04002588 em->block_len = em->len;
Chris Mason0b86a832008-03-24 15:01:56 -04002589
Chris Mason0b86a832008-03-24 15:01:56 -04002590 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
Chris Mason890871b2009-09-02 16:24:52 -04002591 write_lock(&em_tree->lock);
Chris Mason0b86a832008-03-24 15:01:56 -04002592 ret = add_extent_mapping(em_tree, em);
Chris Mason890871b2009-09-02 16:24:52 -04002593 write_unlock(&em_tree->lock);
Chris Masonb248a412008-04-14 09:48:18 -04002594 BUG_ON(ret);
Chris Mason0b86a832008-03-24 15:01:56 -04002595 free_extent_map(em);
Yan Zheng2b820322008-11-17 21:11:30 -05002596
2597 ret = btrfs_make_block_group(trans, extent_root, 0, type,
2598 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
Arne Jansen73c5de02011-04-12 12:07:57 +02002599 start, num_bytes);
Yan Zheng2b820322008-11-17 21:11:30 -05002600 BUG_ON(ret);
2601
Arne Jansen73c5de02011-04-12 12:07:57 +02002602 for (i = 0; i < map->num_stripes; ++i) {
2603 struct btrfs_device *device;
2604 u64 dev_offset;
2605
2606 device = map->stripes[i].dev;
2607 dev_offset = map->stripes[i].physical;
Yan Zheng2b820322008-11-17 21:11:30 -05002608
2609 ret = btrfs_alloc_dev_extent(trans, device,
2610 info->chunk_root->root_key.objectid,
2611 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
Arne Jansen73c5de02011-04-12 12:07:57 +02002612 start, dev_offset, stripe_size);
Yan Zheng2b820322008-11-17 21:11:30 -05002613 BUG_ON(ret);
Yan Zheng2b820322008-11-17 21:11:30 -05002614 }
2615
Miao Xieb2117a32011-01-05 10:07:28 +00002616 kfree(devices_info);
Yan Zheng2b820322008-11-17 21:11:30 -05002617 return 0;
Miao Xieb2117a32011-01-05 10:07:28 +00002618
2619error:
2620 kfree(map);
2621 kfree(devices_info);
2622 return ret;
Yan Zheng2b820322008-11-17 21:11:30 -05002623}
2624
2625static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
2626 struct btrfs_root *extent_root,
2627 struct map_lookup *map, u64 chunk_offset,
2628 u64 chunk_size, u64 stripe_size)
2629{
2630 u64 dev_offset;
2631 struct btrfs_key key;
2632 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2633 struct btrfs_device *device;
2634 struct btrfs_chunk *chunk;
2635 struct btrfs_stripe *stripe;
2636 size_t item_size = btrfs_chunk_item_size(map->num_stripes);
2637 int index = 0;
2638 int ret;
2639
2640 chunk = kzalloc(item_size, GFP_NOFS);
2641 if (!chunk)
2642 return -ENOMEM;
2643
2644 index = 0;
2645 while (index < map->num_stripes) {
2646 device = map->stripes[index].dev;
2647 device->bytes_used += stripe_size;
2648 ret = btrfs_update_device(trans, device);
2649 BUG_ON(ret);
2650 index++;
2651 }
2652
Josef Bacik2bf64752011-09-26 17:12:22 -04002653 spin_lock(&extent_root->fs_info->free_chunk_lock);
2654 extent_root->fs_info->free_chunk_space -= (stripe_size *
2655 map->num_stripes);
2656 spin_unlock(&extent_root->fs_info->free_chunk_lock);
2657
Yan Zheng2b820322008-11-17 21:11:30 -05002658 index = 0;
2659 stripe = &chunk->stripe;
2660 while (index < map->num_stripes) {
2661 device = map->stripes[index].dev;
2662 dev_offset = map->stripes[index].physical;
2663
2664 btrfs_set_stack_stripe_devid(stripe, device->devid);
2665 btrfs_set_stack_stripe_offset(stripe, dev_offset);
2666 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
2667 stripe++;
2668 index++;
2669 }
2670
2671 btrfs_set_stack_chunk_length(chunk, chunk_size);
2672 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
2673 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
2674 btrfs_set_stack_chunk_type(chunk, map->type);
2675 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
2676 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
2677 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
2678 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
2679 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
2680
2681 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2682 key.type = BTRFS_CHUNK_ITEM_KEY;
2683 key.offset = chunk_offset;
2684
2685 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
2686 BUG_ON(ret);
2687
2688 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
2689 ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk,
2690 item_size);
2691 BUG_ON(ret);
2692 }
liubo1abe9b82011-03-24 11:18:59 +00002693
Yan Zheng2b820322008-11-17 21:11:30 -05002694 kfree(chunk);
2695 return 0;
2696}
2697
2698/*
2699 * Chunk allocation falls into two parts. The first part does works
2700 * that make the new allocated chunk useable, but not do any operation
2701 * that modifies the chunk tree. The second part does the works that
2702 * require modifying the chunk tree. This division is important for the
2703 * bootstrap process of adding storage to a seed btrfs.
2704 */
2705int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2706 struct btrfs_root *extent_root, u64 type)
2707{
2708 u64 chunk_offset;
2709 u64 chunk_size;
2710 u64 stripe_size;
2711 struct map_lookup *map;
2712 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2713 int ret;
2714
2715 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2716 &chunk_offset);
2717 if (ret)
2718 return ret;
2719
2720 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2721 &stripe_size, chunk_offset, type);
2722 if (ret)
2723 return ret;
2724
2725 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2726 chunk_size, stripe_size);
2727 BUG_ON(ret);
2728 return 0;
2729}
2730
Chris Masond3977122009-01-05 21:25:51 -05002731static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
Yan Zheng2b820322008-11-17 21:11:30 -05002732 struct btrfs_root *root,
2733 struct btrfs_device *device)
2734{
2735 u64 chunk_offset;
2736 u64 sys_chunk_offset;
2737 u64 chunk_size;
2738 u64 sys_chunk_size;
2739 u64 stripe_size;
2740 u64 sys_stripe_size;
2741 u64 alloc_profile;
2742 struct map_lookup *map;
2743 struct map_lookup *sys_map;
2744 struct btrfs_fs_info *fs_info = root->fs_info;
2745 struct btrfs_root *extent_root = fs_info->extent_root;
2746 int ret;
2747
2748 ret = find_next_chunk(fs_info->chunk_root,
2749 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
Mark Fasheh92b8e892011-07-12 10:57:59 -07002750 if (ret)
2751 return ret;
Yan Zheng2b820322008-11-17 21:11:30 -05002752
2753 alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
2754 (fs_info->metadata_alloc_profile &
2755 fs_info->avail_metadata_alloc_bits);
2756 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2757
2758 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2759 &stripe_size, chunk_offset, alloc_profile);
2760 BUG_ON(ret);
2761
2762 sys_chunk_offset = chunk_offset + chunk_size;
2763
2764 alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
2765 (fs_info->system_alloc_profile &
2766 fs_info->avail_system_alloc_bits);
2767 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2768
2769 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
2770 &sys_chunk_size, &sys_stripe_size,
2771 sys_chunk_offset, alloc_profile);
2772 BUG_ON(ret);
2773
2774 ret = btrfs_add_device(trans, fs_info->chunk_root, device);
2775 BUG_ON(ret);
2776
2777 /*
2778 * Modifying chunk tree needs allocating new blocks from both
2779 * system block group and metadata block group. So we only can
2780 * do operations require modifying the chunk tree after both
2781 * block groups were created.
2782 */
2783 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2784 chunk_size, stripe_size);
2785 BUG_ON(ret);
2786
2787 ret = __finish_chunk_alloc(trans, extent_root, sys_map,
2788 sys_chunk_offset, sys_chunk_size,
2789 sys_stripe_size);
2790 BUG_ON(ret);
2791 return 0;
2792}
2793
2794int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2795{
2796 struct extent_map *em;
2797 struct map_lookup *map;
2798 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2799 int readonly = 0;
2800 int i;
2801
Chris Mason890871b2009-09-02 16:24:52 -04002802 read_lock(&map_tree->map_tree.lock);
Yan Zheng2b820322008-11-17 21:11:30 -05002803 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
Chris Mason890871b2009-09-02 16:24:52 -04002804 read_unlock(&map_tree->map_tree.lock);
Yan Zheng2b820322008-11-17 21:11:30 -05002805 if (!em)
2806 return 1;
2807
Josef Bacikf48b9072010-01-27 02:07:59 +00002808 if (btrfs_test_opt(root, DEGRADED)) {
2809 free_extent_map(em);
2810 return 0;
2811 }
2812
Yan Zheng2b820322008-11-17 21:11:30 -05002813 map = (struct map_lookup *)em->bdev;
2814 for (i = 0; i < map->num_stripes; i++) {
2815 if (!map->stripes[i].dev->writeable) {
2816 readonly = 1;
2817 break;
2818 }
2819 }
2820 free_extent_map(em);
2821 return readonly;
Chris Mason0b86a832008-03-24 15:01:56 -04002822}
2823
2824void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
2825{
David Sterbaa8067e02011-04-21 00:34:43 +02002826 extent_map_tree_init(&tree->map_tree);
Chris Mason0b86a832008-03-24 15:01:56 -04002827}
2828
2829void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
2830{
2831 struct extent_map *em;
2832
Chris Masond3977122009-01-05 21:25:51 -05002833 while (1) {
Chris Mason890871b2009-09-02 16:24:52 -04002834 write_lock(&tree->map_tree.lock);
Chris Mason0b86a832008-03-24 15:01:56 -04002835 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
2836 if (em)
2837 remove_extent_mapping(&tree->map_tree, em);
Chris Mason890871b2009-09-02 16:24:52 -04002838 write_unlock(&tree->map_tree.lock);
Chris Mason0b86a832008-03-24 15:01:56 -04002839 if (!em)
2840 break;
2841 kfree(em->bdev);
2842 /* once for us */
2843 free_extent_map(em);
2844 /* once for the tree */
2845 free_extent_map(em);
2846 }
2847}
2848
Chris Masonf1885912008-04-09 16:28:12 -04002849int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
2850{
2851 struct extent_map *em;
2852 struct map_lookup *map;
2853 struct extent_map_tree *em_tree = &map_tree->map_tree;
2854 int ret;
2855
Chris Mason890871b2009-09-02 16:24:52 -04002856 read_lock(&em_tree->lock);
Chris Masonf1885912008-04-09 16:28:12 -04002857 em = lookup_extent_mapping(em_tree, logical, len);
Chris Mason890871b2009-09-02 16:24:52 -04002858 read_unlock(&em_tree->lock);
Chris Masonf1885912008-04-09 16:28:12 -04002859 BUG_ON(!em);
2860
2861 BUG_ON(em->start > logical || em->start + em->len < logical);
2862 map = (struct map_lookup *)em->bdev;
2863 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
2864 ret = map->num_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04002865 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
2866 ret = map->sub_stripes;
Chris Masonf1885912008-04-09 16:28:12 -04002867 else
2868 ret = 1;
2869 free_extent_map(em);
Chris Masonf1885912008-04-09 16:28:12 -04002870 return ret;
2871}
2872
Chris Masondfe25022008-05-13 13:46:40 -04002873static int find_live_mirror(struct map_lookup *map, int first, int num,
2874 int optimal)
2875{
2876 int i;
2877 if (map->stripes[optimal].dev->bdev)
2878 return optimal;
2879 for (i = first; i < first + num; i++) {
2880 if (map->stripes[i].dev->bdev)
2881 return i;
2882 }
2883 /* we couldn't find one that doesn't fail. Just return something
2884 * and the io error handling code will clean up eventually
2885 */
2886 return optimal;
2887}
2888
Chris Masonf2d8d742008-04-21 10:03:05 -04002889static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2890 u64 logical, u64 *length,
Jan Schmidta1d3c472011-08-04 17:15:33 +02002891 struct btrfs_bio **bbio_ret,
Jens Axboe7eaceac2011-03-10 08:52:07 +01002892 int mirror_num)
Chris Mason0b86a832008-03-24 15:01:56 -04002893{
2894 struct extent_map *em;
2895 struct map_lookup *map;
2896 struct extent_map_tree *em_tree = &map_tree->map_tree;
2897 u64 offset;
Chris Mason593060d2008-03-25 16:50:33 -04002898 u64 stripe_offset;
Li Dongyangfce3bb92011-03-24 10:24:26 +00002899 u64 stripe_end_offset;
Chris Mason593060d2008-03-25 16:50:33 -04002900 u64 stripe_nr;
Li Dongyangfce3bb92011-03-24 10:24:26 +00002901 u64 stripe_nr_orig;
2902 u64 stripe_nr_end;
Chris Masoncea9e442008-04-09 16:28:12 -04002903 int stripes_allocated = 8;
Chris Mason321aecc2008-04-16 10:49:51 -04002904 int stripes_required = 1;
Chris Mason593060d2008-03-25 16:50:33 -04002905 int stripe_index;
Chris Masoncea9e442008-04-09 16:28:12 -04002906 int i;
Chris Masonf2d8d742008-04-21 10:03:05 -04002907 int num_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04002908 int max_errors = 0;
Jan Schmidta1d3c472011-08-04 17:15:33 +02002909 struct btrfs_bio *bbio = NULL;
Chris Mason0b86a832008-03-24 15:01:56 -04002910
Jan Schmidta1d3c472011-08-04 17:15:33 +02002911 if (bbio_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
Chris Masoncea9e442008-04-09 16:28:12 -04002912 stripes_allocated = 1;
Chris Masoncea9e442008-04-09 16:28:12 -04002913again:
Jan Schmidta1d3c472011-08-04 17:15:33 +02002914 if (bbio_ret) {
2915 bbio = kzalloc(btrfs_bio_size(stripes_allocated),
Chris Masoncea9e442008-04-09 16:28:12 -04002916 GFP_NOFS);
Jan Schmidta1d3c472011-08-04 17:15:33 +02002917 if (!bbio)
Chris Masoncea9e442008-04-09 16:28:12 -04002918 return -ENOMEM;
Chris Masona236aed2008-04-29 09:38:00 -04002919
Jan Schmidta1d3c472011-08-04 17:15:33 +02002920 atomic_set(&bbio->error, 0);
Chris Masoncea9e442008-04-09 16:28:12 -04002921 }
Chris Mason0b86a832008-03-24 15:01:56 -04002922
Chris Mason890871b2009-09-02 16:24:52 -04002923 read_lock(&em_tree->lock);
Chris Mason0b86a832008-03-24 15:01:56 -04002924 em = lookup_extent_mapping(em_tree, logical, *length);
Chris Mason890871b2009-09-02 16:24:52 -04002925 read_unlock(&em_tree->lock);
Chris Masonf2d8d742008-04-21 10:03:05 -04002926
Chris Mason3b951512008-04-17 11:29:12 -04002927 if (!em) {
Chris Masond3977122009-01-05 21:25:51 -05002928 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
2929 (unsigned long long)logical,
2930 (unsigned long long)*length);
Chris Masonf2d8d742008-04-21 10:03:05 -04002931 BUG();
Chris Mason3b951512008-04-17 11:29:12 -04002932 }
Chris Mason0b86a832008-03-24 15:01:56 -04002933
2934 BUG_ON(em->start > logical || em->start + em->len < logical);
2935 map = (struct map_lookup *)em->bdev;
2936 offset = logical - em->start;
Chris Mason593060d2008-03-25 16:50:33 -04002937
Chris Masonf1885912008-04-09 16:28:12 -04002938 if (mirror_num > map->num_stripes)
2939 mirror_num = 0;
2940
Jan Schmidta1d3c472011-08-04 17:15:33 +02002941 /* if our btrfs_bio struct is too small, back off and try again */
Christoph Hellwig7b6d91d2010-08-07 18:20:39 +02002942 if (rw & REQ_WRITE) {
Chris Mason321aecc2008-04-16 10:49:51 -04002943 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
2944 BTRFS_BLOCK_GROUP_DUP)) {
2945 stripes_required = map->num_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04002946 max_errors = 1;
Chris Mason321aecc2008-04-16 10:49:51 -04002947 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2948 stripes_required = map->sub_stripes;
Chris Masona236aed2008-04-29 09:38:00 -04002949 max_errors = 1;
Chris Mason321aecc2008-04-16 10:49:51 -04002950 }
2951 }
Li Dongyangfce3bb92011-03-24 10:24:26 +00002952 if (rw & REQ_DISCARD) {
2953 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
2954 BTRFS_BLOCK_GROUP_RAID1 |
2955 BTRFS_BLOCK_GROUP_DUP |
2956 BTRFS_BLOCK_GROUP_RAID10)) {
2957 stripes_required = map->num_stripes;
2958 }
2959 }
Jan Schmidta1d3c472011-08-04 17:15:33 +02002960 if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
Chris Mason321aecc2008-04-16 10:49:51 -04002961 stripes_allocated < stripes_required) {
Chris Masoncea9e442008-04-09 16:28:12 -04002962 stripes_allocated = map->num_stripes;
Chris Masoncea9e442008-04-09 16:28:12 -04002963 free_extent_map(em);
Jan Schmidta1d3c472011-08-04 17:15:33 +02002964 kfree(bbio);
Chris Masoncea9e442008-04-09 16:28:12 -04002965 goto again;
2966 }
Chris Mason593060d2008-03-25 16:50:33 -04002967 stripe_nr = offset;
2968 /*
2969 * stripe_nr counts the total number of stripes we have to stride
2970 * to get to this block
2971 */
2972 do_div(stripe_nr, map->stripe_len);
2973
2974 stripe_offset = stripe_nr * map->stripe_len;
2975 BUG_ON(offset < stripe_offset);
2976
2977 /* stripe_offset is the offset of this block in its stripe*/
2978 stripe_offset = offset - stripe_offset;
2979
Li Dongyangfce3bb92011-03-24 10:24:26 +00002980 if (rw & REQ_DISCARD)
2981 *length = min_t(u64, em->len - offset, *length);
2982 else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
2983 BTRFS_BLOCK_GROUP_RAID1 |
2984 BTRFS_BLOCK_GROUP_RAID10 |
2985 BTRFS_BLOCK_GROUP_DUP)) {
Chris Masoncea9e442008-04-09 16:28:12 -04002986 /* we limit the length of each bio to what fits in a stripe */
2987 *length = min_t(u64, em->len - offset,
Li Dongyangfce3bb92011-03-24 10:24:26 +00002988 map->stripe_len - stripe_offset);
Chris Masoncea9e442008-04-09 16:28:12 -04002989 } else {
2990 *length = em->len - offset;
2991 }
Chris Masonf2d8d742008-04-21 10:03:05 -04002992
Jan Schmidta1d3c472011-08-04 17:15:33 +02002993 if (!bbio_ret)
Chris Masoncea9e442008-04-09 16:28:12 -04002994 goto out;
2995
Chris Masonf2d8d742008-04-21 10:03:05 -04002996 num_stripes = 1;
Chris Masoncea9e442008-04-09 16:28:12 -04002997 stripe_index = 0;
Li Dongyangfce3bb92011-03-24 10:24:26 +00002998 stripe_nr_orig = stripe_nr;
2999 stripe_nr_end = (offset + *length + map->stripe_len - 1) &
3000 (~(map->stripe_len - 1));
3001 do_div(stripe_nr_end, map->stripe_len);
3002 stripe_end_offset = stripe_nr_end * map->stripe_len -
3003 (offset + *length);
3004 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3005 if (rw & REQ_DISCARD)
3006 num_stripes = min_t(u64, map->num_stripes,
3007 stripe_nr_end - stripe_nr_orig);
3008 stripe_index = do_div(stripe_nr, map->num_stripes);
3009 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
Linus Torvalds212a17a2011-03-28 15:31:05 -07003010 if (rw & (REQ_WRITE | REQ_DISCARD))
Chris Masonf2d8d742008-04-21 10:03:05 -04003011 num_stripes = map->num_stripes;
Chris Mason2fff7342008-04-29 14:12:09 -04003012 else if (mirror_num)
Chris Masonf1885912008-04-09 16:28:12 -04003013 stripe_index = mirror_num - 1;
Chris Masondfe25022008-05-13 13:46:40 -04003014 else {
3015 stripe_index = find_live_mirror(map, 0,
3016 map->num_stripes,
3017 current->pid % map->num_stripes);
Jan Schmidta1d3c472011-08-04 17:15:33 +02003018 mirror_num = stripe_index + 1;
Chris Masondfe25022008-05-13 13:46:40 -04003019 }
Chris Mason2fff7342008-04-29 14:12:09 -04003020
Chris Mason611f0e02008-04-03 16:29:03 -04003021 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02003022 if (rw & (REQ_WRITE | REQ_DISCARD)) {
Chris Masonf2d8d742008-04-21 10:03:05 -04003023 num_stripes = map->num_stripes;
Jan Schmidta1d3c472011-08-04 17:15:33 +02003024 } else if (mirror_num) {
Chris Masonf1885912008-04-09 16:28:12 -04003025 stripe_index = mirror_num - 1;
Jan Schmidta1d3c472011-08-04 17:15:33 +02003026 } else {
3027 mirror_num = 1;
3028 }
Chris Mason2fff7342008-04-29 14:12:09 -04003029
Chris Mason321aecc2008-04-16 10:49:51 -04003030 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3031 int factor = map->num_stripes / map->sub_stripes;
Chris Mason321aecc2008-04-16 10:49:51 -04003032
3033 stripe_index = do_div(stripe_nr, factor);
3034 stripe_index *= map->sub_stripes;
3035
Jens Axboe7eaceac2011-03-10 08:52:07 +01003036 if (rw & REQ_WRITE)
Chris Masonf2d8d742008-04-21 10:03:05 -04003037 num_stripes = map->sub_stripes;
Li Dongyangfce3bb92011-03-24 10:24:26 +00003038 else if (rw & REQ_DISCARD)
3039 num_stripes = min_t(u64, map->sub_stripes *
3040 (stripe_nr_end - stripe_nr_orig),
3041 map->num_stripes);
Chris Mason321aecc2008-04-16 10:49:51 -04003042 else if (mirror_num)
3043 stripe_index += mirror_num - 1;
Chris Masondfe25022008-05-13 13:46:40 -04003044 else {
3045 stripe_index = find_live_mirror(map, stripe_index,
3046 map->sub_stripes, stripe_index +
3047 current->pid % map->sub_stripes);
Jan Schmidta1d3c472011-08-04 17:15:33 +02003048 mirror_num = stripe_index + 1;
Chris Masondfe25022008-05-13 13:46:40 -04003049 }
Chris Mason8790d502008-04-03 16:29:03 -04003050 } else {
3051 /*
3052 * after this do_div call, stripe_nr is the number of stripes
3053 * on this device we have to walk to find the data, and
3054 * stripe_index is the number of our device in the stripe array
3055 */
3056 stripe_index = do_div(stripe_nr, map->num_stripes);
Jan Schmidta1d3c472011-08-04 17:15:33 +02003057 mirror_num = stripe_index + 1;
Chris Mason8790d502008-04-03 16:29:03 -04003058 }
Chris Mason593060d2008-03-25 16:50:33 -04003059 BUG_ON(stripe_index >= map->num_stripes);
Chris Mason593060d2008-03-25 16:50:33 -04003060
Li Dongyangfce3bb92011-03-24 10:24:26 +00003061 if (rw & REQ_DISCARD) {
3062 for (i = 0; i < num_stripes; i++) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02003063 bbio->stripes[i].physical =
Chris Masonf2d8d742008-04-21 10:03:05 -04003064 map->stripes[stripe_index].physical +
3065 stripe_offset + stripe_nr * map->stripe_len;
Jan Schmidta1d3c472011-08-04 17:15:33 +02003066 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
Li Dongyangfce3bb92011-03-24 10:24:26 +00003067
3068 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3069 u64 stripes;
Chris Masond9d04872011-03-27 21:23:21 -04003070 u32 last_stripe = 0;
Li Dongyangfce3bb92011-03-24 10:24:26 +00003071 int j;
3072
Chris Masond9d04872011-03-27 21:23:21 -04003073 div_u64_rem(stripe_nr_end - 1,
3074 map->num_stripes,
3075 &last_stripe);
3076
Li Dongyangfce3bb92011-03-24 10:24:26 +00003077 for (j = 0; j < map->num_stripes; j++) {
Chris Masond9d04872011-03-27 21:23:21 -04003078 u32 test;
3079
3080 div_u64_rem(stripe_nr_end - 1 - j,
3081 map->num_stripes, &test);
3082 if (test == stripe_index)
Li Dongyangfce3bb92011-03-24 10:24:26 +00003083 break;
3084 }
3085 stripes = stripe_nr_end - 1 - j;
3086 do_div(stripes, map->num_stripes);
Jan Schmidta1d3c472011-08-04 17:15:33 +02003087 bbio->stripes[i].length = map->stripe_len *
Li Dongyangfce3bb92011-03-24 10:24:26 +00003088 (stripes - stripe_nr + 1);
3089
3090 if (i == 0) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02003091 bbio->stripes[i].length -=
Li Dongyangfce3bb92011-03-24 10:24:26 +00003092 stripe_offset;
3093 stripe_offset = 0;
3094 }
3095 if (stripe_index == last_stripe)
Jan Schmidta1d3c472011-08-04 17:15:33 +02003096 bbio->stripes[i].length -=
Li Dongyangfce3bb92011-03-24 10:24:26 +00003097 stripe_end_offset;
3098 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3099 u64 stripes;
3100 int j;
3101 int factor = map->num_stripes /
3102 map->sub_stripes;
Chris Masond9d04872011-03-27 21:23:21 -04003103 u32 last_stripe = 0;
3104
3105 div_u64_rem(stripe_nr_end - 1,
3106 factor, &last_stripe);
Li Dongyangfce3bb92011-03-24 10:24:26 +00003107 last_stripe *= map->sub_stripes;
3108
3109 for (j = 0; j < factor; j++) {
Chris Masond9d04872011-03-27 21:23:21 -04003110 u32 test;
3111
3112 div_u64_rem(stripe_nr_end - 1 - j,
3113 factor, &test);
3114
3115 if (test ==
Li Dongyangfce3bb92011-03-24 10:24:26 +00003116 stripe_index / map->sub_stripes)
3117 break;
3118 }
3119 stripes = stripe_nr_end - 1 - j;
3120 do_div(stripes, factor);
Jan Schmidta1d3c472011-08-04 17:15:33 +02003121 bbio->stripes[i].length = map->stripe_len *
Li Dongyangfce3bb92011-03-24 10:24:26 +00003122 (stripes - stripe_nr + 1);
3123
3124 if (i < map->sub_stripes) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02003125 bbio->stripes[i].length -=
Li Dongyangfce3bb92011-03-24 10:24:26 +00003126 stripe_offset;
3127 if (i == map->sub_stripes - 1)
3128 stripe_offset = 0;
3129 }
3130 if (stripe_index >= last_stripe &&
3131 stripe_index <= (last_stripe +
3132 map->sub_stripes - 1)) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02003133 bbio->stripes[i].length -=
Li Dongyangfce3bb92011-03-24 10:24:26 +00003134 stripe_end_offset;
3135 }
3136 } else
Jan Schmidta1d3c472011-08-04 17:15:33 +02003137 bbio->stripes[i].length = *length;
Li Dongyangfce3bb92011-03-24 10:24:26 +00003138
3139 stripe_index++;
3140 if (stripe_index == map->num_stripes) {
3141 /* This could only happen for RAID0/10 */
3142 stripe_index = 0;
3143 stripe_nr++;
3144 }
Chris Masonf2d8d742008-04-21 10:03:05 -04003145 }
Li Dongyangfce3bb92011-03-24 10:24:26 +00003146 } else {
3147 for (i = 0; i < num_stripes; i++) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02003148 bbio->stripes[i].physical =
Linus Torvalds212a17a2011-03-28 15:31:05 -07003149 map->stripes[stripe_index].physical +
3150 stripe_offset +
3151 stripe_nr * map->stripe_len;
Jan Schmidta1d3c472011-08-04 17:15:33 +02003152 bbio->stripes[i].dev =
Linus Torvalds212a17a2011-03-28 15:31:05 -07003153 map->stripes[stripe_index].dev;
Li Dongyangfce3bb92011-03-24 10:24:26 +00003154 stripe_index++;
3155 }
Chris Mason593060d2008-03-25 16:50:33 -04003156 }
Jan Schmidta1d3c472011-08-04 17:15:33 +02003157 if (bbio_ret) {
3158 *bbio_ret = bbio;
3159 bbio->num_stripes = num_stripes;
3160 bbio->max_errors = max_errors;
3161 bbio->mirror_num = mirror_num;
Chris Masonf2d8d742008-04-21 10:03:05 -04003162 }
Chris Masoncea9e442008-04-09 16:28:12 -04003163out:
Chris Mason0b86a832008-03-24 15:01:56 -04003164 free_extent_map(em);
Chris Mason0b86a832008-03-24 15:01:56 -04003165 return 0;
3166}
3167
Chris Masonf2d8d742008-04-21 10:03:05 -04003168int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3169 u64 logical, u64 *length,
Jan Schmidta1d3c472011-08-04 17:15:33 +02003170 struct btrfs_bio **bbio_ret, int mirror_num)
Chris Masonf2d8d742008-04-21 10:03:05 -04003171{
Jan Schmidta1d3c472011-08-04 17:15:33 +02003172 return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret,
Jens Axboe7eaceac2011-03-10 08:52:07 +01003173 mirror_num);
Chris Masonf2d8d742008-04-21 10:03:05 -04003174}
3175
Yan Zhenga512bbf2008-12-08 16:46:26 -05003176int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
3177 u64 chunk_start, u64 physical, u64 devid,
3178 u64 **logical, int *naddrs, int *stripe_len)
3179{
3180 struct extent_map_tree *em_tree = &map_tree->map_tree;
3181 struct extent_map *em;
3182 struct map_lookup *map;
3183 u64 *buf;
3184 u64 bytenr;
3185 u64 length;
3186 u64 stripe_nr;
3187 int i, j, nr = 0;
3188
Chris Mason890871b2009-09-02 16:24:52 -04003189 read_lock(&em_tree->lock);
Yan Zhenga512bbf2008-12-08 16:46:26 -05003190 em = lookup_extent_mapping(em_tree, chunk_start, 1);
Chris Mason890871b2009-09-02 16:24:52 -04003191 read_unlock(&em_tree->lock);
Yan Zhenga512bbf2008-12-08 16:46:26 -05003192
3193 BUG_ON(!em || em->start != chunk_start);
3194 map = (struct map_lookup *)em->bdev;
3195
3196 length = em->len;
3197 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
3198 do_div(length, map->num_stripes / map->sub_stripes);
3199 else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
3200 do_div(length, map->num_stripes);
3201
3202 buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
3203 BUG_ON(!buf);
3204
3205 for (i = 0; i < map->num_stripes; i++) {
3206 if (devid && map->stripes[i].dev->devid != devid)
3207 continue;
3208 if (map->stripes[i].physical > physical ||
3209 map->stripes[i].physical + length <= physical)
3210 continue;
3211
3212 stripe_nr = physical - map->stripes[i].physical;
3213 do_div(stripe_nr, map->stripe_len);
3214
3215 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3216 stripe_nr = stripe_nr * map->num_stripes + i;
3217 do_div(stripe_nr, map->sub_stripes);
3218 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3219 stripe_nr = stripe_nr * map->num_stripes + i;
3220 }
3221 bytenr = chunk_start + stripe_nr * map->stripe_len;
Chris Mason934d3752008-12-08 16:43:10 -05003222 WARN_ON(nr >= map->num_stripes);
Yan Zhenga512bbf2008-12-08 16:46:26 -05003223 for (j = 0; j < nr; j++) {
3224 if (buf[j] == bytenr)
3225 break;
3226 }
Chris Mason934d3752008-12-08 16:43:10 -05003227 if (j == nr) {
3228 WARN_ON(nr >= map->num_stripes);
Yan Zhenga512bbf2008-12-08 16:46:26 -05003229 buf[nr++] = bytenr;
Chris Mason934d3752008-12-08 16:43:10 -05003230 }
Yan Zhenga512bbf2008-12-08 16:46:26 -05003231 }
3232
Yan Zhenga512bbf2008-12-08 16:46:26 -05003233 *logical = buf;
3234 *naddrs = nr;
3235 *stripe_len = map->stripe_len;
3236
3237 free_extent_map(em);
3238 return 0;
3239}
3240
Jan Schmidta1d3c472011-08-04 17:15:33 +02003241static void btrfs_end_bio(struct bio *bio, int err)
Chris Mason8790d502008-04-03 16:29:03 -04003242{
Jan Schmidta1d3c472011-08-04 17:15:33 +02003243 struct btrfs_bio *bbio = bio->bi_private;
Chris Mason7d2b4da2008-08-05 10:13:57 -04003244 int is_orig_bio = 0;
Chris Mason8790d502008-04-03 16:29:03 -04003245
Chris Mason8790d502008-04-03 16:29:03 -04003246 if (err)
Jan Schmidta1d3c472011-08-04 17:15:33 +02003247 atomic_inc(&bbio->error);
Chris Mason8790d502008-04-03 16:29:03 -04003248
Jan Schmidta1d3c472011-08-04 17:15:33 +02003249 if (bio == bbio->orig_bio)
Chris Mason7d2b4da2008-08-05 10:13:57 -04003250 is_orig_bio = 1;
3251
Jan Schmidta1d3c472011-08-04 17:15:33 +02003252 if (atomic_dec_and_test(&bbio->stripes_pending)) {
Chris Mason7d2b4da2008-08-05 10:13:57 -04003253 if (!is_orig_bio) {
3254 bio_put(bio);
Jan Schmidta1d3c472011-08-04 17:15:33 +02003255 bio = bbio->orig_bio;
Chris Mason7d2b4da2008-08-05 10:13:57 -04003256 }
Jan Schmidta1d3c472011-08-04 17:15:33 +02003257 bio->bi_private = bbio->private;
3258 bio->bi_end_io = bbio->end_io;
Jan Schmidt2774b2c2011-06-16 12:05:19 +02003259 bio->bi_bdev = (struct block_device *)
3260 (unsigned long)bbio->mirror_num;
Chris Masona236aed2008-04-29 09:38:00 -04003261 /* only send an error to the higher layers if it is
3262 * beyond the tolerance of the multi-bio
3263 */
Jan Schmidta1d3c472011-08-04 17:15:33 +02003264 if (atomic_read(&bbio->error) > bbio->max_errors) {
Chris Masona236aed2008-04-29 09:38:00 -04003265 err = -EIO;
Chris Mason5dbc8fc2011-12-09 11:07:37 -05003266 } else {
Chris Mason1259ab72008-05-12 13:39:03 -04003267 /*
3268 * this bio is actually up to date, we didn't
3269 * go over the max number of errors
3270 */
3271 set_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Masona236aed2008-04-29 09:38:00 -04003272 err = 0;
Chris Mason1259ab72008-05-12 13:39:03 -04003273 }
Jan Schmidta1d3c472011-08-04 17:15:33 +02003274 kfree(bbio);
Chris Mason8790d502008-04-03 16:29:03 -04003275
3276 bio_endio(bio, err);
Chris Mason7d2b4da2008-08-05 10:13:57 -04003277 } else if (!is_orig_bio) {
Chris Mason8790d502008-04-03 16:29:03 -04003278 bio_put(bio);
3279 }
Chris Mason8790d502008-04-03 16:29:03 -04003280}
3281
Chris Mason8b712842008-06-11 16:50:36 -04003282struct async_sched {
3283 struct bio *bio;
3284 int rw;
3285 struct btrfs_fs_info *info;
3286 struct btrfs_work work;
3287};
3288
3289/*
3290 * see run_scheduled_bios for a description of why bios are collected for
3291 * async submit.
3292 *
3293 * This will add one bio to the pending list for a device and make sure
3294 * the work struct is scheduled.
3295 */
Chris Masond3977122009-01-05 21:25:51 -05003296static noinline int schedule_bio(struct btrfs_root *root,
Chris Masona1b32a52008-09-05 16:09:51 -04003297 struct btrfs_device *device,
3298 int rw, struct bio *bio)
Chris Mason8b712842008-06-11 16:50:36 -04003299{
3300 int should_queue = 1;
Chris Masonffbd5172009-04-20 15:50:09 -04003301 struct btrfs_pending_bios *pending_bios;
Chris Mason8b712842008-06-11 16:50:36 -04003302
3303 /* don't bother with additional async steps for reads, right now */
Christoph Hellwig7b6d91d2010-08-07 18:20:39 +02003304 if (!(rw & REQ_WRITE)) {
Chris Mason492bb6de2008-07-31 16:29:02 -04003305 bio_get(bio);
Chris Mason8b712842008-06-11 16:50:36 -04003306 submit_bio(rw, bio);
Chris Mason492bb6de2008-07-31 16:29:02 -04003307 bio_put(bio);
Chris Mason8b712842008-06-11 16:50:36 -04003308 return 0;
3309 }
3310
3311 /*
Chris Mason0986fe92008-08-15 15:34:15 -04003312 * nr_async_bios allows us to reliably return congestion to the
Chris Mason8b712842008-06-11 16:50:36 -04003313 * higher layers. Otherwise, the async bio makes it appear we have
3314 * made progress against dirty pages when we've really just put it
3315 * on a queue for later
3316 */
Chris Mason0986fe92008-08-15 15:34:15 -04003317 atomic_inc(&root->fs_info->nr_async_bios);
Chris Mason492bb6de2008-07-31 16:29:02 -04003318 WARN_ON(bio->bi_next);
Chris Mason8b712842008-06-11 16:50:36 -04003319 bio->bi_next = NULL;
3320 bio->bi_rw |= rw;
3321
3322 spin_lock(&device->io_lock);
Christoph Hellwig7b6d91d2010-08-07 18:20:39 +02003323 if (bio->bi_rw & REQ_SYNC)
Chris Masonffbd5172009-04-20 15:50:09 -04003324 pending_bios = &device->pending_sync_bios;
3325 else
3326 pending_bios = &device->pending_bios;
Chris Mason8b712842008-06-11 16:50:36 -04003327
Chris Masonffbd5172009-04-20 15:50:09 -04003328 if (pending_bios->tail)
3329 pending_bios->tail->bi_next = bio;
Chris Mason8b712842008-06-11 16:50:36 -04003330
Chris Masonffbd5172009-04-20 15:50:09 -04003331 pending_bios->tail = bio;
3332 if (!pending_bios->head)
3333 pending_bios->head = bio;
Chris Mason8b712842008-06-11 16:50:36 -04003334 if (device->running_pending)
3335 should_queue = 0;
3336
3337 spin_unlock(&device->io_lock);
3338
3339 if (should_queue)
Chris Mason1cc127b2008-06-12 14:46:17 -04003340 btrfs_queue_worker(&root->fs_info->submit_workers,
3341 &device->work);
Chris Mason8b712842008-06-11 16:50:36 -04003342 return 0;
3343}
3344
Chris Masonf1885912008-04-09 16:28:12 -04003345int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
Chris Mason8b712842008-06-11 16:50:36 -04003346 int mirror_num, int async_submit)
Chris Mason0b86a832008-03-24 15:01:56 -04003347{
3348 struct btrfs_mapping_tree *map_tree;
3349 struct btrfs_device *dev;
Chris Mason8790d502008-04-03 16:29:03 -04003350 struct bio *first_bio = bio;
Chris Masona62b9402008-10-03 16:31:08 -04003351 u64 logical = (u64)bio->bi_sector << 9;
Chris Mason0b86a832008-03-24 15:01:56 -04003352 u64 length = 0;
3353 u64 map_length;
Chris Mason0b86a832008-03-24 15:01:56 -04003354 int ret;
Chris Mason8790d502008-04-03 16:29:03 -04003355 int dev_nr = 0;
3356 int total_devs = 1;
Jan Schmidta1d3c472011-08-04 17:15:33 +02003357 struct btrfs_bio *bbio = NULL;
Chris Mason0b86a832008-03-24 15:01:56 -04003358
Chris Masonf2d8d742008-04-21 10:03:05 -04003359 length = bio->bi_size;
Chris Mason0b86a832008-03-24 15:01:56 -04003360 map_tree = &root->fs_info->mapping_tree;
3361 map_length = length;
Chris Masoncea9e442008-04-09 16:28:12 -04003362
Jan Schmidta1d3c472011-08-04 17:15:33 +02003363 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio,
Chris Masonf1885912008-04-09 16:28:12 -04003364 mirror_num);
Chris Masoncea9e442008-04-09 16:28:12 -04003365 BUG_ON(ret);
3366
Jan Schmidta1d3c472011-08-04 17:15:33 +02003367 total_devs = bbio->num_stripes;
Chris Masoncea9e442008-04-09 16:28:12 -04003368 if (map_length < length) {
Chris Masond3977122009-01-05 21:25:51 -05003369 printk(KERN_CRIT "mapping failed logical %llu bio len %llu "
3370 "len %llu\n", (unsigned long long)logical,
3371 (unsigned long long)length,
3372 (unsigned long long)map_length);
Chris Masoncea9e442008-04-09 16:28:12 -04003373 BUG();
3374 }
Jan Schmidta1d3c472011-08-04 17:15:33 +02003375
3376 bbio->orig_bio = first_bio;
3377 bbio->private = first_bio->bi_private;
3378 bbio->end_io = first_bio->bi_end_io;
3379 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
Chris Masoncea9e442008-04-09 16:28:12 -04003380
Chris Masond3977122009-01-05 21:25:51 -05003381 while (dev_nr < total_devs) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02003382 if (dev_nr < total_devs - 1) {
3383 bio = bio_clone(first_bio, GFP_NOFS);
3384 BUG_ON(!bio);
3385 } else {
3386 bio = first_bio;
Chris Mason8790d502008-04-03 16:29:03 -04003387 }
Jan Schmidta1d3c472011-08-04 17:15:33 +02003388 bio->bi_private = bbio;
3389 bio->bi_end_io = btrfs_end_bio;
3390 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
3391 dev = bbio->stripes[dev_nr].dev;
Chris Mason18e503d2010-10-28 15:30:42 -04003392 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02003393 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
3394 "(%s id %llu), size=%u\n", rw,
3395 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
3396 dev->name, dev->devid, bio->bi_size);
Chris Masondfe25022008-05-13 13:46:40 -04003397 bio->bi_bdev = dev->bdev;
Chris Mason8b712842008-06-11 16:50:36 -04003398 if (async_submit)
3399 schedule_bio(root, dev, rw, bio);
3400 else
3401 submit_bio(rw, bio);
Chris Masondfe25022008-05-13 13:46:40 -04003402 } else {
3403 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
3404 bio->bi_sector = logical >> 9;
Chris Masondfe25022008-05-13 13:46:40 -04003405 bio_endio(bio, -EIO);
Chris Masondfe25022008-05-13 13:46:40 -04003406 }
Chris Mason8790d502008-04-03 16:29:03 -04003407 dev_nr++;
Chris Mason239b14b2008-03-24 15:02:07 -04003408 }
Chris Mason0b86a832008-03-24 15:01:56 -04003409 return 0;
3410}
3411
Chris Masona4437552008-04-18 10:29:38 -04003412struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
Yan Zheng2b820322008-11-17 21:11:30 -05003413 u8 *uuid, u8 *fsid)
Chris Mason0b86a832008-03-24 15:01:56 -04003414{
Yan Zheng2b820322008-11-17 21:11:30 -05003415 struct btrfs_device *device;
3416 struct btrfs_fs_devices *cur_devices;
Chris Mason0b86a832008-03-24 15:01:56 -04003417
Yan Zheng2b820322008-11-17 21:11:30 -05003418 cur_devices = root->fs_info->fs_devices;
3419 while (cur_devices) {
3420 if (!fsid ||
3421 !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
3422 device = __find_device(&cur_devices->devices,
3423 devid, uuid);
3424 if (device)
3425 return device;
3426 }
3427 cur_devices = cur_devices->seed;
3428 }
3429 return NULL;
Chris Mason0b86a832008-03-24 15:01:56 -04003430}
3431
Chris Masondfe25022008-05-13 13:46:40 -04003432static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
3433 u64 devid, u8 *dev_uuid)
3434{
3435 struct btrfs_device *device;
3436 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
3437
3438 device = kzalloc(sizeof(*device), GFP_NOFS);
yanhai zhu7cbd8a82008-11-12 14:38:54 -05003439 if (!device)
3440 return NULL;
Chris Masondfe25022008-05-13 13:46:40 -04003441 list_add(&device->dev_list,
3442 &fs_devices->devices);
Chris Masondfe25022008-05-13 13:46:40 -04003443 device->dev_root = root->fs_info->dev_root;
3444 device->devid = devid;
Chris Mason8b712842008-06-11 16:50:36 -04003445 device->work.func = pending_bios_fn;
Yan Zhenge4404d62008-12-12 10:03:26 -05003446 device->fs_devices = fs_devices;
Chris Masoncd02dca2010-12-13 14:56:23 -05003447 device->missing = 1;
Chris Masondfe25022008-05-13 13:46:40 -04003448 fs_devices->num_devices++;
Chris Masoncd02dca2010-12-13 14:56:23 -05003449 fs_devices->missing_devices++;
Chris Masondfe25022008-05-13 13:46:40 -04003450 spin_lock_init(&device->io_lock);
Chris Masond20f7042008-12-08 16:58:54 -05003451 INIT_LIST_HEAD(&device->dev_alloc_list);
Chris Masondfe25022008-05-13 13:46:40 -04003452 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
3453 return device;
3454}
3455
Chris Mason0b86a832008-03-24 15:01:56 -04003456static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
3457 struct extent_buffer *leaf,
3458 struct btrfs_chunk *chunk)
3459{
3460 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
3461 struct map_lookup *map;
3462 struct extent_map *em;
3463 u64 logical;
3464 u64 length;
3465 u64 devid;
Chris Masona4437552008-04-18 10:29:38 -04003466 u8 uuid[BTRFS_UUID_SIZE];
Chris Mason593060d2008-03-25 16:50:33 -04003467 int num_stripes;
Chris Mason0b86a832008-03-24 15:01:56 -04003468 int ret;
Chris Mason593060d2008-03-25 16:50:33 -04003469 int i;
Chris Mason0b86a832008-03-24 15:01:56 -04003470
Chris Masone17cade2008-04-15 15:41:47 -04003471 logical = key->offset;
3472 length = btrfs_chunk_length(leaf, chunk);
Chris Masona061fc82008-05-07 11:43:44 -04003473
Chris Mason890871b2009-09-02 16:24:52 -04003474 read_lock(&map_tree->map_tree.lock);
Chris Mason0b86a832008-03-24 15:01:56 -04003475 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
Chris Mason890871b2009-09-02 16:24:52 -04003476 read_unlock(&map_tree->map_tree.lock);
Chris Mason0b86a832008-03-24 15:01:56 -04003477
3478 /* already mapped? */
3479 if (em && em->start <= logical && em->start + em->len > logical) {
3480 free_extent_map(em);
Chris Mason0b86a832008-03-24 15:01:56 -04003481 return 0;
3482 } else if (em) {
3483 free_extent_map(em);
3484 }
Chris Mason0b86a832008-03-24 15:01:56 -04003485
David Sterba172ddd62011-04-21 00:48:27 +02003486 em = alloc_extent_map();
Chris Mason0b86a832008-03-24 15:01:56 -04003487 if (!em)
3488 return -ENOMEM;
Chris Mason593060d2008-03-25 16:50:33 -04003489 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3490 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
Chris Mason0b86a832008-03-24 15:01:56 -04003491 if (!map) {
3492 free_extent_map(em);
3493 return -ENOMEM;
3494 }
3495
3496 em->bdev = (struct block_device *)map;
3497 em->start = logical;
3498 em->len = length;
3499 em->block_start = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04003500 em->block_len = em->len;
Chris Mason0b86a832008-03-24 15:01:56 -04003501
Chris Mason593060d2008-03-25 16:50:33 -04003502 map->num_stripes = num_stripes;
3503 map->io_width = btrfs_chunk_io_width(leaf, chunk);
3504 map->io_align = btrfs_chunk_io_align(leaf, chunk);
3505 map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
3506 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
3507 map->type = btrfs_chunk_type(leaf, chunk);
Chris Mason321aecc2008-04-16 10:49:51 -04003508 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
Chris Mason593060d2008-03-25 16:50:33 -04003509 for (i = 0; i < num_stripes; i++) {
3510 map->stripes[i].physical =
3511 btrfs_stripe_offset_nr(leaf, chunk, i);
3512 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
Chris Masona4437552008-04-18 10:29:38 -04003513 read_extent_buffer(leaf, uuid, (unsigned long)
3514 btrfs_stripe_dev_uuid_nr(chunk, i),
3515 BTRFS_UUID_SIZE);
Yan Zheng2b820322008-11-17 21:11:30 -05003516 map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
3517 NULL);
Chris Masondfe25022008-05-13 13:46:40 -04003518 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
Chris Mason593060d2008-03-25 16:50:33 -04003519 kfree(map);
3520 free_extent_map(em);
3521 return -EIO;
3522 }
Chris Masondfe25022008-05-13 13:46:40 -04003523 if (!map->stripes[i].dev) {
3524 map->stripes[i].dev =
3525 add_missing_dev(root, devid, uuid);
3526 if (!map->stripes[i].dev) {
3527 kfree(map);
3528 free_extent_map(em);
3529 return -EIO;
3530 }
3531 }
3532 map->stripes[i].dev->in_fs_metadata = 1;
Chris Mason0b86a832008-03-24 15:01:56 -04003533 }
3534
Chris Mason890871b2009-09-02 16:24:52 -04003535 write_lock(&map_tree->map_tree.lock);
Chris Mason0b86a832008-03-24 15:01:56 -04003536 ret = add_extent_mapping(&map_tree->map_tree, em);
Chris Mason890871b2009-09-02 16:24:52 -04003537 write_unlock(&map_tree->map_tree.lock);
Chris Masonb248a412008-04-14 09:48:18 -04003538 BUG_ON(ret);
Chris Mason0b86a832008-03-24 15:01:56 -04003539 free_extent_map(em);
3540
3541 return 0;
3542}
3543
3544static int fill_device_from_item(struct extent_buffer *leaf,
3545 struct btrfs_dev_item *dev_item,
3546 struct btrfs_device *device)
3547{
3548 unsigned long ptr;
Chris Mason0b86a832008-03-24 15:01:56 -04003549
3550 device->devid = btrfs_device_id(leaf, dev_item);
Chris Balld6397ba2009-04-27 07:29:03 -04003551 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
3552 device->total_bytes = device->disk_total_bytes;
Chris Mason0b86a832008-03-24 15:01:56 -04003553 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
3554 device->type = btrfs_device_type(leaf, dev_item);
3555 device->io_align = btrfs_device_io_align(leaf, dev_item);
3556 device->io_width = btrfs_device_io_width(leaf, dev_item);
3557 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
Chris Mason0b86a832008-03-24 15:01:56 -04003558
3559 ptr = (unsigned long)btrfs_device_uuid(dev_item);
Chris Masone17cade2008-04-15 15:41:47 -04003560 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -04003561
Chris Mason0b86a832008-03-24 15:01:56 -04003562 return 0;
3563}
3564
Yan Zheng2b820322008-11-17 21:11:30 -05003565static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
3566{
3567 struct btrfs_fs_devices *fs_devices;
3568 int ret;
3569
3570 mutex_lock(&uuid_mutex);
3571
3572 fs_devices = root->fs_info->fs_devices->seed;
3573 while (fs_devices) {
3574 if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
3575 ret = 0;
3576 goto out;
3577 }
3578 fs_devices = fs_devices->seed;
3579 }
3580
3581 fs_devices = find_fsid(fsid);
3582 if (!fs_devices) {
3583 ret = -ENOENT;
3584 goto out;
3585 }
Yan Zhenge4404d62008-12-12 10:03:26 -05003586
3587 fs_devices = clone_fs_devices(fs_devices);
3588 if (IS_ERR(fs_devices)) {
3589 ret = PTR_ERR(fs_devices);
Yan Zheng2b820322008-11-17 21:11:30 -05003590 goto out;
3591 }
3592
Christoph Hellwig97288f22008-12-02 06:36:09 -05003593 ret = __btrfs_open_devices(fs_devices, FMODE_READ,
Chris Mason15916de2008-11-19 21:17:22 -05003594 root->fs_info->bdev_holder);
Yan Zheng2b820322008-11-17 21:11:30 -05003595 if (ret)
3596 goto out;
3597
3598 if (!fs_devices->seeding) {
3599 __btrfs_close_devices(fs_devices);
Yan Zhenge4404d62008-12-12 10:03:26 -05003600 free_fs_devices(fs_devices);
Yan Zheng2b820322008-11-17 21:11:30 -05003601 ret = -EINVAL;
3602 goto out;
3603 }
3604
3605 fs_devices->seed = root->fs_info->fs_devices->seed;
3606 root->fs_info->fs_devices->seed = fs_devices;
Yan Zheng2b820322008-11-17 21:11:30 -05003607out:
3608 mutex_unlock(&uuid_mutex);
3609 return ret;
3610}
3611
Chris Mason0d81ba52008-03-24 15:02:07 -04003612static int read_one_dev(struct btrfs_root *root,
Chris Mason0b86a832008-03-24 15:01:56 -04003613 struct extent_buffer *leaf,
3614 struct btrfs_dev_item *dev_item)
3615{
3616 struct btrfs_device *device;
3617 u64 devid;
3618 int ret;
Yan Zheng2b820322008-11-17 21:11:30 -05003619 u8 fs_uuid[BTRFS_UUID_SIZE];
Chris Masona4437552008-04-18 10:29:38 -04003620 u8 dev_uuid[BTRFS_UUID_SIZE];
3621
Chris Mason0b86a832008-03-24 15:01:56 -04003622 devid = btrfs_device_id(leaf, dev_item);
Chris Masona4437552008-04-18 10:29:38 -04003623 read_extent_buffer(leaf, dev_uuid,
3624 (unsigned long)btrfs_device_uuid(dev_item),
3625 BTRFS_UUID_SIZE);
Yan Zheng2b820322008-11-17 21:11:30 -05003626 read_extent_buffer(leaf, fs_uuid,
3627 (unsigned long)btrfs_device_fsid(dev_item),
3628 BTRFS_UUID_SIZE);
3629
3630 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
3631 ret = open_seed_devices(root, fs_uuid);
Yan Zhenge4404d62008-12-12 10:03:26 -05003632 if (ret && !btrfs_test_opt(root, DEGRADED))
Yan Zheng2b820322008-11-17 21:11:30 -05003633 return ret;
Yan Zheng2b820322008-11-17 21:11:30 -05003634 }
3635
3636 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
3637 if (!device || !device->bdev) {
Yan Zhenge4404d62008-12-12 10:03:26 -05003638 if (!btrfs_test_opt(root, DEGRADED))
Yan Zheng2b820322008-11-17 21:11:30 -05003639 return -EIO;
3640
3641 if (!device) {
Chris Masond3977122009-01-05 21:25:51 -05003642 printk(KERN_WARNING "warning devid %llu missing\n",
3643 (unsigned long long)devid);
Yan Zheng2b820322008-11-17 21:11:30 -05003644 device = add_missing_dev(root, devid, dev_uuid);
3645 if (!device)
3646 return -ENOMEM;
Chris Masoncd02dca2010-12-13 14:56:23 -05003647 } else if (!device->missing) {
3648 /*
3649 * this happens when a device that was properly setup
3650 * in the device info lists suddenly goes bad.
3651 * device->bdev is NULL, and so we have to set
3652 * device->missing to one here
3653 */
3654 root->fs_info->fs_devices->missing_devices++;
3655 device->missing = 1;
Yan Zheng2b820322008-11-17 21:11:30 -05003656 }
3657 }
3658
3659 if (device->fs_devices != root->fs_info->fs_devices) {
3660 BUG_ON(device->writeable);
3661 if (device->generation !=
3662 btrfs_device_generation(leaf, dev_item))
3663 return -EINVAL;
Chris Mason6324fbf2008-03-24 15:01:59 -04003664 }
Chris Mason0b86a832008-03-24 15:01:56 -04003665
3666 fill_device_from_item(leaf, dev_item, device);
3667 device->dev_root = root->fs_info->dev_root;
Chris Masondfe25022008-05-13 13:46:40 -04003668 device->in_fs_metadata = 1;
Josef Bacik2bf64752011-09-26 17:12:22 -04003669 if (device->writeable) {
Yan Zheng2b820322008-11-17 21:11:30 -05003670 device->fs_devices->total_rw_bytes += device->total_bytes;
Josef Bacik2bf64752011-09-26 17:12:22 -04003671 spin_lock(&root->fs_info->free_chunk_lock);
3672 root->fs_info->free_chunk_space += device->total_bytes -
3673 device->bytes_used;
3674 spin_unlock(&root->fs_info->free_chunk_lock);
3675 }
Chris Mason0b86a832008-03-24 15:01:56 -04003676 ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04003677 return ret;
3678}
3679
Yan Zhenge4404d62008-12-12 10:03:26 -05003680int btrfs_read_sys_array(struct btrfs_root *root)
Chris Mason0b86a832008-03-24 15:01:56 -04003681{
David Sterba6c417612011-04-13 15:41:04 +02003682 struct btrfs_super_block *super_copy = root->fs_info->super_copy;
Chris Masona061fc82008-05-07 11:43:44 -04003683 struct extent_buffer *sb;
Chris Mason0b86a832008-03-24 15:01:56 -04003684 struct btrfs_disk_key *disk_key;
Chris Mason0b86a832008-03-24 15:01:56 -04003685 struct btrfs_chunk *chunk;
Chris Mason84eed902008-04-25 09:04:37 -04003686 u8 *ptr;
3687 unsigned long sb_ptr;
3688 int ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04003689 u32 num_stripes;
3690 u32 array_size;
3691 u32 len = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04003692 u32 cur;
Chris Mason84eed902008-04-25 09:04:37 -04003693 struct btrfs_key key;
Chris Mason0b86a832008-03-24 15:01:56 -04003694
Yan Zhenge4404d62008-12-12 10:03:26 -05003695 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
Chris Masona061fc82008-05-07 11:43:44 -04003696 BTRFS_SUPER_INFO_SIZE);
3697 if (!sb)
3698 return -ENOMEM;
3699 btrfs_set_buffer_uptodate(sb);
Chris Mason85d4e462011-07-26 16:11:19 -04003700 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
Chris Mason4008c042009-02-12 14:09:45 -05003701
Chris Masona061fc82008-05-07 11:43:44 -04003702 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
Chris Mason0b86a832008-03-24 15:01:56 -04003703 array_size = btrfs_super_sys_array_size(super_copy);
3704
Chris Mason0b86a832008-03-24 15:01:56 -04003705 ptr = super_copy->sys_chunk_array;
3706 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
3707 cur = 0;
3708
3709 while (cur < array_size) {
3710 disk_key = (struct btrfs_disk_key *)ptr;
3711 btrfs_disk_key_to_cpu(&key, disk_key);
3712
Chris Masona061fc82008-05-07 11:43:44 -04003713 len = sizeof(*disk_key); ptr += len;
Chris Mason0b86a832008-03-24 15:01:56 -04003714 sb_ptr += len;
3715 cur += len;
3716
Chris Mason0d81ba52008-03-24 15:02:07 -04003717 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
Chris Mason0b86a832008-03-24 15:01:56 -04003718 chunk = (struct btrfs_chunk *)sb_ptr;
Chris Mason0d81ba52008-03-24 15:02:07 -04003719 ret = read_one_chunk(root, &key, sb, chunk);
Chris Mason84eed902008-04-25 09:04:37 -04003720 if (ret)
3721 break;
Chris Mason0b86a832008-03-24 15:01:56 -04003722 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
3723 len = btrfs_chunk_item_size(num_stripes);
3724 } else {
Chris Mason84eed902008-04-25 09:04:37 -04003725 ret = -EIO;
3726 break;
Chris Mason0b86a832008-03-24 15:01:56 -04003727 }
3728 ptr += len;
3729 sb_ptr += len;
3730 cur += len;
3731 }
Chris Masona061fc82008-05-07 11:43:44 -04003732 free_extent_buffer(sb);
Chris Mason84eed902008-04-25 09:04:37 -04003733 return ret;
Chris Mason0b86a832008-03-24 15:01:56 -04003734}
3735
3736int btrfs_read_chunk_tree(struct btrfs_root *root)
3737{
3738 struct btrfs_path *path;
3739 struct extent_buffer *leaf;
3740 struct btrfs_key key;
3741 struct btrfs_key found_key;
3742 int ret;
3743 int slot;
3744
3745 root = root->fs_info->chunk_root;
3746
3747 path = btrfs_alloc_path();
3748 if (!path)
3749 return -ENOMEM;
3750
3751 /* first we search for all of the device items, and then we
3752 * read in all of the chunk items. This way we can create chunk
3753 * mappings that reference all of the devices that are afound
3754 */
3755 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
3756 key.offset = 0;
3757 key.type = 0;
3758again:
3759 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Zhao Leiab593812010-03-25 12:34:49 +00003760 if (ret < 0)
3761 goto error;
Chris Masond3977122009-01-05 21:25:51 -05003762 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04003763 leaf = path->nodes[0];
3764 slot = path->slots[0];
3765 if (slot >= btrfs_header_nritems(leaf)) {
3766 ret = btrfs_next_leaf(root, path);
3767 if (ret == 0)
3768 continue;
3769 if (ret < 0)
3770 goto error;
3771 break;
3772 }
3773 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3774 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3775 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
3776 break;
3777 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
3778 struct btrfs_dev_item *dev_item;
3779 dev_item = btrfs_item_ptr(leaf, slot,
3780 struct btrfs_dev_item);
Chris Mason0d81ba52008-03-24 15:02:07 -04003781 ret = read_one_dev(root, leaf, dev_item);
Yan Zheng2b820322008-11-17 21:11:30 -05003782 if (ret)
3783 goto error;
Chris Mason0b86a832008-03-24 15:01:56 -04003784 }
3785 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
3786 struct btrfs_chunk *chunk;
3787 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3788 ret = read_one_chunk(root, &found_key, leaf, chunk);
Yan Zheng2b820322008-11-17 21:11:30 -05003789 if (ret)
3790 goto error;
Chris Mason0b86a832008-03-24 15:01:56 -04003791 }
3792 path->slots[0]++;
3793 }
3794 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3795 key.objectid = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02003796 btrfs_release_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -04003797 goto again;
3798 }
Chris Mason0b86a832008-03-24 15:01:56 -04003799 ret = 0;
3800error:
Yan Zheng2b820322008-11-17 21:11:30 -05003801 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -04003802 return ret;
3803}