Blame - fs/btrfs/scrub.c - android_kernel_oneplus_msm8996

blob: 70f9fa772ee9593c7938c1e12fd4752c42844744 [file] [log] [blame]

Arne Jansen	a2de733	2011-03-08 14:14:00 +0100	[diff] [blame^]	1	/*
				2	* Copyright (C) 2011 STRATO. All rights reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public
				6	* License v2 as published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				11	* General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public
				14	* License along with this program; if not, write to the
				15	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				16	* Boston, MA 021110-1307, USA.
				17	*/
				18
				19	#include <linux/sched.h>
				20	#include <linux/pagemap.h>
				21	#include <linux/writeback.h>
				22	#include <linux/blkdev.h>
				23	#include <linux/rbtree.h>
				24	#include <linux/slab.h>
				25	#include <linux/workqueue.h>
				26	#include "ctree.h"
				27	#include "volumes.h"
				28	#include "disk-io.h"
				29	#include "ordered-data.h"
				30
				31	/*
				32	* This is only the first step towards a full-features scrub. It reads all
				33	* extent and super block and verifies the checksums. In case a bad checksum
				34	* is found or the extent cannot be read, good data will be written back if
				35	* any can be found.
				36	*
				37	* Future enhancements:
				38	* - To enhance the performance, better read-ahead strategies for the
				39	* extent-tree can be employed.
				40	* - In case an unrepairable extent is encountered, track which files are
				41	* affected and report them
				42	* - In case of a read error on files with nodatasum, map the file and read
				43	* the extent to trigger a writeback of the good copy
				44	* - track and record media errors, throw out bad devices
				45	* - add a readonly mode
				46	* - add a mode to also read unallocated space
				47	* - make the prefetch cancellable
				48	*/
				49
				50	struct scrub_bio;
				51	struct scrub_page;
				52	struct scrub_dev;
				53	struct scrub_fixup;
				54	static void scrub_bio_end_io(struct bio *bio, int err);
				55	static void scrub_checksum(struct btrfs_work *work);
				56	static int scrub_checksum_data(struct scrub_dev *sdev,
				57	struct scrub_page spag, void buffer);
				58	static int scrub_checksum_tree_block(struct scrub_dev *sdev,
				59	struct scrub_page *spag, u64 logical,
				60	void *buffer);
				61	static int scrub_checksum_super(struct scrub_bio sbio, void buffer);
				62	static void scrub_recheck_end_io(struct bio *bio, int err);
				63	static void scrub_fixup_worker(struct btrfs_work *work);
				64	static void scrub_fixup(struct scrub_fixup *fixup);
				65
				66	#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
				67	#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
				68
				69	struct scrub_page {
				70	u64 flags; /* extent flags */
				71	u64 generation;
				72	u64 mirror_num;
				73	int have_csum;
				74	u8 csum[BTRFS_CSUM_SIZE];
				75	};
				76
				77	struct scrub_bio {
				78	int index;
				79	struct scrub_dev *sdev;
				80	struct bio *bio;
				81	int err;
				82	u64 logical;
				83	u64 physical;
				84	struct scrub_page spag[SCRUB_PAGES_PER_BIO];
				85	u64 count;
				86	int next_free;
				87	struct btrfs_work work;
				88	};
				89
				90	struct scrub_dev {
				91	struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
				92	struct btrfs_device *dev;
				93	int first_free;
				94	int curr;
				95	atomic_t in_flight;
				96	spinlock_t list_lock;
				97	wait_queue_head_t list_wait;
				98	u16 csum_size;
				99	struct list_head csum_list;
				100	atomic_t cancel_req;
				101	/*
				102	* statistics
				103	*/
				104	struct btrfs_scrub_progress stat;
				105	spinlock_t stat_lock;
				106	};
				107
				108	struct scrub_fixup {
				109	struct scrub_dev *sdev;
				110	struct bio *bio;
				111	u64 logical;
				112	u64 physical;
				113	struct scrub_page spag;
				114	struct btrfs_work work;
				115	int err;
				116	int recheck;
				117	};
				118
				119	static void scrub_free_csums(struct scrub_dev *sdev)
				120	{
				121	while (!list_empty(&sdev->csum_list)) {
				122	struct btrfs_ordered_sum *sum;
				123	sum = list_first_entry(&sdev->csum_list,
				124	struct btrfs_ordered_sum, list);
				125	list_del(&sum->list);
				126	kfree(sum);
				127	}
				128	}
				129
				130	static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
				131	{
				132	int i;
				133	int j;
				134	struct page *last_page;
				135
				136	if (!sdev)
				137	return;
				138
				139	for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
				140	struct scrub_bio *sbio = sdev->bios[i];
				141	struct bio *bio;
				142
				143	if (!sbio)
				144	break;
				145
				146	bio = sbio->bio;
				147	if (bio) {
				148	last_page = NULL;
				149	for (j = 0; j < bio->bi_vcnt; ++j) {
				150	if (bio->bi_io_vec[j].bv_page == last_page)
				151	continue;
				152	last_page = bio->bi_io_vec[j].bv_page;
				153	__free_page(last_page);
				154	}
				155	bio_put(bio);
				156	}
				157	kfree(sbio);
				158	}
				159
				160	scrub_free_csums(sdev);
				161	kfree(sdev);
				162	}
				163
				164	static noinline_for_stack
				165	struct scrub_dev scrub_setup_dev(struct btrfs_device dev)
				166	{
				167	struct scrub_dev *sdev;
				168	int i;
				169	int j;
				170	int ret;
				171	struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
				172
				173	sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
				174	if (!sdev)
				175	goto nomem;
				176	sdev->dev = dev;
				177	for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
				178	struct bio *bio;
				179	struct scrub_bio *sbio;
				180
				181	sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
				182	if (!sbio)
				183	goto nomem;
				184	sdev->bios[i] = sbio;
				185
				186	bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
				187	if (!bio)
				188	goto nomem;
				189
				190	sbio->index = i;
				191	sbio->sdev = sdev;
				192	sbio->bio = bio;
				193	sbio->count = 0;
				194	sbio->work.func = scrub_checksum;
				195	bio->bi_private = sdev->bios[i];
				196	bio->bi_end_io = scrub_bio_end_io;
				197	bio->bi_sector = 0;
				198	bio->bi_bdev = dev->bdev;
				199	bio->bi_size = 0;
				200
				201	for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
				202	struct page *page;
				203	page = alloc_page(GFP_NOFS);
				204	if (!page)
				205	goto nomem;
				206
				207	ret = bio_add_page(bio, page, PAGE_SIZE, 0);
				208	if (!ret)
				209	goto nomem;
				210	}
				211	WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
				212
				213	if (i != SCRUB_BIOS_PER_DEV-1)
				214	sdev->bios[i]->next_free = i + 1;
				215	else
				216	sdev->bios[i]->next_free = -1;
				217	}
				218	sdev->first_free = 0;
				219	sdev->curr = -1;
				220	atomic_set(&sdev->in_flight, 0);
				221	atomic_set(&sdev->cancel_req, 0);
				222	sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
				223	INIT_LIST_HEAD(&sdev->csum_list);
				224
				225	spin_lock_init(&sdev->list_lock);
				226	spin_lock_init(&sdev->stat_lock);
				227	init_waitqueue_head(&sdev->list_wait);
				228	return sdev;
				229
				230	nomem:
				231	scrub_free_dev(sdev);
				232	return ERR_PTR(-ENOMEM);
				233	}
				234
				235	/*
				236	* scrub_recheck_error gets called when either verification of the page
				237	* failed or the bio failed to read, e.g. with EIO. In the latter case,
				238	* recheck_error gets called for every page in the bio, even though only
				239	* one may be bad
				240	*/
				241	static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
				242	{
				243	struct scrub_dev *sdev = sbio->sdev;
				244	struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
				245	struct bio *bio = NULL;
				246	struct page *page = NULL;
				247	struct scrub_fixup *fixup = NULL;
				248	int ret;
				249
				250	/*
				251	* while we're in here we do not want the transaction to commit.
				252	* To prevent it, we increment scrubs_running. scrub_pause will
				253	* have to wait until we're finished
				254	* we can safely increment scrubs_running here, because we're
				255	* in the context of the original bio which is still marked in_flight
				256	*/
				257	atomic_inc(&fs_info->scrubs_running);
				258
				259	fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
				260	if (!fixup)
				261	goto malloc_error;
				262
				263	fixup->logical = sbio->logical + ix * PAGE_SIZE;
				264	fixup->physical = sbio->physical + ix * PAGE_SIZE;
				265	fixup->spag = sbio->spag[ix];
				266	fixup->sdev = sdev;
				267
				268	bio = bio_alloc(GFP_NOFS, 1);
				269	if (!bio)
				270	goto malloc_error;
				271	bio->bi_private = fixup;
				272	bio->bi_size = 0;
				273	bio->bi_bdev = sdev->dev->bdev;
				274	fixup->bio = bio;
				275	fixup->recheck = 0;
				276
				277	page = alloc_page(GFP_NOFS);
				278	if (!page)
				279	goto malloc_error;
				280
				281	ret = bio_add_page(bio, page, PAGE_SIZE, 0);
				282	if (!ret)
				283	goto malloc_error;
				284
				285	if (!sbio->err) {
				286	/*
				287	* shorter path: just a checksum error, go ahead and correct it
				288	*/
				289	scrub_fixup_worker(&fixup->work);
				290	return;
				291	}
				292
				293	/*
				294	* an I/O-error occured for one of the blocks in the bio, not
				295	* necessarily for this one, so first try to read it separately
				296	*/
				297	fixup->work.func = scrub_fixup_worker;
				298	fixup->recheck = 1;
				299	bio->bi_end_io = scrub_recheck_end_io;
				300	bio->bi_sector = fixup->physical >> 9;
				301	bio->bi_bdev = sdev->dev->bdev;
				302	submit_bio(0, bio);
				303
				304	return;
				305
				306	malloc_error:
				307	if (bio)
				308	bio_put(bio);
				309	if (page)
				310	__free_page(page);
				311	kfree(fixup);
				312	spin_lock(&sdev->stat_lock);
				313	++sdev->stat.malloc_errors;
				314	spin_unlock(&sdev->stat_lock);
				315	atomic_dec(&fs_info->scrubs_running);
				316	wake_up(&fs_info->scrub_pause_wait);
				317	}
				318
				319	static void scrub_recheck_end_io(struct bio *bio, int err)
				320	{
				321	struct scrub_fixup *fixup = bio->bi_private;
				322	struct btrfs_fs_info *fs_info = fixup->sdev->dev->dev_root->fs_info;
				323
				324	fixup->err = err;
				325	btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work);
				326	}
				327
				328	static int scrub_fixup_check(struct scrub_fixup *fixup)
				329	{
				330	int ret = 1;
				331	struct page *page;
				332	void *buffer;
				333	u64 flags = fixup->spag.flags;
				334
				335	page = fixup->bio->bi_io_vec[0].bv_page;
				336	buffer = kmap_atomic(page, KM_USER0);
				337	if (flags & BTRFS_EXTENT_FLAG_DATA) {
				338	ret = scrub_checksum_data(fixup->sdev,
				339	&fixup->spag, buffer);
				340	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
				341	ret = scrub_checksum_tree_block(fixup->sdev,
				342	&fixup->spag,
				343	fixup->logical,
				344	buffer);
				345	} else {
				346	WARN_ON(1);
				347	}
				348	kunmap_atomic(buffer, KM_USER0);
				349
				350	return ret;
				351	}
				352
				353	static void scrub_fixup_worker(struct btrfs_work *work)
				354	{
				355	struct scrub_fixup *fixup;
				356	struct btrfs_fs_info *fs_info;
				357	u64 flags;
				358	int ret = 1;
				359
				360	fixup = container_of(work, struct scrub_fixup, work);
				361	fs_info = fixup->sdev->dev->dev_root->fs_info;
				362	flags = fixup->spag.flags;
				363
				364	if (fixup->recheck && fixup->err == 0)
				365	ret = scrub_fixup_check(fixup);
				366
				367	if (ret \|\| fixup->err)
				368	scrub_fixup(fixup);
				369
				370	__free_page(fixup->bio->bi_io_vec[0].bv_page);
				371	bio_put(fixup->bio);
				372
				373	atomic_dec(&fs_info->scrubs_running);
				374	wake_up(&fs_info->scrub_pause_wait);
				375
				376	kfree(fixup);
				377	}
				378
				379	static void scrub_fixup_end_io(struct bio *bio, int err)
				380	{
				381	complete((struct completion *)bio->bi_private);
				382	}
				383
				384	static void scrub_fixup(struct scrub_fixup *fixup)
				385	{
				386	struct scrub_dev *sdev = fixup->sdev;
				387	struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
				388	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
				389	struct btrfs_multi_bio *multi = NULL;
				390	struct bio *bio = fixup->bio;
				391	u64 length;
				392	int i;
				393	int ret;
				394	DECLARE_COMPLETION_ONSTACK(complete);
				395
				396	if ((fixup->spag.flags & BTRFS_EXTENT_FLAG_DATA) &&
				397	(fixup->spag.have_csum == 0)) {
				398	/*
				399	* nodatasum, don't try to fix anything
				400	* FIXME: we can do better, open the inode and trigger a
				401	* writeback
				402	*/
				403	goto uncorrectable;
				404	}
				405
				406	length = PAGE_SIZE;
				407	ret = btrfs_map_block(map_tree, REQ_WRITE, fixup->logical, &length,
				408	&multi, 0);
				409	if (ret \|\| !multi \|\| length < PAGE_SIZE) {
				410	printk(KERN_ERR
				411	"scrub_fixup: btrfs_map_block failed us for %llu\n",
				412	(unsigned long long)fixup->logical);
				413	WARN_ON(1);
				414	return;
				415	}
				416
				417	if (multi->num_stripes == 1) {
				418	/* there aren't any replicas */
				419	goto uncorrectable;
				420	}
				421
				422	/*
				423	* first find a good copy
				424	*/
				425	for (i = 0; i < multi->num_stripes; ++i) {
				426	if (i == fixup->spag.mirror_num)
				427	continue;
				428
				429	bio->bi_sector = multi->stripes[i].physical >> 9;
				430	bio->bi_bdev = multi->stripes[i].dev->bdev;
				431	bio->bi_size = PAGE_SIZE;
				432	bio->bi_next = NULL;
				433	bio->bi_flags \|= 1 << BIO_UPTODATE;
				434	bio->bi_comp_cpu = -1;
				435	bio->bi_end_io = scrub_fixup_end_io;
				436	bio->bi_private = &complete;
				437
				438	submit_bio(0, bio);
				439
				440	wait_for_completion(&complete);
				441
				442	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
				443	/* I/O-error, this is not a good copy */
				444	continue;
				445
				446	ret = scrub_fixup_check(fixup);
				447	if (ret == 0)
				448	break;
				449	}
				450	if (i == multi->num_stripes)
				451	goto uncorrectable;
				452
				453	/*
				454	* the bio now contains good data, write it back
				455	*/
				456	bio->bi_sector = fixup->physical >> 9;
				457	bio->bi_bdev = sdev->dev->bdev;
				458	bio->bi_size = PAGE_SIZE;
				459	bio->bi_next = NULL;
				460	bio->bi_flags \|= 1 << BIO_UPTODATE;
				461	bio->bi_comp_cpu = -1;
				462	bio->bi_end_io = scrub_fixup_end_io;
				463	bio->bi_private = &complete;
				464
				465	submit_bio(REQ_WRITE, bio);
				466
				467	wait_for_completion(&complete);
				468
				469	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
				470	/* I/O-error, writeback failed, give up */
				471	goto uncorrectable;
				472
				473	kfree(multi);
				474	spin_lock(&sdev->stat_lock);
				475	++sdev->stat.corrected_errors;
				476	spin_unlock(&sdev->stat_lock);
				477
				478	if (printk_ratelimit())
				479	printk(KERN_ERR "btrfs: fixed up at %llu\n",
				480	(unsigned long long)fixup->logical);
				481	return;
				482
				483	uncorrectable:
				484	kfree(multi);
				485	spin_lock(&sdev->stat_lock);
				486	++sdev->stat.uncorrectable_errors;
				487	spin_unlock(&sdev->stat_lock);
				488
				489	if (printk_ratelimit())
				490	printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
				491	(unsigned long long)fixup->logical);
				492	}
				493
				494	static void scrub_bio_end_io(struct bio *bio, int err)
				495	{
				496	struct scrub_bio *sbio = bio->bi_private;
				497	struct scrub_dev *sdev = sbio->sdev;
				498	struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
				499
				500	sbio->err = err;
				501
				502	btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
				503	}
				504
				505	static void scrub_checksum(struct btrfs_work *work)
				506	{
				507	struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
				508	struct scrub_dev *sdev = sbio->sdev;
				509	struct page *page;
				510	void *buffer;
				511	int i;
				512	u64 flags;
				513	u64 logical;
				514	int ret;
				515
				516	if (sbio->err) {
				517	struct bio *bio;
				518	struct bio *old_bio;
				519
				520	for (i = 0; i < sbio->count; ++i)
				521	scrub_recheck_error(sbio, i);
				522	spin_lock(&sdev->stat_lock);
				523	++sdev->stat.read_errors;
				524	spin_unlock(&sdev->stat_lock);
				525
				526	/*
				527	* FIXME: allocate a new bio after a media error. I haven't
				528	* figured out how to reuse this one
				529	*/
				530	old_bio = sbio->bio;
				531	bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
				532	if (!bio) {
				533	/*
				534	* alloc failed. cancel the scrub and don't requeue
				535	* this sbio
				536	*/
				537	printk(KERN_ERR "btrfs scrub: allocation failure, "
				538	"cancelling scrub\n");
				539	atomic_inc(&sdev->dev->dev_root->fs_info->
				540	scrub_cancel_req);
				541	goto out_no_enqueue;
				542	}
				543	sbio->bio = bio;
				544	bio->bi_private = sbio;
				545	bio->bi_end_io = scrub_bio_end_io;
				546	bio->bi_sector = 0;
				547	bio->bi_bdev = sbio->sdev->dev->bdev;
				548	bio->bi_size = 0;
				549	for (i = 0; i < SCRUB_PAGES_PER_BIO; ++i) {
				550	struct page *page;
				551	page = old_bio->bi_io_vec[i].bv_page;
				552	bio_add_page(bio, page, PAGE_SIZE, 0);
				553	}
				554	bio_put(old_bio);
				555	goto out;
				556	}
				557	for (i = 0; i < sbio->count; ++i) {
				558	page = sbio->bio->bi_io_vec[i].bv_page;
				559	buffer = kmap_atomic(page, KM_USER0);
				560	flags = sbio->spag[i].flags;
				561	logical = sbio->logical + i * PAGE_SIZE;
				562	ret = 0;
				563	if (flags & BTRFS_EXTENT_FLAG_DATA) {
				564	ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
				565	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
				566	ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
				567	logical, buffer);
				568	} else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
				569	BUG_ON(i);
				570	(void)scrub_checksum_super(sbio, buffer);
				571	} else {
				572	WARN_ON(1);
				573	}
				574	kunmap_atomic(buffer, KM_USER0);
				575	if (ret)
				576	scrub_recheck_error(sbio, i);
				577	}
				578
				579	out:
				580	spin_lock(&sdev->list_lock);
				581	sbio->next_free = sdev->first_free;
				582	sdev->first_free = sbio->index;
				583	spin_unlock(&sdev->list_lock);
				584	out_no_enqueue:
				585	atomic_dec(&sdev->in_flight);
				586	wake_up(&sdev->list_wait);
				587	}
				588
				589	static int scrub_checksum_data(struct scrub_dev *sdev,
				590	struct scrub_page spag, void buffer)
				591	{
				592	u8 csum[BTRFS_CSUM_SIZE];
				593	u32 crc = ~(u32)0;
				594	int fail = 0;
				595	struct btrfs_root *root = sdev->dev->dev_root;
				596
				597	if (!spag->have_csum)
				598	return 0;
				599
				600	crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
				601	btrfs_csum_final(crc, csum);
				602	if (memcmp(csum, spag->csum, sdev->csum_size))
				603	fail = 1;
				604
				605	spin_lock(&sdev->stat_lock);
				606	++sdev->stat.data_extents_scrubbed;
				607	sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
				608	if (fail)
				609	++sdev->stat.csum_errors;
				610	spin_unlock(&sdev->stat_lock);
				611
				612	return fail;
				613	}
				614
				615	static int scrub_checksum_tree_block(struct scrub_dev *sdev,
				616	struct scrub_page *spag, u64 logical,
				617	void *buffer)
				618	{
				619	struct btrfs_header *h;
				620	struct btrfs_root *root = sdev->dev->dev_root;
				621	struct btrfs_fs_info *fs_info = root->fs_info;
				622	u8 csum[BTRFS_CSUM_SIZE];
				623	u32 crc = ~(u32)0;
				624	int fail = 0;
				625	int crc_fail = 0;
				626
				627	/*
				628	* we don't use the getter functions here, as we
				629	* a) don't have an extent buffer and
				630	* b) the page is already kmapped
				631	*/
				632	h = (struct btrfs_header *)buffer;
				633
				634	if (logical != le64_to_cpu(h->bytenr))
				635	++fail;
				636
				637	if (spag->generation != le64_to_cpu(h->generation))
				638	++fail;
				639
				640	if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
				641	++fail;
				642
				643	if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
				644	BTRFS_UUID_SIZE))
				645	++fail;
				646
				647	crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
				648	PAGE_SIZE - BTRFS_CSUM_SIZE);
				649	btrfs_csum_final(crc, csum);
				650	if (memcmp(csum, h->csum, sdev->csum_size))
				651	++crc_fail;
				652
				653	spin_lock(&sdev->stat_lock);
				654	++sdev->stat.tree_extents_scrubbed;
				655	sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
				656	if (crc_fail)
				657	++sdev->stat.csum_errors;
				658	if (fail)
				659	++sdev->stat.verify_errors;
				660	spin_unlock(&sdev->stat_lock);
				661
				662	return fail \|\| crc_fail;
				663	}
				664
				665	static int scrub_checksum_super(struct scrub_bio sbio, void buffer)
				666	{
				667	struct btrfs_super_block *s;
				668	u64 logical;
				669	struct scrub_dev *sdev = sbio->sdev;
				670	struct btrfs_root *root = sdev->dev->dev_root;
				671	struct btrfs_fs_info *fs_info = root->fs_info;
				672	u8 csum[BTRFS_CSUM_SIZE];
				673	u32 crc = ~(u32)0;
				674	int fail = 0;
				675
				676	s = (struct btrfs_super_block *)buffer;
				677	logical = sbio->logical;
				678
				679	if (logical != le64_to_cpu(s->bytenr))
				680	++fail;
				681
				682	if (sbio->spag[0].generation != le64_to_cpu(s->generation))
				683	++fail;
				684
				685	if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
				686	++fail;
				687
				688	crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
				689	PAGE_SIZE - BTRFS_CSUM_SIZE);
				690	btrfs_csum_final(crc, csum);
				691	if (memcmp(csum, s->csum, sbio->sdev->csum_size))
				692	++fail;
				693
				694	if (fail) {
				695	/*
				696	* if we find an error in a super block, we just report it.
				697	* They will get written with the next transaction commit
				698	* anyway
				699	*/
				700	spin_lock(&sdev->stat_lock);
				701	++sdev->stat.super_errors;
				702	spin_unlock(&sdev->stat_lock);
				703	}
				704
				705	return fail;
				706	}
				707
				708	static int scrub_submit(struct scrub_dev *sdev)
				709	{
				710	struct scrub_bio *sbio;
				711
				712	if (sdev->curr == -1)
				713	return 0;
				714
				715	sbio = sdev->bios[sdev->curr];
				716
				717	sbio->bio->bi_sector = sbio->physical >> 9;
				718	sbio->bio->bi_size = sbio->count * PAGE_SIZE;
				719	sbio->bio->bi_next = NULL;
				720	sbio->bio->bi_flags \|= 1 << BIO_UPTODATE;
				721	sbio->bio->bi_comp_cpu = -1;
				722	sbio->bio->bi_bdev = sdev->dev->bdev;
				723	sbio->err = 0;
				724	sdev->curr = -1;
				725	atomic_inc(&sdev->in_flight);
				726
				727	submit_bio(0, sbio->bio);
				728
				729	return 0;
				730	}
				731
				732	static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
				733	u64 physical, u64 flags, u64 gen, u64 mirror_num,
				734	u8 *csum, int force)
				735	{
				736	struct scrub_bio *sbio;
				737
				738	again:
				739	/*
				740	* grab a fresh bio or wait for one to become available
				741	*/
				742	while (sdev->curr == -1) {
				743	spin_lock(&sdev->list_lock);
				744	sdev->curr = sdev->first_free;
				745	if (sdev->curr != -1) {
				746	sdev->first_free = sdev->bios[sdev->curr]->next_free;
				747	sdev->bios[sdev->curr]->next_free = -1;
				748	sdev->bios[sdev->curr]->count = 0;
				749	spin_unlock(&sdev->list_lock);
				750	} else {
				751	spin_unlock(&sdev->list_lock);
				752	wait_event(sdev->list_wait, sdev->first_free != -1);
				753	}
				754	}
				755	sbio = sdev->bios[sdev->curr];
				756	if (sbio->count == 0) {
				757	sbio->physical = physical;
				758	sbio->logical = logical;
				759	} else if (sbio->physical + sbio->count * PAGE_SIZE != physical) {
				760	scrub_submit(sdev);
				761	goto again;
				762	}
				763	sbio->spag[sbio->count].flags = flags;
				764	sbio->spag[sbio->count].generation = gen;
				765	sbio->spag[sbio->count].have_csum = 0;
				766	sbio->spag[sbio->count].mirror_num = mirror_num;
				767	if (csum) {
				768	sbio->spag[sbio->count].have_csum = 1;
				769	memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
				770	}
				771	++sbio->count;
				772	if (sbio->count == SCRUB_PAGES_PER_BIO \|\| force)
				773	scrub_submit(sdev);
				774
				775	return 0;
				776	}
				777
				778	static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
				779	u8 *csum)
				780	{
				781	struct btrfs_ordered_sum *sum = NULL;
				782	int ret = 0;
				783	unsigned long i;
				784	unsigned long num_sectors;
				785	u32 sectorsize = sdev->dev->dev_root->sectorsize;
				786
				787	while (!list_empty(&sdev->csum_list)) {
				788	sum = list_first_entry(&sdev->csum_list,
				789	struct btrfs_ordered_sum, list);
				790	if (sum->bytenr > logical)
				791	return 0;
				792	if (sum->bytenr + sum->len > logical)
				793	break;
				794
				795	++sdev->stat.csum_discards;
				796	list_del(&sum->list);
				797	kfree(sum);
				798	sum = NULL;
				799	}
				800	if (!sum)
				801	return 0;
				802
				803	num_sectors = sum->len / sectorsize;
				804	for (i = 0; i < num_sectors; ++i) {
				805	if (sum->sums[i].bytenr == logical) {
				806	memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
				807	ret = 1;
				808	break;
				809	}
				810	}
				811	if (ret && i == num_sectors - 1) {
				812	list_del(&sum->list);
				813	kfree(sum);
				814	}
				815	return ret;
				816	}
				817
				818	/* scrub extent tries to collect up to 64 kB for each bio */
				819	static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
				820	u64 physical, u64 flags, u64 gen, u64 mirror_num)
				821	{
				822	int ret;
				823	u8 csum[BTRFS_CSUM_SIZE];
				824
				825	while (len) {
				826	u64 l = min_t(u64, len, PAGE_SIZE);
				827	int have_csum = 0;
				828
				829	if (flags & BTRFS_EXTENT_FLAG_DATA) {
				830	/* push csums to sbio */
				831	have_csum = scrub_find_csum(sdev, logical, l, csum);
				832	if (have_csum == 0)
				833	++sdev->stat.no_csum;
				834	}
				835	ret = scrub_page(sdev, logical, l, physical, flags, gen,
				836	mirror_num, have_csum ? csum : NULL, 0);
				837	if (ret)
				838	return ret;
				839	len -= l;
				840	logical += l;
				841	physical += l;
				842	}
				843	return 0;
				844	}
				845
				846	static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
				847	struct map_lookup *map, int num, u64 base, u64 length)
				848	{
				849	struct btrfs_path *path;
				850	struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
				851	struct btrfs_root *root = fs_info->extent_root;
				852	struct btrfs_root *csum_root = fs_info->csum_root;
				853	struct btrfs_extent_item *extent;
				854	u64 flags;
				855	int ret;
				856	int slot;
				857	int i;
				858	u64 nstripes;
				859	int start_stripe;
				860	struct extent_buffer *l;
				861	struct btrfs_key key;
				862	u64 physical;
				863	u64 logical;
				864	u64 generation;
				865	u64 mirror_num;
				866
				867	u64 increment = map->stripe_len;
				868	u64 offset;
				869
				870	nstripes = length;
				871	offset = 0;
				872	do_div(nstripes, map->stripe_len);
				873	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
				874	offset = map->stripe_len * num;
				875	increment = map->stripe_len * map->num_stripes;
				876	mirror_num = 0;
				877	} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
				878	int factor = map->num_stripes / map->sub_stripes;
				879	offset = map->stripe_len * (num / map->sub_stripes);
				880	increment = map->stripe_len * factor;
				881	mirror_num = num % map->sub_stripes;
				882	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
				883	increment = map->stripe_len;
				884	mirror_num = num % map->num_stripes;
				885	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
				886	increment = map->stripe_len;
				887	mirror_num = num % map->num_stripes;
				888	} else {
				889	increment = map->stripe_len;
				890	mirror_num = 0;
				891	}
				892
				893	path = btrfs_alloc_path();
				894	if (!path)
				895	return -ENOMEM;
				896
				897	path->reada = 2;
				898	path->search_commit_root = 1;
				899	path->skip_locking = 1;
				900
				901	/*
				902	* find all extents for each stripe and just read them to get
				903	* them into the page cache
				904	* FIXME: we can do better. build a more intelligent prefetching
				905	*/
				906	logical = base + offset;
				907	physical = map->stripes[num].physical;
				908	ret = 0;
				909	for (i = 0; i < nstripes; ++i) {
				910	key.objectid = logical;
				911	key.type = BTRFS_EXTENT_ITEM_KEY;
				912	key.offset = (u64)0;
				913
				914	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				915	if (ret < 0)
				916	goto out;
				917
				918	l = path->nodes[0];
				919	slot = path->slots[0];
				920	btrfs_item_key_to_cpu(l, &key, slot);
				921	if (key.objectid != logical) {
				922	ret = btrfs_previous_item(root, path, 0,
				923	BTRFS_EXTENT_ITEM_KEY);
				924	if (ret < 0)
				925	goto out;
				926	}
				927
				928	while (1) {
				929	l = path->nodes[0];
				930	slot = path->slots[0];
				931	if (slot >= btrfs_header_nritems(l)) {
				932	ret = btrfs_next_leaf(root, path);
				933	if (ret == 0)
				934	continue;
				935	if (ret < 0)
				936	goto out;
				937
				938	break;
				939	}
				940	btrfs_item_key_to_cpu(l, &key, slot);
				941
				942	if (key.objectid >= logical + map->stripe_len)
				943	break;
				944
				945	path->slots[0]++;
				946	}
				947	btrfs_release_path(root, path);
				948	logical += increment;
				949	physical += map->stripe_len;
				950	cond_resched();
				951	}
				952
				953	/*
				954	* collect all data csums for the stripe to avoid seeking during
				955	* the scrub. This might currently (crc32) end up to be about 1MB
				956	*/
				957	start_stripe = 0;
				958	again:
				959	logical = base + offset + start_stripe * increment;
				960	for (i = start_stripe; i < nstripes; ++i) {
				961	ret = btrfs_lookup_csums_range(csum_root, logical,
				962	logical + map->stripe_len - 1,
				963	&sdev->csum_list, 1);
				964	if (ret)
				965	goto out;
				966
				967	logical += increment;
				968	cond_resched();
				969	}
				970	/*
				971	* now find all extents for each stripe and scrub them
				972	*/
				973	logical = base + offset + start_stripe * increment;
				974	physical = map->stripes[num].physical + start_stripe * map->stripe_len;
				975	ret = 0;
				976	for (i = start_stripe; i < nstripes; ++i) {
				977	/*
				978	* canceled?
				979	*/
				980	if (atomic_read(&fs_info->scrub_cancel_req) \|\|
				981	atomic_read(&sdev->cancel_req)) {
				982	ret = -ECANCELED;
				983	goto out;
				984	}
				985	/*
				986	* check to see if we have to pause
				987	*/
				988	if (atomic_read(&fs_info->scrub_pause_req)) {
				989	/* push queued extents */
				990	scrub_submit(sdev);
				991	wait_event(sdev->list_wait,
				992	atomic_read(&sdev->in_flight) == 0);
				993	atomic_inc(&fs_info->scrubs_paused);
				994	wake_up(&fs_info->scrub_pause_wait);
				995	mutex_lock(&fs_info->scrub_lock);
				996	while (atomic_read(&fs_info->scrub_pause_req)) {
				997	mutex_unlock(&fs_info->scrub_lock);
				998	wait_event(fs_info->scrub_pause_wait,
				999	atomic_read(&fs_info->scrub_pause_req) == 0);
				1000	mutex_lock(&fs_info->scrub_lock);
				1001	}
				1002	atomic_dec(&fs_info->scrubs_paused);
				1003	mutex_unlock(&fs_info->scrub_lock);
				1004	wake_up(&fs_info->scrub_pause_wait);
				1005	scrub_free_csums(sdev);
				1006	start_stripe = i;
				1007	goto again;
				1008	}
				1009
				1010	key.objectid = logical;
				1011	key.type = BTRFS_EXTENT_ITEM_KEY;
				1012	key.offset = (u64)0;
				1013
				1014	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				1015	if (ret < 0)
				1016	goto out;
				1017
				1018	l = path->nodes[0];
				1019	slot = path->slots[0];
				1020	btrfs_item_key_to_cpu(l, &key, slot);
				1021	if (key.objectid != logical) {
				1022	ret = btrfs_previous_item(root, path, 0,
				1023	BTRFS_EXTENT_ITEM_KEY);
				1024	if (ret < 0)
				1025	goto out;
				1026	}
				1027
				1028	while (1) {
				1029	l = path->nodes[0];
				1030	slot = path->slots[0];
				1031	if (slot >= btrfs_header_nritems(l)) {
				1032	ret = btrfs_next_leaf(root, path);
				1033	if (ret == 0)
				1034	continue;
				1035	if (ret < 0)
				1036	goto out;
				1037
				1038	break;
				1039	}
				1040	btrfs_item_key_to_cpu(l, &key, slot);
				1041
				1042	if (key.objectid + key.offset <= logical)
				1043	goto next;
				1044
				1045	if (key.objectid >= logical + map->stripe_len)
				1046	break;
				1047
				1048	if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
				1049	goto next;
				1050
				1051	extent = btrfs_item_ptr(l, slot,
				1052	struct btrfs_extent_item);
				1053	flags = btrfs_extent_flags(l, extent);
				1054	generation = btrfs_extent_generation(l, extent);
				1055
				1056	if (key.objectid < logical &&
				1057	(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
				1058	printk(KERN_ERR
				1059	"btrfs scrub: tree block %llu spanning "
				1060	"stripes, ignored. logical=%llu\n",
				1061	(unsigned long long)key.objectid,
				1062	(unsigned long long)logical);
				1063	goto next;
				1064	}
				1065
				1066	/*
				1067	* trim extent to this stripe
				1068	*/
				1069	if (key.objectid < logical) {
				1070	key.offset -= logical - key.objectid;
				1071	key.objectid = logical;
				1072	}
				1073	if (key.objectid + key.offset >
				1074	logical + map->stripe_len) {
				1075	key.offset = logical + map->stripe_len -
				1076	key.objectid;
				1077	}
				1078
				1079	ret = scrub_extent(sdev, key.objectid, key.offset,
				1080	key.objectid - logical + physical,
				1081	flags, generation, mirror_num);
				1082	if (ret)
				1083	goto out;
				1084
				1085	next:
				1086	path->slots[0]++;
				1087	}
				1088	btrfs_release_path(root, path);
				1089	logical += increment;
				1090	physical += map->stripe_len;
				1091	spin_lock(&sdev->stat_lock);
				1092	sdev->stat.last_physical = physical;
				1093	spin_unlock(&sdev->stat_lock);
				1094	}
				1095	/* push queued extents */
				1096	scrub_submit(sdev);
				1097
				1098	out:
				1099	btrfs_free_path(path);
				1100	return ret < 0 ? ret : 0;
				1101	}
				1102
				1103	static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
				1104	u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
				1105	{
				1106	struct btrfs_mapping_tree *map_tree =
				1107	&sdev->dev->dev_root->fs_info->mapping_tree;
				1108	struct map_lookup *map;
				1109	struct extent_map *em;
				1110	int i;
				1111	int ret = -EINVAL;
				1112
				1113	read_lock(&map_tree->map_tree.lock);
				1114	em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
				1115	read_unlock(&map_tree->map_tree.lock);
				1116
				1117	if (!em)
				1118	return -EINVAL;
				1119
				1120	map = (struct map_lookup *)em->bdev;
				1121	if (em->start != chunk_offset)
				1122	goto out;
				1123
				1124	if (em->len < length)
				1125	goto out;
				1126
				1127	for (i = 0; i < map->num_stripes; ++i) {
				1128	if (map->stripes[i].dev == sdev->dev) {
				1129	ret = scrub_stripe(sdev, map, i, chunk_offset, length);
				1130	if (ret)
				1131	goto out;
				1132	}
				1133	}
				1134	out:
				1135	free_extent_map(em);
				1136
				1137	return ret;
				1138	}
				1139
				1140	static noinline_for_stack
				1141	int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
				1142	{
				1143	struct btrfs_dev_extent *dev_extent = NULL;
				1144	struct btrfs_path *path;
				1145	struct btrfs_root *root = sdev->dev->dev_root;
				1146	struct btrfs_fs_info *fs_info = root->fs_info;
				1147	u64 length;
				1148	u64 chunk_tree;
				1149	u64 chunk_objectid;
				1150	u64 chunk_offset;
				1151	int ret;
				1152	int slot;
				1153	struct extent_buffer *l;
				1154	struct btrfs_key key;
				1155	struct btrfs_key found_key;
				1156	struct btrfs_block_group_cache *cache;
				1157
				1158	path = btrfs_alloc_path();
				1159	if (!path)
				1160	return -ENOMEM;
				1161
				1162	path->reada = 2;
				1163	path->search_commit_root = 1;
				1164	path->skip_locking = 1;
				1165
				1166	key.objectid = sdev->dev->devid;
				1167	key.offset = 0ull;
				1168	key.type = BTRFS_DEV_EXTENT_KEY;
				1169
				1170
				1171	while (1) {
				1172	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				1173	if (ret < 0)
				1174	goto out;
				1175	ret = 0;
				1176
				1177	l = path->nodes[0];
				1178	slot = path->slots[0];
				1179
				1180	btrfs_item_key_to_cpu(l, &found_key, slot);
				1181
				1182	if (found_key.objectid != sdev->dev->devid)
				1183	break;
				1184
				1185	if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
				1186	break;
				1187
				1188	if (found_key.offset >= end)
				1189	break;
				1190
				1191	if (found_key.offset < key.offset)
				1192	break;
				1193
				1194	dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
				1195	length = btrfs_dev_extent_length(l, dev_extent);
				1196
				1197	if (found_key.offset + length <= start) {
				1198	key.offset = found_key.offset + length;
				1199	btrfs_release_path(root, path);
				1200	continue;
				1201	}
				1202
				1203	chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
				1204	chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
				1205	chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
				1206
				1207	/*
				1208	* get a reference on the corresponding block group to prevent
				1209	* the chunk from going away while we scrub it
				1210	*/
				1211	cache = btrfs_lookup_block_group(fs_info, chunk_offset);
				1212	if (!cache) {
				1213	ret = -ENOENT;
				1214	goto out;
				1215	}
				1216	ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
				1217	chunk_offset, length);
				1218	btrfs_put_block_group(cache);
				1219	if (ret)
				1220	break;
				1221
				1222	key.offset = found_key.offset + length;
				1223	btrfs_release_path(root, path);
				1224	}
				1225
				1226	out:
				1227	btrfs_free_path(path);
				1228	return ret;
				1229	}
				1230
				1231	static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
				1232	{
				1233	int i;
				1234	u64 bytenr;
				1235	u64 gen;
				1236	int ret;
				1237	struct btrfs_device *device = sdev->dev;
				1238	struct btrfs_root *root = device->dev_root;
				1239
				1240	gen = root->fs_info->last_trans_committed;
				1241
				1242	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
				1243	bytenr = btrfs_sb_offset(i);
				1244	if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
				1245	break;
				1246
				1247	ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
				1248	BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
				1249	if (ret)
				1250	return ret;
				1251	}
				1252	wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
				1253
				1254	return 0;
				1255	}
				1256
				1257	/*
				1258	* get a reference count on fs_info->scrub_workers. start worker if necessary
				1259	*/
				1260	static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
				1261	{
				1262	struct btrfs_fs_info *fs_info = root->fs_info;
				1263
				1264	mutex_lock(&fs_info->scrub_lock);
				1265	if (fs_info->scrub_workers_refcnt == 0)
				1266	btrfs_start_workers(&fs_info->scrub_workers, 1);
				1267	++fs_info->scrub_workers_refcnt;
				1268	mutex_unlock(&fs_info->scrub_lock);
				1269
				1270	return 0;
				1271	}
				1272
				1273	static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
				1274	{
				1275	struct btrfs_fs_info *fs_info = root->fs_info;
				1276
				1277	mutex_lock(&fs_info->scrub_lock);
				1278	if (--fs_info->scrub_workers_refcnt == 0)
				1279	btrfs_stop_workers(&fs_info->scrub_workers);
				1280	WARN_ON(fs_info->scrub_workers_refcnt < 0);
				1281	mutex_unlock(&fs_info->scrub_lock);
				1282	}
				1283
				1284
				1285	int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
				1286	struct btrfs_scrub_progress *progress)
				1287	{
				1288	struct scrub_dev *sdev;
				1289	struct btrfs_fs_info *fs_info = root->fs_info;
				1290	int ret;
				1291	struct btrfs_device *dev;
				1292
				1293	if (root->fs_info->closing)
				1294	return -EINVAL;
				1295
				1296	/*
				1297	* check some assumptions
				1298	*/
				1299	if (root->sectorsize != PAGE_SIZE \|\|
				1300	root->sectorsize != root->leafsize \|\|
				1301	root->sectorsize != root->nodesize) {
				1302	printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
				1303	return -EINVAL;
				1304	}
				1305
				1306	ret = scrub_workers_get(root);
				1307	if (ret)
				1308	return ret;
				1309
				1310	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
				1311	dev = btrfs_find_device(root, devid, NULL, NULL);
				1312	if (!dev \|\| dev->missing) {
				1313	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
				1314	scrub_workers_put(root);
				1315	return -ENODEV;
				1316	}
				1317	mutex_lock(&fs_info->scrub_lock);
				1318
				1319	if (!dev->in_fs_metadata) {
				1320	mutex_unlock(&fs_info->scrub_lock);
				1321	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
				1322	scrub_workers_put(root);
				1323	return -ENODEV;
				1324	}
				1325
				1326	if (dev->scrub_device) {
				1327	mutex_unlock(&fs_info->scrub_lock);
				1328	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
				1329	scrub_workers_put(root);
				1330	return -EINPROGRESS;
				1331	}
				1332	sdev = scrub_setup_dev(dev);
				1333	if (IS_ERR(sdev)) {
				1334	mutex_unlock(&fs_info->scrub_lock);
				1335	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
				1336	scrub_workers_put(root);
				1337	return PTR_ERR(sdev);
				1338	}
				1339	dev->scrub_device = sdev;
				1340
				1341	atomic_inc(&fs_info->scrubs_running);
				1342	mutex_unlock(&fs_info->scrub_lock);
				1343	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
				1344
				1345	down_read(&fs_info->scrub_super_lock);
				1346	ret = scrub_supers(sdev);
				1347	up_read(&fs_info->scrub_super_lock);
				1348
				1349	if (!ret)
				1350	ret = scrub_enumerate_chunks(sdev, start, end);
				1351
				1352	wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
				1353
				1354	atomic_dec(&fs_info->scrubs_running);
				1355	wake_up(&fs_info->scrub_pause_wait);
				1356
				1357	if (progress)
				1358	memcpy(progress, &sdev->stat, sizeof(*progress));
				1359
				1360	mutex_lock(&fs_info->scrub_lock);
				1361	dev->scrub_device = NULL;
				1362	mutex_unlock(&fs_info->scrub_lock);
				1363
				1364	scrub_free_dev(sdev);
				1365	scrub_workers_put(root);
				1366
				1367	return ret;
				1368	}
				1369
				1370	int btrfs_scrub_pause(struct btrfs_root *root)
				1371	{
				1372	struct btrfs_fs_info *fs_info = root->fs_info;
				1373
				1374	mutex_lock(&fs_info->scrub_lock);
				1375	atomic_inc(&fs_info->scrub_pause_req);
				1376	while (atomic_read(&fs_info->scrubs_paused) !=
				1377	atomic_read(&fs_info->scrubs_running)) {
				1378	mutex_unlock(&fs_info->scrub_lock);
				1379	wait_event(fs_info->scrub_pause_wait,
				1380	atomic_read(&fs_info->scrubs_paused) ==
				1381	atomic_read(&fs_info->scrubs_running));
				1382	mutex_lock(&fs_info->scrub_lock);
				1383	}
				1384	mutex_unlock(&fs_info->scrub_lock);
				1385
				1386	return 0;
				1387	}
				1388
				1389	int btrfs_scrub_continue(struct btrfs_root *root)
				1390	{
				1391	struct btrfs_fs_info *fs_info = root->fs_info;
				1392
				1393	atomic_dec(&fs_info->scrub_pause_req);
				1394	wake_up(&fs_info->scrub_pause_wait);
				1395	return 0;
				1396	}
				1397
				1398	int btrfs_scrub_pause_super(struct btrfs_root *root)
				1399	{
				1400	down_write(&root->fs_info->scrub_super_lock);
				1401	return 0;
				1402	}
				1403
				1404	int btrfs_scrub_continue_super(struct btrfs_root *root)
				1405	{
				1406	up_write(&root->fs_info->scrub_super_lock);
				1407	return 0;
				1408	}
				1409
				1410	int btrfs_scrub_cancel(struct btrfs_root *root)
				1411	{
				1412	struct btrfs_fs_info *fs_info = root->fs_info;
				1413
				1414	mutex_lock(&fs_info->scrub_lock);
				1415	if (!atomic_read(&fs_info->scrubs_running)) {
				1416	mutex_unlock(&fs_info->scrub_lock);
				1417	return -ENOTCONN;
				1418	}
				1419
				1420	atomic_inc(&fs_info->scrub_cancel_req);
				1421	while (atomic_read(&fs_info->scrubs_running)) {
				1422	mutex_unlock(&fs_info->scrub_lock);
				1423	wait_event(fs_info->scrub_pause_wait,
				1424	atomic_read(&fs_info->scrubs_running) == 0);
				1425	mutex_lock(&fs_info->scrub_lock);
				1426	}
				1427	atomic_dec(&fs_info->scrub_cancel_req);
				1428	mutex_unlock(&fs_info->scrub_lock);
				1429
				1430	return 0;
				1431	}
				1432
				1433	int btrfs_scrub_cancel_dev(struct btrfs_root root, struct btrfs_device dev)
				1434	{
				1435	struct btrfs_fs_info *fs_info = root->fs_info;
				1436	struct scrub_dev *sdev;
				1437
				1438	mutex_lock(&fs_info->scrub_lock);
				1439	sdev = dev->scrub_device;
				1440	if (!sdev) {
				1441	mutex_unlock(&fs_info->scrub_lock);
				1442	return -ENOTCONN;
				1443	}
				1444	atomic_inc(&sdev->cancel_req);
				1445	while (dev->scrub_device) {
				1446	mutex_unlock(&fs_info->scrub_lock);
				1447	wait_event(fs_info->scrub_pause_wait,
				1448	dev->scrub_device == NULL);
				1449	mutex_lock(&fs_info->scrub_lock);
				1450	}
				1451	mutex_unlock(&fs_info->scrub_lock);
				1452
				1453	return 0;
				1454	}
				1455	int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
				1456	{
				1457	struct btrfs_fs_info *fs_info = root->fs_info;
				1458	struct btrfs_device *dev;
				1459	int ret;
				1460
				1461	/*
				1462	* we have to hold the device_list_mutex here so the device
				1463	* does not go away in cancel_dev. FIXME: find a better solution
				1464	*/
				1465	mutex_lock(&fs_info->fs_devices->device_list_mutex);
				1466	dev = btrfs_find_device(root, devid, NULL, NULL);
				1467	if (!dev) {
				1468	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
				1469	return -ENODEV;
				1470	}
				1471	ret = btrfs_scrub_cancel_dev(root, dev);
				1472	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
				1473
				1474	return ret;
				1475	}
				1476
				1477	int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
				1478	struct btrfs_scrub_progress *progress)
				1479	{
				1480	struct btrfs_device *dev;
				1481	struct scrub_dev *sdev = NULL;
				1482
				1483	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
				1484	dev = btrfs_find_device(root, devid, NULL, NULL);
				1485	if (dev)
				1486	sdev = dev->scrub_device;
				1487	if (sdev)
				1488	memcpy(progress, &sdev->stat, sizeof(*progress));
				1489	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
				1490
				1491	return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
				1492	}