Blame - drivers/md/dm-thin.c - android_kernel_htc_msm8960

blob: bbb29ccc24c7e37cd527dcca0d0eddcb806952fd [file] [log] [blame]

Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 2011 Red Hat UK.
				3	*
				4	* This file is released under the GPL.
				5	*/
				6
				7	#include "dm-thin-metadata.h"
				8
				9	#include <linux/device-mapper.h>
				10	#include <linux/dm-io.h>
				11	#include <linux/dm-kcopyd.h>
				12	#include <linux/list.h>
				13	#include <linux/init.h>
				14	#include <linux/module.h>
				15	#include <linux/slab.h>
				16
				17	#define DM_MSG_PREFIX "thin"
				18
				19	/*
				20	* Tunable constants
				21	*/
Alasdair G Kergon	b4ce163	2012-07-27 15:07:57 +0100	[diff] [blame^]	22	#define ENDIO_HOOK_POOL_SIZE 1024
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	23	#define DEFERRED_SET_SIZE 64
				24	#define MAPPING_POOL_SIZE 1024
				25	#define PRISON_CELLS 1024
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	26	#define COMMIT_PERIOD HZ
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	27
				28	/*
				29	* The block size of the device holding pool data must be
				30	* between 64KB and 1GB.
				31	*/
				32	#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
				33	#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
				34
				35	/*
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	36	* Device id is restricted to 24 bits.
				37	*/
				38	#define MAX_DEV_ID ((1 << 24) - 1)
				39
				40	/*
				41	* How do we handle breaking sharing of data blocks?
				42	* =================================================
				43	*
				44	* We use a standard copy-on-write btree to store the mappings for the
				45	* devices (note I'm talking about copy-on-write of the metadata here, not
				46	* the data). When you take an internal snapshot you clone the root node
				47	* of the origin btree. After this there is no concept of an origin or a
				48	* snapshot. They are just two device trees that happen to point to the
				49	* same data blocks.
				50	*
				51	* When we get a write in we decide if it's to a shared data block using
				52	* some timestamp magic. If it is, we have to break sharing.
				53	*
				54	* Let's say we write to a shared block in what was the origin. The
				55	* steps are:
				56	*
				57	* i) plug io further to this physical block. (see bio_prison code).
				58	*
				59	* ii) quiesce any read io to that shared data block. Obviously
				60	* including all devices that share this block. (see deferred_set code)
				61	*
				62	* iii) copy the data block to a newly allocate block. This step can be
				63	* missed out if the io covers the block. (schedule_copy).
				64	*
				65	* iv) insert the new mapping into the origin's btree
Joe Thornber	fe878f3	2012-03-28 18:41:24 +0100	[diff] [blame]	66	* (process_prepared_mapping). This act of inserting breaks some
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	67	* sharing of btree nodes between the two devices. Breaking sharing only
				68	* effects the btree of that specific device. Btrees for the other
				69	* devices that share the block never change. The btree for the origin
				70	* device as it was after the last commit is untouched, ie. we're using
				71	* persistent data structures in the functional programming sense.
				72	*
				73	* v) unplug io to this physical block, including the io that triggered
				74	* the breaking of sharing.
				75	*
				76	* Steps (ii) and (iii) occur in parallel.
				77	*
				78	* The metadata _doesn't_ need to be committed before the io continues. We
				79	* get away with this because the io is always written to a _new_ block.
				80	* If there's a crash, then:
				81	*
				82	* - The origin mapping will point to the old origin block (the shared
				83	* one). This will contain the data as it was before the io that triggered
				84	* the breaking of sharing came in.
				85	*
				86	* - The snap mapping still points to the old block. As it would after
				87	* the commit.
				88	*
				89	* The downside of this scheme is the timestamp magic isn't perfect, and
				90	* will continue to think that data block in the snapshot device is shared
				91	* even after the write to the origin has broken sharing. I suspect data
				92	* blocks will typically be shared by many different devices, so we're
				93	* breaking sharing n + 1 times, rather than n, where n is the number of
				94	* devices that reference this data block. At the moment I think the
				95	* benefits far, far outweigh the disadvantages.
				96	*/
				97
				98	/----------------------------------------------------------------/
				99
				100	/*
				101	* Sometimes we can't deal with a bio straight away. We put them in prison
				102	* where they can't cause any mischief. Bios are put in a cell identified
				103	* by a key, multiple bios can be in the same cell. When the cell is
				104	* subsequently unlocked the bios become available.
				105	*/
				106	struct bio_prison;
				107
				108	struct cell_key {
				109	int virtual;
				110	dm_thin_id dev;
				111	dm_block_t block;
				112	};
				113
				114	struct cell {
				115	struct hlist_node list;
				116	struct bio_prison *prison;
				117	struct cell_key key;
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	118	struct bio *holder;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	119	struct bio_list bios;
				120	};
				121
				122	struct bio_prison {
				123	spinlock_t lock;
				124	mempool_t *cell_pool;
				125
				126	unsigned nr_buckets;
				127	unsigned hash_mask;
				128	struct hlist_head *cells;
				129	};
				130
				131	static uint32_t calc_nr_buckets(unsigned nr_cells)
				132	{
				133	uint32_t n = 128;
				134
				135	nr_cells /= 4;
				136	nr_cells = min(nr_cells, 8192u);
				137
				138	while (n < nr_cells)
				139	n <<= 1;
				140
				141	return n;
				142	}
				143
				144	/*
				145	* @nr_cells should be the number of cells you want in use _concurrently_.
				146	* Don't confuse it with the number of distinct keys.
				147	*/
				148	static struct bio_prison *prison_create(unsigned nr_cells)
				149	{
				150	unsigned i;
				151	uint32_t nr_buckets = calc_nr_buckets(nr_cells);
				152	size_t len = sizeof(struct bio_prison) +
				153	(sizeof(struct hlist_head) * nr_buckets);
				154	struct bio_prison *prison = kmalloc(len, GFP_KERNEL);
				155
				156	if (!prison)
				157	return NULL;
				158
				159	spin_lock_init(&prison->lock);
				160	prison->cell_pool = mempool_create_kmalloc_pool(nr_cells,
				161	sizeof(struct cell));
				162	if (!prison->cell_pool) {
				163	kfree(prison);
				164	return NULL;
				165	}
				166
				167	prison->nr_buckets = nr_buckets;
				168	prison->hash_mask = nr_buckets - 1;
				169	prison->cells = (struct hlist_head *) (prison + 1);
				170	for (i = 0; i < nr_buckets; i++)
				171	INIT_HLIST_HEAD(prison->cells + i);
				172
				173	return prison;
				174	}
				175
				176	static void prison_destroy(struct bio_prison *prison)
				177	{
				178	mempool_destroy(prison->cell_pool);
				179	kfree(prison);
				180	}
				181
				182	static uint32_t hash_key(struct bio_prison prison, struct cell_key key)
				183	{
				184	const unsigned long BIG_PRIME = 4294967291UL;
				185	uint64_t hash = key->block * BIG_PRIME;
				186
				187	return (uint32_t) (hash & prison->hash_mask);
				188	}
				189
				190	static int keys_equal(struct cell_key lhs, struct cell_key rhs)
				191	{
				192	return (lhs->virtual == rhs->virtual) &&
				193	(lhs->dev == rhs->dev) &&
				194	(lhs->block == rhs->block);
				195	}
				196
				197	static struct cell __search_bucket(struct hlist_head bucket,
				198	struct cell_key *key)
				199	{
				200	struct cell *cell;
				201	struct hlist_node *tmp;
				202
				203	hlist_for_each_entry(cell, tmp, bucket, list)
				204	if (keys_equal(&cell->key, key))
				205	return cell;
				206
				207	return NULL;
				208	}
				209
				210	/*
				211	* This may block if a new cell needs allocating. You must ensure that
				212	* cells will be unlocked even if the calling thread is blocked.
				213	*
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	214	* Returns 1 if the cell was already held, 0 if @inmate is the new holder.
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	215	*/
				216	static int bio_detain(struct bio_prison prison, struct cell_key key,
				217	struct bio inmate, struct cell *ref)
				218	{
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	219	int r = 1;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	220	unsigned long flags;
				221	uint32_t hash = hash_key(prison, key);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	222	struct cell cell, cell2;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	223
				224	BUG_ON(hash > prison->nr_buckets);
				225
				226	spin_lock_irqsave(&prison->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	227
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	228	cell = __search_bucket(prison->cells + hash, key);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	229	if (cell) {
				230	bio_list_add(&cell->bios, inmate);
				231	goto out;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	232	}
				233
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	234	/*
				235	* Allocate a new cell
				236	*/
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	237	spin_unlock_irqrestore(&prison->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	238	cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
				239	spin_lock_irqsave(&prison->lock, flags);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	240
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	241	/*
				242	* We've been unlocked, so we have to double check that
				243	* nobody else has inserted this cell in the meantime.
				244	*/
				245	cell = __search_bucket(prison->cells + hash, key);
				246	if (cell) {
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	247	mempool_free(cell2, prison->cell_pool);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	248	bio_list_add(&cell->bios, inmate);
				249	goto out;
				250	}
				251
				252	/*
				253	* Use new cell.
				254	*/
				255	cell = cell2;
				256
				257	cell->prison = prison;
				258	memcpy(&cell->key, key, sizeof(cell->key));
				259	cell->holder = inmate;
				260	bio_list_init(&cell->bios);
				261	hlist_add_head(&cell->list, prison->cells + hash);
				262
				263	r = 0;
				264
				265	out:
				266	spin_unlock_irqrestore(&prison->lock, flags);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	267
				268	*ref = cell;
				269
				270	return r;
				271	}
				272
				273	/*
				274	* @inmates must have been initialised prior to this call
				275	*/
				276	static void __cell_release(struct cell cell, struct bio_list inmates)
				277	{
				278	struct bio_prison *prison = cell->prison;
				279
				280	hlist_del(&cell->list);
				281
Mike Snitzer	03aaae7	2012-05-12 01:43:12 +0100	[diff] [blame]	282	if (inmates) {
				283	bio_list_add(inmates, cell->holder);
				284	bio_list_merge(inmates, &cell->bios);
				285	}
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	286
				287	mempool_free(cell, prison->cell_pool);
				288	}
				289
				290	static void cell_release(struct cell cell, struct bio_list bios)
				291	{
				292	unsigned long flags;
				293	struct bio_prison *prison = cell->prison;
				294
				295	spin_lock_irqsave(&prison->lock, flags);
				296	__cell_release(cell, bios);
				297	spin_unlock_irqrestore(&prison->lock, flags);
				298	}
				299
				300	/*
				301	* There are a couple of places where we put a bio into a cell briefly
				302	* before taking it out again. In these situations we know that no other
				303	* bio may be in the cell. This function releases the cell, and also does
				304	* a sanity check.
				305	*/
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	306	static void __cell_release_singleton(struct cell cell, struct bio bio)
				307	{
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	308	BUG_ON(cell->holder != bio);
				309	BUG_ON(!bio_list_empty(&cell->bios));
Mike Snitzer	03aaae7	2012-05-12 01:43:12 +0100	[diff] [blame]	310
				311	__cell_release(cell, NULL);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	312	}
				313
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	314	static void cell_release_singleton(struct cell cell, struct bio bio)
				315	{
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	316	unsigned long flags;
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	317	struct bio_prison *prison = cell->prison;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	318
				319	spin_lock_irqsave(&prison->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	320	__cell_release_singleton(cell, bio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	321	spin_unlock_irqrestore(&prison->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	322	}
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	323
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	324	/*
				325	* Sometimes we don't want the holder, just the additional bios.
				326	*/
				327	static void __cell_release_no_holder(struct cell cell, struct bio_list inmates)
				328	{
				329	struct bio_prison *prison = cell->prison;
				330
				331	hlist_del(&cell->list);
				332	bio_list_merge(inmates, &cell->bios);
				333
				334	mempool_free(cell, prison->cell_pool);
				335	}
				336
				337	static void cell_release_no_holder(struct cell cell, struct bio_list inmates)
				338	{
				339	unsigned long flags;
				340	struct bio_prison *prison = cell->prison;
				341
				342	spin_lock_irqsave(&prison->lock, flags);
				343	__cell_release_no_holder(cell, inmates);
				344	spin_unlock_irqrestore(&prison->lock, flags);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	345	}
				346
				347	static void cell_error(struct cell *cell)
				348	{
				349	struct bio_prison *prison = cell->prison;
				350	struct bio_list bios;
				351	struct bio *bio;
				352	unsigned long flags;
				353
				354	bio_list_init(&bios);
				355
				356	spin_lock_irqsave(&prison->lock, flags);
				357	__cell_release(cell, &bios);
				358	spin_unlock_irqrestore(&prison->lock, flags);
				359
				360	while ((bio = bio_list_pop(&bios)))
				361	bio_io_error(bio);
				362	}
				363
				364	/----------------------------------------------------------------/
				365
				366	/*
				367	* We use the deferred set to keep track of pending reads to shared blocks.
				368	* We do this to ensure the new mapping caused by a write isn't performed
				369	* until these prior reads have completed. Otherwise the insertion of the
				370	* new mapping could free the old block that the read bios are mapped to.
				371	*/
				372
				373	struct deferred_set;
				374	struct deferred_entry {
				375	struct deferred_set *ds;
				376	unsigned count;
				377	struct list_head work_items;
				378	};
				379
				380	struct deferred_set {
				381	spinlock_t lock;
				382	unsigned current_entry;
				383	unsigned sweeper;
				384	struct deferred_entry entries[DEFERRED_SET_SIZE];
				385	};
				386
				387	static void ds_init(struct deferred_set *ds)
				388	{
				389	int i;
				390
				391	spin_lock_init(&ds->lock);
				392	ds->current_entry = 0;
				393	ds->sweeper = 0;
				394	for (i = 0; i < DEFERRED_SET_SIZE; i++) {
				395	ds->entries[i].ds = ds;
				396	ds->entries[i].count = 0;
				397	INIT_LIST_HEAD(&ds->entries[i].work_items);
				398	}
				399	}
				400
				401	static struct deferred_entry ds_inc(struct deferred_set ds)
				402	{
				403	unsigned long flags;
				404	struct deferred_entry *entry;
				405
				406	spin_lock_irqsave(&ds->lock, flags);
				407	entry = ds->entries + ds->current_entry;
				408	entry->count++;
				409	spin_unlock_irqrestore(&ds->lock, flags);
				410
				411	return entry;
				412	}
				413
				414	static unsigned ds_next(unsigned index)
				415	{
				416	return (index + 1) % DEFERRED_SET_SIZE;
				417	}
				418
				419	static void __sweep(struct deferred_set ds, struct list_head head)
				420	{
				421	while ((ds->sweeper != ds->current_entry) &&
				422	!ds->entries[ds->sweeper].count) {
				423	list_splice_init(&ds->entries[ds->sweeper].work_items, head);
				424	ds->sweeper = ds_next(ds->sweeper);
				425	}
				426
				427	if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count)
				428	list_splice_init(&ds->entries[ds->sweeper].work_items, head);
				429	}
				430
				431	static void ds_dec(struct deferred_entry entry, struct list_head head)
				432	{
				433	unsigned long flags;
				434
				435	spin_lock_irqsave(&entry->ds->lock, flags);
				436	BUG_ON(!entry->count);
				437	--entry->count;
				438	__sweep(entry->ds, head);
				439	spin_unlock_irqrestore(&entry->ds->lock, flags);
				440	}
				441
				442	/*
				443	* Returns 1 if deferred or 0 if no pending items to delay job.
				444	*/
				445	static int ds_add_work(struct deferred_set ds, struct list_head work)
				446	{
				447	int r = 1;
				448	unsigned long flags;
				449	unsigned next_entry;
				450
				451	spin_lock_irqsave(&ds->lock, flags);
				452	if ((ds->sweeper == ds->current_entry) &&
				453	!ds->entries[ds->current_entry].count)
				454	r = 0;
				455	else {
				456	list_add(work, &ds->entries[ds->current_entry].work_items);
				457	next_entry = ds_next(ds->current_entry);
				458	if (!ds->entries[next_entry].count)
				459	ds->current_entry = next_entry;
				460	}
				461	spin_unlock_irqrestore(&ds->lock, flags);
				462
				463	return r;
				464	}
				465
				466	/----------------------------------------------------------------/
				467
				468	/*
				469	* Key building.
				470	*/
				471	static void build_data_key(struct dm_thin_device *td,
				472	dm_block_t b, struct cell_key *key)
				473	{
				474	key->virtual = 0;
				475	key->dev = dm_thin_dev_id(td);
				476	key->block = b;
				477	}
				478
				479	static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
				480	struct cell_key *key)
				481	{
				482	key->virtual = 1;
				483	key->dev = dm_thin_dev_id(td);
				484	key->block = b;
				485	}
				486
				487	/----------------------------------------------------------------/
				488
				489	/*
				490	* A pool device ties together a metadata device and a data device. It
				491	* also provides the interface for creating and destroying internal
				492	* devices.
				493	*/
				494	struct new_mapping;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	495
				496	struct pool_features {
				497	unsigned zero_new_blocks:1;
				498	unsigned discard_enabled:1;
				499	unsigned discard_passdown:1;
				500	};
				501
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	502	struct pool {
				503	struct list_head list;
				504	struct dm_target ti; / Only set if a pool target is bound */
				505
				506	struct mapped_device *pool_md;
				507	struct block_device *md_dev;
				508	struct dm_pool_metadata *pmd;
				509
				510	uint32_t sectors_per_block;
				511	unsigned block_shift;
				512	dm_block_t offset_mask;
				513	dm_block_t low_water_blocks;
				514
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	515	struct pool_features pf;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	516	unsigned low_water_triggered:1; /* A dm event has been sent */
				517	unsigned no_free_space:1; /* A -ENOSPC warning has been issued */
				518
				519	struct bio_prison *prison;
				520	struct dm_kcopyd_client *copier;
				521
				522	struct workqueue_struct *wq;
				523	struct work_struct worker;
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	524	struct delayed_work waker;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	525
				526	unsigned ref_count;
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	527	unsigned long last_commit_jiffies;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	528
				529	spinlock_t lock;
				530	struct bio_list deferred_bios;
				531	struct bio_list deferred_flush_bios;
				532	struct list_head prepared_mappings;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	533	struct list_head prepared_discards;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	534
				535	struct bio_list retry_on_resume_list;
				536
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	537	struct deferred_set shared_read_ds;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	538	struct deferred_set all_io_ds;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	539
				540	struct new_mapping *next_mapping;
				541	mempool_t *mapping_pool;
				542	mempool_t *endio_hook_pool;
				543	};
				544
				545	/*
				546	* Target context for a pool.
				547	*/
				548	struct pool_c {
				549	struct dm_target *ti;
				550	struct pool *pool;
				551	struct dm_dev *data_dev;
				552	struct dm_dev *metadata_dev;
				553	struct dm_target_callbacks callbacks;
				554
				555	dm_block_t low_water_blocks;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	556	struct pool_features pf;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	557	};
				558
				559	/*
				560	* Target context for a thin.
				561	*/
				562	struct thin_c {
				563	struct dm_dev *pool_dev;
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	564	struct dm_dev *origin_dev;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	565	dm_thin_id dev_id;
				566
				567	struct pool *pool;
				568	struct dm_thin_device *td;
				569	};
				570
				571	/----------------------------------------------------------------/
				572
				573	/*
				574	* A global list of pools that uses a struct mapped_device as a key.
				575	*/
				576	static struct dm_thin_pool_table {
				577	struct mutex mutex;
				578	struct list_head pools;
				579	} dm_thin_pool_table;
				580
				581	static void pool_table_init(void)
				582	{
				583	mutex_init(&dm_thin_pool_table.mutex);
				584	INIT_LIST_HEAD(&dm_thin_pool_table.pools);
				585	}
				586
				587	static void __pool_table_insert(struct pool *pool)
				588	{
				589	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				590	list_add(&pool->list, &dm_thin_pool_table.pools);
				591	}
				592
				593	static void __pool_table_remove(struct pool *pool)
				594	{
				595	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				596	list_del(&pool->list);
				597	}
				598
				599	static struct pool __pool_table_lookup(struct mapped_device md)
				600	{
				601	struct pool pool = NULL, tmp;
				602
				603	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				604
				605	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
				606	if (tmp->pool_md == md) {
				607	pool = tmp;
				608	break;
				609	}
				610	}
				611
				612	return pool;
				613	}
				614
				615	static struct pool __pool_table_lookup_metadata_dev(struct block_device md_dev)
				616	{
				617	struct pool pool = NULL, tmp;
				618
				619	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				620
				621	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
				622	if (tmp->md_dev == md_dev) {
				623	pool = tmp;
				624	break;
				625	}
				626	}
				627
				628	return pool;
				629	}
				630
				631	/----------------------------------------------------------------/
				632
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	633	struct endio_hook {
				634	struct thin_c *tc;
				635	struct deferred_entry *shared_read_entry;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	636	struct deferred_entry *all_io_entry;
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	637	struct new_mapping *overwrite_mapping;
				638	};
				639
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	640	static void __requeue_bio_list(struct thin_c tc, struct bio_list master)
				641	{
				642	struct bio *bio;
				643	struct bio_list bios;
				644
				645	bio_list_init(&bios);
				646	bio_list_merge(&bios, master);
				647	bio_list_init(master);
				648
				649	while ((bio = bio_list_pop(&bios))) {
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	650	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				651	if (h->tc == tc)
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	652	bio_endio(bio, DM_ENDIO_REQUEUE);
				653	else
				654	bio_list_add(master, bio);
				655	}
				656	}
				657
				658	static void requeue_io(struct thin_c *tc)
				659	{
				660	struct pool *pool = tc->pool;
				661	unsigned long flags;
				662
				663	spin_lock_irqsave(&pool->lock, flags);
				664	__requeue_bio_list(tc, &pool->deferred_bios);
				665	__requeue_bio_list(tc, &pool->retry_on_resume_list);
				666	spin_unlock_irqrestore(&pool->lock, flags);
				667	}
				668
				669	/*
				670	* This section of code contains the logic for processing a thin device's IO.
				671	* Much of the code depends on pool object resources (lists, workqueues, etc)
				672	* but most is exclusively called from the thin target rather than the thin-pool
				673	* target.
				674	*/
				675
				676	static dm_block_t get_bio_block(struct thin_c tc, struct bio bio)
				677	{
				678	return bio->bi_sector >> tc->pool->block_shift;
				679	}
				680
				681	static void remap(struct thin_c tc, struct bio bio, dm_block_t block)
				682	{
				683	struct pool *pool = tc->pool;
				684
				685	bio->bi_bdev = tc->pool_dev->bdev;
				686	bio->bi_sector = (block << pool->block_shift) +
				687	(bio->bi_sector & pool->offset_mask);
				688	}
				689
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	690	static void remap_to_origin(struct thin_c tc, struct bio bio)
				691	{
				692	bio->bi_bdev = tc->origin_dev->bdev;
				693	}
				694
				695	static void issue(struct thin_c tc, struct bio bio)
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	696	{
				697	struct pool *pool = tc->pool;
				698	unsigned long flags;
				699
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	700	/*
				701	* Batch together any FUA/FLUSH bios we find and then issue
				702	* a single commit for them in process_deferred_bios().
				703	*/
				704	if (bio->bi_rw & (REQ_FLUSH \| REQ_FUA)) {
				705	spin_lock_irqsave(&pool->lock, flags);
				706	bio_list_add(&pool->deferred_flush_bios, bio);
				707	spin_unlock_irqrestore(&pool->lock, flags);
				708	} else
				709	generic_make_request(bio);
				710	}
				711
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	712	static void remap_to_origin_and_issue(struct thin_c tc, struct bio bio)
				713	{
				714	remap_to_origin(tc, bio);
				715	issue(tc, bio);
				716	}
				717
				718	static void remap_and_issue(struct thin_c tc, struct bio bio,
				719	dm_block_t block)
				720	{
				721	remap(tc, bio, block);
				722	issue(tc, bio);
				723	}
				724
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	725	/*
				726	* wake_worker() is used when new work is queued and when pool_resume is
				727	* ready to continue deferred IO processing.
				728	*/
				729	static void wake_worker(struct pool *pool)
				730	{
				731	queue_work(pool->wq, &pool->worker);
				732	}
				733
				734	/----------------------------------------------------------------/
				735
				736	/*
				737	* Bio endio functions.
				738	*/
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	739	struct new_mapping {
				740	struct list_head list;
				741
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	742	unsigned quiesced:1;
				743	unsigned prepared:1;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	744	unsigned pass_discard:1;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	745
				746	struct thin_c *tc;
				747	dm_block_t virt_block;
				748	dm_block_t data_block;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	749	struct cell cell, cell2;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	750	int err;
				751
				752	/*
				753	* If the bio covers the whole area of a block then we can avoid
				754	* zeroing or copying. Instead this bio is hooked. The bio will
				755	* still be in the cell, so care has to be taken to avoid issuing
				756	* the bio twice.
				757	*/
				758	struct bio *bio;
				759	bio_end_io_t *saved_bi_end_io;
				760	};
				761
				762	static void __maybe_add_mapping(struct new_mapping *m)
				763	{
				764	struct pool *pool = m->tc->pool;
				765
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	766	if (m->quiesced && m->prepared) {
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	767	list_add(&m->list, &pool->prepared_mappings);
				768	wake_worker(pool);
				769	}
				770	}
				771
				772	static void copy_complete(int read_err, unsigned long write_err, void *context)
				773	{
				774	unsigned long flags;
				775	struct new_mapping *m = context;
				776	struct pool *pool = m->tc->pool;
				777
				778	m->err = read_err \|\| write_err ? -EIO : 0;
				779
				780	spin_lock_irqsave(&pool->lock, flags);
				781	m->prepared = 1;
				782	__maybe_add_mapping(m);
				783	spin_unlock_irqrestore(&pool->lock, flags);
				784	}
				785
				786	static void overwrite_endio(struct bio *bio, int err)
				787	{
				788	unsigned long flags;
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	789	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				790	struct new_mapping *m = h->overwrite_mapping;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	791	struct pool *pool = m->tc->pool;
				792
				793	m->err = err;
				794
				795	spin_lock_irqsave(&pool->lock, flags);
				796	m->prepared = 1;
				797	__maybe_add_mapping(m);
				798	spin_unlock_irqrestore(&pool->lock, flags);
				799	}
				800
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	801	/----------------------------------------------------------------/
				802
				803	/*
				804	* Workqueue.
				805	*/
				806
				807	/*
				808	* Prepared mapping jobs.
				809	*/
				810
				811	/*
				812	* This sends the bios in the cell back to the deferred_bios list.
				813	*/
				814	static void cell_defer(struct thin_c tc, struct cell cell,
				815	dm_block_t data_block)
				816	{
				817	struct pool *pool = tc->pool;
				818	unsigned long flags;
				819
				820	spin_lock_irqsave(&pool->lock, flags);
				821	cell_release(cell, &pool->deferred_bios);
				822	spin_unlock_irqrestore(&tc->pool->lock, flags);
				823
				824	wake_worker(pool);
				825	}
				826
				827	/*
				828	* Same as cell_defer above, except it omits one particular detainee,
				829	* a write bio that covers the block and has already been processed.
				830	*/
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	831	static void cell_defer_except(struct thin_c tc, struct cell cell)
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	832	{
				833	struct bio_list bios;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	834	struct pool *pool = tc->pool;
				835	unsigned long flags;
				836
				837	bio_list_init(&bios);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	838
				839	spin_lock_irqsave(&pool->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	840	cell_release_no_holder(cell, &pool->deferred_bios);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	841	spin_unlock_irqrestore(&pool->lock, flags);
				842
				843	wake_worker(pool);
				844	}
				845
				846	static void process_prepared_mapping(struct new_mapping *m)
				847	{
				848	struct thin_c *tc = m->tc;
				849	struct bio *bio;
				850	int r;
				851
				852	bio = m->bio;
				853	if (bio)
				854	bio->bi_end_io = m->saved_bi_end_io;
				855
				856	if (m->err) {
				857	cell_error(m->cell);
				858	return;
				859	}
				860
				861	/*
				862	* Commit the prepared block into the mapping btree.
				863	* Any I/O for this block arriving after this point will get
				864	* remapped to it directly.
				865	*/
				866	r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
				867	if (r) {
				868	DMERR("dm_thin_insert_block() failed");
				869	cell_error(m->cell);
				870	return;
				871	}
				872
				873	/*
				874	* Release any bios held while the block was being provisioned.
				875	* If we are processing a write bio that completely covers the block,
				876	* we already processed it so can ignore it now when processing
				877	* the bios in the cell.
				878	*/
				879	if (bio) {
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	880	cell_defer_except(tc, m->cell);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	881	bio_endio(bio, 0);
				882	} else
				883	cell_defer(tc, m->cell, m->data_block);
				884
				885	list_del(&m->list);
				886	mempool_free(m, tc->pool->mapping_pool);
				887	}
				888
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	889	static void process_prepared_discard(struct new_mapping *m)
				890	{
				891	int r;
				892	struct thin_c *tc = m->tc;
				893
				894	r = dm_thin_remove_block(tc->td, m->virt_block);
				895	if (r)
				896	DMERR("dm_thin_remove_block() failed");
				897
				898	/*
				899	* Pass the discard down to the underlying device?
				900	*/
				901	if (m->pass_discard)
				902	remap_and_issue(tc, m->bio, m->data_block);
				903	else
				904	bio_endio(m->bio, 0);
				905
				906	cell_defer_except(tc, m->cell);
				907	cell_defer_except(tc, m->cell2);
				908	mempool_free(m, tc->pool->mapping_pool);
				909	}
				910
				911	static void process_prepared(struct pool pool, struct list_head head,
				912	void (fn)(struct new_mapping ))
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	913	{
				914	unsigned long flags;
				915	struct list_head maps;
				916	struct new_mapping m, tmp;
				917
				918	INIT_LIST_HEAD(&maps);
				919	spin_lock_irqsave(&pool->lock, flags);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	920	list_splice_init(head, &maps);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	921	spin_unlock_irqrestore(&pool->lock, flags);
				922
				923	list_for_each_entry_safe(m, tmp, &maps, list)
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	924	fn(m);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	925	}
				926
				927	/*
				928	* Deferred bio jobs.
				929	*/
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	930	static int io_overlaps_block(struct pool pool, struct bio bio)
				931	{
				932	return !(bio->bi_sector & pool->offset_mask) &&
				933	(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
				934
				935	}
				936
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	937	static int io_overwrites_block(struct pool pool, struct bio bio)
				938	{
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	939	return (bio_data_dir(bio) == WRITE) &&
				940	io_overlaps_block(pool, bio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	941	}
				942
				943	static void save_and_set_endio(struct bio bio, bio_end_io_t *save,
				944	bio_end_io_t *fn)
				945	{
				946	*save = bio->bi_end_io;
				947	bio->bi_end_io = fn;
				948	}
				949
				950	static int ensure_next_mapping(struct pool *pool)
				951	{
				952	if (pool->next_mapping)
				953	return 0;
				954
				955	pool->next_mapping = mempool_alloc(pool->mapping_pool, GFP_ATOMIC);
				956
				957	return pool->next_mapping ? 0 : -ENOMEM;
				958	}
				959
				960	static struct new_mapping get_next_mapping(struct pool pool)
				961	{
				962	struct new_mapping *r = pool->next_mapping;
				963
				964	BUG_ON(!pool->next_mapping);
				965
				966	pool->next_mapping = NULL;
				967
				968	return r;
				969	}
				970
				971	static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	972	struct dm_dev *origin, dm_block_t data_origin,
				973	dm_block_t data_dest,
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	974	struct cell cell, struct bio bio)
				975	{
				976	int r;
				977	struct pool *pool = tc->pool;
				978	struct new_mapping *m = get_next_mapping(pool);
				979
				980	INIT_LIST_HEAD(&m->list);
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	981	m->quiesced = 0;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	982	m->prepared = 0;
				983	m->tc = tc;
				984	m->virt_block = virt_block;
				985	m->data_block = data_dest;
				986	m->cell = cell;
				987	m->err = 0;
				988	m->bio = NULL;
				989
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	990	if (!ds_add_work(&pool->shared_read_ds, &m->list))
				991	m->quiesced = 1;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	992
				993	/*
				994	* IO to pool_dev remaps to the pool target's data_dev.
				995	*
				996	* If the whole block of data is being overwritten, we can issue the
				997	* bio immediately. Otherwise we use kcopyd to clone the data first.
				998	*/
				999	if (io_overwrites_block(pool, bio)) {
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1000	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				1001	h->overwrite_mapping = m;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1002	m->bio = bio;
				1003	save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1004	remap_and_issue(tc, bio, data_dest);
				1005	} else {
				1006	struct dm_io_region from, to;
				1007
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	1008	from.bdev = origin->bdev;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1009	from.sector = data_origin * pool->sectors_per_block;
				1010	from.count = pool->sectors_per_block;
				1011
				1012	to.bdev = tc->pool_dev->bdev;
				1013	to.sector = data_dest * pool->sectors_per_block;
				1014	to.count = pool->sectors_per_block;
				1015
				1016	r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
				1017	0, copy_complete, m);
				1018	if (r < 0) {
				1019	mempool_free(m, pool->mapping_pool);
				1020	DMERR("dm_kcopyd_copy() failed");
				1021	cell_error(cell);
				1022	}
				1023	}
				1024	}
				1025
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	1026	static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
				1027	dm_block_t data_origin, dm_block_t data_dest,
				1028	struct cell cell, struct bio bio)
				1029	{
				1030	schedule_copy(tc, virt_block, tc->pool_dev,
				1031	data_origin, data_dest, cell, bio);
				1032	}
				1033
				1034	static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
				1035	dm_block_t data_dest,
				1036	struct cell cell, struct bio bio)
				1037	{
				1038	schedule_copy(tc, virt_block, tc->origin_dev,
				1039	virt_block, data_dest, cell, bio);
				1040	}
				1041
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1042	static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
				1043	dm_block_t data_block, struct cell *cell,
				1044	struct bio *bio)
				1045	{
				1046	struct pool *pool = tc->pool;
				1047	struct new_mapping *m = get_next_mapping(pool);
				1048
				1049	INIT_LIST_HEAD(&m->list);
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1050	m->quiesced = 1;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1051	m->prepared = 0;
				1052	m->tc = tc;
				1053	m->virt_block = virt_block;
				1054	m->data_block = data_block;
				1055	m->cell = cell;
				1056	m->err = 0;
				1057	m->bio = NULL;
				1058
				1059	/*
				1060	* If the whole block of data is being overwritten or we are not
				1061	* zeroing pre-existing data, we can issue the bio immediately.
				1062	* Otherwise we use kcopyd to zero the data first.
				1063	*/
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1064	if (!pool->pf.zero_new_blocks)
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1065	process_prepared_mapping(m);
				1066
				1067	else if (io_overwrites_block(pool, bio)) {
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1068	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				1069	h->overwrite_mapping = m;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1070	m->bio = bio;
				1071	save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1072	remap_and_issue(tc, bio, data_block);
				1073
				1074	} else {
				1075	int r;
				1076	struct dm_io_region to;
				1077
				1078	to.bdev = tc->pool_dev->bdev;
				1079	to.sector = data_block * pool->sectors_per_block;
				1080	to.count = pool->sectors_per_block;
				1081
				1082	r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
				1083	if (r < 0) {
				1084	mempool_free(m, pool->mapping_pool);
				1085	DMERR("dm_kcopyd_zero() failed");
				1086	cell_error(cell);
				1087	}
				1088	}
				1089	}
				1090
				1091	static int alloc_data_block(struct thin_c tc, dm_block_t result)
				1092	{
				1093	int r;
				1094	dm_block_t free_blocks;
				1095	unsigned long flags;
				1096	struct pool *pool = tc->pool;
				1097
				1098	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
				1099	if (r)
				1100	return r;
				1101
				1102	if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
				1103	DMWARN("%s: reached low water mark, sending event.",
				1104	dm_device_name(pool->pool_md));
				1105	spin_lock_irqsave(&pool->lock, flags);
				1106	pool->low_water_triggered = 1;
				1107	spin_unlock_irqrestore(&pool->lock, flags);
				1108	dm_table_event(pool->ti->table);
				1109	}
				1110
				1111	if (!free_blocks) {
				1112	if (pool->no_free_space)
				1113	return -ENOSPC;
				1114	else {
				1115	/*
				1116	* Try to commit to see if that will free up some
				1117	* more space.
				1118	*/
				1119	r = dm_pool_commit_metadata(pool->pmd);
				1120	if (r) {
				1121	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				1122	__func__, r);
				1123	return r;
				1124	}
				1125
				1126	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
				1127	if (r)
				1128	return r;
				1129
				1130	/*
				1131	* If we still have no space we set a flag to avoid
				1132	* doing all this checking and return -ENOSPC.
				1133	*/
				1134	if (!free_blocks) {
				1135	DMWARN("%s: no free space available.",
				1136	dm_device_name(pool->pool_md));
				1137	spin_lock_irqsave(&pool->lock, flags);
				1138	pool->no_free_space = 1;
				1139	spin_unlock_irqrestore(&pool->lock, flags);
				1140	return -ENOSPC;
				1141	}
				1142	}
				1143	}
				1144
				1145	r = dm_pool_alloc_data_block(pool->pmd, result);
				1146	if (r)
				1147	return r;
				1148
				1149	return 0;
				1150	}
				1151
				1152	/*
				1153	* If we have run out of space, queue bios until the device is
				1154	* resumed, presumably after having been reloaded with more space.
				1155	*/
				1156	static void retry_on_resume(struct bio *bio)
				1157	{
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1158	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				1159	struct thin_c *tc = h->tc;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1160	struct pool *pool = tc->pool;
				1161	unsigned long flags;
				1162
				1163	spin_lock_irqsave(&pool->lock, flags);
				1164	bio_list_add(&pool->retry_on_resume_list, bio);
				1165	spin_unlock_irqrestore(&pool->lock, flags);
				1166	}
				1167
				1168	static void no_space(struct cell *cell)
				1169	{
				1170	struct bio *bio;
				1171	struct bio_list bios;
				1172
				1173	bio_list_init(&bios);
				1174	cell_release(cell, &bios);
				1175
				1176	while ((bio = bio_list_pop(&bios)))
				1177	retry_on_resume(bio);
				1178	}
				1179
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1180	static void process_discard(struct thin_c tc, struct bio bio)
				1181	{
				1182	int r;
Mike Snitzer	c3a0ce2	2012-05-12 01:43:16 +0100	[diff] [blame]	1183	unsigned long flags;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1184	struct pool *pool = tc->pool;
				1185	struct cell cell, cell2;
				1186	struct cell_key key, key2;
				1187	dm_block_t block = get_bio_block(tc, bio);
				1188	struct dm_thin_lookup_result lookup_result;
				1189	struct new_mapping *m;
				1190
				1191	build_virtual_key(tc->td, block, &key);
				1192	if (bio_detain(tc->pool->prison, &key, bio, &cell))
				1193	return;
				1194
				1195	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
				1196	switch (r) {
				1197	case 0:
				1198	/*
				1199	* Check nobody is fiddling with this pool block. This can
				1200	* happen if someone's in the process of breaking sharing
				1201	* on this block.
				1202	*/
				1203	build_data_key(tc->td, lookup_result.block, &key2);
				1204	if (bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
				1205	cell_release_singleton(cell, bio);
				1206	break;
				1207	}
				1208
				1209	if (io_overlaps_block(pool, bio)) {
				1210	/*
				1211	* IO may still be going to the destination block. We must
				1212	* quiesce before we can do the removal.
				1213	*/
				1214	m = get_next_mapping(pool);
				1215	m->tc = tc;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1216	m->pass_discard = (!lookup_result.shared) & pool->pf.discard_passdown;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1217	m->virt_block = block;
				1218	m->data_block = lookup_result.block;
				1219	m->cell = cell;
				1220	m->cell2 = cell2;
				1221	m->err = 0;
				1222	m->bio = bio;
				1223
				1224	if (!ds_add_work(&pool->all_io_ds, &m->list)) {
Mike Snitzer	c3a0ce2	2012-05-12 01:43:16 +0100	[diff] [blame]	1225	spin_lock_irqsave(&pool->lock, flags);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1226	list_add(&m->list, &pool->prepared_discards);
Mike Snitzer	c3a0ce2	2012-05-12 01:43:16 +0100	[diff] [blame]	1227	spin_unlock_irqrestore(&pool->lock, flags);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1228	wake_worker(pool);
				1229	}
				1230	} else {
				1231	/*
				1232	* This path is hit if people are ignoring
				1233	* limits->discard_granularity. It ignores any
				1234	* part of the discard that is in a subsequent
				1235	* block.
				1236	*/
				1237	sector_t offset = bio->bi_sector - (block << pool->block_shift);
				1238	unsigned remaining = (pool->sectors_per_block - offset) << 9;
				1239	bio->bi_size = min(bio->bi_size, remaining);
				1240
				1241	cell_release_singleton(cell, bio);
				1242	cell_release_singleton(cell2, bio);
Mikulas Patocka	5b8bbc3	2012-07-20 14:25:05 +0100	[diff] [blame]	1243	if ((!lookup_result.shared) && pool->pf.discard_passdown)
				1244	remap_and_issue(tc, bio, lookup_result.block);
				1245	else
				1246	bio_endio(bio, 0);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1247	}
				1248	break;
				1249
				1250	case -ENODATA:
				1251	/*
				1252	* It isn't provisioned, just forget it.
				1253	*/
				1254	cell_release_singleton(cell, bio);
				1255	bio_endio(bio, 0);
				1256	break;
				1257
				1258	default:
				1259	DMERR("discard: find block unexpectedly returned %d", r);
				1260	cell_release_singleton(cell, bio);
				1261	bio_io_error(bio);
				1262	break;
				1263	}
				1264	}
				1265
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1266	static void break_sharing(struct thin_c tc, struct bio bio, dm_block_t block,
				1267	struct cell_key *key,
				1268	struct dm_thin_lookup_result *lookup_result,
				1269	struct cell *cell)
				1270	{
				1271	int r;
				1272	dm_block_t data_block;
				1273
				1274	r = alloc_data_block(tc, &data_block);
				1275	switch (r) {
				1276	case 0:
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	1277	schedule_internal_copy(tc, block, lookup_result->block,
				1278	data_block, cell, bio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1279	break;
				1280
				1281	case -ENOSPC:
				1282	no_space(cell);
				1283	break;
				1284
				1285	default:
				1286	DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
				1287	cell_error(cell);
				1288	break;
				1289	}
				1290	}
				1291
				1292	static void process_shared_bio(struct thin_c tc, struct bio bio,
				1293	dm_block_t block,
				1294	struct dm_thin_lookup_result *lookup_result)
				1295	{
				1296	struct cell *cell;
				1297	struct pool *pool = tc->pool;
				1298	struct cell_key key;
				1299
				1300	/*
				1301	* If cell is already occupied, then sharing is already in the process
				1302	* of being broken so we have nothing further to do here.
				1303	*/
				1304	build_data_key(tc->td, lookup_result->block, &key);
				1305	if (bio_detain(pool->prison, &key, bio, &cell))
				1306	return;
				1307
				1308	if (bio_data_dir(bio) == WRITE)
				1309	break_sharing(tc, bio, block, &key, lookup_result, cell);
				1310	else {
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1311	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1312
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1313	h->shared_read_entry = ds_inc(&pool->shared_read_ds);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1314
				1315	cell_release_singleton(cell, bio);
				1316	remap_and_issue(tc, bio, lookup_result->block);
				1317	}
				1318	}
				1319
				1320	static void provision_block(struct thin_c tc, struct bio bio, dm_block_t block,
				1321	struct cell *cell)
				1322	{
				1323	int r;
				1324	dm_block_t data_block;
				1325
				1326	/*
				1327	* Remap empty bios (flushes) immediately, without provisioning.
				1328	*/
				1329	if (!bio->bi_size) {
				1330	cell_release_singleton(cell, bio);
				1331	remap_and_issue(tc, bio, 0);
				1332	return;
				1333	}
				1334
				1335	/*
				1336	* Fill read bios with zeroes and complete them immediately.
				1337	*/
				1338	if (bio_data_dir(bio) == READ) {
				1339	zero_fill_bio(bio);
				1340	cell_release_singleton(cell, bio);
				1341	bio_endio(bio, 0);
				1342	return;
				1343	}
				1344
				1345	r = alloc_data_block(tc, &data_block);
				1346	switch (r) {
				1347	case 0:
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	1348	if (tc->origin_dev)
				1349	schedule_external_copy(tc, block, data_block, cell, bio);
				1350	else
				1351	schedule_zero(tc, block, data_block, cell, bio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1352	break;
				1353
				1354	case -ENOSPC:
				1355	no_space(cell);
				1356	break;
				1357
				1358	default:
				1359	DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
				1360	cell_error(cell);
				1361	break;
				1362	}
				1363	}
				1364
				1365	static void process_bio(struct thin_c tc, struct bio bio)
				1366	{
				1367	int r;
				1368	dm_block_t block = get_bio_block(tc, bio);
				1369	struct cell *cell;
				1370	struct cell_key key;
				1371	struct dm_thin_lookup_result lookup_result;
				1372
				1373	/*
				1374	* If cell is already occupied, then the block is already
				1375	* being provisioned so we have nothing further to do here.
				1376	*/
				1377	build_virtual_key(tc->td, block, &key);
				1378	if (bio_detain(tc->pool->prison, &key, bio, &cell))
				1379	return;
				1380
				1381	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
				1382	switch (r) {
				1383	case 0:
				1384	/*
				1385	* We can release this cell now. This thread is the only
				1386	* one that puts bios into a cell, and we know there were
				1387	* no preceding bios.
				1388	*/
				1389	/*
				1390	* TODO: this will probably have to change when discard goes
				1391	* back in.
				1392	*/
				1393	cell_release_singleton(cell, bio);
				1394
				1395	if (lookup_result.shared)
				1396	process_shared_bio(tc, bio, block, &lookup_result);
				1397	else
				1398	remap_and_issue(tc, bio, lookup_result.block);
				1399	break;
				1400
				1401	case -ENODATA:
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	1402	if (bio_data_dir(bio) == READ && tc->origin_dev) {
				1403	cell_release_singleton(cell, bio);
				1404	remap_to_origin_and_issue(tc, bio);
				1405	} else
				1406	provision_block(tc, bio, block, cell);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1407	break;
				1408
				1409	default:
				1410	DMERR("dm_thin_find_block() failed, error = %d", r);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1411	cell_release_singleton(cell, bio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1412	bio_io_error(bio);
				1413	break;
				1414	}
				1415	}
				1416
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1417	static int need_commit_due_to_time(struct pool *pool)
				1418	{
				1419	return jiffies < pool->last_commit_jiffies \|\|
				1420	jiffies > pool->last_commit_jiffies + COMMIT_PERIOD;
				1421	}
				1422
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1423	static void process_deferred_bios(struct pool *pool)
				1424	{
				1425	unsigned long flags;
				1426	struct bio *bio;
				1427	struct bio_list bios;
				1428	int r;
				1429
				1430	bio_list_init(&bios);
				1431
				1432	spin_lock_irqsave(&pool->lock, flags);
				1433	bio_list_merge(&bios, &pool->deferred_bios);
				1434	bio_list_init(&pool->deferred_bios);
				1435	spin_unlock_irqrestore(&pool->lock, flags);
				1436
				1437	while ((bio = bio_list_pop(&bios))) {
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1438	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				1439	struct thin_c *tc = h->tc;
				1440
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1441	/*
				1442	* If we've got no free new_mapping structs, and processing
				1443	* this bio might require one, we pause until there are some
				1444	* prepared mappings to process.
				1445	*/
				1446	if (ensure_next_mapping(pool)) {
				1447	spin_lock_irqsave(&pool->lock, flags);
				1448	bio_list_merge(&pool->deferred_bios, &bios);
				1449	spin_unlock_irqrestore(&pool->lock, flags);
				1450
				1451	break;
				1452	}
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1453
				1454	if (bio->bi_rw & REQ_DISCARD)
				1455	process_discard(tc, bio);
				1456	else
				1457	process_bio(tc, bio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1458	}
				1459
				1460	/*
				1461	* If there are any deferred flush bios, we must commit
				1462	* the metadata before issuing them.
				1463	*/
				1464	bio_list_init(&bios);
				1465	spin_lock_irqsave(&pool->lock, flags);
				1466	bio_list_merge(&bios, &pool->deferred_flush_bios);
				1467	bio_list_init(&pool->deferred_flush_bios);
				1468	spin_unlock_irqrestore(&pool->lock, flags);
				1469
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1470	if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1471	return;
				1472
				1473	r = dm_pool_commit_metadata(pool->pmd);
				1474	if (r) {
				1475	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				1476	__func__, r);
				1477	while ((bio = bio_list_pop(&bios)))
				1478	bio_io_error(bio);
				1479	return;
				1480	}
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1481	pool->last_commit_jiffies = jiffies;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1482
				1483	while ((bio = bio_list_pop(&bios)))
				1484	generic_make_request(bio);
				1485	}
				1486
				1487	static void do_worker(struct work_struct *ws)
				1488	{
				1489	struct pool *pool = container_of(ws, struct pool, worker);
				1490
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1491	process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping);
				1492	process_prepared(pool, &pool->prepared_discards, process_prepared_discard);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1493	process_deferred_bios(pool);
				1494	}
				1495
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1496	/*
				1497	* We want to commit periodically so that not too much
				1498	* unwritten data builds up.
				1499	*/
				1500	static void do_waker(struct work_struct *ws)
				1501	{
				1502	struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker);
				1503	wake_worker(pool);
				1504	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
				1505	}
				1506
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1507	/----------------------------------------------------------------/
				1508
				1509	/*
				1510	* Mapping functions.
				1511	*/
				1512
				1513	/*
				1514	* Called only while mapping a thin bio to hand it over to the workqueue.
				1515	*/
				1516	static void thin_defer_bio(struct thin_c tc, struct bio bio)
				1517	{
				1518	unsigned long flags;
				1519	struct pool *pool = tc->pool;
				1520
				1521	spin_lock_irqsave(&pool->lock, flags);
				1522	bio_list_add(&pool->deferred_bios, bio);
				1523	spin_unlock_irqrestore(&pool->lock, flags);
				1524
				1525	wake_worker(pool);
				1526	}
				1527
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1528	static struct endio_hook thin_hook_bio(struct thin_c tc, struct bio *bio)
				1529	{
				1530	struct pool *pool = tc->pool;
				1531	struct endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
				1532
				1533	h->tc = tc;
				1534	h->shared_read_entry = NULL;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1535	h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : ds_inc(&pool->all_io_ds);
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1536	h->overwrite_mapping = NULL;
				1537
				1538	return h;
				1539	}
				1540
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1541	/*
				1542	* Non-blocking function called from the thin target's map function.
				1543	*/
				1544	static int thin_bio_map(struct dm_target ti, struct bio bio,
				1545	union map_info *map_context)
				1546	{
				1547	int r;
				1548	struct thin_c *tc = ti->private;
				1549	dm_block_t block = get_bio_block(tc, bio);
				1550	struct dm_thin_device *td = tc->td;
				1551	struct dm_thin_lookup_result result;
				1552
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1553	map_context->ptr = thin_hook_bio(tc, bio);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1554	if (bio->bi_rw & (REQ_DISCARD \| REQ_FLUSH \| REQ_FUA)) {
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1555	thin_defer_bio(tc, bio);
				1556	return DM_MAPIO_SUBMITTED;
				1557	}
				1558
				1559	r = dm_thin_find_block(td, block, 0, &result);
				1560
				1561	/*
				1562	* Note that we defer readahead too.
				1563	*/
				1564	switch (r) {
				1565	case 0:
				1566	if (unlikely(result.shared)) {
				1567	/*
				1568	* We have a race condition here between the
				1569	* result.shared value returned by the lookup and
				1570	* snapshot creation, which may cause new
				1571	* sharing.
				1572	*
				1573	* To avoid this always quiesce the origin before
				1574	* taking the snap. You want to do this anyway to
				1575	* ensure a consistent application view
				1576	* (i.e. lockfs).
				1577	*
				1578	* More distant ancestors are irrelevant. The
				1579	* shared flag will be set in their case.
				1580	*/
				1581	thin_defer_bio(tc, bio);
				1582	r = DM_MAPIO_SUBMITTED;
				1583	} else {
				1584	remap(tc, bio, result.block);
				1585	r = DM_MAPIO_REMAPPED;
				1586	}
				1587	break;
				1588
				1589	case -ENODATA:
				1590	/*
				1591	* In future, the failed dm_thin_find_block above could
				1592	* provide the hint to load the metadata into cache.
				1593	*/
				1594	case -EWOULDBLOCK:
				1595	thin_defer_bio(tc, bio);
				1596	r = DM_MAPIO_SUBMITTED;
				1597	break;
				1598	}
				1599
				1600	return r;
				1601	}
				1602
				1603	static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
				1604	{
				1605	int r;
				1606	unsigned long flags;
				1607	struct pool_c *pt = container_of(cb, struct pool_c, callbacks);
				1608
				1609	spin_lock_irqsave(&pt->pool->lock, flags);
				1610	r = !bio_list_empty(&pt->pool->retry_on_resume_list);
				1611	spin_unlock_irqrestore(&pt->pool->lock, flags);
				1612
				1613	if (!r) {
				1614	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
				1615	r = bdi_congested(&q->backing_dev_info, bdi_bits);
				1616	}
				1617
				1618	return r;
				1619	}
				1620
				1621	static void __requeue_bios(struct pool *pool)
				1622	{
				1623	bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list);
				1624	bio_list_init(&pool->retry_on_resume_list);
				1625	}
				1626
				1627	/*----------------------------------------------------------------
				1628	* Binding of control targets to a pool object
				1629	--------------------------------------------------------------/
				1630	static int bind_control_target(struct pool pool, struct dm_target ti)
				1631	{
				1632	struct pool_c *pt = ti->private;
				1633
				1634	pool->ti = ti;
				1635	pool->low_water_blocks = pt->low_water_blocks;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1636	pool->pf = pt->pf;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1637
Mike Snitzer	f402693	2012-05-19 01:01:01 +0100	[diff] [blame]	1638	/*
				1639	* If discard_passdown was enabled verify that the data device
				1640	* supports discards. Disable discard_passdown if not; otherwise
				1641	* -EOPNOTSUPP will be returned.
				1642	*/
				1643	if (pt->pf.discard_passdown) {
				1644	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
				1645	if (!q \|\| !blk_queue_discard(q)) {
				1646	char buf[BDEVNAME_SIZE];
				1647	DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",
				1648	bdevname(pt->data_dev->bdev, buf));
				1649	pool->pf.discard_passdown = 0;
				1650	}
				1651	}
				1652
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1653	return 0;
				1654	}
				1655
				1656	static void unbind_control_target(struct pool pool, struct dm_target ti)
				1657	{
				1658	if (pool->ti == ti)
				1659	pool->ti = NULL;
				1660	}
				1661
				1662	/*----------------------------------------------------------------
				1663	* Pool creation
				1664	--------------------------------------------------------------/
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1665	/* Initialize pool features. */
				1666	static void pool_features_init(struct pool_features *pf)
				1667	{
				1668	pf->zero_new_blocks = 1;
				1669	pf->discard_enabled = 1;
				1670	pf->discard_passdown = 1;
				1671	}
				1672
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1673	static void __pool_destroy(struct pool *pool)
				1674	{
				1675	__pool_table_remove(pool);
				1676
				1677	if (dm_pool_metadata_close(pool->pmd) < 0)
				1678	DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
				1679
				1680	prison_destroy(pool->prison);
				1681	dm_kcopyd_client_destroy(pool->copier);
				1682
				1683	if (pool->wq)
				1684	destroy_workqueue(pool->wq);
				1685
				1686	if (pool->next_mapping)
				1687	mempool_free(pool->next_mapping, pool->mapping_pool);
				1688	mempool_destroy(pool->mapping_pool);
				1689	mempool_destroy(pool->endio_hook_pool);
				1690	kfree(pool);
				1691	}
				1692
				1693	static struct pool pool_create(struct mapped_device pool_md,
				1694	struct block_device *metadata_dev,
				1695	unsigned long block_size, char **error)
				1696	{
				1697	int r;
				1698	void *err_p;
				1699	struct pool *pool;
				1700	struct dm_pool_metadata *pmd;
				1701
				1702	pmd = dm_pool_metadata_open(metadata_dev, block_size);
				1703	if (IS_ERR(pmd)) {
				1704	*error = "Error creating metadata object";
				1705	return (struct pool *)pmd;
				1706	}
				1707
				1708	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
				1709	if (!pool) {
				1710	*error = "Error allocating memory for pool";
				1711	err_p = ERR_PTR(-ENOMEM);
				1712	goto bad_pool;
				1713	}
				1714
				1715	pool->pmd = pmd;
				1716	pool->sectors_per_block = block_size;
				1717	pool->block_shift = ffs(block_size) - 1;
				1718	pool->offset_mask = block_size - 1;
				1719	pool->low_water_blocks = 0;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1720	pool_features_init(&pool->pf);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1721	pool->prison = prison_create(PRISON_CELLS);
				1722	if (!pool->prison) {
				1723	*error = "Error creating pool's bio prison";
				1724	err_p = ERR_PTR(-ENOMEM);
				1725	goto bad_prison;
				1726	}
				1727
				1728	pool->copier = dm_kcopyd_client_create();
				1729	if (IS_ERR(pool->copier)) {
				1730	r = PTR_ERR(pool->copier);
				1731	*error = "Error creating pool's kcopyd client";
				1732	err_p = ERR_PTR(r);
				1733	goto bad_kcopyd_client;
				1734	}
				1735
				1736	/*
				1737	* Create singlethreaded workqueue that will service all devices
				1738	* that use this metadata.
				1739	*/
				1740	pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
				1741	if (!pool->wq) {
				1742	*error = "Error creating pool's workqueue";
				1743	err_p = ERR_PTR(-ENOMEM);
				1744	goto bad_wq;
				1745	}
				1746
				1747	INIT_WORK(&pool->worker, do_worker);
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1748	INIT_DELAYED_WORK(&pool->waker, do_waker);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1749	spin_lock_init(&pool->lock);
				1750	bio_list_init(&pool->deferred_bios);
				1751	bio_list_init(&pool->deferred_flush_bios);
				1752	INIT_LIST_HEAD(&pool->prepared_mappings);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1753	INIT_LIST_HEAD(&pool->prepared_discards);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1754	pool->low_water_triggered = 0;
				1755	pool->no_free_space = 0;
				1756	bio_list_init(&pool->retry_on_resume_list);
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	1757	ds_init(&pool->shared_read_ds);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	1758	ds_init(&pool->all_io_ds);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1759
				1760	pool->next_mapping = NULL;
				1761	pool->mapping_pool =
				1762	mempool_create_kmalloc_pool(MAPPING_POOL_SIZE, sizeof(struct new_mapping));
				1763	if (!pool->mapping_pool) {
				1764	*error = "Error creating pool's mapping mempool";
				1765	err_p = ERR_PTR(-ENOMEM);
				1766	goto bad_mapping_pool;
				1767	}
				1768
				1769	pool->endio_hook_pool =
				1770	mempool_create_kmalloc_pool(ENDIO_HOOK_POOL_SIZE, sizeof(struct endio_hook));
				1771	if (!pool->endio_hook_pool) {
				1772	*error = "Error creating pool's endio_hook mempool";
				1773	err_p = ERR_PTR(-ENOMEM);
				1774	goto bad_endio_hook_pool;
				1775	}
				1776	pool->ref_count = 1;
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1777	pool->last_commit_jiffies = jiffies;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1778	pool->pool_md = pool_md;
				1779	pool->md_dev = metadata_dev;
				1780	__pool_table_insert(pool);
				1781
				1782	return pool;
				1783
				1784	bad_endio_hook_pool:
				1785	mempool_destroy(pool->mapping_pool);
				1786	bad_mapping_pool:
				1787	destroy_workqueue(pool->wq);
				1788	bad_wq:
				1789	dm_kcopyd_client_destroy(pool->copier);
				1790	bad_kcopyd_client:
				1791	prison_destroy(pool->prison);
				1792	bad_prison:
				1793	kfree(pool);
				1794	bad_pool:
				1795	if (dm_pool_metadata_close(pmd))
				1796	DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
				1797
				1798	return err_p;
				1799	}
				1800
				1801	static void __pool_inc(struct pool *pool)
				1802	{
				1803	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				1804	pool->ref_count++;
				1805	}
				1806
				1807	static void __pool_dec(struct pool *pool)
				1808	{
				1809	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				1810	BUG_ON(!pool->ref_count);
				1811	if (!--pool->ref_count)
				1812	__pool_destroy(pool);
				1813	}
				1814
				1815	static struct pool __pool_find(struct mapped_device pool_md,
				1816	struct block_device *metadata_dev,
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1817	unsigned long block_size, char **error,
				1818	int *created)
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1819	{
				1820	struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
				1821
				1822	if (pool) {
				1823	if (pool->pool_md != pool_md)
				1824	return ERR_PTR(-EBUSY);
				1825	__pool_inc(pool);
				1826
				1827	} else {
				1828	pool = __pool_table_lookup(pool_md);
				1829	if (pool) {
				1830	if (pool->md_dev != metadata_dev)
				1831	return ERR_PTR(-EINVAL);
				1832	__pool_inc(pool);
				1833
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1834	} else {
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1835	pool = pool_create(pool_md, metadata_dev, block_size, error);
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1836	*created = 1;
				1837	}
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1838	}
				1839
				1840	return pool;
				1841	}
				1842
				1843	/*----------------------------------------------------------------
				1844	* Pool target methods
				1845	--------------------------------------------------------------/
				1846	static void pool_dtr(struct dm_target *ti)
				1847	{
				1848	struct pool_c *pt = ti->private;
				1849
				1850	mutex_lock(&dm_thin_pool_table.mutex);
				1851
				1852	unbind_control_target(pt->pool, ti);
				1853	__pool_dec(pt->pool);
				1854	dm_put_device(ti, pt->metadata_dev);
				1855	dm_put_device(ti, pt->data_dev);
				1856	kfree(pt);
				1857
				1858	mutex_unlock(&dm_thin_pool_table.mutex);
				1859	}
				1860
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1861	static int parse_pool_features(struct dm_arg_set as, struct pool_features pf,
				1862	struct dm_target *ti)
				1863	{
				1864	int r;
				1865	unsigned argc;
				1866	const char *arg_name;
				1867
				1868	static struct dm_arg _args[] = {
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1869	{0, 3, "Invalid number of pool feature arguments"},
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1870	};
				1871
				1872	/*
				1873	* No feature arguments supplied.
				1874	*/
				1875	if (!as->argc)
				1876	return 0;
				1877
				1878	r = dm_read_arg_group(_args, as, &argc, &ti->error);
				1879	if (r)
				1880	return -EINVAL;
				1881
				1882	while (argc && !r) {
				1883	arg_name = dm_shift_arg(as);
				1884	argc--;
				1885
				1886	if (!strcasecmp(arg_name, "skip_block_zeroing")) {
				1887	pf->zero_new_blocks = 0;
				1888	continue;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1889	} else if (!strcasecmp(arg_name, "ignore_discard")) {
				1890	pf->discard_enabled = 0;
				1891	continue;
				1892	} else if (!strcasecmp(arg_name, "no_discard_passdown")) {
				1893	pf->discard_passdown = 0;
				1894	continue;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1895	}
				1896
				1897	ti->error = "Unrecognised pool feature requested";
				1898	r = -EINVAL;
				1899	}
				1900
				1901	return r;
				1902	}
				1903
				1904	/*
				1905	* thin-pool <metadata dev> <data dev>
				1906	* <data block size (sectors)>
				1907	* <low water mark (blocks)>
				1908	* [<#feature args> [<arg>]*]
				1909	*
				1910	* Optional feature arguments are:
				1911	* skip_block_zeroing: skips the zeroing of newly-provisioned blocks.
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1912	* ignore_discard: disable discard
				1913	* no_discard_passdown: don't pass discards down to the data device
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1914	*/
				1915	static int pool_ctr(struct dm_target ti, unsigned argc, char *argv)
				1916	{
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1917	int r, pool_created = 0;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1918	struct pool_c *pt;
				1919	struct pool *pool;
				1920	struct pool_features pf;
				1921	struct dm_arg_set as;
				1922	struct dm_dev *data_dev;
				1923	unsigned long block_size;
				1924	dm_block_t low_water_blocks;
				1925	struct dm_dev *metadata_dev;
				1926	sector_t metadata_dev_size;
Mike Snitzer	c4a69ec	2012-03-28 18:41:28 +0100	[diff] [blame]	1927	char b[BDEVNAME_SIZE];
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1928
				1929	/*
				1930	* FIXME Remove validation from scope of lock.
				1931	*/
				1932	mutex_lock(&dm_thin_pool_table.mutex);
				1933
				1934	if (argc < 4) {
				1935	ti->error = "Invalid argument count";
				1936	r = -EINVAL;
				1937	goto out_unlock;
				1938	}
				1939	as.argc = argc;
				1940	as.argv = argv;
				1941
				1942	r = dm_get_device(ti, argv[0], FMODE_READ \| FMODE_WRITE, &metadata_dev);
				1943	if (r) {
				1944	ti->error = "Error opening metadata block device";
				1945	goto out_unlock;
				1946	}
				1947
				1948	metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT;
Mike Snitzer	c4a69ec	2012-03-28 18:41:28 +0100	[diff] [blame]	1949	if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING)
				1950	DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
				1951	bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1952
				1953	r = dm_get_device(ti, argv[1], FMODE_READ \| FMODE_WRITE, &data_dev);
				1954	if (r) {
				1955	ti->error = "Error getting data device";
				1956	goto out_metadata;
				1957	}
				1958
				1959	if (kstrtoul(argv[2], 10, &block_size) \|\| !block_size \|\|
				1960	block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS \|\|
				1961	block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS \|\|
				1962	!is_power_of_2(block_size)) {
				1963	ti->error = "Invalid block size";
				1964	r = -EINVAL;
				1965	goto out;
				1966	}
				1967
				1968	if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
				1969	ti->error = "Invalid low water mark";
				1970	r = -EINVAL;
				1971	goto out;
				1972	}
				1973
				1974	/*
				1975	* Set default pool features.
				1976	*/
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1977	pool_features_init(&pf);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1978
				1979	dm_consume_args(&as, 4);
				1980	r = parse_pool_features(&as, &pf, ti);
				1981	if (r)
				1982	goto out;
				1983
				1984	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
				1985	if (!pt) {
				1986	r = -ENOMEM;
				1987	goto out;
				1988	}
				1989
				1990	pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev,
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1991	block_size, &ti->error, &pool_created);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1992	if (IS_ERR(pool)) {
				1993	r = PTR_ERR(pool);
				1994	goto out_free_pt;
				1995	}
				1996
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	1997	/*
				1998	* 'pool_created' reflects whether this is the first table load.
				1999	* Top level discard support is not allowed to be changed after
				2000	* initial load. This would require a pool reload to trigger thin
				2001	* device changes.
				2002	*/
				2003	if (!pool_created && pf.discard_enabled != pool->pf.discard_enabled) {
				2004	ti->error = "Discard support cannot be disabled once enabled";
				2005	r = -EINVAL;
				2006	goto out_flags_changed;
				2007	}
				2008
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2009	pt->pool = pool;
				2010	pt->ti = ti;
				2011	pt->metadata_dev = metadata_dev;
				2012	pt->data_dev = data_dev;
				2013	pt->low_water_blocks = low_water_blocks;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2014	pt->pf = pf;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2015	ti->num_flush_requests = 1;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2016	/*
				2017	* Only need to enable discards if the pool should pass
				2018	* them down to the data device. The thin device's discard
				2019	* processing will cause mappings to be removed from the btree.
				2020	*/
				2021	if (pf.discard_enabled && pf.discard_passdown) {
				2022	ti->num_discard_requests = 1;
				2023	/*
				2024	* Setting 'discards_supported' circumvents the normal
				2025	* stacking of discard limits (this keeps the pool and
				2026	* thin devices' discard limits consistent).
				2027	*/
				2028	ti->discards_supported = 1;
				2029	}
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2030	ti->private = pt;
				2031
				2032	pt->callbacks.congested_fn = pool_is_congested;
				2033	dm_table_add_target_callbacks(ti->table, &pt->callbacks);
				2034
				2035	mutex_unlock(&dm_thin_pool_table.mutex);
				2036
				2037	return 0;
				2038
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2039	out_flags_changed:
				2040	__pool_dec(pool);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2041	out_free_pt:
				2042	kfree(pt);
				2043	out:
				2044	dm_put_device(ti, data_dev);
				2045	out_metadata:
				2046	dm_put_device(ti, metadata_dev);
				2047	out_unlock:
				2048	mutex_unlock(&dm_thin_pool_table.mutex);
				2049
				2050	return r;
				2051	}
				2052
				2053	static int pool_map(struct dm_target ti, struct bio bio,
				2054	union map_info *map_context)
				2055	{
				2056	int r;
				2057	struct pool_c *pt = ti->private;
				2058	struct pool *pool = pt->pool;
				2059	unsigned long flags;
				2060
				2061	/*
				2062	* As this is a singleton target, ti->begin is always zero.
				2063	*/
				2064	spin_lock_irqsave(&pool->lock, flags);
				2065	bio->bi_bdev = pt->data_dev->bdev;
				2066	r = DM_MAPIO_REMAPPED;
				2067	spin_unlock_irqrestore(&pool->lock, flags);
				2068
				2069	return r;
				2070	}
				2071
				2072	/*
				2073	* Retrieves the number of blocks of the data device from
				2074	* the superblock and compares it to the actual device size,
				2075	* thus resizing the data device in case it has grown.
				2076	*
				2077	* This both copes with opening preallocated data devices in the ctr
				2078	* being followed by a resume
				2079	* -and-
				2080	* calling the resume method individually after userspace has
				2081	* grown the data device in reaction to a table event.
				2082	*/
				2083	static int pool_preresume(struct dm_target *ti)
				2084	{
				2085	int r;
				2086	struct pool_c *pt = ti->private;
				2087	struct pool *pool = pt->pool;
				2088	dm_block_t data_size, sb_data_size;
				2089
				2090	/*
				2091	* Take control of the pool object.
				2092	*/
				2093	r = bind_control_target(pool, ti);
				2094	if (r)
				2095	return r;
				2096
				2097	data_size = ti->len >> pool->block_shift;
				2098	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
				2099	if (r) {
				2100	DMERR("failed to retrieve data device size");
				2101	return r;
				2102	}
				2103
				2104	if (data_size < sb_data_size) {
				2105	DMERR("pool target too small, is %llu blocks (expected %llu)",
				2106	data_size, sb_data_size);
				2107	return -EINVAL;
				2108
				2109	} else if (data_size > sb_data_size) {
				2110	r = dm_pool_resize_data_dev(pool->pmd, data_size);
				2111	if (r) {
				2112	DMERR("failed to resize data device");
				2113	return r;
				2114	}
				2115
				2116	r = dm_pool_commit_metadata(pool->pmd);
				2117	if (r) {
				2118	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				2119	__func__, r);
				2120	return r;
				2121	}
				2122	}
				2123
				2124	return 0;
				2125	}
				2126
				2127	static void pool_resume(struct dm_target *ti)
				2128	{
				2129	struct pool_c *pt = ti->private;
				2130	struct pool *pool = pt->pool;
				2131	unsigned long flags;
				2132
				2133	spin_lock_irqsave(&pool->lock, flags);
				2134	pool->low_water_triggered = 0;
				2135	pool->no_free_space = 0;
				2136	__requeue_bios(pool);
				2137	spin_unlock_irqrestore(&pool->lock, flags);
				2138
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	2139	do_waker(&pool->waker.work);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2140	}
				2141
				2142	static void pool_postsuspend(struct dm_target *ti)
				2143	{
				2144	int r;
				2145	struct pool_c *pt = ti->private;
				2146	struct pool *pool = pt->pool;
				2147
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	2148	cancel_delayed_work(&pool->waker);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2149	flush_workqueue(pool->wq);
				2150
				2151	r = dm_pool_commit_metadata(pool->pmd);
				2152	if (r < 0) {
				2153	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				2154	__func__, r);
				2155	/* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/
				2156	}
				2157	}
				2158
				2159	static int check_arg_count(unsigned argc, unsigned args_required)
				2160	{
				2161	if (argc != args_required) {
				2162	DMWARN("Message received with %u arguments instead of %u.",
				2163	argc, args_required);
				2164	return -EINVAL;
				2165	}
				2166
				2167	return 0;
				2168	}
				2169
				2170	static int read_dev_id(char arg, dm_thin_id dev_id, int warning)
				2171	{
				2172	if (!kstrtoull(arg, 10, (unsigned long long *)dev_id) &&
				2173	*dev_id <= MAX_DEV_ID)
				2174	return 0;
				2175
				2176	if (warning)
				2177	DMWARN("Message received with invalid device id: %s", arg);
				2178
				2179	return -EINVAL;
				2180	}
				2181
				2182	static int process_create_thin_mesg(unsigned argc, char *argv, struct pool pool)
				2183	{
				2184	dm_thin_id dev_id;
				2185	int r;
				2186
				2187	r = check_arg_count(argc, 2);
				2188	if (r)
				2189	return r;
				2190
				2191	r = read_dev_id(argv[1], &dev_id, 1);
				2192	if (r)
				2193	return r;
				2194
				2195	r = dm_pool_create_thin(pool->pmd, dev_id);
				2196	if (r) {
				2197	DMWARN("Creation of new thinly-provisioned device with id %s failed.",
				2198	argv[1]);
				2199	return r;
				2200	}
				2201
				2202	return 0;
				2203	}
				2204
				2205	static int process_create_snap_mesg(unsigned argc, char *argv, struct pool pool)
				2206	{
				2207	dm_thin_id dev_id;
				2208	dm_thin_id origin_dev_id;
				2209	int r;
				2210
				2211	r = check_arg_count(argc, 3);
				2212	if (r)
				2213	return r;
				2214
				2215	r = read_dev_id(argv[1], &dev_id, 1);
				2216	if (r)
				2217	return r;
				2218
				2219	r = read_dev_id(argv[2], &origin_dev_id, 1);
				2220	if (r)
				2221	return r;
				2222
				2223	r = dm_pool_create_snap(pool->pmd, dev_id, origin_dev_id);
				2224	if (r) {
				2225	DMWARN("Creation of new snapshot %s of device %s failed.",
				2226	argv[1], argv[2]);
				2227	return r;
				2228	}
				2229
				2230	return 0;
				2231	}
				2232
				2233	static int process_delete_mesg(unsigned argc, char *argv, struct pool pool)
				2234	{
				2235	dm_thin_id dev_id;
				2236	int r;
				2237
				2238	r = check_arg_count(argc, 2);
				2239	if (r)
				2240	return r;
				2241
				2242	r = read_dev_id(argv[1], &dev_id, 1);
				2243	if (r)
				2244	return r;
				2245
				2246	r = dm_pool_delete_thin_device(pool->pmd, dev_id);
				2247	if (r)
				2248	DMWARN("Deletion of thin device %s failed.", argv[1]);
				2249
				2250	return r;
				2251	}
				2252
				2253	static int process_set_transaction_id_mesg(unsigned argc, char *argv, struct pool pool)
				2254	{
				2255	dm_thin_id old_id, new_id;
				2256	int r;
				2257
				2258	r = check_arg_count(argc, 3);
				2259	if (r)
				2260	return r;
				2261
				2262	if (kstrtoull(argv[1], 10, (unsigned long long *)&old_id)) {
				2263	DMWARN("set_transaction_id message: Unrecognised id %s.", argv[1]);
				2264	return -EINVAL;
				2265	}
				2266
				2267	if (kstrtoull(argv[2], 10, (unsigned long long *)&new_id)) {
				2268	DMWARN("set_transaction_id message: Unrecognised new id %s.", argv[2]);
				2269	return -EINVAL;
				2270	}
				2271
				2272	r = dm_pool_set_metadata_transaction_id(pool->pmd, old_id, new_id);
				2273	if (r) {
				2274	DMWARN("Failed to change transaction id from %s to %s.",
				2275	argv[1], argv[2]);
				2276	return r;
				2277	}
				2278
				2279	return 0;
				2280	}
				2281
				2282	/*
				2283	* Messages supported:
				2284	* create_thin <dev_id>
				2285	* create_snap <dev_id> <origin_id>
				2286	* delete <dev_id>
				2287	* trim <dev_id> <new_size_in_sectors>
				2288	* set_transaction_id <current_trans_id> <new_trans_id>
				2289	*/
				2290	static int pool_message(struct dm_target ti, unsigned argc, char *argv)
				2291	{
				2292	int r = -EINVAL;
				2293	struct pool_c *pt = ti->private;
				2294	struct pool *pool = pt->pool;
				2295
				2296	if (!strcasecmp(argv[0], "create_thin"))
				2297	r = process_create_thin_mesg(argc, argv, pool);
				2298
				2299	else if (!strcasecmp(argv[0], "create_snap"))
				2300	r = process_create_snap_mesg(argc, argv, pool);
				2301
				2302	else if (!strcasecmp(argv[0], "delete"))
				2303	r = process_delete_mesg(argc, argv, pool);
				2304
				2305	else if (!strcasecmp(argv[0], "set_transaction_id"))
				2306	r = process_set_transaction_id_mesg(argc, argv, pool);
				2307
				2308	else
				2309	DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
				2310
				2311	if (!r) {
				2312	r = dm_pool_commit_metadata(pool->pmd);
				2313	if (r)
				2314	DMERR("%s message: dm_pool_commit_metadata() failed, error = %d",
				2315	argv[0], r);
				2316	}
				2317
				2318	return r;
				2319	}
				2320
				2321	/*
				2322	* Status line is:
				2323	* <transaction id> <used metadata sectors>/<total metadata sectors>
				2324	* <used data sectors>/<total data sectors> <held metadata root>
				2325	*/
				2326	static int pool_status(struct dm_target *ti, status_type_t type,
				2327	char *result, unsigned maxlen)
				2328	{
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2329	int r, count;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2330	unsigned sz = 0;
				2331	uint64_t transaction_id;
				2332	dm_block_t nr_free_blocks_data;
				2333	dm_block_t nr_free_blocks_metadata;
				2334	dm_block_t nr_blocks_data;
				2335	dm_block_t nr_blocks_metadata;
				2336	dm_block_t held_root;
				2337	char buf[BDEVNAME_SIZE];
				2338	char buf2[BDEVNAME_SIZE];
				2339	struct pool_c *pt = ti->private;
				2340	struct pool *pool = pt->pool;
				2341
				2342	switch (type) {
				2343	case STATUSTYPE_INFO:
				2344	r = dm_pool_get_metadata_transaction_id(pool->pmd,
				2345	&transaction_id);
				2346	if (r)
				2347	return r;
				2348
				2349	r = dm_pool_get_free_metadata_block_count(pool->pmd,
				2350	&nr_free_blocks_metadata);
				2351	if (r)
				2352	return r;
				2353
				2354	r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
				2355	if (r)
				2356	return r;
				2357
				2358	r = dm_pool_get_free_block_count(pool->pmd,
				2359	&nr_free_blocks_data);
				2360	if (r)
				2361	return r;
				2362
				2363	r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
				2364	if (r)
				2365	return r;
				2366
				2367	r = dm_pool_get_held_metadata_root(pool->pmd, &held_root);
				2368	if (r)
				2369	return r;
				2370
				2371	DMEMIT("%llu %llu/%llu %llu/%llu ",
				2372	(unsigned long long)transaction_id,
				2373	(unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
				2374	(unsigned long long)nr_blocks_metadata,
				2375	(unsigned long long)(nr_blocks_data - nr_free_blocks_data),
				2376	(unsigned long long)nr_blocks_data);
				2377
				2378	if (held_root)
				2379	DMEMIT("%llu", held_root);
				2380	else
				2381	DMEMIT("-");
				2382
				2383	break;
				2384
				2385	case STATUSTYPE_TABLE:
				2386	DMEMIT("%s %s %lu %llu ",
				2387	format_dev_t(buf, pt->metadata_dev->bdev->bd_dev),
				2388	format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
				2389	(unsigned long)pool->sectors_per_block,
				2390	(unsigned long long)pt->low_water_blocks);
				2391
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2392	count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled +
Mike Snitzer	f402693	2012-05-19 01:01:01 +0100	[diff] [blame]	2393	!pt->pf.discard_passdown;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2394	DMEMIT("%u ", count);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2395
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2396	if (!pool->pf.zero_new_blocks)
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2397	DMEMIT("skip_block_zeroing ");
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2398
				2399	if (!pool->pf.discard_enabled)
				2400	DMEMIT("ignore_discard ");
				2401
Mike Snitzer	f402693	2012-05-19 01:01:01 +0100	[diff] [blame]	2402	if (!pt->pf.discard_passdown)
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2403	DMEMIT("no_discard_passdown ");
				2404
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2405	break;
				2406	}
				2407
				2408	return 0;
				2409	}
				2410
				2411	static int pool_iterate_devices(struct dm_target *ti,
				2412	iterate_devices_callout_fn fn, void *data)
				2413	{
				2414	struct pool_c *pt = ti->private;
				2415
				2416	return fn(ti, pt->data_dev, 0, ti->len, data);
				2417	}
				2418
				2419	static int pool_merge(struct dm_target ti, struct bvec_merge_data bvm,
				2420	struct bio_vec *biovec, int max_size)
				2421	{
				2422	struct pool_c *pt = ti->private;
				2423	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
				2424
				2425	if (!q->merge_bvec_fn)
				2426	return max_size;
				2427
				2428	bvm->bi_bdev = pt->data_dev->bdev;
				2429
				2430	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
				2431	}
				2432
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	2433	static void set_discard_limits(struct pool pool, struct queue_limits limits)
				2434	{
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2435	/*
				2436	* FIXME: these limits may be incompatible with the pool's data device
				2437	*/
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	2438	limits->max_discard_sectors = pool->sectors_per_block;
				2439
				2440	/*
				2441	* This is just a hint, and not enforced. We have to cope with
				2442	* bios that overlap 2 blocks.
				2443	*/
				2444	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2445	limits->discard_zeroes_data = pool->pf.zero_new_blocks;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	2446	}
				2447
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2448	static void pool_io_hints(struct dm_target ti, struct queue_limits limits)
				2449	{
				2450	struct pool_c *pt = ti->private;
				2451	struct pool *pool = pt->pool;
				2452
				2453	blk_limits_io_min(limits, 0);
				2454	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2455	if (pool->pf.discard_enabled)
				2456	set_discard_limits(pool, limits);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2457	}
				2458
				2459	static struct target_type pool_target = {
				2460	.name = "thin-pool",
				2461	.features = DM_TARGET_SINGLETON \| DM_TARGET_ALWAYS_WRITEABLE \|
				2462	DM_TARGET_IMMUTABLE,
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2463	.version = {1, 1, 0},
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2464	.module = THIS_MODULE,
				2465	.ctr = pool_ctr,
				2466	.dtr = pool_dtr,
				2467	.map = pool_map,
				2468	.postsuspend = pool_postsuspend,
				2469	.preresume = pool_preresume,
				2470	.resume = pool_resume,
				2471	.message = pool_message,
				2472	.status = pool_status,
				2473	.merge = pool_merge,
				2474	.iterate_devices = pool_iterate_devices,
				2475	.io_hints = pool_io_hints,
				2476	};
				2477
				2478	/*----------------------------------------------------------------
				2479	* Thin target methods
				2480	--------------------------------------------------------------/
				2481	static void thin_dtr(struct dm_target *ti)
				2482	{
				2483	struct thin_c *tc = ti->private;
				2484
				2485	mutex_lock(&dm_thin_pool_table.mutex);
				2486
				2487	__pool_dec(tc->pool);
				2488	dm_pool_close_thin_device(tc->td);
				2489	dm_put_device(ti, tc->pool_dev);
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2490	if (tc->origin_dev)
				2491	dm_put_device(ti, tc->origin_dev);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2492	kfree(tc);
				2493
				2494	mutex_unlock(&dm_thin_pool_table.mutex);
				2495	}
				2496
				2497	/*
				2498	* Thin target parameters:
				2499	*
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2500	* <pool_dev> <dev_id> [origin_dev]
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2501	*
				2502	* pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
				2503	* dev_id: the internal device identifier
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2504	* origin_dev: a device external to the pool that should act as the origin
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2505	*
				2506	* If the pool device has discards disabled, they get disabled for the thin
				2507	* device as well.
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2508	*/
				2509	static int thin_ctr(struct dm_target ti, unsigned argc, char *argv)
				2510	{
				2511	int r;
				2512	struct thin_c *tc;
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2513	struct dm_dev pool_dev, origin_dev;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2514	struct mapped_device *pool_md;
				2515
				2516	mutex_lock(&dm_thin_pool_table.mutex);
				2517
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2518	if (argc != 2 && argc != 3) {
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2519	ti->error = "Invalid argument count";
				2520	r = -EINVAL;
				2521	goto out_unlock;
				2522	}
				2523
				2524	tc = ti->private = kzalloc(sizeof(*tc), GFP_KERNEL);
				2525	if (!tc) {
				2526	ti->error = "Out of memory";
				2527	r = -ENOMEM;
				2528	goto out_unlock;
				2529	}
				2530
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2531	if (argc == 3) {
				2532	r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
				2533	if (r) {
				2534	ti->error = "Error opening origin device";
				2535	goto bad_origin_dev;
				2536	}
				2537	tc->origin_dev = origin_dev;
				2538	}
				2539
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2540	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev);
				2541	if (r) {
				2542	ti->error = "Error opening pool device";
				2543	goto bad_pool_dev;
				2544	}
				2545	tc->pool_dev = pool_dev;
				2546
				2547	if (read_dev_id(argv[1], (unsigned long long *)&tc->dev_id, 0)) {
				2548	ti->error = "Invalid device id";
				2549	r = -EINVAL;
				2550	goto bad_common;
				2551	}
				2552
				2553	pool_md = dm_get_md(tc->pool_dev->bdev->bd_dev);
				2554	if (!pool_md) {
				2555	ti->error = "Couldn't get pool mapped device";
				2556	r = -EINVAL;
				2557	goto bad_common;
				2558	}
				2559
				2560	tc->pool = __pool_table_lookup(pool_md);
				2561	if (!tc->pool) {
				2562	ti->error = "Couldn't find pool object";
				2563	r = -EINVAL;
				2564	goto bad_pool_lookup;
				2565	}
				2566	__pool_inc(tc->pool);
				2567
				2568	r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
				2569	if (r) {
				2570	ti->error = "Couldn't open thin internal device";
				2571	goto bad_thin_open;
				2572	}
				2573
				2574	ti->split_io = tc->pool->sectors_per_block;
				2575	ti->num_flush_requests = 1;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2576
				2577	/* In case the pool supports discards, pass them on. */
				2578	if (tc->pool->pf.discard_enabled) {
				2579	ti->discards_supported = 1;
				2580	ti->num_discard_requests = 1;
Mikulas Patocka	5b8bbc3	2012-07-20 14:25:05 +0100	[diff] [blame]	2581	ti->discard_zeroes_data_unsupported = 1;
Joe Thornber	67e2e2b	2012-03-28 18:41:29 +0100	[diff] [blame]	2582	}
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2583
				2584	dm_put(pool_md);
				2585
				2586	mutex_unlock(&dm_thin_pool_table.mutex);
				2587
				2588	return 0;
				2589
				2590	bad_thin_open:
				2591	__pool_dec(tc->pool);
				2592	bad_pool_lookup:
				2593	dm_put(pool_md);
				2594	bad_common:
				2595	dm_put_device(ti, tc->pool_dev);
				2596	bad_pool_dev:
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2597	if (tc->origin_dev)
				2598	dm_put_device(ti, tc->origin_dev);
				2599	bad_origin_dev:
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2600	kfree(tc);
				2601	out_unlock:
				2602	mutex_unlock(&dm_thin_pool_table.mutex);
				2603
				2604	return r;
				2605	}
				2606
				2607	static int thin_map(struct dm_target ti, struct bio bio,
				2608	union map_info *map_context)
				2609	{
Alasdair G Kergon	6efd6e8	2012-03-28 18:41:28 +0100	[diff] [blame]	2610	bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2611
				2612	return thin_bio_map(ti, bio, map_context);
				2613	}
				2614
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	2615	static int thin_endio(struct dm_target *ti,
				2616	struct bio *bio, int err,
				2617	union map_info *map_context)
				2618	{
				2619	unsigned long flags;
				2620	struct endio_hook *h = map_context->ptr;
				2621	struct list_head work;
				2622	struct new_mapping m, tmp;
				2623	struct pool *pool = h->tc->pool;
				2624
				2625	if (h->shared_read_entry) {
				2626	INIT_LIST_HEAD(&work);
				2627	ds_dec(h->shared_read_entry, &work);
				2628
				2629	spin_lock_irqsave(&pool->lock, flags);
				2630	list_for_each_entry_safe(m, tmp, &work, list) {
				2631	list_del(&m->list);
				2632	m->quiesced = 1;
				2633	__maybe_add_mapping(m);
				2634	}
				2635	spin_unlock_irqrestore(&pool->lock, flags);
				2636	}
				2637
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	2638	if (h->all_io_entry) {
				2639	INIT_LIST_HEAD(&work);
				2640	ds_dec(h->all_io_entry, &work);
Mike Snitzer	c3a0ce2	2012-05-12 01:43:16 +0100	[diff] [blame]	2641	spin_lock_irqsave(&pool->lock, flags);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	2642	list_for_each_entry_safe(m, tmp, &work, list)
				2643	list_add(&m->list, &pool->prepared_discards);
Mike Snitzer	c3a0ce2	2012-05-12 01:43:16 +0100	[diff] [blame]	2644	spin_unlock_irqrestore(&pool->lock, flags);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	2645	}
				2646
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	2647	mempool_free(h, pool->endio_hook_pool);
				2648
				2649	return 0;
				2650	}
				2651
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2652	static void thin_postsuspend(struct dm_target *ti)
				2653	{
				2654	if (dm_noflush_suspending(ti))
				2655	requeue_io((struct thin_c *)ti->private);
				2656	}
				2657
				2658	/*
				2659	* <nr mapped sectors> <highest mapped sector>
				2660	*/
				2661	static int thin_status(struct dm_target *ti, status_type_t type,
				2662	char *result, unsigned maxlen)
				2663	{
				2664	int r;
				2665	ssize_t sz = 0;
				2666	dm_block_t mapped, highest;
				2667	char buf[BDEVNAME_SIZE];
				2668	struct thin_c *tc = ti->private;
				2669
				2670	if (!tc->td)
				2671	DMEMIT("-");
				2672	else {
				2673	switch (type) {
				2674	case STATUSTYPE_INFO:
				2675	r = dm_thin_get_mapped_count(tc->td, &mapped);
				2676	if (r)
				2677	return r;
				2678
				2679	r = dm_thin_get_highest_mapped_block(tc->td, &highest);
				2680	if (r < 0)
				2681	return r;
				2682
				2683	DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
				2684	if (r)
				2685	DMEMIT("%llu", ((highest + 1) *
				2686	tc->pool->sectors_per_block) - 1);
				2687	else
				2688	DMEMIT("-");
				2689	break;
				2690
				2691	case STATUSTYPE_TABLE:
				2692	DMEMIT("%s %lu",
				2693	format_dev_t(buf, tc->pool_dev->bdev->bd_dev),
				2694	(unsigned long) tc->dev_id);
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2695	if (tc->origin_dev)
				2696	DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev));
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2697	break;
				2698	}
				2699	}
				2700
				2701	return 0;
				2702	}
				2703
				2704	static int thin_iterate_devices(struct dm_target *ti,
				2705	iterate_devices_callout_fn fn, void *data)
				2706	{
				2707	dm_block_t blocks;
				2708	struct thin_c *tc = ti->private;
				2709
				2710	/*
				2711	* We can't call dm_pool_get_data_dev_size() since that blocks. So
				2712	* we follow a more convoluted path through to the pool's target.
				2713	*/
				2714	if (!tc->pool->ti)
				2715	return 0; /* nothing is bound */
				2716
				2717	blocks = tc->pool->ti->len >> tc->pool->block_shift;
				2718	if (blocks)
				2719	return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
				2720
				2721	return 0;
				2722	}
				2723
				2724	static void thin_io_hints(struct dm_target ti, struct queue_limits limits)
				2725	{
				2726	struct thin_c *tc = ti->private;
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	2727	struct pool *pool = tc->pool;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2728
				2729	blk_limits_io_min(limits, 0);
Joe Thornber	104655f	2012-03-28 18:41:28 +0100	[diff] [blame]	2730	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
				2731	set_discard_limits(pool, limits);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2732	}
				2733
				2734	static struct target_type thin_target = {
				2735	.name = "thin",
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2736	.version = {1, 1, 0},
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2737	.module = THIS_MODULE,
				2738	.ctr = thin_ctr,
				2739	.dtr = thin_dtr,
				2740	.map = thin_map,
Joe Thornber	eb2aa48	2012-03-28 18:41:28 +0100	[diff] [blame]	2741	.end_io = thin_endio,
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2742	.postsuspend = thin_postsuspend,
				2743	.status = thin_status,
				2744	.iterate_devices = thin_iterate_devices,
				2745	.io_hints = thin_io_hints,
				2746	};
				2747
				2748	/----------------------------------------------------------------/
				2749
				2750	static int __init dm_thin_init(void)
				2751	{
				2752	int r;
				2753
				2754	pool_table_init();
				2755
				2756	r = dm_register_target(&thin_target);
				2757	if (r)
				2758	return r;
				2759
				2760	r = dm_register_target(&pool_target);
				2761	if (r)
				2762	dm_unregister_target(&thin_target);
				2763
				2764	return r;
				2765	}
				2766
				2767	static void dm_thin_exit(void)
				2768	{
				2769	dm_unregister_target(&thin_target);
				2770	dm_unregister_target(&pool_target);
				2771	}
				2772
				2773	module_init(dm_thin_init);
				2774	module_exit(dm_thin_exit);
				2775
Alasdair G Kergon	7cab8bf	2012-05-12 01:43:19 +0100	[diff] [blame]	2776	MODULE_DESCRIPTION(DM_NAME " thin provisioning target");
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2777	MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
				2778	MODULE_LICENSE("GPL");