| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (C) 2003 Sistina Software | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 3 |  * Copyright (C) 2006 Red Hat GmbH | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 4 |  * | 
 | 5 |  * This file is released under the GPL. | 
 | 6 |  */ | 
 | 7 |  | 
| Mikulas Patocka | 586e80e | 2008-10-21 17:44:59 +0100 | [diff] [blame] | 8 | #include <linux/device-mapper.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 |  | 
 | 10 | #include <linux/bio.h> | 
 | 11 | #include <linux/mempool.h> | 
 | 12 | #include <linux/module.h> | 
 | 13 | #include <linux/sched.h> | 
 | 14 | #include <linux/slab.h> | 
| Alasdair G Kergon | a765e20 | 2008-04-24 22:02:01 +0100 | [diff] [blame] | 15 | #include <linux/dm-io.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 16 |  | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 17 | struct dm_io_client { | 
 | 18 | 	mempool_t *pool; | 
 | 19 | 	struct bio_set *bios; | 
 | 20 | }; | 
 | 21 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 22 | /* FIXME: can we shrink this ? */ | 
 | 23 | struct io { | 
| Alasdair G Kergon | e01fd7e | 2008-04-24 21:43:14 +0100 | [diff] [blame] | 24 | 	unsigned long error_bits; | 
| Mikulas Patocka | 5af443a | 2009-06-22 10:12:25 +0100 | [diff] [blame] | 25 | 	unsigned long eopnotsupp_bits; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 26 | 	atomic_t count; | 
 | 27 | 	struct task_struct *sleeper; | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 28 | 	struct dm_io_client *client; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 29 | 	io_notify_fn callback; | 
 | 30 | 	void *context; | 
 | 31 | }; | 
 | 32 |  | 
 | 33 | /* | 
 | 34 |  * io contexts are only dynamically allocated for asynchronous | 
 | 35 |  * io.  Since async io is likely to be the majority of io we'll | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 36 |  * have the same number of io contexts as bios! (FIXME: must reduce this). | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 37 |  */ | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 38 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 39 | static unsigned int pages_to_ios(unsigned int pages) | 
 | 40 | { | 
 | 41 | 	return 4 * pages;	/* too many ? */ | 
 | 42 | } | 
 | 43 |  | 
| Heinz Mauelshagen | c8b03af | 2007-05-09 02:33:01 -0700 | [diff] [blame] | 44 | /* | 
 | 45 |  * Create a client with mempool and bioset. | 
 | 46 |  */ | 
 | 47 | struct dm_io_client *dm_io_client_create(unsigned num_pages) | 
 | 48 | { | 
 | 49 | 	unsigned ios = pages_to_ios(num_pages); | 
 | 50 | 	struct dm_io_client *client; | 
 | 51 |  | 
 | 52 | 	client = kmalloc(sizeof(*client), GFP_KERNEL); | 
 | 53 | 	if (!client) | 
 | 54 | 		return ERR_PTR(-ENOMEM); | 
 | 55 |  | 
 | 56 | 	client->pool = mempool_create_kmalloc_pool(ios, sizeof(struct io)); | 
 | 57 | 	if (!client->pool) | 
 | 58 | 		goto bad; | 
 | 59 |  | 
| Jens Axboe | bb799ca | 2008-12-10 15:35:05 +0100 | [diff] [blame] | 60 | 	client->bios = bioset_create(16, 0); | 
| Heinz Mauelshagen | c8b03af | 2007-05-09 02:33:01 -0700 | [diff] [blame] | 61 | 	if (!client->bios) | 
 | 62 | 		goto bad; | 
 | 63 |  | 
 | 64 | 	return client; | 
 | 65 |  | 
 | 66 |    bad: | 
 | 67 | 	if (client->pool) | 
 | 68 | 		mempool_destroy(client->pool); | 
 | 69 | 	kfree(client); | 
 | 70 | 	return ERR_PTR(-ENOMEM); | 
 | 71 | } | 
 | 72 | EXPORT_SYMBOL(dm_io_client_create); | 
 | 73 |  | 
 | 74 | int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client) | 
 | 75 | { | 
 | 76 | 	return mempool_resize(client->pool, pages_to_ios(num_pages), | 
 | 77 | 			      GFP_KERNEL); | 
 | 78 | } | 
 | 79 | EXPORT_SYMBOL(dm_io_client_resize); | 
 | 80 |  | 
 | 81 | void dm_io_client_destroy(struct dm_io_client *client) | 
 | 82 | { | 
 | 83 | 	mempool_destroy(client->pool); | 
 | 84 | 	bioset_free(client->bios); | 
 | 85 | 	kfree(client); | 
 | 86 | } | 
 | 87 | EXPORT_SYMBOL(dm_io_client_destroy); | 
 | 88 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 89 | /*----------------------------------------------------------------- | 
 | 90 |  * We need to keep track of which region a bio is doing io for. | 
 | 91 |  * In order to save a memory allocation we store this the last | 
 | 92 |  * bvec which we know is unused (blech). | 
 | 93 |  * XXX This is ugly and can OOPS with some configs... find another way. | 
 | 94 |  *---------------------------------------------------------------*/ | 
 | 95 | static inline void bio_set_region(struct bio *bio, unsigned region) | 
 | 96 | { | 
| Heinz Mauelshagen | f00b16a | 2006-12-08 02:41:01 -0800 | [diff] [blame] | 97 | 	bio->bi_io_vec[bio->bi_max_vecs].bv_len = region; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 98 | } | 
 | 99 |  | 
 | 100 | static inline unsigned bio_get_region(struct bio *bio) | 
 | 101 | { | 
| Heinz Mauelshagen | f00b16a | 2006-12-08 02:41:01 -0800 | [diff] [blame] | 102 | 	return bio->bi_io_vec[bio->bi_max_vecs].bv_len; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 103 | } | 
 | 104 |  | 
 | 105 | /*----------------------------------------------------------------- | 
 | 106 |  * We need an io object to keep track of the number of bios that | 
 | 107 |  * have been dispatched for a particular io. | 
 | 108 |  *---------------------------------------------------------------*/ | 
 | 109 | static void dec_count(struct io *io, unsigned int region, int error) | 
 | 110 | { | 
| Mikulas Patocka | 5af443a | 2009-06-22 10:12:25 +0100 | [diff] [blame] | 111 | 	if (error) { | 
| Alasdair G Kergon | e01fd7e | 2008-04-24 21:43:14 +0100 | [diff] [blame] | 112 | 		set_bit(region, &io->error_bits); | 
| Mikulas Patocka | 5af443a | 2009-06-22 10:12:25 +0100 | [diff] [blame] | 113 | 		if (error == -EOPNOTSUPP) | 
 | 114 | 			set_bit(region, &io->eopnotsupp_bits); | 
 | 115 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 116 |  | 
 | 117 | 	if (atomic_dec_and_test(&io->count)) { | 
 | 118 | 		if (io->sleeper) | 
 | 119 | 			wake_up_process(io->sleeper); | 
 | 120 |  | 
 | 121 | 		else { | 
| Alasdair G Kergon | e01fd7e | 2008-04-24 21:43:14 +0100 | [diff] [blame] | 122 | 			unsigned long r = io->error_bits; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 123 | 			io_notify_fn fn = io->callback; | 
 | 124 | 			void *context = io->context; | 
 | 125 |  | 
| Milan Broz | bf17ce3 | 2007-05-09 02:33:05 -0700 | [diff] [blame] | 126 | 			mempool_free(io, io->client->pool); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 127 | 			fn(r, context); | 
 | 128 | 		} | 
 | 129 | 	} | 
 | 130 | } | 
 | 131 |  | 
| NeilBrown | 6712ecf | 2007-09-27 12:47:43 +0200 | [diff] [blame] | 132 | static void endio(struct bio *bio, int error) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 133 | { | 
| Heinz Mauelshagen | c897feb | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 134 | 	struct io *io; | 
 | 135 | 	unsigned region; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 136 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 137 | 	if (error && bio_data_dir(bio) == READ) | 
 | 138 | 		zero_fill_bio(bio); | 
 | 139 |  | 
| Heinz Mauelshagen | c897feb | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 140 | 	/* | 
 | 141 | 	 * The bio destructor in bio_put() may use the io object. | 
 | 142 | 	 */ | 
 | 143 | 	io = bio->bi_private; | 
 | 144 | 	region = bio_get_region(bio); | 
 | 145 |  | 
| Heinz Mauelshagen | f00b16a | 2006-12-08 02:41:01 -0800 | [diff] [blame] | 146 | 	bio->bi_max_vecs++; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 147 | 	bio_put(bio); | 
 | 148 |  | 
| Heinz Mauelshagen | c897feb | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 149 | 	dec_count(io, region, error); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 150 | } | 
 | 151 |  | 
 | 152 | /*----------------------------------------------------------------- | 
 | 153 |  * These little objects provide an abstraction for getting a new | 
 | 154 |  * destination page for io. | 
 | 155 |  *---------------------------------------------------------------*/ | 
 | 156 | struct dpages { | 
 | 157 | 	void (*get_page)(struct dpages *dp, | 
 | 158 | 			 struct page **p, unsigned long *len, unsigned *offset); | 
 | 159 | 	void (*next_page)(struct dpages *dp); | 
 | 160 |  | 
 | 161 | 	unsigned context_u; | 
 | 162 | 	void *context_ptr; | 
 | 163 | }; | 
 | 164 |  | 
 | 165 | /* | 
 | 166 |  * Functions for getting the pages from a list. | 
 | 167 |  */ | 
 | 168 | static void list_get_page(struct dpages *dp, | 
 | 169 | 		  struct page **p, unsigned long *len, unsigned *offset) | 
 | 170 | { | 
 | 171 | 	unsigned o = dp->context_u; | 
 | 172 | 	struct page_list *pl = (struct page_list *) dp->context_ptr; | 
 | 173 |  | 
 | 174 | 	*p = pl->page; | 
 | 175 | 	*len = PAGE_SIZE - o; | 
 | 176 | 	*offset = o; | 
 | 177 | } | 
 | 178 |  | 
 | 179 | static void list_next_page(struct dpages *dp) | 
 | 180 | { | 
 | 181 | 	struct page_list *pl = (struct page_list *) dp->context_ptr; | 
 | 182 | 	dp->context_ptr = pl->next; | 
 | 183 | 	dp->context_u = 0; | 
 | 184 | } | 
 | 185 |  | 
 | 186 | static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset) | 
 | 187 | { | 
 | 188 | 	dp->get_page = list_get_page; | 
 | 189 | 	dp->next_page = list_next_page; | 
 | 190 | 	dp->context_u = offset; | 
 | 191 | 	dp->context_ptr = pl; | 
 | 192 | } | 
 | 193 |  | 
 | 194 | /* | 
 | 195 |  * Functions for getting the pages from a bvec. | 
 | 196 |  */ | 
 | 197 | static void bvec_get_page(struct dpages *dp, | 
 | 198 | 		  struct page **p, unsigned long *len, unsigned *offset) | 
 | 199 | { | 
 | 200 | 	struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; | 
 | 201 | 	*p = bvec->bv_page; | 
 | 202 | 	*len = bvec->bv_len; | 
 | 203 | 	*offset = bvec->bv_offset; | 
 | 204 | } | 
 | 205 |  | 
 | 206 | static void bvec_next_page(struct dpages *dp) | 
 | 207 | { | 
 | 208 | 	struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; | 
 | 209 | 	dp->context_ptr = bvec + 1; | 
 | 210 | } | 
 | 211 |  | 
 | 212 | static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) | 
 | 213 | { | 
 | 214 | 	dp->get_page = bvec_get_page; | 
 | 215 | 	dp->next_page = bvec_next_page; | 
 | 216 | 	dp->context_ptr = bvec; | 
 | 217 | } | 
 | 218 |  | 
| Heinz Mauelshagen | c8b03af | 2007-05-09 02:33:01 -0700 | [diff] [blame] | 219 | /* | 
 | 220 |  * Functions for getting the pages from a VMA. | 
 | 221 |  */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 222 | static void vm_get_page(struct dpages *dp, | 
 | 223 | 		 struct page **p, unsigned long *len, unsigned *offset) | 
 | 224 | { | 
 | 225 | 	*p = vmalloc_to_page(dp->context_ptr); | 
 | 226 | 	*offset = dp->context_u; | 
 | 227 | 	*len = PAGE_SIZE - dp->context_u; | 
 | 228 | } | 
 | 229 |  | 
 | 230 | static void vm_next_page(struct dpages *dp) | 
 | 231 | { | 
 | 232 | 	dp->context_ptr += PAGE_SIZE - dp->context_u; | 
 | 233 | 	dp->context_u = 0; | 
 | 234 | } | 
 | 235 |  | 
 | 236 | static void vm_dp_init(struct dpages *dp, void *data) | 
 | 237 | { | 
 | 238 | 	dp->get_page = vm_get_page; | 
 | 239 | 	dp->next_page = vm_next_page; | 
 | 240 | 	dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); | 
 | 241 | 	dp->context_ptr = data; | 
 | 242 | } | 
 | 243 |  | 
| Peter Osterlund | 3676347 | 2005-09-06 15:16:42 -0700 | [diff] [blame] | 244 | static void dm_bio_destructor(struct bio *bio) | 
 | 245 | { | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 246 | 	struct io *io = bio->bi_private; | 
 | 247 |  | 
| Milan Broz | bf17ce3 | 2007-05-09 02:33:05 -0700 | [diff] [blame] | 248 | 	bio_free(bio, io->client->bios); | 
| Peter Osterlund | 3676347 | 2005-09-06 15:16:42 -0700 | [diff] [blame] | 249 | } | 
 | 250 |  | 
| Heinz Mauelshagen | c8b03af | 2007-05-09 02:33:01 -0700 | [diff] [blame] | 251 | /* | 
 | 252 |  * Functions for getting the pages from kernel memory. | 
 | 253 |  */ | 
 | 254 | static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len, | 
 | 255 | 			unsigned *offset) | 
 | 256 | { | 
 | 257 | 	*p = virt_to_page(dp->context_ptr); | 
 | 258 | 	*offset = dp->context_u; | 
 | 259 | 	*len = PAGE_SIZE - dp->context_u; | 
 | 260 | } | 
 | 261 |  | 
 | 262 | static void km_next_page(struct dpages *dp) | 
 | 263 | { | 
 | 264 | 	dp->context_ptr += PAGE_SIZE - dp->context_u; | 
 | 265 | 	dp->context_u = 0; | 
 | 266 | } | 
 | 267 |  | 
 | 268 | static void km_dp_init(struct dpages *dp, void *data) | 
 | 269 | { | 
 | 270 | 	dp->get_page = km_get_page; | 
 | 271 | 	dp->next_page = km_next_page; | 
 | 272 | 	dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); | 
 | 273 | 	dp->context_ptr = data; | 
 | 274 | } | 
 | 275 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 276 | /*----------------------------------------------------------------- | 
 | 277 |  * IO routines that accept a list of pages. | 
 | 278 |  *---------------------------------------------------------------*/ | 
| Heinz Mauelshagen | 22a1ceb | 2008-04-24 21:43:17 +0100 | [diff] [blame] | 279 | static void do_region(int rw, unsigned region, struct dm_io_region *where, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 280 | 		      struct dpages *dp, struct io *io) | 
 | 281 | { | 
 | 282 | 	struct bio *bio; | 
 | 283 | 	struct page *page; | 
 | 284 | 	unsigned long len; | 
 | 285 | 	unsigned offset; | 
 | 286 | 	unsigned num_bvecs; | 
 | 287 | 	sector_t remaining = where->count; | 
 | 288 |  | 
 | 289 | 	while (remaining) { | 
 | 290 | 		/* | 
| Heinz Mauelshagen | f00b16a | 2006-12-08 02:41:01 -0800 | [diff] [blame] | 291 | 		 * Allocate a suitably sized-bio: we add an extra | 
 | 292 | 		 * bvec for bio_get/set_region() and decrement bi_max_vecs | 
 | 293 | 		 * to hide it from bio_add_page(). | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 294 | 		 */ | 
| Jun'ichi Nomura | 596f138 | 2007-07-12 17:27:45 +0100 | [diff] [blame] | 295 | 		num_bvecs = dm_sector_div_up(remaining, | 
 | 296 | 					     (PAGE_SIZE >> SECTOR_SHIFT)); | 
 | 297 | 		num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev), | 
 | 298 | 				      num_bvecs); | 
| Mikulas Patocka | d659e6c | 2009-03-16 17:44:30 +0000 | [diff] [blame] | 299 | 		if (unlikely(num_bvecs > BIO_MAX_PAGES)) | 
 | 300 | 			num_bvecs = BIO_MAX_PAGES; | 
| Milan Broz | bf17ce3 | 2007-05-09 02:33:05 -0700 | [diff] [blame] | 301 | 		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 302 | 		bio->bi_sector = where->sector + (where->count - remaining); | 
 | 303 | 		bio->bi_bdev = where->bdev; | 
 | 304 | 		bio->bi_end_io = endio; | 
 | 305 | 		bio->bi_private = io; | 
| Peter Osterlund | 3676347 | 2005-09-06 15:16:42 -0700 | [diff] [blame] | 306 | 		bio->bi_destructor = dm_bio_destructor; | 
| Heinz Mauelshagen | f00b16a | 2006-12-08 02:41:01 -0800 | [diff] [blame] | 307 | 		bio->bi_max_vecs--; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 308 | 		bio_set_region(bio, region); | 
 | 309 |  | 
 | 310 | 		/* | 
 | 311 | 		 * Try and add as many pages as possible. | 
 | 312 | 		 */ | 
 | 313 | 		while (remaining) { | 
 | 314 | 			dp->get_page(dp, &page, &len, &offset); | 
 | 315 | 			len = min(len, to_bytes(remaining)); | 
 | 316 | 			if (!bio_add_page(bio, page, len, offset)) | 
 | 317 | 				break; | 
 | 318 |  | 
 | 319 | 			offset = 0; | 
 | 320 | 			remaining -= to_sector(len); | 
 | 321 | 			dp->next_page(dp); | 
 | 322 | 		} | 
 | 323 |  | 
 | 324 | 		atomic_inc(&io->count); | 
 | 325 | 		submit_bio(rw, bio); | 
 | 326 | 	} | 
 | 327 | } | 
 | 328 |  | 
 | 329 | static void dispatch_io(int rw, unsigned int num_regions, | 
| Heinz Mauelshagen | 22a1ceb | 2008-04-24 21:43:17 +0100 | [diff] [blame] | 330 | 			struct dm_io_region *where, struct dpages *dp, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 331 | 			struct io *io, int sync) | 
 | 332 | { | 
 | 333 | 	int i; | 
 | 334 | 	struct dpages old_pages = *dp; | 
 | 335 |  | 
 | 336 | 	if (sync) | 
| Jens Axboe | 93dbb39 | 2009-02-16 10:25:40 +0100 | [diff] [blame] | 337 | 		rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 338 |  | 
 | 339 | 	/* | 
 | 340 | 	 * For multiple regions we need to be careful to rewind | 
 | 341 | 	 * the dp object for each call to do_region. | 
 | 342 | 	 */ | 
 | 343 | 	for (i = 0; i < num_regions; i++) { | 
 | 344 | 		*dp = old_pages; | 
 | 345 | 		if (where[i].count) | 
 | 346 | 			do_region(rw, i, where + i, dp, io); | 
 | 347 | 	} | 
 | 348 |  | 
 | 349 | 	/* | 
| Heinz Mauelshagen | f00b16a | 2006-12-08 02:41:01 -0800 | [diff] [blame] | 350 | 	 * Drop the extra reference that we were holding to avoid | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 351 | 	 * the io being completed too early. | 
 | 352 | 	 */ | 
 | 353 | 	dec_count(io, 0, 0); | 
 | 354 | } | 
 | 355 |  | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 356 | static int sync_io(struct dm_io_client *client, unsigned int num_regions, | 
| Heinz Mauelshagen | 22a1ceb | 2008-04-24 21:43:17 +0100 | [diff] [blame] | 357 | 		   struct dm_io_region *where, int rw, struct dpages *dp, | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 358 | 		   unsigned long *error_bits) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 359 | { | 
 | 360 | 	struct io io; | 
 | 361 |  | 
| Mikulas Patocka | 7ff14a3 | 2008-04-24 22:10:47 +0100 | [diff] [blame] | 362 | 	if (num_regions > 1 && (rw & RW_MASK) != WRITE) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 363 | 		WARN_ON(1); | 
 | 364 | 		return -EIO; | 
 | 365 | 	} | 
 | 366 |  | 
| Mikulas Patocka | 51aa322 | 2009-06-22 10:12:26 +0100 | [diff] [blame] | 367 | retry: | 
| Alasdair G Kergon | e01fd7e | 2008-04-24 21:43:14 +0100 | [diff] [blame] | 368 | 	io.error_bits = 0; | 
| Mikulas Patocka | 5af443a | 2009-06-22 10:12:25 +0100 | [diff] [blame] | 369 | 	io.eopnotsupp_bits = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 370 | 	atomic_set(&io.count, 1); /* see dispatch_io() */ | 
 | 371 | 	io.sleeper = current; | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 372 | 	io.client = client; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 373 |  | 
 | 374 | 	dispatch_io(rw, num_regions, where, dp, &io, 1); | 
 | 375 |  | 
 | 376 | 	while (1) { | 
 | 377 | 		set_current_state(TASK_UNINTERRUPTIBLE); | 
 | 378 |  | 
| Mikulas Patocka | b64b6bf | 2009-04-02 19:55:24 +0100 | [diff] [blame] | 379 | 		if (!atomic_read(&io.count)) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 380 | 			break; | 
 | 381 |  | 
 | 382 | 		io_schedule(); | 
 | 383 | 	} | 
 | 384 | 	set_current_state(TASK_RUNNING); | 
 | 385 |  | 
| Mikulas Patocka | 51aa322 | 2009-06-22 10:12:26 +0100 | [diff] [blame] | 386 | 	if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { | 
 | 387 | 		rw &= ~(1 << BIO_RW_BARRIER); | 
 | 388 | 		goto retry; | 
 | 389 | 	} | 
 | 390 |  | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 391 | 	if (error_bits) | 
| Alasdair G Kergon | e01fd7e | 2008-04-24 21:43:14 +0100 | [diff] [blame] | 392 | 		*error_bits = io.error_bits; | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 393 |  | 
| Alasdair G Kergon | e01fd7e | 2008-04-24 21:43:14 +0100 | [diff] [blame] | 394 | 	return io.error_bits ? -EIO : 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 395 | } | 
 | 396 |  | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 397 | static int async_io(struct dm_io_client *client, unsigned int num_regions, | 
| Heinz Mauelshagen | 22a1ceb | 2008-04-24 21:43:17 +0100 | [diff] [blame] | 398 | 		    struct dm_io_region *where, int rw, struct dpages *dp, | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 399 | 		    io_notify_fn fn, void *context) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 400 | { | 
 | 401 | 	struct io *io; | 
 | 402 |  | 
| Mikulas Patocka | 7ff14a3 | 2008-04-24 22:10:47 +0100 | [diff] [blame] | 403 | 	if (num_regions > 1 && (rw & RW_MASK) != WRITE) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 404 | 		WARN_ON(1); | 
 | 405 | 		fn(1, context); | 
 | 406 | 		return -EIO; | 
 | 407 | 	} | 
 | 408 |  | 
| Milan Broz | bf17ce3 | 2007-05-09 02:33:05 -0700 | [diff] [blame] | 409 | 	io = mempool_alloc(client->pool, GFP_NOIO); | 
| Alasdair G Kergon | e01fd7e | 2008-04-24 21:43:14 +0100 | [diff] [blame] | 410 | 	io->error_bits = 0; | 
| Mikulas Patocka | 5af443a | 2009-06-22 10:12:25 +0100 | [diff] [blame] | 411 | 	io->eopnotsupp_bits = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 412 | 	atomic_set(&io->count, 1); /* see dispatch_io() */ | 
 | 413 | 	io->sleeper = NULL; | 
| Heinz Mauelshagen | 891ce20 | 2007-05-09 02:33:00 -0700 | [diff] [blame] | 414 | 	io->client = client; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 415 | 	io->callback = fn; | 
 | 416 | 	io->context = context; | 
 | 417 |  | 
 | 418 | 	dispatch_io(rw, num_regions, where, dp, io, 0); | 
 | 419 | 	return 0; | 
 | 420 | } | 
 | 421 |  | 
| Heinz Mauelshagen | c8b03af | 2007-05-09 02:33:01 -0700 | [diff] [blame] | 422 | static int dp_init(struct dm_io_request *io_req, struct dpages *dp) | 
 | 423 | { | 
 | 424 | 	/* Set up dpages based on memory type */ | 
 | 425 | 	switch (io_req->mem.type) { | 
 | 426 | 	case DM_IO_PAGE_LIST: | 
 | 427 | 		list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); | 
 | 428 | 		break; | 
 | 429 |  | 
 | 430 | 	case DM_IO_BVEC: | 
 | 431 | 		bvec_dp_init(dp, io_req->mem.ptr.bvec); | 
 | 432 | 		break; | 
 | 433 |  | 
 | 434 | 	case DM_IO_VMA: | 
 | 435 | 		vm_dp_init(dp, io_req->mem.ptr.vma); | 
 | 436 | 		break; | 
 | 437 |  | 
 | 438 | 	case DM_IO_KMEM: | 
 | 439 | 		km_dp_init(dp, io_req->mem.ptr.addr); | 
 | 440 | 		break; | 
 | 441 |  | 
 | 442 | 	default: | 
 | 443 | 		return -EINVAL; | 
 | 444 | 	} | 
 | 445 |  | 
 | 446 | 	return 0; | 
 | 447 | } | 
 | 448 |  | 
 | 449 | /* | 
| Mikulas Patocka | 7ff14a3 | 2008-04-24 22:10:47 +0100 | [diff] [blame] | 450 |  * New collapsed (a)synchronous interface. | 
 | 451 |  * | 
 | 452 |  * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug | 
 | 453 |  * the queue with blk_unplug() some time later or set the BIO_RW_SYNC bit in | 
 | 454 |  * io_req->bi_rw. If you fail to do one of these, the IO will be submitted to | 
 | 455 |  * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. | 
| Heinz Mauelshagen | c8b03af | 2007-05-09 02:33:01 -0700 | [diff] [blame] | 456 |  */ | 
 | 457 | int dm_io(struct dm_io_request *io_req, unsigned num_regions, | 
| Heinz Mauelshagen | 22a1ceb | 2008-04-24 21:43:17 +0100 | [diff] [blame] | 458 | 	  struct dm_io_region *where, unsigned long *sync_error_bits) | 
| Heinz Mauelshagen | c8b03af | 2007-05-09 02:33:01 -0700 | [diff] [blame] | 459 | { | 
 | 460 | 	int r; | 
 | 461 | 	struct dpages dp; | 
 | 462 |  | 
 | 463 | 	r = dp_init(io_req, &dp); | 
 | 464 | 	if (r) | 
 | 465 | 		return r; | 
 | 466 |  | 
 | 467 | 	if (!io_req->notify.fn) | 
 | 468 | 		return sync_io(io_req->client, num_regions, where, | 
 | 469 | 			       io_req->bi_rw, &dp, sync_error_bits); | 
 | 470 |  | 
 | 471 | 	return async_io(io_req->client, num_regions, where, io_req->bi_rw, | 
 | 472 | 			&dp, io_req->notify.fn, io_req->notify.context); | 
 | 473 | } | 
 | 474 | EXPORT_SYMBOL(dm_io); |