| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Some low level IO code, and hacks for various block layer limitations | 
 | 3 |  * | 
 | 4 |  * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> | 
 | 5 |  * Copyright 2012 Google, Inc. | 
 | 6 |  */ | 
 | 7 |  | 
 | 8 | #include "bcache.h" | 
 | 9 | #include "bset.h" | 
 | 10 | #include "debug.h" | 
 | 11 |  | 
| Kent Overstreet | c37511b | 2013-04-26 15:39:55 -0700 | [diff] [blame] | 12 | #include <linux/blkdev.h> | 
 | 13 |  | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 14 | static void bch_bi_idx_hack_endio(struct bio *bio, int error) | 
 | 15 | { | 
 | 16 | 	struct bio *p = bio->bi_private; | 
 | 17 |  | 
 | 18 | 	bio_endio(p, error); | 
 | 19 | 	bio_put(bio); | 
 | 20 | } | 
 | 21 |  | 
 | 22 | static void bch_generic_make_request_hack(struct bio *bio) | 
 | 23 | { | 
 | 24 | 	if (bio->bi_idx) { | 
 | 25 | 		struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio)); | 
 | 26 |  | 
 | 27 | 		memcpy(clone->bi_io_vec, | 
 | 28 | 		       bio_iovec(bio), | 
 | 29 | 		       bio_segments(bio) * sizeof(struct bio_vec)); | 
 | 30 |  | 
 | 31 | 		clone->bi_sector	= bio->bi_sector; | 
 | 32 | 		clone->bi_bdev		= bio->bi_bdev; | 
 | 33 | 		clone->bi_rw		= bio->bi_rw; | 
 | 34 | 		clone->bi_vcnt		= bio_segments(bio); | 
 | 35 | 		clone->bi_size		= bio->bi_size; | 
 | 36 |  | 
 | 37 | 		clone->bi_private	= bio; | 
 | 38 | 		clone->bi_end_io	= bch_bi_idx_hack_endio; | 
 | 39 |  | 
 | 40 | 		bio = clone; | 
 | 41 | 	} | 
 | 42 |  | 
| Kent Overstreet | bca97ad | 2013-04-20 15:26:31 -0700 | [diff] [blame] | 43 | 	/* | 
 | 44 | 	 * Hack, since drivers that clone bios clone up to bi_max_vecs, but our | 
 | 45 | 	 * bios might have had more than that (before we split them per device | 
 | 46 | 	 * limitations). | 
 | 47 | 	 * | 
 | 48 | 	 * To be taken out once immutable bvec stuff is in. | 
 | 49 | 	 */ | 
 | 50 | 	bio->bi_max_vecs = bio->bi_vcnt; | 
 | 51 |  | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 52 | 	generic_make_request(bio); | 
 | 53 | } | 
 | 54 |  | 
 | 55 | /** | 
 | 56 |  * bch_bio_split - split a bio | 
 | 57 |  * @bio:	bio to split | 
 | 58 |  * @sectors:	number of sectors to split from the front of @bio | 
 | 59 |  * @gfp:	gfp mask | 
 | 60 |  * @bs:		bio set to allocate from | 
 | 61 |  * | 
 | 62 |  * Allocates and returns a new bio which represents @sectors from the start of | 
 | 63 |  * @bio, and updates @bio to represent the remaining sectors. | 
 | 64 |  * | 
 | 65 |  * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio | 
 | 66 |  * unchanged. | 
 | 67 |  * | 
 | 68 |  * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a | 
 | 69 |  * bvec boundry; it is the caller's responsibility to ensure that @bio is not | 
 | 70 |  * freed before the split. | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 71 |  */ | 
 | 72 | struct bio *bch_bio_split(struct bio *bio, int sectors, | 
 | 73 | 			  gfp_t gfp, struct bio_set *bs) | 
 | 74 | { | 
 | 75 | 	unsigned idx = bio->bi_idx, vcnt = 0, nbytes = sectors << 9; | 
 | 76 | 	struct bio_vec *bv; | 
 | 77 | 	struct bio *ret = NULL; | 
 | 78 |  | 
 | 79 | 	BUG_ON(sectors <= 0); | 
 | 80 |  | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 81 | 	if (sectors >= bio_sectors(bio)) | 
 | 82 | 		return bio; | 
 | 83 |  | 
 | 84 | 	if (bio->bi_rw & REQ_DISCARD) { | 
 | 85 | 		ret = bio_alloc_bioset(gfp, 1, bs); | 
| Kumar Amit Mehta | 5c69412 | 2013-05-28 00:31:15 -0700 | [diff] [blame] | 86 | 		if (!ret) | 
 | 87 | 			return NULL; | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 88 | 		idx = 0; | 
 | 89 | 		goto out; | 
 | 90 | 	} | 
 | 91 |  | 
 | 92 | 	bio_for_each_segment(bv, bio, idx) { | 
 | 93 | 		vcnt = idx - bio->bi_idx; | 
 | 94 |  | 
 | 95 | 		if (!nbytes) { | 
 | 96 | 			ret = bio_alloc_bioset(gfp, vcnt, bs); | 
 | 97 | 			if (!ret) | 
 | 98 | 				return NULL; | 
 | 99 |  | 
 | 100 | 			memcpy(ret->bi_io_vec, bio_iovec(bio), | 
 | 101 | 			       sizeof(struct bio_vec) * vcnt); | 
 | 102 |  | 
 | 103 | 			break; | 
 | 104 | 		} else if (nbytes < bv->bv_len) { | 
 | 105 | 			ret = bio_alloc_bioset(gfp, ++vcnt, bs); | 
 | 106 | 			if (!ret) | 
 | 107 | 				return NULL; | 
 | 108 |  | 
 | 109 | 			memcpy(ret->bi_io_vec, bio_iovec(bio), | 
 | 110 | 			       sizeof(struct bio_vec) * vcnt); | 
 | 111 |  | 
 | 112 | 			ret->bi_io_vec[vcnt - 1].bv_len = nbytes; | 
 | 113 | 			bv->bv_offset	+= nbytes; | 
 | 114 | 			bv->bv_len	-= nbytes; | 
 | 115 | 			break; | 
 | 116 | 		} | 
 | 117 |  | 
 | 118 | 		nbytes -= bv->bv_len; | 
 | 119 | 	} | 
 | 120 | out: | 
 | 121 | 	ret->bi_bdev	= bio->bi_bdev; | 
 | 122 | 	ret->bi_sector	= bio->bi_sector; | 
 | 123 | 	ret->bi_size	= sectors << 9; | 
 | 124 | 	ret->bi_rw	= bio->bi_rw; | 
 | 125 | 	ret->bi_vcnt	= vcnt; | 
 | 126 | 	ret->bi_max_vecs = vcnt; | 
 | 127 |  | 
 | 128 | 	bio->bi_sector	+= sectors; | 
 | 129 | 	bio->bi_size	-= sectors << 9; | 
 | 130 | 	bio->bi_idx	 = idx; | 
 | 131 |  | 
 | 132 | 	if (bio_integrity(bio)) { | 
 | 133 | 		if (bio_integrity_clone(ret, bio, gfp)) { | 
 | 134 | 			bio_put(ret); | 
 | 135 | 			return NULL; | 
 | 136 | 		} | 
 | 137 |  | 
 | 138 | 		bio_integrity_trim(ret, 0, bio_sectors(ret)); | 
 | 139 | 		bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio)); | 
 | 140 | 	} | 
 | 141 |  | 
 | 142 | 	return ret; | 
 | 143 | } | 
 | 144 |  | 
 | 145 | static unsigned bch_bio_max_sectors(struct bio *bio) | 
 | 146 | { | 
 | 147 | 	unsigned ret = bio_sectors(bio); | 
 | 148 | 	struct request_queue *q = bdev_get_queue(bio->bi_bdev); | 
| Kent Overstreet | 1545f13 | 2013-04-10 15:50:57 -0700 | [diff] [blame] | 149 | 	unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES, | 
 | 150 | 				      queue_max_segments(q)); | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 151 |  | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 152 | 	if (bio->bi_rw & REQ_DISCARD) | 
 | 153 | 		return min(ret, q->limits.max_discard_sectors); | 
 | 154 |  | 
| Kent Overstreet | 1545f13 | 2013-04-10 15:50:57 -0700 | [diff] [blame] | 155 | 	if (bio_segments(bio) > max_segments || | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 156 | 	    q->merge_bvec_fn) { | 
| Kent Overstreet | 8e51e41 | 2013-06-06 18:15:57 -0700 | [diff] [blame] | 157 | 		struct bio_vec *bv; | 
 | 158 | 		int i, seg = 0; | 
 | 159 |  | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 160 | 		ret = 0; | 
 | 161 |  | 
| Kent Overstreet | 8e51e41 | 2013-06-06 18:15:57 -0700 | [diff] [blame] | 162 | 		bio_for_each_segment(bv, bio, i) { | 
| Kent Overstreet | a09ded8 | 2013-04-22 14:44:24 -0700 | [diff] [blame] | 163 | 			struct bvec_merge_data bvm = { | 
 | 164 | 				.bi_bdev	= bio->bi_bdev, | 
 | 165 | 				.bi_sector	= bio->bi_sector, | 
 | 166 | 				.bi_size	= ret << 9, | 
 | 167 | 				.bi_rw		= bio->bi_rw, | 
 | 168 | 			}; | 
 | 169 |  | 
| Kent Overstreet | 8e51e41 | 2013-06-06 18:15:57 -0700 | [diff] [blame] | 170 | 			if (seg == max_segments) | 
 | 171 | 				break; | 
 | 172 |  | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 173 | 			if (q->merge_bvec_fn && | 
 | 174 | 			    q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) | 
 | 175 | 				break; | 
 | 176 |  | 
| Kent Overstreet | 8e51e41 | 2013-06-06 18:15:57 -0700 | [diff] [blame] | 177 | 			seg++; | 
| Kent Overstreet | a09ded8 | 2013-04-22 14:44:24 -0700 | [diff] [blame] | 178 | 			ret += bv->bv_len >> 9; | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 179 | 		} | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 180 | 	} | 
 | 181 |  | 
 | 182 | 	ret = min(ret, queue_max_sectors(q)); | 
 | 183 |  | 
 | 184 | 	WARN_ON(!ret); | 
 | 185 | 	ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9); | 
 | 186 |  | 
 | 187 | 	return ret; | 
 | 188 | } | 
 | 189 |  | 
 | 190 | static void bch_bio_submit_split_done(struct closure *cl) | 
 | 191 | { | 
 | 192 | 	struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); | 
 | 193 |  | 
 | 194 | 	s->bio->bi_end_io = s->bi_end_io; | 
 | 195 | 	s->bio->bi_private = s->bi_private; | 
 | 196 | 	bio_endio(s->bio, 0); | 
 | 197 |  | 
 | 198 | 	closure_debug_destroy(&s->cl); | 
 | 199 | 	mempool_free(s, s->p->bio_split_hook); | 
 | 200 | } | 
 | 201 |  | 
 | 202 | static void bch_bio_submit_split_endio(struct bio *bio, int error) | 
 | 203 | { | 
 | 204 | 	struct closure *cl = bio->bi_private; | 
 | 205 | 	struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); | 
 | 206 |  | 
 | 207 | 	if (error) | 
 | 208 | 		clear_bit(BIO_UPTODATE, &s->bio->bi_flags); | 
 | 209 |  | 
 | 210 | 	bio_put(bio); | 
 | 211 | 	closure_put(cl); | 
 | 212 | } | 
 | 213 |  | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 214 | void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) | 
 | 215 | { | 
 | 216 | 	struct bio_split_hook *s; | 
| Kent Overstreet | 8e51e41 | 2013-06-06 18:15:57 -0700 | [diff] [blame] | 217 | 	struct bio *n; | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 218 |  | 
 | 219 | 	if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) | 
 | 220 | 		goto submit; | 
 | 221 |  | 
 | 222 | 	if (bio_sectors(bio) <= bch_bio_max_sectors(bio)) | 
 | 223 | 		goto submit; | 
 | 224 |  | 
 | 225 | 	s = mempool_alloc(p->bio_split_hook, GFP_NOIO); | 
| Kent Overstreet | 8e51e41 | 2013-06-06 18:15:57 -0700 | [diff] [blame] | 226 | 	closure_init(&s->cl, NULL); | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 227 |  | 
 | 228 | 	s->bio		= bio; | 
 | 229 | 	s->p		= p; | 
 | 230 | 	s->bi_end_io	= bio->bi_end_io; | 
 | 231 | 	s->bi_private	= bio->bi_private; | 
 | 232 | 	bio_get(bio); | 
 | 233 |  | 
| Kent Overstreet | 8e51e41 | 2013-06-06 18:15:57 -0700 | [diff] [blame] | 234 | 	do { | 
 | 235 | 		n = bch_bio_split(bio, bch_bio_max_sectors(bio), | 
 | 236 | 				  GFP_NOIO, s->p->bio_split); | 
 | 237 |  | 
 | 238 | 		n->bi_end_io	= bch_bio_submit_split_endio; | 
 | 239 | 		n->bi_private	= &s->cl; | 
 | 240 |  | 
 | 241 | 		closure_get(&s->cl); | 
 | 242 | 		bch_generic_make_request_hack(n); | 
 | 243 | 	} while (n != bio); | 
 | 244 |  | 
 | 245 | 	continue_at(&s->cl, bch_bio_submit_split_done, NULL); | 
| Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 246 | submit: | 
 | 247 | 	bch_generic_make_request_hack(bio); | 
 | 248 | } | 
 | 249 |  | 
 | 250 | /* Bios with headers */ | 
 | 251 |  | 
 | 252 | void bch_bbio_free(struct bio *bio, struct cache_set *c) | 
 | 253 | { | 
 | 254 | 	struct bbio *b = container_of(bio, struct bbio, bio); | 
 | 255 | 	mempool_free(b, c->bio_meta); | 
 | 256 | } | 
 | 257 |  | 
 | 258 | struct bio *bch_bbio_alloc(struct cache_set *c) | 
 | 259 | { | 
 | 260 | 	struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO); | 
 | 261 | 	struct bio *bio = &b->bio; | 
 | 262 |  | 
 | 263 | 	bio_init(bio); | 
 | 264 | 	bio->bi_flags		|= BIO_POOL_NONE << BIO_POOL_OFFSET; | 
 | 265 | 	bio->bi_max_vecs	 = bucket_pages(c); | 
 | 266 | 	bio->bi_io_vec		 = bio->bi_inline_vecs; | 
 | 267 |  | 
 | 268 | 	return bio; | 
 | 269 | } | 
 | 270 |  | 
 | 271 | void __bch_submit_bbio(struct bio *bio, struct cache_set *c) | 
 | 272 | { | 
 | 273 | 	struct bbio *b = container_of(bio, struct bbio, bio); | 
 | 274 |  | 
 | 275 | 	bio->bi_sector	= PTR_OFFSET(&b->key, 0); | 
 | 276 | 	bio->bi_bdev	= PTR_CACHE(c, &b->key, 0)->bdev; | 
 | 277 |  | 
 | 278 | 	b->submit_time_us = local_clock_us(); | 
 | 279 | 	closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0)); | 
 | 280 | } | 
 | 281 |  | 
 | 282 | void bch_submit_bbio(struct bio *bio, struct cache_set *c, | 
 | 283 | 		     struct bkey *k, unsigned ptr) | 
 | 284 | { | 
 | 285 | 	struct bbio *b = container_of(bio, struct bbio, bio); | 
 | 286 | 	bch_bkey_copy_single_ptr(&b->key, k, ptr); | 
 | 287 | 	__bch_submit_bbio(bio, c); | 
 | 288 | } | 
 | 289 |  | 
 | 290 | /* IO errors */ | 
 | 291 |  | 
 | 292 | void bch_count_io_errors(struct cache *ca, int error, const char *m) | 
 | 293 | { | 
 | 294 | 	/* | 
 | 295 | 	 * The halflife of an error is: | 
 | 296 | 	 * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh | 
 | 297 | 	 */ | 
 | 298 |  | 
 | 299 | 	if (ca->set->error_decay) { | 
 | 300 | 		unsigned count = atomic_inc_return(&ca->io_count); | 
 | 301 |  | 
 | 302 | 		while (count > ca->set->error_decay) { | 
 | 303 | 			unsigned errors; | 
 | 304 | 			unsigned old = count; | 
 | 305 | 			unsigned new = count - ca->set->error_decay; | 
 | 306 |  | 
 | 307 | 			/* | 
 | 308 | 			 * First we subtract refresh from count; each time we | 
 | 309 | 			 * succesfully do so, we rescale the errors once: | 
 | 310 | 			 */ | 
 | 311 |  | 
 | 312 | 			count = atomic_cmpxchg(&ca->io_count, old, new); | 
 | 313 |  | 
 | 314 | 			if (count == old) { | 
 | 315 | 				count = new; | 
 | 316 |  | 
 | 317 | 				errors = atomic_read(&ca->io_errors); | 
 | 318 | 				do { | 
 | 319 | 					old = errors; | 
 | 320 | 					new = ((uint64_t) errors * 127) / 128; | 
 | 321 | 					errors = atomic_cmpxchg(&ca->io_errors, | 
 | 322 | 								old, new); | 
 | 323 | 				} while (old != errors); | 
 | 324 | 			} | 
 | 325 | 		} | 
 | 326 | 	} | 
 | 327 |  | 
 | 328 | 	if (error) { | 
 | 329 | 		char buf[BDEVNAME_SIZE]; | 
 | 330 | 		unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, | 
 | 331 | 						    &ca->io_errors); | 
 | 332 | 		errors >>= IO_ERROR_SHIFT; | 
 | 333 |  | 
 | 334 | 		if (errors < ca->set->error_limit) | 
 | 335 | 			pr_err("%s: IO error on %s, recovering", | 
 | 336 | 			       bdevname(ca->bdev, buf), m); | 
 | 337 | 		else | 
 | 338 | 			bch_cache_set_error(ca->set, | 
 | 339 | 					    "%s: too many IO errors %s", | 
 | 340 | 					    bdevname(ca->bdev, buf), m); | 
 | 341 | 	} | 
 | 342 | } | 
 | 343 |  | 
 | 344 | void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, | 
 | 345 | 			      int error, const char *m) | 
 | 346 | { | 
 | 347 | 	struct bbio *b = container_of(bio, struct bbio, bio); | 
 | 348 | 	struct cache *ca = PTR_CACHE(c, &b->key, 0); | 
 | 349 |  | 
 | 350 | 	unsigned threshold = bio->bi_rw & REQ_WRITE | 
 | 351 | 		? c->congested_write_threshold_us | 
 | 352 | 		: c->congested_read_threshold_us; | 
 | 353 |  | 
 | 354 | 	if (threshold) { | 
 | 355 | 		unsigned t = local_clock_us(); | 
 | 356 |  | 
 | 357 | 		int us = t - b->submit_time_us; | 
 | 358 | 		int congested = atomic_read(&c->congested); | 
 | 359 |  | 
 | 360 | 		if (us > (int) threshold) { | 
 | 361 | 			int ms = us / 1024; | 
 | 362 | 			c->congested_last_us = t; | 
 | 363 |  | 
 | 364 | 			ms = min(ms, CONGESTED_MAX + congested); | 
 | 365 | 			atomic_sub(ms, &c->congested); | 
 | 366 | 		} else if (congested < 0) | 
 | 367 | 			atomic_inc(&c->congested); | 
 | 368 | 	} | 
 | 369 |  | 
 | 370 | 	bch_count_io_errors(ca, error, m); | 
 | 371 | } | 
 | 372 |  | 
 | 373 | void bch_bbio_endio(struct cache_set *c, struct bio *bio, | 
 | 374 | 		    int error, const char *m) | 
 | 375 | { | 
 | 376 | 	struct closure *cl = bio->bi_private; | 
 | 377 |  | 
 | 378 | 	bch_bbio_count_io_errors(c, bio, error, m); | 
 | 379 | 	bio_put(bio); | 
 | 380 | 	closure_put(cl); | 
 | 381 | } |