| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (C) 2012 Red Hat. All rights reserved. | 
 | 3 |  * | 
 | 4 |  * This file is released under the GPL. | 
 | 5 |  */ | 
 | 6 |  | 
 | 7 | #include "dm-cache-policy.h" | 
 | 8 | #include "dm.h" | 
 | 9 |  | 
 | 10 | #include <linux/hash.h> | 
 | 11 | #include <linux/module.h> | 
 | 12 | #include <linux/mutex.h> | 
 | 13 | #include <linux/slab.h> | 
 | 14 | #include <linux/vmalloc.h> | 
 | 15 |  | 
 | 16 | #define DM_MSG_PREFIX "cache-policy-mq" | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 17 |  | 
 | 18 | static struct kmem_cache *mq_entry_cache; | 
 | 19 |  | 
 | 20 | /*----------------------------------------------------------------*/ | 
 | 21 |  | 
 | 22 | static unsigned next_power(unsigned n, unsigned min) | 
 | 23 | { | 
 | 24 | 	return roundup_pow_of_two(max(n, min)); | 
 | 25 | } | 
 | 26 |  | 
 | 27 | /*----------------------------------------------------------------*/ | 
 | 28 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 29 | /* | 
 | 30 |  * Large, sequential ios are probably better left on the origin device since | 
 | 31 |  * spindles tend to have good bandwidth. | 
 | 32 |  * | 
 | 33 |  * The io_tracker tries to spot when the io is in one of these sequential | 
 | 34 |  * modes. | 
 | 35 |  * | 
 | 36 |  * Two thresholds to switch between random and sequential io mode are defaulting | 
 | 37 |  * as follows and can be adjusted via the constructor and message interfaces. | 
 | 38 |  */ | 
 | 39 | #define RANDOM_THRESHOLD_DEFAULT 4 | 
 | 40 | #define SEQUENTIAL_THRESHOLD_DEFAULT 512 | 
 | 41 |  | 
 | 42 | enum io_pattern { | 
 | 43 | 	PATTERN_SEQUENTIAL, | 
 | 44 | 	PATTERN_RANDOM | 
 | 45 | }; | 
 | 46 |  | 
 | 47 | struct io_tracker { | 
 | 48 | 	enum io_pattern pattern; | 
 | 49 |  | 
 | 50 | 	unsigned nr_seq_samples; | 
 | 51 | 	unsigned nr_rand_samples; | 
 | 52 | 	unsigned thresholds[2]; | 
 | 53 |  | 
 | 54 | 	dm_oblock_t last_end_oblock; | 
 | 55 | }; | 
 | 56 |  | 
 | 57 | static void iot_init(struct io_tracker *t, | 
 | 58 | 		     int sequential_threshold, int random_threshold) | 
 | 59 | { | 
 | 60 | 	t->pattern = PATTERN_RANDOM; | 
 | 61 | 	t->nr_seq_samples = 0; | 
 | 62 | 	t->nr_rand_samples = 0; | 
 | 63 | 	t->last_end_oblock = 0; | 
 | 64 | 	t->thresholds[PATTERN_RANDOM] = random_threshold; | 
 | 65 | 	t->thresholds[PATTERN_SEQUENTIAL] = sequential_threshold; | 
 | 66 | } | 
 | 67 |  | 
 | 68 | static enum io_pattern iot_pattern(struct io_tracker *t) | 
 | 69 | { | 
 | 70 | 	return t->pattern; | 
 | 71 | } | 
 | 72 |  | 
 | 73 | static void iot_update_stats(struct io_tracker *t, struct bio *bio) | 
 | 74 | { | 
 | 75 | 	if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1) | 
 | 76 | 		t->nr_seq_samples++; | 
 | 77 | 	else { | 
 | 78 | 		/* | 
 | 79 | 		 * Just one non-sequential IO is enough to reset the | 
 | 80 | 		 * counters. | 
 | 81 | 		 */ | 
 | 82 | 		if (t->nr_seq_samples) { | 
 | 83 | 			t->nr_seq_samples = 0; | 
 | 84 | 			t->nr_rand_samples = 0; | 
 | 85 | 		} | 
 | 86 |  | 
 | 87 | 		t->nr_rand_samples++; | 
 | 88 | 	} | 
 | 89 |  | 
 | 90 | 	t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1); | 
 | 91 | } | 
 | 92 |  | 
 | 93 | static void iot_check_for_pattern_switch(struct io_tracker *t) | 
 | 94 | { | 
 | 95 | 	switch (t->pattern) { | 
 | 96 | 	case PATTERN_SEQUENTIAL: | 
 | 97 | 		if (t->nr_rand_samples >= t->thresholds[PATTERN_RANDOM]) { | 
 | 98 | 			t->pattern = PATTERN_RANDOM; | 
 | 99 | 			t->nr_seq_samples = t->nr_rand_samples = 0; | 
 | 100 | 		} | 
 | 101 | 		break; | 
 | 102 |  | 
 | 103 | 	case PATTERN_RANDOM: | 
 | 104 | 		if (t->nr_seq_samples >= t->thresholds[PATTERN_SEQUENTIAL]) { | 
 | 105 | 			t->pattern = PATTERN_SEQUENTIAL; | 
 | 106 | 			t->nr_seq_samples = t->nr_rand_samples = 0; | 
 | 107 | 		} | 
 | 108 | 		break; | 
 | 109 | 	} | 
 | 110 | } | 
 | 111 |  | 
 | 112 | static void iot_examine_bio(struct io_tracker *t, struct bio *bio) | 
 | 113 | { | 
 | 114 | 	iot_update_stats(t, bio); | 
 | 115 | 	iot_check_for_pattern_switch(t); | 
 | 116 | } | 
 | 117 |  | 
 | 118 | /*----------------------------------------------------------------*/ | 
 | 119 |  | 
 | 120 |  | 
 | 121 | /* | 
 | 122 |  * This queue is divided up into different levels.  Allowing us to push | 
 | 123 |  * entries to the back of any of the levels.  Think of it as a partially | 
 | 124 |  * sorted queue. | 
 | 125 |  */ | 
 | 126 | #define NR_QUEUE_LEVELS 16u | 
 | 127 |  | 
 | 128 | struct queue { | 
 | 129 | 	struct list_head qs[NR_QUEUE_LEVELS]; | 
 | 130 | }; | 
 | 131 |  | 
 | 132 | static void queue_init(struct queue *q) | 
 | 133 | { | 
 | 134 | 	unsigned i; | 
 | 135 |  | 
 | 136 | 	for (i = 0; i < NR_QUEUE_LEVELS; i++) | 
 | 137 | 		INIT_LIST_HEAD(q->qs + i); | 
 | 138 | } | 
 | 139 |  | 
 | 140 | /* | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 141 |  * Checks to see if the queue is empty. | 
 | 142 |  * FIXME: reduce cpu usage. | 
 | 143 |  */ | 
 | 144 | static bool queue_empty(struct queue *q) | 
 | 145 | { | 
 | 146 | 	unsigned i; | 
 | 147 |  | 
 | 148 | 	for (i = 0; i < NR_QUEUE_LEVELS; i++) | 
 | 149 | 		if (!list_empty(q->qs + i)) | 
 | 150 | 			return false; | 
 | 151 |  | 
 | 152 | 	return true; | 
 | 153 | } | 
 | 154 |  | 
 | 155 | /* | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 156 |  * Insert an entry to the back of the given level. | 
 | 157 |  */ | 
 | 158 | static void queue_push(struct queue *q, unsigned level, struct list_head *elt) | 
 | 159 | { | 
 | 160 | 	list_add_tail(elt, q->qs + level); | 
 | 161 | } | 
 | 162 |  | 
 | 163 | static void queue_remove(struct list_head *elt) | 
 | 164 | { | 
 | 165 | 	list_del(elt); | 
 | 166 | } | 
 | 167 |  | 
 | 168 | /* | 
 | 169 |  * Shifts all regions down one level.  This has no effect on the order of | 
 | 170 |  * the queue. | 
 | 171 |  */ | 
 | 172 | static void queue_shift_down(struct queue *q) | 
 | 173 | { | 
 | 174 | 	unsigned level; | 
 | 175 |  | 
 | 176 | 	for (level = 1; level < NR_QUEUE_LEVELS; level++) | 
 | 177 | 		list_splice_init(q->qs + level, q->qs + level - 1); | 
 | 178 | } | 
 | 179 |  | 
 | 180 | /* | 
 | 181 |  * Gives us the oldest entry of the lowest popoulated level.  If the first | 
 | 182 |  * level is emptied then we shift down one level. | 
 | 183 |  */ | 
 | 184 | static struct list_head *queue_pop(struct queue *q) | 
 | 185 | { | 
 | 186 | 	unsigned level; | 
 | 187 | 	struct list_head *r; | 
 | 188 |  | 
 | 189 | 	for (level = 0; level < NR_QUEUE_LEVELS; level++) | 
 | 190 | 		if (!list_empty(q->qs + level)) { | 
 | 191 | 			r = q->qs[level].next; | 
 | 192 | 			list_del(r); | 
 | 193 |  | 
 | 194 | 			/* have we just emptied the bottom level? */ | 
 | 195 | 			if (level == 0 && list_empty(q->qs)) | 
 | 196 | 				queue_shift_down(q); | 
 | 197 |  | 
 | 198 | 			return r; | 
 | 199 | 		} | 
 | 200 |  | 
 | 201 | 	return NULL; | 
 | 202 | } | 
 | 203 |  | 
 | 204 | static struct list_head *list_pop(struct list_head *lh) | 
 | 205 | { | 
 | 206 | 	struct list_head *r = lh->next; | 
 | 207 |  | 
 | 208 | 	BUG_ON(!r); | 
 | 209 | 	list_del_init(r); | 
 | 210 |  | 
 | 211 | 	return r; | 
 | 212 | } | 
 | 213 |  | 
 | 214 | /*----------------------------------------------------------------*/ | 
 | 215 |  | 
 | 216 | /* | 
 | 217 |  * Describes a cache entry.  Used in both the cache and the pre_cache. | 
 | 218 |  */ | 
 | 219 | struct entry { | 
 | 220 | 	struct hlist_node hlist; | 
 | 221 | 	struct list_head list; | 
 | 222 | 	dm_oblock_t oblock; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 223 |  | 
 | 224 | 	/* | 
 | 225 | 	 * FIXME: pack these better | 
 | 226 | 	 */ | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 227 | 	bool dirty:1; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 228 | 	unsigned hit_count; | 
 | 229 | 	unsigned generation; | 
 | 230 | 	unsigned tick; | 
 | 231 | }; | 
 | 232 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 233 | /* | 
 | 234 |  * Rather than storing the cblock in an entry, we allocate all entries in | 
 | 235 |  * an array, and infer the cblock from the entry position. | 
 | 236 |  * | 
 | 237 |  * Free entries are linked together into a list. | 
 | 238 |  */ | 
 | 239 | struct entry_pool { | 
 | 240 | 	struct entry *entries, *entries_end; | 
 | 241 | 	struct list_head free; | 
 | 242 | 	unsigned nr_allocated; | 
 | 243 | }; | 
 | 244 |  | 
 | 245 | static int epool_init(struct entry_pool *ep, unsigned nr_entries) | 
 | 246 | { | 
 | 247 | 	unsigned i; | 
 | 248 |  | 
 | 249 | 	ep->entries = vzalloc(sizeof(struct entry) * nr_entries); | 
 | 250 | 	if (!ep->entries) | 
 | 251 | 		return -ENOMEM; | 
 | 252 |  | 
 | 253 | 	ep->entries_end = ep->entries + nr_entries; | 
 | 254 |  | 
 | 255 | 	INIT_LIST_HEAD(&ep->free); | 
 | 256 | 	for (i = 0; i < nr_entries; i++) | 
 | 257 | 		list_add(&ep->entries[i].list, &ep->free); | 
 | 258 |  | 
 | 259 | 	ep->nr_allocated = 0; | 
 | 260 |  | 
 | 261 | 	return 0; | 
 | 262 | } | 
 | 263 |  | 
 | 264 | static void epool_exit(struct entry_pool *ep) | 
 | 265 | { | 
 | 266 | 	vfree(ep->entries); | 
 | 267 | } | 
 | 268 |  | 
 | 269 | static struct entry *alloc_entry(struct entry_pool *ep) | 
 | 270 | { | 
 | 271 | 	struct entry *e; | 
 | 272 |  | 
 | 273 | 	if (list_empty(&ep->free)) | 
 | 274 | 		return NULL; | 
 | 275 |  | 
 | 276 | 	e = list_entry(list_pop(&ep->free), struct entry, list); | 
 | 277 | 	INIT_LIST_HEAD(&e->list); | 
 | 278 | 	INIT_HLIST_NODE(&e->hlist); | 
 | 279 | 	ep->nr_allocated++; | 
 | 280 |  | 
 | 281 | 	return e; | 
 | 282 | } | 
 | 283 |  | 
 | 284 | /* | 
 | 285 |  * This assumes the cblock hasn't already been allocated. | 
 | 286 |  */ | 
 | 287 | static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock) | 
 | 288 | { | 
 | 289 | 	struct entry *e = ep->entries + from_cblock(cblock); | 
 | 290 | 	list_del(&e->list); | 
 | 291 |  | 
 | 292 | 	INIT_LIST_HEAD(&e->list); | 
 | 293 | 	INIT_HLIST_NODE(&e->hlist); | 
 | 294 | 	ep->nr_allocated++; | 
 | 295 |  | 
 | 296 | 	return e; | 
 | 297 | } | 
 | 298 |  | 
 | 299 | static void free_entry(struct entry_pool *ep, struct entry *e) | 
 | 300 | { | 
 | 301 | 	BUG_ON(!ep->nr_allocated); | 
 | 302 | 	ep->nr_allocated--; | 
 | 303 | 	INIT_HLIST_NODE(&e->hlist); | 
 | 304 | 	list_add(&e->list, &ep->free); | 
 | 305 | } | 
 | 306 |  | 
| Joe Thornber | 532906a | 2013-11-08 16:36:17 +0000 | [diff] [blame] | 307 | /* | 
 | 308 |  * Returns NULL if the entry is free. | 
 | 309 |  */ | 
 | 310 | static struct entry *epool_find(struct entry_pool *ep, dm_cblock_t cblock) | 
 | 311 | { | 
 | 312 | 	struct entry *e = ep->entries + from_cblock(cblock); | 
| Mike Snitzer | 7b6b2bc | 2013-11-12 12:17:43 -0500 | [diff] [blame] | 313 | 	return !hlist_unhashed(&e->hlist) ? e : NULL; | 
| Joe Thornber | 532906a | 2013-11-08 16:36:17 +0000 | [diff] [blame] | 314 | } | 
 | 315 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 316 | static bool epool_empty(struct entry_pool *ep) | 
 | 317 | { | 
 | 318 | 	return list_empty(&ep->free); | 
 | 319 | } | 
 | 320 |  | 
 | 321 | static bool in_pool(struct entry_pool *ep, struct entry *e) | 
 | 322 | { | 
 | 323 | 	return e >= ep->entries && e < ep->entries_end; | 
 | 324 | } | 
 | 325 |  | 
 | 326 | static dm_cblock_t infer_cblock(struct entry_pool *ep, struct entry *e) | 
 | 327 | { | 
 | 328 | 	return to_cblock(e - ep->entries); | 
 | 329 | } | 
 | 330 |  | 
 | 331 | /*----------------------------------------------------------------*/ | 
 | 332 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 333 | struct mq_policy { | 
 | 334 | 	struct dm_cache_policy policy; | 
 | 335 |  | 
 | 336 | 	/* protects everything */ | 
 | 337 | 	struct mutex lock; | 
 | 338 | 	dm_cblock_t cache_size; | 
 | 339 | 	struct io_tracker tracker; | 
 | 340 |  | 
 | 341 | 	/* | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 342 | 	 * Entries come from two pools, one of pre-cache entries, and one | 
 | 343 | 	 * for the cache proper. | 
 | 344 | 	 */ | 
 | 345 | 	struct entry_pool pre_cache_pool; | 
 | 346 | 	struct entry_pool cache_pool; | 
 | 347 |  | 
 | 348 | 	/* | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 349 | 	 * We maintain three queues of entries.  The cache proper, | 
 | 350 | 	 * consisting of a clean and dirty queue, contains the currently | 
 | 351 | 	 * active mappings.  Whereas the pre_cache tracks blocks that | 
 | 352 | 	 * are being hit frequently and potential candidates for promotion | 
 | 353 | 	 * to the cache. | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 354 | 	 */ | 
 | 355 | 	struct queue pre_cache; | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 356 | 	struct queue cache_clean; | 
 | 357 | 	struct queue cache_dirty; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 358 |  | 
 | 359 | 	/* | 
 | 360 | 	 * Keeps track of time, incremented by the core.  We use this to | 
 | 361 | 	 * avoid attributing multiple hits within the same tick. | 
 | 362 | 	 * | 
 | 363 | 	 * Access to tick_protected should be done with the spin lock held. | 
 | 364 | 	 * It's copied to tick at the start of the map function (within the | 
 | 365 | 	 * mutex). | 
 | 366 | 	 */ | 
 | 367 | 	spinlock_t tick_lock; | 
 | 368 | 	unsigned tick_protected; | 
 | 369 | 	unsigned tick; | 
 | 370 |  | 
 | 371 | 	/* | 
 | 372 | 	 * A count of the number of times the map function has been called | 
 | 373 | 	 * and found an entry in the pre_cache or cache.  Currently used to | 
 | 374 | 	 * calculate the generation. | 
 | 375 | 	 */ | 
 | 376 | 	unsigned hit_count; | 
 | 377 |  | 
 | 378 | 	/* | 
 | 379 | 	 * A generation is a longish period that is used to trigger some | 
 | 380 | 	 * book keeping effects.  eg, decrementing hit counts on entries. | 
 | 381 | 	 * This is needed to allow the cache to evolve as io patterns | 
 | 382 | 	 * change. | 
 | 383 | 	 */ | 
 | 384 | 	unsigned generation; | 
 | 385 | 	unsigned generation_period; /* in lookups (will probably change) */ | 
 | 386 |  | 
 | 387 | 	/* | 
 | 388 | 	 * Entries in the pre_cache whose hit count passes the promotion | 
 | 389 | 	 * threshold move to the cache proper.  Working out the correct | 
 | 390 | 	 * value for the promotion_threshold is crucial to this policy. | 
 | 391 | 	 */ | 
 | 392 | 	unsigned promote_threshold; | 
 | 393 |  | 
 | 394 | 	/* | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 395 | 	 * The hash table allows us to quickly find an entry by origin | 
 | 396 | 	 * block.  Both pre_cache and cache entries are in here. | 
 | 397 | 	 */ | 
 | 398 | 	unsigned nr_buckets; | 
 | 399 | 	dm_block_t hash_bits; | 
 | 400 | 	struct hlist_head *table; | 
 | 401 | }; | 
 | 402 |  | 
 | 403 | /*----------------------------------------------------------------*/ | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 404 |  | 
 | 405 | /* | 
 | 406 |  * Simple hash table implementation.  Should replace with the standard hash | 
 | 407 |  * table that's making its way upstream. | 
 | 408 |  */ | 
 | 409 | static void hash_insert(struct mq_policy *mq, struct entry *e) | 
 | 410 | { | 
 | 411 | 	unsigned h = hash_64(from_oblock(e->oblock), mq->hash_bits); | 
 | 412 |  | 
 | 413 | 	hlist_add_head(&e->hlist, mq->table + h); | 
 | 414 | } | 
 | 415 |  | 
 | 416 | static struct entry *hash_lookup(struct mq_policy *mq, dm_oblock_t oblock) | 
 | 417 | { | 
 | 418 | 	unsigned h = hash_64(from_oblock(oblock), mq->hash_bits); | 
 | 419 | 	struct hlist_head *bucket = mq->table + h; | 
 | 420 | 	struct entry *e; | 
 | 421 |  | 
 | 422 | 	hlist_for_each_entry(e, bucket, hlist) | 
 | 423 | 		if (e->oblock == oblock) { | 
 | 424 | 			hlist_del(&e->hlist); | 
 | 425 | 			hlist_add_head(&e->hlist, bucket); | 
 | 426 | 			return e; | 
 | 427 | 		} | 
 | 428 |  | 
 | 429 | 	return NULL; | 
 | 430 | } | 
 | 431 |  | 
 | 432 | static void hash_remove(struct entry *e) | 
 | 433 | { | 
 | 434 | 	hlist_del(&e->hlist); | 
 | 435 | } | 
 | 436 |  | 
 | 437 | /*----------------------------------------------------------------*/ | 
 | 438 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 439 | static bool any_free_cblocks(struct mq_policy *mq) | 
 | 440 | { | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 441 | 	return !epool_empty(&mq->cache_pool); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 442 | } | 
 | 443 |  | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 444 | static bool any_clean_cblocks(struct mq_policy *mq) | 
 | 445 | { | 
 | 446 | 	return !queue_empty(&mq->cache_clean); | 
 | 447 | } | 
 | 448 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 449 | /*----------------------------------------------------------------*/ | 
 | 450 |  | 
 | 451 | /* | 
 | 452 |  * Now we get to the meat of the policy.  This section deals with deciding | 
 | 453 |  * when to to add entries to the pre_cache and cache, and move between | 
 | 454 |  * them. | 
 | 455 |  */ | 
 | 456 |  | 
 | 457 | /* | 
 | 458 |  * The queue level is based on the log2 of the hit count. | 
 | 459 |  */ | 
 | 460 | static unsigned queue_level(struct entry *e) | 
 | 461 | { | 
 | 462 | 	return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u); | 
 | 463 | } | 
 | 464 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 465 | static bool in_cache(struct mq_policy *mq, struct entry *e) | 
 | 466 | { | 
 | 467 | 	return in_pool(&mq->cache_pool, e); | 
 | 468 | } | 
 | 469 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 470 | /* | 
 | 471 |  * Inserts the entry into the pre_cache or the cache.  Ensures the cache | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 472 |  * block is marked as allocated if necc.  Inserts into the hash table. | 
 | 473 |  * Sets the tick which records when the entry was last moved about. | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 474 |  */ | 
 | 475 | static void push(struct mq_policy *mq, struct entry *e) | 
 | 476 | { | 
 | 477 | 	e->tick = mq->tick; | 
 | 478 | 	hash_insert(mq, e); | 
 | 479 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 480 | 	if (in_cache(mq, e)) | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 481 | 		queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean, | 
 | 482 | 			   queue_level(e), &e->list); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 483 | 	else | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 484 | 		queue_push(&mq->pre_cache, queue_level(e), &e->list); | 
 | 485 | } | 
 | 486 |  | 
 | 487 | /* | 
 | 488 |  * Removes an entry from pre_cache or cache.  Removes from the hash table. | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 489 |  */ | 
 | 490 | static void del(struct mq_policy *mq, struct entry *e) | 
 | 491 | { | 
 | 492 | 	queue_remove(&e->list); | 
 | 493 | 	hash_remove(e); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 494 | } | 
 | 495 |  | 
 | 496 | /* | 
 | 497 |  * Like del, except it removes the first entry in the queue (ie. the least | 
 | 498 |  * recently used). | 
 | 499 |  */ | 
 | 500 | static struct entry *pop(struct mq_policy *mq, struct queue *q) | 
 | 501 | { | 
| Joe Thornber | 0184b44 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 502 | 	struct entry *e; | 
 | 503 | 	struct list_head *h = queue_pop(q); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 504 |  | 
| Joe Thornber | 0184b44 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 505 | 	if (!h) | 
 | 506 | 		return NULL; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 507 |  | 
| Joe Thornber | 0184b44 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 508 | 	e = container_of(h, struct entry, list); | 
 | 509 | 	hash_remove(e); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 510 |  | 
 | 511 | 	return e; | 
 | 512 | } | 
 | 513 |  | 
 | 514 | /* | 
 | 515 |  * Has this entry already been updated? | 
 | 516 |  */ | 
 | 517 | static bool updated_this_tick(struct mq_policy *mq, struct entry *e) | 
 | 518 | { | 
 | 519 | 	return mq->tick == e->tick; | 
 | 520 | } | 
 | 521 |  | 
 | 522 | /* | 
 | 523 |  * The promotion threshold is adjusted every generation.  As are the counts | 
 | 524 |  * of the entries. | 
 | 525 |  * | 
 | 526 |  * At the moment the threshold is taken by averaging the hit counts of some | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 527 |  * of the entries in the cache (the first 20 entries across all levels in | 
 | 528 |  * ascending order, giving preference to the clean entries at each level). | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 529 |  * | 
 | 530 |  * We can be much cleverer than this though.  For example, each promotion | 
 | 531 |  * could bump up the threshold helping to prevent churn.  Much more to do | 
 | 532 |  * here. | 
 | 533 |  */ | 
 | 534 |  | 
 | 535 | #define MAX_TO_AVERAGE 20 | 
 | 536 |  | 
 | 537 | static void check_generation(struct mq_policy *mq) | 
 | 538 | { | 
 | 539 | 	unsigned total = 0, nr = 0, count = 0, level; | 
 | 540 | 	struct list_head *head; | 
 | 541 | 	struct entry *e; | 
 | 542 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 543 | 	if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) { | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 544 | 		mq->hit_count = 0; | 
 | 545 | 		mq->generation++; | 
 | 546 |  | 
 | 547 | 		for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 548 | 			head = mq->cache_clean.qs + level; | 
 | 549 | 			list_for_each_entry(e, head, list) { | 
 | 550 | 				nr++; | 
 | 551 | 				total += e->hit_count; | 
 | 552 |  | 
 | 553 | 				if (++count >= MAX_TO_AVERAGE) | 
 | 554 | 					break; | 
 | 555 | 			} | 
 | 556 |  | 
 | 557 | 			head = mq->cache_dirty.qs + level; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 558 | 			list_for_each_entry(e, head, list) { | 
 | 559 | 				nr++; | 
 | 560 | 				total += e->hit_count; | 
 | 561 |  | 
 | 562 | 				if (++count >= MAX_TO_AVERAGE) | 
 | 563 | 					break; | 
 | 564 | 			} | 
 | 565 | 		} | 
 | 566 |  | 
 | 567 | 		mq->promote_threshold = nr ? total / nr : 1; | 
 | 568 | 		if (mq->promote_threshold * nr < total) | 
 | 569 | 			mq->promote_threshold++; | 
 | 570 | 	} | 
 | 571 | } | 
 | 572 |  | 
 | 573 | /* | 
 | 574 |  * Whenever we use an entry we bump up it's hit counter, and push it to the | 
 | 575 |  * back to it's current level. | 
 | 576 |  */ | 
 | 577 | static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e) | 
 | 578 | { | 
 | 579 | 	if (updated_this_tick(mq, e)) | 
 | 580 | 		return; | 
 | 581 |  | 
 | 582 | 	e->hit_count++; | 
 | 583 | 	mq->hit_count++; | 
 | 584 | 	check_generation(mq); | 
 | 585 |  | 
 | 586 | 	/* generation adjustment, to stop the counts increasing forever. */ | 
 | 587 | 	/* FIXME: divide? */ | 
 | 588 | 	/* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */ | 
 | 589 | 	e->generation = mq->generation; | 
 | 590 |  | 
 | 591 | 	del(mq, e); | 
 | 592 | 	push(mq, e); | 
 | 593 | } | 
 | 594 |  | 
 | 595 | /* | 
 | 596 |  * Demote the least recently used entry from the cache to the pre_cache. | 
 | 597 |  * Returns the new cache entry to use, and the old origin block it was | 
 | 598 |  * mapped to. | 
 | 599 |  * | 
 | 600 |  * We drop the hit count on the demoted entry back to 1 to stop it bouncing | 
 | 601 |  * straight back into the cache if it's subsequently hit.  There are | 
 | 602 |  * various options here, and more experimentation would be good: | 
 | 603 |  * | 
 | 604 |  * - just forget about the demoted entry completely (ie. don't insert it | 
 | 605 |      into the pre_cache). | 
 | 606 |  * - divide the hit count rather that setting to some hard coded value. | 
 | 607 |  * - set the hit count to a hard coded value other than 1, eg, is it better | 
 | 608 |  *   if it goes in at level 2? | 
 | 609 |  */ | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 610 | static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 611 | { | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 612 | 	struct entry *demoted = pop(mq, &mq->cache_clean); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 613 |  | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 614 | 	if (!demoted) | 
 | 615 | 		/* | 
 | 616 | 		 * We could get a block from mq->cache_dirty, but that | 
 | 617 | 		 * would add extra latency to the triggering bio as it | 
 | 618 | 		 * waits for the writeback.  Better to not promote this | 
 | 619 | 		 * time and hope there's a clean block next time this block | 
 | 620 | 		 * is hit. | 
 | 621 | 		 */ | 
 | 622 | 		return -ENOSPC; | 
 | 623 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 624 | 	*oblock = demoted->oblock; | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 625 | 	free_entry(&mq->cache_pool, demoted); | 
 | 626 |  | 
 | 627 | 	/* | 
 | 628 | 	 * We used to put the demoted block into the pre-cache, but I think | 
 | 629 | 	 * it's simpler to just let it work it's way up from zero again. | 
 | 630 | 	 * Stops blocks flickering in and out of the cache. | 
 | 631 | 	 */ | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 632 |  | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 633 | 	return 0; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 634 | } | 
 | 635 |  | 
 | 636 | /* | 
 | 637 |  * We modify the basic promotion_threshold depending on the specific io. | 
 | 638 |  * | 
 | 639 |  * If the origin block has been discarded then there's no cost to copy it | 
 | 640 |  * to the cache. | 
 | 641 |  * | 
 | 642 |  * We bias towards reads, since they can be demoted at no cost if they | 
 | 643 |  * haven't been dirtied. | 
 | 644 |  */ | 
 | 645 | #define DISCARDED_PROMOTE_THRESHOLD 1 | 
 | 646 | #define READ_PROMOTE_THRESHOLD 4 | 
 | 647 | #define WRITE_PROMOTE_THRESHOLD 8 | 
 | 648 |  | 
 | 649 | static unsigned adjusted_promote_threshold(struct mq_policy *mq, | 
 | 650 | 					   bool discarded_oblock, int data_dir) | 
 | 651 | { | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 652 | 	if (data_dir == READ) | 
 | 653 | 		return mq->promote_threshold + READ_PROMOTE_THRESHOLD; | 
 | 654 |  | 
 | 655 | 	if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) { | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 656 | 		/* | 
 | 657 | 		 * We don't need to do any copying at all, so give this a | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 658 | 		 * very low threshold. | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 659 | 		 */ | 
 | 660 | 		return DISCARDED_PROMOTE_THRESHOLD; | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 661 | 	} | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 662 |  | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 663 | 	return mq->promote_threshold + WRITE_PROMOTE_THRESHOLD; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 664 | } | 
 | 665 |  | 
 | 666 | static bool should_promote(struct mq_policy *mq, struct entry *e, | 
 | 667 | 			   bool discarded_oblock, int data_dir) | 
 | 668 | { | 
 | 669 | 	return e->hit_count >= | 
 | 670 | 		adjusted_promote_threshold(mq, discarded_oblock, data_dir); | 
 | 671 | } | 
 | 672 |  | 
 | 673 | static int cache_entry_found(struct mq_policy *mq, | 
 | 674 | 			     struct entry *e, | 
 | 675 | 			     struct policy_result *result) | 
 | 676 | { | 
 | 677 | 	requeue_and_update_tick(mq, e); | 
 | 678 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 679 | 	if (in_cache(mq, e)) { | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 680 | 		result->op = POLICY_HIT; | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 681 | 		result->cblock = infer_cblock(&mq->cache_pool, e); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 682 | 	} | 
 | 683 |  | 
 | 684 | 	return 0; | 
 | 685 | } | 
 | 686 |  | 
 | 687 | /* | 
| Joe Thornber | 0184b44 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 688 |  * Moves an entry from the pre_cache to the cache.  The main work is | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 689 |  * finding which cache block to use. | 
 | 690 |  */ | 
 | 691 | static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, | 
 | 692 | 			      struct policy_result *result) | 
 | 693 | { | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 694 | 	int r; | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 695 | 	struct entry *new_e; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 696 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 697 | 	/* Ensure there's a free cblock in the cache */ | 
 | 698 | 	if (epool_empty(&mq->cache_pool)) { | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 699 | 		result->op = POLICY_REPLACE; | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 700 | 		r = demote_cblock(mq, &result->old_oblock); | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 701 | 		if (r) { | 
 | 702 | 			result->op = POLICY_MISS; | 
 | 703 | 			return 0; | 
 | 704 | 		} | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 705 | 	} else | 
 | 706 | 		result->op = POLICY_NEW; | 
 | 707 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 708 | 	new_e = alloc_entry(&mq->cache_pool); | 
 | 709 | 	BUG_ON(!new_e); | 
 | 710 |  | 
 | 711 | 	new_e->oblock = e->oblock; | 
 | 712 | 	new_e->dirty = false; | 
 | 713 | 	new_e->hit_count = e->hit_count; | 
 | 714 | 	new_e->generation = e->generation; | 
 | 715 | 	new_e->tick = e->tick; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 716 |  | 
 | 717 | 	del(mq, e); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 718 | 	free_entry(&mq->pre_cache_pool, e); | 
 | 719 | 	push(mq, new_e); | 
 | 720 |  | 
 | 721 | 	result->cblock = infer_cblock(&mq->cache_pool, new_e); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 722 |  | 
 | 723 | 	return 0; | 
 | 724 | } | 
 | 725 |  | 
 | 726 | static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, | 
 | 727 | 				 bool can_migrate, bool discarded_oblock, | 
 | 728 | 				 int data_dir, struct policy_result *result) | 
 | 729 | { | 
 | 730 | 	int r = 0; | 
 | 731 | 	bool updated = updated_this_tick(mq, e); | 
 | 732 |  | 
 | 733 | 	requeue_and_update_tick(mq, e); | 
 | 734 |  | 
 | 735 | 	if ((!discarded_oblock && updated) || | 
 | 736 | 	    !should_promote(mq, e, discarded_oblock, data_dir)) | 
 | 737 | 		result->op = POLICY_MISS; | 
 | 738 | 	else if (!can_migrate) | 
 | 739 | 		r = -EWOULDBLOCK; | 
 | 740 | 	else | 
 | 741 | 		r = pre_cache_to_cache(mq, e, result); | 
 | 742 |  | 
 | 743 | 	return r; | 
 | 744 | } | 
 | 745 |  | 
 | 746 | static void insert_in_pre_cache(struct mq_policy *mq, | 
 | 747 | 				dm_oblock_t oblock) | 
 | 748 | { | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 749 | 	struct entry *e = alloc_entry(&mq->pre_cache_pool); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 750 |  | 
 | 751 | 	if (!e) | 
 | 752 | 		/* | 
 | 753 | 		 * There's no spare entry structure, so we grab the least | 
 | 754 | 		 * used one from the pre_cache. | 
 | 755 | 		 */ | 
 | 756 | 		e = pop(mq, &mq->pre_cache); | 
 | 757 |  | 
 | 758 | 	if (unlikely(!e)) { | 
 | 759 | 		DMWARN("couldn't pop from pre cache"); | 
 | 760 | 		return; | 
 | 761 | 	} | 
 | 762 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 763 | 	e->dirty = false; | 
 | 764 | 	e->oblock = oblock; | 
 | 765 | 	e->hit_count = 1; | 
 | 766 | 	e->generation = mq->generation; | 
 | 767 | 	push(mq, e); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 768 | } | 
 | 769 |  | 
 | 770 | static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, | 
 | 771 | 			    struct policy_result *result) | 
 | 772 | { | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 773 | 	int r; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 774 | 	struct entry *e; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 775 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 776 | 	if (epool_empty(&mq->cache_pool)) { | 
 | 777 | 		result->op = POLICY_REPLACE; | 
 | 778 | 		r = demote_cblock(mq, &result->old_oblock); | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 779 | 		if (unlikely(r)) { | 
 | 780 | 			result->op = POLICY_MISS; | 
 | 781 | 			insert_in_pre_cache(mq, oblock); | 
 | 782 | 			return; | 
 | 783 | 		} | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 784 |  | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 785 | 		/* | 
 | 786 | 		 * This will always succeed, since we've just demoted. | 
 | 787 | 		 */ | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 788 | 		e = alloc_entry(&mq->cache_pool); | 
 | 789 | 		BUG_ON(!e); | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 790 |  | 
 | 791 | 	} else { | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 792 | 		e = alloc_entry(&mq->cache_pool); | 
| Joe Thornber | c86c307 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 793 | 		result->op = POLICY_NEW; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 794 | 	} | 
 | 795 |  | 
 | 796 | 	e->oblock = oblock; | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 797 | 	e->dirty = false; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 798 | 	e->hit_count = 1; | 
 | 799 | 	e->generation = mq->generation; | 
 | 800 | 	push(mq, e); | 
 | 801 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 802 | 	result->cblock = infer_cblock(&mq->cache_pool, e); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 803 | } | 
 | 804 |  | 
 | 805 | static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, | 
 | 806 | 			  bool can_migrate, bool discarded_oblock, | 
 | 807 | 			  int data_dir, struct policy_result *result) | 
 | 808 | { | 
 | 809 | 	if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) == 1) { | 
 | 810 | 		if (can_migrate) | 
 | 811 | 			insert_in_cache(mq, oblock, result); | 
 | 812 | 		else | 
 | 813 | 			return -EWOULDBLOCK; | 
 | 814 | 	} else { | 
 | 815 | 		insert_in_pre_cache(mq, oblock); | 
 | 816 | 		result->op = POLICY_MISS; | 
 | 817 | 	} | 
 | 818 |  | 
 | 819 | 	return 0; | 
 | 820 | } | 
 | 821 |  | 
 | 822 | /* | 
 | 823 |  * Looks the oblock up in the hash table, then decides whether to put in | 
 | 824 |  * pre_cache, or cache etc. | 
 | 825 |  */ | 
 | 826 | static int map(struct mq_policy *mq, dm_oblock_t oblock, | 
 | 827 | 	       bool can_migrate, bool discarded_oblock, | 
 | 828 | 	       int data_dir, struct policy_result *result) | 
 | 829 | { | 
 | 830 | 	int r = 0; | 
 | 831 | 	struct entry *e = hash_lookup(mq, oblock); | 
 | 832 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 833 | 	if (e && in_cache(mq, e)) | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 834 | 		r = cache_entry_found(mq, e, result); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 835 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 836 | 	else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) | 
 | 837 | 		result->op = POLICY_MISS; | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 838 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 839 | 	else if (e) | 
 | 840 | 		r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, | 
 | 841 | 					  data_dir, result); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 842 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 843 | 	else | 
 | 844 | 		r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, | 
 | 845 | 				   data_dir, result); | 
 | 846 |  | 
 | 847 | 	if (r == -EWOULDBLOCK) | 
 | 848 | 		result->op = POLICY_MISS; | 
 | 849 |  | 
 | 850 | 	return r; | 
 | 851 | } | 
 | 852 |  | 
 | 853 | /*----------------------------------------------------------------*/ | 
 | 854 |  | 
 | 855 | /* | 
 | 856 |  * Public interface, via the policy struct.  See dm-cache-policy.h for a | 
 | 857 |  * description of these. | 
 | 858 |  */ | 
 | 859 |  | 
 | 860 | static struct mq_policy *to_mq_policy(struct dm_cache_policy *p) | 
 | 861 | { | 
 | 862 | 	return container_of(p, struct mq_policy, policy); | 
 | 863 | } | 
 | 864 |  | 
 | 865 | static void mq_destroy(struct dm_cache_policy *p) | 
 | 866 | { | 
 | 867 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 868 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 869 | 	kfree(mq->table); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 870 | 	epool_exit(&mq->cache_pool); | 
 | 871 | 	epool_exit(&mq->pre_cache_pool); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 872 | 	kfree(mq); | 
 | 873 | } | 
 | 874 |  | 
 | 875 | static void copy_tick(struct mq_policy *mq) | 
 | 876 | { | 
 | 877 | 	unsigned long flags; | 
 | 878 |  | 
 | 879 | 	spin_lock_irqsave(&mq->tick_lock, flags); | 
 | 880 | 	mq->tick = mq->tick_protected; | 
 | 881 | 	spin_unlock_irqrestore(&mq->tick_lock, flags); | 
 | 882 | } | 
 | 883 |  | 
 | 884 | static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, | 
 | 885 | 		  bool can_block, bool can_migrate, bool discarded_oblock, | 
 | 886 | 		  struct bio *bio, struct policy_result *result) | 
 | 887 | { | 
 | 888 | 	int r; | 
 | 889 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 890 |  | 
 | 891 | 	result->op = POLICY_MISS; | 
 | 892 |  | 
 | 893 | 	if (can_block) | 
 | 894 | 		mutex_lock(&mq->lock); | 
 | 895 | 	else if (!mutex_trylock(&mq->lock)) | 
 | 896 | 		return -EWOULDBLOCK; | 
 | 897 |  | 
 | 898 | 	copy_tick(mq); | 
 | 899 |  | 
 | 900 | 	iot_examine_bio(&mq->tracker, bio); | 
 | 901 | 	r = map(mq, oblock, can_migrate, discarded_oblock, | 
 | 902 | 		bio_data_dir(bio), result); | 
 | 903 |  | 
 | 904 | 	mutex_unlock(&mq->lock); | 
 | 905 |  | 
 | 906 | 	return r; | 
 | 907 | } | 
 | 908 |  | 
 | 909 | static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) | 
 | 910 | { | 
 | 911 | 	int r; | 
 | 912 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 913 | 	struct entry *e; | 
 | 914 |  | 
 | 915 | 	if (!mutex_trylock(&mq->lock)) | 
 | 916 | 		return -EWOULDBLOCK; | 
 | 917 |  | 
 | 918 | 	e = hash_lookup(mq, oblock); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 919 | 	if (e && in_cache(mq, e)) { | 
 | 920 | 		*cblock = infer_cblock(&mq->cache_pool, e); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 921 | 		r = 0; | 
 | 922 | 	} else | 
 | 923 | 		r = -ENOENT; | 
 | 924 |  | 
 | 925 | 	mutex_unlock(&mq->lock); | 
 | 926 |  | 
 | 927 | 	return r; | 
 | 928 | } | 
 | 929 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 930 | static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set) | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 931 | { | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 932 | 	struct entry *e; | 
 | 933 |  | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 934 | 	e = hash_lookup(mq, oblock); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 935 | 	BUG_ON(!e || !in_cache(mq, e)); | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 936 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 937 | 	del(mq, e); | 
 | 938 | 	e->dirty = set; | 
 | 939 | 	push(mq, e); | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 940 | } | 
 | 941 |  | 
 | 942 | static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) | 
 | 943 | { | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 944 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 945 |  | 
 | 946 | 	mutex_lock(&mq->lock); | 
 | 947 | 	__mq_set_clear_dirty(mq, oblock, true); | 
 | 948 | 	mutex_unlock(&mq->lock); | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 949 | } | 
 | 950 |  | 
 | 951 | static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) | 
 | 952 | { | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 953 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 954 |  | 
 | 955 | 	mutex_lock(&mq->lock); | 
 | 956 | 	__mq_set_clear_dirty(mq, oblock, false); | 
 | 957 | 	mutex_unlock(&mq->lock); | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 958 | } | 
 | 959 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 960 | static int mq_load_mapping(struct dm_cache_policy *p, | 
 | 961 | 			   dm_oblock_t oblock, dm_cblock_t cblock, | 
 | 962 | 			   uint32_t hint, bool hint_valid) | 
 | 963 | { | 
 | 964 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 965 | 	struct entry *e; | 
 | 966 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 967 | 	e = alloc_particular_entry(&mq->cache_pool, cblock); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 968 | 	e->oblock = oblock; | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 969 | 	e->dirty = false;	/* this gets corrected in a minute */ | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 970 | 	e->hit_count = hint_valid ? hint : 1; | 
 | 971 | 	e->generation = mq->generation; | 
 | 972 | 	push(mq, e); | 
 | 973 |  | 
 | 974 | 	return 0; | 
 | 975 | } | 
 | 976 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 977 | static int mq_save_hints(struct mq_policy *mq, struct queue *q, | 
 | 978 | 			 policy_walk_fn fn, void *context) | 
 | 979 | { | 
 | 980 | 	int r; | 
 | 981 | 	unsigned level; | 
 | 982 | 	struct entry *e; | 
 | 983 |  | 
 | 984 | 	for (level = 0; level < NR_QUEUE_LEVELS; level++) | 
 | 985 | 		list_for_each_entry(e, q->qs + level, list) { | 
 | 986 | 			r = fn(context, infer_cblock(&mq->cache_pool, e), | 
 | 987 | 			       e->oblock, e->hit_count); | 
 | 988 | 			if (r) | 
 | 989 | 				return r; | 
 | 990 | 		} | 
 | 991 |  | 
 | 992 | 	return 0; | 
 | 993 | } | 
 | 994 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 995 | static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, | 
 | 996 | 			    void *context) | 
 | 997 | { | 
 | 998 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 999 | 	int r = 0; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1000 |  | 
 | 1001 | 	mutex_lock(&mq->lock); | 
 | 1002 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1003 | 	r = mq_save_hints(mq, &mq->cache_clean, fn, context); | 
 | 1004 | 	if (!r) | 
 | 1005 | 		r = mq_save_hints(mq, &mq->cache_dirty, fn, context); | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 1006 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1007 | 	mutex_unlock(&mq->lock); | 
 | 1008 |  | 
 | 1009 | 	return r; | 
 | 1010 | } | 
 | 1011 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1012 | static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock) | 
 | 1013 | { | 
 | 1014 | 	struct entry *e; | 
 | 1015 |  | 
 | 1016 | 	e = hash_lookup(mq, oblock); | 
 | 1017 | 	BUG_ON(!e || !in_cache(mq, e)); | 
 | 1018 |  | 
 | 1019 | 	del(mq, e); | 
 | 1020 | 	free_entry(&mq->cache_pool, e); | 
 | 1021 | } | 
 | 1022 |  | 
| Geert Uytterhoeven | b936bf8 | 2013-07-26 09:57:31 +0200 | [diff] [blame] | 1023 | static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1024 | { | 
| Geert Uytterhoeven | b936bf8 | 2013-07-26 09:57:31 +0200 | [diff] [blame] | 1025 | 	struct mq_policy *mq = to_mq_policy(p); | 
| Geert Uytterhoeven | b936bf8 | 2013-07-26 09:57:31 +0200 | [diff] [blame] | 1026 |  | 
 | 1027 | 	mutex_lock(&mq->lock); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1028 | 	__remove_mapping(mq, oblock); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1029 | 	mutex_unlock(&mq->lock); | 
 | 1030 | } | 
 | 1031 |  | 
| Joe Thornber | 532906a | 2013-11-08 16:36:17 +0000 | [diff] [blame] | 1032 | static int __remove_cblock(struct mq_policy *mq, dm_cblock_t cblock) | 
 | 1033 | { | 
 | 1034 | 	struct entry *e = epool_find(&mq->cache_pool, cblock); | 
 | 1035 |  | 
 | 1036 | 	if (!e) | 
 | 1037 | 		return -ENODATA; | 
 | 1038 |  | 
 | 1039 | 	del(mq, e); | 
 | 1040 | 	free_entry(&mq->cache_pool, e); | 
 | 1041 |  | 
 | 1042 | 	return 0; | 
 | 1043 | } | 
 | 1044 |  | 
 | 1045 | static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) | 
 | 1046 | { | 
 | 1047 | 	int r; | 
 | 1048 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 1049 |  | 
 | 1050 | 	mutex_lock(&mq->lock); | 
 | 1051 | 	r = __remove_cblock(mq, cblock); | 
 | 1052 | 	mutex_unlock(&mq->lock); | 
 | 1053 |  | 
 | 1054 | 	return r; | 
 | 1055 | } | 
 | 1056 |  | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 1057 | static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock, | 
 | 1058 | 			      dm_cblock_t *cblock) | 
 | 1059 | { | 
 | 1060 | 	struct entry *e = pop(mq, &mq->cache_dirty); | 
 | 1061 |  | 
 | 1062 | 	if (!e) | 
 | 1063 | 		return -ENODATA; | 
 | 1064 |  | 
 | 1065 | 	*oblock = e->oblock; | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1066 | 	*cblock = infer_cblock(&mq->cache_pool, e); | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 1067 | 	e->dirty = false; | 
 | 1068 | 	push(mq, e); | 
 | 1069 |  | 
 | 1070 | 	return 0; | 
 | 1071 | } | 
 | 1072 |  | 
 | 1073 | static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, | 
 | 1074 | 			     dm_cblock_t *cblock) | 
 | 1075 | { | 
 | 1076 | 	int r; | 
 | 1077 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 1078 |  | 
 | 1079 | 	mutex_lock(&mq->lock); | 
 | 1080 | 	r = __mq_writeback_work(mq, oblock, cblock); | 
 | 1081 | 	mutex_unlock(&mq->lock); | 
 | 1082 |  | 
 | 1083 | 	return r; | 
 | 1084 | } | 
 | 1085 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1086 | static void __force_mapping(struct mq_policy *mq, | 
 | 1087 | 			    dm_oblock_t current_oblock, dm_oblock_t new_oblock) | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1088 | { | 
 | 1089 | 	struct entry *e = hash_lookup(mq, current_oblock); | 
 | 1090 |  | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1091 | 	if (e && in_cache(mq, e)) { | 
 | 1092 | 		del(mq, e); | 
 | 1093 | 		e->oblock = new_oblock; | 
 | 1094 | 		e->dirty = true; | 
 | 1095 | 		push(mq, e); | 
 | 1096 | 	} | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1097 | } | 
 | 1098 |  | 
 | 1099 | static void mq_force_mapping(struct dm_cache_policy *p, | 
 | 1100 | 			     dm_oblock_t current_oblock, dm_oblock_t new_oblock) | 
 | 1101 | { | 
 | 1102 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 1103 |  | 
 | 1104 | 	mutex_lock(&mq->lock); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1105 | 	__force_mapping(mq, current_oblock, new_oblock); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1106 | 	mutex_unlock(&mq->lock); | 
 | 1107 | } | 
 | 1108 |  | 
 | 1109 | static dm_cblock_t mq_residency(struct dm_cache_policy *p) | 
 | 1110 | { | 
| Joe Thornber | 99ba2ae | 2013-10-21 11:44:57 +0100 | [diff] [blame] | 1111 | 	dm_cblock_t r; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1112 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 1113 |  | 
| Joe Thornber | 99ba2ae | 2013-10-21 11:44:57 +0100 | [diff] [blame] | 1114 | 	mutex_lock(&mq->lock); | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1115 | 	r = to_cblock(mq->cache_pool.nr_allocated); | 
| Joe Thornber | 99ba2ae | 2013-10-21 11:44:57 +0100 | [diff] [blame] | 1116 | 	mutex_unlock(&mq->lock); | 
 | 1117 |  | 
 | 1118 | 	return r; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1119 | } | 
 | 1120 |  | 
 | 1121 | static void mq_tick(struct dm_cache_policy *p) | 
 | 1122 | { | 
 | 1123 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 1124 | 	unsigned long flags; | 
 | 1125 |  | 
 | 1126 | 	spin_lock_irqsave(&mq->tick_lock, flags); | 
 | 1127 | 	mq->tick_protected++; | 
 | 1128 | 	spin_unlock_irqrestore(&mq->tick_lock, flags); | 
 | 1129 | } | 
 | 1130 |  | 
 | 1131 | static int mq_set_config_value(struct dm_cache_policy *p, | 
 | 1132 | 			       const char *key, const char *value) | 
 | 1133 | { | 
 | 1134 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 1135 | 	enum io_pattern pattern; | 
 | 1136 | 	unsigned long tmp; | 
 | 1137 |  | 
 | 1138 | 	if (!strcasecmp(key, "random_threshold")) | 
 | 1139 | 		pattern = PATTERN_RANDOM; | 
 | 1140 | 	else if (!strcasecmp(key, "sequential_threshold")) | 
 | 1141 | 		pattern = PATTERN_SEQUENTIAL; | 
 | 1142 | 	else | 
 | 1143 | 		return -EINVAL; | 
 | 1144 |  | 
 | 1145 | 	if (kstrtoul(value, 10, &tmp)) | 
 | 1146 | 		return -EINVAL; | 
 | 1147 |  | 
 | 1148 | 	mq->tracker.thresholds[pattern] = tmp; | 
 | 1149 |  | 
 | 1150 | 	return 0; | 
 | 1151 | } | 
 | 1152 |  | 
 | 1153 | static int mq_emit_config_values(struct dm_cache_policy *p, char *result, unsigned maxlen) | 
 | 1154 | { | 
 | 1155 | 	ssize_t sz = 0; | 
 | 1156 | 	struct mq_policy *mq = to_mq_policy(p); | 
 | 1157 |  | 
 | 1158 | 	DMEMIT("4 random_threshold %u sequential_threshold %u", | 
 | 1159 | 	       mq->tracker.thresholds[PATTERN_RANDOM], | 
 | 1160 | 	       mq->tracker.thresholds[PATTERN_SEQUENTIAL]); | 
 | 1161 |  | 
 | 1162 | 	return 0; | 
 | 1163 | } | 
 | 1164 |  | 
 | 1165 | /* Init the policy plugin interface function pointers. */ | 
 | 1166 | static void init_policy_functions(struct mq_policy *mq) | 
 | 1167 | { | 
 | 1168 | 	mq->policy.destroy = mq_destroy; | 
 | 1169 | 	mq->policy.map = mq_map; | 
 | 1170 | 	mq->policy.lookup = mq_lookup; | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 1171 | 	mq->policy.set_dirty = mq_set_dirty; | 
 | 1172 | 	mq->policy.clear_dirty = mq_clear_dirty; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1173 | 	mq->policy.load_mapping = mq_load_mapping; | 
 | 1174 | 	mq->policy.walk_mappings = mq_walk_mappings; | 
 | 1175 | 	mq->policy.remove_mapping = mq_remove_mapping; | 
| Joe Thornber | 532906a | 2013-11-08 16:36:17 +0000 | [diff] [blame] | 1176 | 	mq->policy.remove_cblock = mq_remove_cblock; | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 1177 | 	mq->policy.writeback_work = mq_writeback_work; | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1178 | 	mq->policy.force_mapping = mq_force_mapping; | 
 | 1179 | 	mq->policy.residency = mq_residency; | 
 | 1180 | 	mq->policy.tick = mq_tick; | 
 | 1181 | 	mq->policy.emit_config_values = mq_emit_config_values; | 
 | 1182 | 	mq->policy.set_config_value = mq_set_config_value; | 
 | 1183 | } | 
 | 1184 |  | 
 | 1185 | static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, | 
 | 1186 | 					 sector_t origin_size, | 
 | 1187 | 					 sector_t cache_block_size) | 
 | 1188 | { | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1189 | 	struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); | 
 | 1190 |  | 
 | 1191 | 	if (!mq) | 
 | 1192 | 		return NULL; | 
 | 1193 |  | 
 | 1194 | 	init_policy_functions(mq); | 
 | 1195 | 	iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1196 | 	mq->cache_size = cache_size; | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1197 |  | 
 | 1198 | 	if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) { | 
 | 1199 | 		DMERR("couldn't initialize pool of pre-cache entries"); | 
 | 1200 | 		goto bad_pre_cache_init; | 
 | 1201 | 	} | 
 | 1202 |  | 
 | 1203 | 	if (epool_init(&mq->cache_pool, from_cblock(cache_size))) { | 
 | 1204 | 		DMERR("couldn't initialize pool of cache entries"); | 
 | 1205 | 		goto bad_cache_init; | 
 | 1206 | 	} | 
 | 1207 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1208 | 	mq->tick_protected = 0; | 
 | 1209 | 	mq->tick = 0; | 
 | 1210 | 	mq->hit_count = 0; | 
 | 1211 | 	mq->generation = 0; | 
 | 1212 | 	mq->promote_threshold = 0; | 
 | 1213 | 	mutex_init(&mq->lock); | 
 | 1214 | 	spin_lock_init(&mq->tick_lock); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1215 |  | 
 | 1216 | 	queue_init(&mq->pre_cache); | 
| Joe Thornber | 01911c1 | 2013-10-24 14:10:28 -0400 | [diff] [blame] | 1217 | 	queue_init(&mq->cache_clean); | 
 | 1218 | 	queue_init(&mq->cache_dirty); | 
 | 1219 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1220 | 	mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); | 
 | 1221 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1222 | 	mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); | 
 | 1223 | 	mq->hash_bits = ffs(mq->nr_buckets) - 1; | 
 | 1224 | 	mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL); | 
 | 1225 | 	if (!mq->table) | 
 | 1226 | 		goto bad_alloc_table; | 
 | 1227 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1228 | 	return &mq->policy; | 
 | 1229 |  | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1230 | bad_alloc_table: | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1231 | 	epool_exit(&mq->cache_pool); | 
 | 1232 | bad_cache_init: | 
 | 1233 | 	epool_exit(&mq->pre_cache_pool); | 
 | 1234 | bad_pre_cache_init: | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1235 | 	kfree(mq); | 
 | 1236 |  | 
 | 1237 | 	return NULL; | 
 | 1238 | } | 
 | 1239 |  | 
 | 1240 | /*----------------------------------------------------------------*/ | 
 | 1241 |  | 
 | 1242 | static struct dm_cache_policy_type mq_policy_type = { | 
 | 1243 | 	.name = "mq", | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1244 | 	.version = {1, 1, 0}, | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1245 | 	.hint_size = 4, | 
 | 1246 | 	.owner = THIS_MODULE, | 
 | 1247 | 	.create = mq_create | 
 | 1248 | }; | 
 | 1249 |  | 
 | 1250 | static struct dm_cache_policy_type default_policy_type = { | 
 | 1251 | 	.name = "default", | 
| Joe Thornber | 633618e | 2013-11-09 11:12:51 +0000 | [diff] [blame] | 1252 | 	.version = {1, 1, 0}, | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1253 | 	.hint_size = 4, | 
 | 1254 | 	.owner = THIS_MODULE, | 
 | 1255 | 	.create = mq_create | 
 | 1256 | }; | 
 | 1257 |  | 
 | 1258 | static int __init mq_init(void) | 
 | 1259 | { | 
 | 1260 | 	int r; | 
 | 1261 |  | 
 | 1262 | 	mq_entry_cache = kmem_cache_create("dm_mq_policy_cache_entry", | 
 | 1263 | 					   sizeof(struct entry), | 
 | 1264 | 					   __alignof__(struct entry), | 
 | 1265 | 					   0, NULL); | 
 | 1266 | 	if (!mq_entry_cache) | 
 | 1267 | 		goto bad; | 
 | 1268 |  | 
 | 1269 | 	r = dm_cache_policy_register(&mq_policy_type); | 
 | 1270 | 	if (r) { | 
 | 1271 | 		DMERR("register failed %d", r); | 
 | 1272 | 		goto bad_register_mq; | 
 | 1273 | 	} | 
 | 1274 |  | 
 | 1275 | 	r = dm_cache_policy_register(&default_policy_type); | 
 | 1276 | 	if (!r) { | 
| Mike Snitzer | 4e7f506 | 2013-03-20 17:21:27 +0000 | [diff] [blame] | 1277 | 		DMINFO("version %u.%u.%u loaded", | 
 | 1278 | 		       mq_policy_type.version[0], | 
 | 1279 | 		       mq_policy_type.version[1], | 
 | 1280 | 		       mq_policy_type.version[2]); | 
| Joe Thornber | f283635 | 2013-03-01 22:45:51 +0000 | [diff] [blame] | 1281 | 		return 0; | 
 | 1282 | 	} | 
 | 1283 |  | 
 | 1284 | 	DMERR("register failed (as default) %d", r); | 
 | 1285 |  | 
 | 1286 | 	dm_cache_policy_unregister(&mq_policy_type); | 
 | 1287 | bad_register_mq: | 
 | 1288 | 	kmem_cache_destroy(mq_entry_cache); | 
 | 1289 | bad: | 
 | 1290 | 	return -ENOMEM; | 
 | 1291 | } | 
 | 1292 |  | 
 | 1293 | static void __exit mq_exit(void) | 
 | 1294 | { | 
 | 1295 | 	dm_cache_policy_unregister(&mq_policy_type); | 
 | 1296 | 	dm_cache_policy_unregister(&default_policy_type); | 
 | 1297 |  | 
 | 1298 | 	kmem_cache_destroy(mq_entry_cache); | 
 | 1299 | } | 
 | 1300 |  | 
 | 1301 | module_init(mq_init); | 
 | 1302 | module_exit(mq_exit); | 
 | 1303 |  | 
 | 1304 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); | 
 | 1305 | MODULE_LICENSE("GPL"); | 
 | 1306 | MODULE_DESCRIPTION("mq cache policy"); | 
 | 1307 |  | 
 | 1308 | MODULE_ALIAS("dm-cache-default"); |