| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Floating proportions | 
 | 3 |  * | 
 | 4 |  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 
 | 5 |  * | 
 | 6 |  * Description: | 
 | 7 |  * | 
 | 8 |  * The floating proportion is a time derivative with an exponentially decaying | 
 | 9 |  * history: | 
 | 10 |  * | 
 | 11 |  *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) | 
 | 12 |  * | 
 | 13 |  * Where j is an element from {prop_local}, x_{j} is j's number of events, | 
 | 14 |  * and i the time period over which the differential is taken. So d/dt_{-i} is | 
 | 15 |  * the differential over the i-th last period. | 
 | 16 |  * | 
 | 17 |  * The decaying history gives smooth transitions. The time differential carries | 
 | 18 |  * the notion of speed. | 
 | 19 |  * | 
 | 20 |  * The denominator is 2^(1+i) because we want the series to be normalised, ie. | 
 | 21 |  * | 
 | 22 |  *   \Sum_{i=0} 1/2^(1+i) = 1 | 
 | 23 |  * | 
 | 24 |  * Further more, if we measure time (t) in the same events as x; so that: | 
 | 25 |  * | 
 | 26 |  *   t = \Sum_{j} x_{j} | 
 | 27 |  * | 
 | 28 |  * we get that: | 
 | 29 |  * | 
 | 30 |  *   \Sum_{j} p_{j} = 1 | 
 | 31 |  * | 
 | 32 |  * Writing this in an iterative fashion we get (dropping the 'd's): | 
 | 33 |  * | 
 | 34 |  *   if (++x_{j}, ++t > period) | 
 | 35 |  *     t /= 2; | 
 | 36 |  *     for_each (j) | 
 | 37 |  *       x_{j} /= 2; | 
 | 38 |  * | 
 | 39 |  * so that: | 
 | 40 |  * | 
 | 41 |  *   p_{j} = x_{j} / t; | 
 | 42 |  * | 
 | 43 |  * We optimize away the '/= 2' for the global time delta by noting that: | 
 | 44 |  * | 
 | 45 |  *   if (++t > period) t /= 2: | 
 | 46 |  * | 
 | 47 |  * Can be approximated by: | 
 | 48 |  * | 
 | 49 |  *   period/2 + (++t % period/2) | 
 | 50 |  * | 
 | 51 |  * [ Furthermore, when we choose period to be 2^n it can be written in terms of | 
 | 52 |  *   binary operations and wraparound artefacts disappear. ] | 
 | 53 |  * | 
 | 54 |  * Also note that this yields a natural counter of the elapsed periods: | 
 | 55 |  * | 
 | 56 |  *   c = t / (period/2) | 
 | 57 |  * | 
 | 58 |  * [ Its monotonic increasing property can be applied to mitigate the wrap- | 
 | 59 |  *   around issue. ] | 
 | 60 |  * | 
 | 61 |  * This allows us to do away with the loop over all prop_locals on each period | 
 | 62 |  * expiration. By remembering the period count under which it was last accessed | 
 | 63 |  * as c_{j}, we can obtain the number of 'missed' cycles from: | 
 | 64 |  * | 
 | 65 |  *   c - c_{j} | 
 | 66 |  * | 
 | 67 |  * We can then lazily catch up to the global period count every time we are | 
 | 68 |  * going to use x_{j}, by doing: | 
 | 69 |  * | 
 | 70 |  *   x_{j} /= 2^(c - c_{j}), c_{j} = c | 
 | 71 |  */ | 
 | 72 |  | 
 | 73 | #include <linux/proportions.h> | 
 | 74 | #include <linux/rcupdate.h> | 
 | 75 |  | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 76 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) | 
 | 77 | { | 
 | 78 | 	int err; | 
 | 79 |  | 
 | 80 | 	if (shift > PROP_MAX_SHIFT) | 
 | 81 | 		shift = PROP_MAX_SHIFT; | 
 | 82 |  | 
 | 83 | 	pd->index = 0; | 
 | 84 | 	pd->pg[0].shift = shift; | 
 | 85 | 	mutex_init(&pd->mutex); | 
| Peter Zijlstra | ea31951 | 2008-12-26 15:08:55 +0100 | [diff] [blame] | 86 | 	err = percpu_counter_init(&pd->pg[0].events, 0); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 87 | 	if (err) | 
 | 88 | 		goto out; | 
 | 89 |  | 
| Peter Zijlstra | ea31951 | 2008-12-26 15:08:55 +0100 | [diff] [blame] | 90 | 	err = percpu_counter_init(&pd->pg[1].events, 0); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 91 | 	if (err) | 
 | 92 | 		percpu_counter_destroy(&pd->pg[0].events); | 
 | 93 |  | 
 | 94 | out: | 
 | 95 | 	return err; | 
 | 96 | } | 
 | 97 |  | 
 | 98 | /* | 
 | 99 |  * We have two copies, and flip between them to make it seem like an atomic | 
 | 100 |  * update. The update is not really atomic wrt the events counter, but | 
 | 101 |  * it is internally consistent with the bit layout depending on shift. | 
 | 102 |  * | 
 | 103 |  * We copy the events count, move the bits around and flip the index. | 
 | 104 |  */ | 
 | 105 | void prop_change_shift(struct prop_descriptor *pd, int shift) | 
 | 106 | { | 
 | 107 | 	int index; | 
 | 108 | 	int offset; | 
 | 109 | 	u64 events; | 
 | 110 | 	unsigned long flags; | 
 | 111 |  | 
 | 112 | 	if (shift > PROP_MAX_SHIFT) | 
 | 113 | 		shift = PROP_MAX_SHIFT; | 
 | 114 |  | 
 | 115 | 	mutex_lock(&pd->mutex); | 
 | 116 |  | 
 | 117 | 	index = pd->index ^ 1; | 
 | 118 | 	offset = pd->pg[pd->index].shift - shift; | 
 | 119 | 	if (!offset) | 
 | 120 | 		goto out; | 
 | 121 |  | 
 | 122 | 	pd->pg[index].shift = shift; | 
 | 123 |  | 
 | 124 | 	local_irq_save(flags); | 
 | 125 | 	events = percpu_counter_sum(&pd->pg[pd->index].events); | 
 | 126 | 	if (offset < 0) | 
 | 127 | 		events <<= -offset; | 
 | 128 | 	else | 
 | 129 | 		events >>= offset; | 
 | 130 | 	percpu_counter_set(&pd->pg[index].events, events); | 
 | 131 |  | 
 | 132 | 	/* | 
 | 133 | 	 * ensure the new pg is fully written before the switch | 
 | 134 | 	 */ | 
 | 135 | 	smp_wmb(); | 
 | 136 | 	pd->index = index; | 
 | 137 | 	local_irq_restore(flags); | 
 | 138 |  | 
 | 139 | 	synchronize_rcu(); | 
 | 140 |  | 
 | 141 | out: | 
 | 142 | 	mutex_unlock(&pd->mutex); | 
 | 143 | } | 
 | 144 |  | 
 | 145 | /* | 
 | 146 |  * wrap the access to the data in an rcu_read_lock() section; | 
 | 147 |  * this is used to track the active references. | 
 | 148 |  */ | 
 | 149 | static struct prop_global *prop_get_global(struct prop_descriptor *pd) | 
| Harvey Harrison | 3007967 | 2009-01-06 14:40:50 -0800 | [diff] [blame] | 150 | __acquires(RCU) | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 151 | { | 
 | 152 | 	int index; | 
 | 153 |  | 
 | 154 | 	rcu_read_lock(); | 
 | 155 | 	index = pd->index; | 
 | 156 | 	/* | 
 | 157 | 	 * match the wmb from vcd_flip() | 
 | 158 | 	 */ | 
 | 159 | 	smp_rmb(); | 
 | 160 | 	return &pd->pg[index]; | 
 | 161 | } | 
 | 162 |  | 
 | 163 | static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) | 
| Harvey Harrison | 3007967 | 2009-01-06 14:40:50 -0800 | [diff] [blame] | 164 | __releases(RCU) | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 165 | { | 
 | 166 | 	rcu_read_unlock(); | 
 | 167 | } | 
 | 168 |  | 
 | 169 | static void | 
 | 170 | prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) | 
 | 171 | { | 
 | 172 | 	int offset = *pl_shift - new_shift; | 
 | 173 |  | 
 | 174 | 	if (!offset) | 
 | 175 | 		return; | 
 | 176 |  | 
 | 177 | 	if (offset < 0) | 
 | 178 | 		*pl_period <<= -offset; | 
 | 179 | 	else | 
 | 180 | 		*pl_period >>= offset; | 
 | 181 |  | 
 | 182 | 	*pl_shift = new_shift; | 
 | 183 | } | 
 | 184 |  | 
 | 185 | /* | 
 | 186 |  * PERCPU | 
 | 187 |  */ | 
 | 188 |  | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 189 | #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) | 
 | 190 |  | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 191 | int prop_local_init_percpu(struct prop_local_percpu *pl) | 
 | 192 | { | 
 | 193 | 	spin_lock_init(&pl->lock); | 
 | 194 | 	pl->shift = 0; | 
 | 195 | 	pl->period = 0; | 
| Peter Zijlstra | ea31951 | 2008-12-26 15:08:55 +0100 | [diff] [blame] | 196 | 	return percpu_counter_init(&pl->events, 0); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 197 | } | 
 | 198 |  | 
 | 199 | void prop_local_destroy_percpu(struct prop_local_percpu *pl) | 
 | 200 | { | 
 | 201 | 	percpu_counter_destroy(&pl->events); | 
 | 202 | } | 
 | 203 |  | 
 | 204 | /* | 
 | 205 |  * Catch up with missed period expirations. | 
 | 206 |  * | 
 | 207 |  *   until (c_{j} == c) | 
 | 208 |  *     x_{j} -= x_{j}/2; | 
 | 209 |  *     c_{j}++; | 
 | 210 |  */ | 
 | 211 | static | 
 | 212 | void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) | 
 | 213 | { | 
 | 214 | 	unsigned long period = 1UL << (pg->shift - 1); | 
 | 215 | 	unsigned long period_mask = ~(period - 1); | 
 | 216 | 	unsigned long global_period; | 
 | 217 | 	unsigned long flags; | 
 | 218 |  | 
 | 219 | 	global_period = percpu_counter_read(&pg->events); | 
 | 220 | 	global_period &= period_mask; | 
 | 221 |  | 
 | 222 | 	/* | 
 | 223 | 	 * Fast path - check if the local and global period count still match | 
 | 224 | 	 * outside of the lock. | 
 | 225 | 	 */ | 
 | 226 | 	if (pl->period == global_period) | 
 | 227 | 		return; | 
 | 228 |  | 
 | 229 | 	spin_lock_irqsave(&pl->lock, flags); | 
 | 230 | 	prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 231 |  | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 232 | 	/* | 
 | 233 | 	 * For each missed period, we half the local counter. | 
 | 234 | 	 * basically: | 
 | 235 | 	 *   pl->events >> (global_period - pl->period); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 236 | 	 */ | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 237 | 	period = (global_period - pl->period) >> (pg->shift - 1); | 
 | 238 | 	if (period < BITS_PER_LONG) { | 
 | 239 | 		s64 val = percpu_counter_read(&pl->events); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 240 |  | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 241 | 		if (val < (nr_cpu_ids * PROP_BATCH)) | 
 | 242 | 			val = percpu_counter_sum(&pl->events); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 243 |  | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 244 | 		__percpu_counter_add(&pl->events, -val + (val >> period), | 
 | 245 | 					PROP_BATCH); | 
 | 246 | 	} else | 
 | 247 | 		percpu_counter_set(&pl->events, 0); | 
 | 248 |  | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 249 | 	pl->period = global_period; | 
 | 250 | 	spin_unlock_irqrestore(&pl->lock, flags); | 
 | 251 | } | 
 | 252 |  | 
 | 253 | /* | 
 | 254 |  *   ++x_{j}, ++t | 
 | 255 |  */ | 
 | 256 | void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | 
 | 257 | { | 
 | 258 | 	struct prop_global *pg = prop_get_global(pd); | 
 | 259 |  | 
 | 260 | 	prop_norm_percpu(pg, pl); | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 261 | 	__percpu_counter_add(&pl->events, 1, PROP_BATCH); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 262 | 	percpu_counter_add(&pg->events, 1); | 
 | 263 | 	prop_put_global(pd, pg); | 
 | 264 | } | 
 | 265 |  | 
 | 266 | /* | 
| Peter Zijlstra | a42dde0 | 2008-04-30 00:54:36 -0700 | [diff] [blame] | 267 |  * identical to __prop_inc_percpu, except that it limits this pl's fraction to | 
 | 268 |  * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. | 
 | 269 |  */ | 
 | 270 | void __prop_inc_percpu_max(struct prop_descriptor *pd, | 
 | 271 | 			   struct prop_local_percpu *pl, long frac) | 
 | 272 | { | 
 | 273 | 	struct prop_global *pg = prop_get_global(pd); | 
 | 274 |  | 
 | 275 | 	prop_norm_percpu(pg, pl); | 
 | 276 |  | 
 | 277 | 	if (unlikely(frac != PROP_FRAC_BASE)) { | 
 | 278 | 		unsigned long period_2 = 1UL << (pg->shift - 1); | 
 | 279 | 		unsigned long counter_mask = period_2 - 1; | 
 | 280 | 		unsigned long global_count; | 
 | 281 | 		long numerator, denominator; | 
 | 282 |  | 
 | 283 | 		numerator = percpu_counter_read_positive(&pl->events); | 
 | 284 | 		global_count = percpu_counter_read(&pg->events); | 
 | 285 | 		denominator = period_2 + (global_count & counter_mask); | 
 | 286 |  | 
 | 287 | 		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) | 
 | 288 | 			goto out_put; | 
 | 289 | 	} | 
 | 290 |  | 
 | 291 | 	percpu_counter_add(&pl->events, 1); | 
 | 292 | 	percpu_counter_add(&pg->events, 1); | 
 | 293 |  | 
 | 294 | out_put: | 
 | 295 | 	prop_put_global(pd, pg); | 
 | 296 | } | 
 | 297 |  | 
 | 298 | /* | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 299 |  * Obtain a fraction of this proportion | 
 | 300 |  * | 
 | 301 |  *   p_{j} = x_{j} / (period/2 + t % period/2) | 
 | 302 |  */ | 
 | 303 | void prop_fraction_percpu(struct prop_descriptor *pd, | 
 | 304 | 		struct prop_local_percpu *pl, | 
 | 305 | 		long *numerator, long *denominator) | 
 | 306 | { | 
 | 307 | 	struct prop_global *pg = prop_get_global(pd); | 
 | 308 | 	unsigned long period_2 = 1UL << (pg->shift - 1); | 
 | 309 | 	unsigned long counter_mask = period_2 - 1; | 
 | 310 | 	unsigned long global_count; | 
 | 311 |  | 
 | 312 | 	prop_norm_percpu(pg, pl); | 
 | 313 | 	*numerator = percpu_counter_read_positive(&pl->events); | 
 | 314 |  | 
 | 315 | 	global_count = percpu_counter_read(&pg->events); | 
 | 316 | 	*denominator = period_2 + (global_count & counter_mask); | 
 | 317 |  | 
 | 318 | 	prop_put_global(pd, pg); | 
 | 319 | } | 
 | 320 |  | 
 | 321 | /* | 
 | 322 |  * SINGLE | 
 | 323 |  */ | 
 | 324 |  | 
 | 325 | int prop_local_init_single(struct prop_local_single *pl) | 
 | 326 | { | 
 | 327 | 	spin_lock_init(&pl->lock); | 
 | 328 | 	pl->shift = 0; | 
 | 329 | 	pl->period = 0; | 
 | 330 | 	pl->events = 0; | 
 | 331 | 	return 0; | 
 | 332 | } | 
 | 333 |  | 
 | 334 | void prop_local_destroy_single(struct prop_local_single *pl) | 
 | 335 | { | 
 | 336 | } | 
 | 337 |  | 
 | 338 | /* | 
 | 339 |  * Catch up with missed period expirations. | 
 | 340 |  */ | 
 | 341 | static | 
 | 342 | void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) | 
 | 343 | { | 
 | 344 | 	unsigned long period = 1UL << (pg->shift - 1); | 
 | 345 | 	unsigned long period_mask = ~(period - 1); | 
 | 346 | 	unsigned long global_period; | 
 | 347 | 	unsigned long flags; | 
 | 348 |  | 
 | 349 | 	global_period = percpu_counter_read(&pg->events); | 
 | 350 | 	global_period &= period_mask; | 
 | 351 |  | 
 | 352 | 	/* | 
 | 353 | 	 * Fast path - check if the local and global period count still match | 
 | 354 | 	 * outside of the lock. | 
 | 355 | 	 */ | 
 | 356 | 	if (pl->period == global_period) | 
 | 357 | 		return; | 
 | 358 |  | 
 | 359 | 	spin_lock_irqsave(&pl->lock, flags); | 
 | 360 | 	prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | 
 | 361 | 	/* | 
 | 362 | 	 * For each missed period, we half the local counter. | 
 | 363 | 	 */ | 
 | 364 | 	period = (global_period - pl->period) >> (pg->shift - 1); | 
 | 365 | 	if (likely(period < BITS_PER_LONG)) | 
 | 366 | 		pl->events >>= period; | 
 | 367 | 	else | 
 | 368 | 		pl->events = 0; | 
 | 369 | 	pl->period = global_period; | 
 | 370 | 	spin_unlock_irqrestore(&pl->lock, flags); | 
 | 371 | } | 
 | 372 |  | 
 | 373 | /* | 
 | 374 |  *   ++x_{j}, ++t | 
 | 375 |  */ | 
 | 376 | void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) | 
 | 377 | { | 
 | 378 | 	struct prop_global *pg = prop_get_global(pd); | 
 | 379 |  | 
 | 380 | 	prop_norm_single(pg, pl); | 
 | 381 | 	pl->events++; | 
 | 382 | 	percpu_counter_add(&pg->events, 1); | 
 | 383 | 	prop_put_global(pd, pg); | 
 | 384 | } | 
 | 385 |  | 
 | 386 | /* | 
 | 387 |  * Obtain a fraction of this proportion | 
 | 388 |  * | 
 | 389 |  *   p_{j} = x_{j} / (period/2 + t % period/2) | 
 | 390 |  */ | 
 | 391 | void prop_fraction_single(struct prop_descriptor *pd, | 
 | 392 | 	       	struct prop_local_single *pl, | 
 | 393 | 		long *numerator, long *denominator) | 
 | 394 | { | 
 | 395 | 	struct prop_global *pg = prop_get_global(pd); | 
 | 396 | 	unsigned long period_2 = 1UL << (pg->shift - 1); | 
 | 397 | 	unsigned long counter_mask = period_2 - 1; | 
 | 398 | 	unsigned long global_count; | 
 | 399 |  | 
 | 400 | 	prop_norm_single(pg, pl); | 
 | 401 | 	*numerator = pl->events; | 
 | 402 |  | 
 | 403 | 	global_count = percpu_counter_read(&pg->events); | 
 | 404 | 	*denominator = period_2 + (global_count & counter_mask); | 
 | 405 |  | 
 | 406 | 	prop_put_global(pd, pg); | 
 | 407 | } |