| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Floating proportions | 
 | 3 |  * | 
 | 4 |  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 
 | 5 |  * | 
 | 6 |  * Description: | 
 | 7 |  * | 
 | 8 |  * The floating proportion is a time derivative with an exponentially decaying | 
 | 9 |  * history: | 
 | 10 |  * | 
 | 11 |  *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) | 
 | 12 |  * | 
 | 13 |  * Where j is an element from {prop_local}, x_{j} is j's number of events, | 
 | 14 |  * and i the time period over which the differential is taken. So d/dt_{-i} is | 
 | 15 |  * the differential over the i-th last period. | 
 | 16 |  * | 
 | 17 |  * The decaying history gives smooth transitions. The time differential carries | 
 | 18 |  * the notion of speed. | 
 | 19 |  * | 
 | 20 |  * The denominator is 2^(1+i) because we want the series to be normalised, ie. | 
 | 21 |  * | 
 | 22 |  *   \Sum_{i=0} 1/2^(1+i) = 1 | 
 | 23 |  * | 
 | 24 |  * Further more, if we measure time (t) in the same events as x; so that: | 
 | 25 |  * | 
 | 26 |  *   t = \Sum_{j} x_{j} | 
 | 27 |  * | 
 | 28 |  * we get that: | 
 | 29 |  * | 
 | 30 |  *   \Sum_{j} p_{j} = 1 | 
 | 31 |  * | 
 | 32 |  * Writing this in an iterative fashion we get (dropping the 'd's): | 
 | 33 |  * | 
 | 34 |  *   if (++x_{j}, ++t > period) | 
 | 35 |  *     t /= 2; | 
 | 36 |  *     for_each (j) | 
 | 37 |  *       x_{j} /= 2; | 
 | 38 |  * | 
 | 39 |  * so that: | 
 | 40 |  * | 
 | 41 |  *   p_{j} = x_{j} / t; | 
 | 42 |  * | 
 | 43 |  * We optimize away the '/= 2' for the global time delta by noting that: | 
 | 44 |  * | 
 | 45 |  *   if (++t > period) t /= 2: | 
 | 46 |  * | 
 | 47 |  * Can be approximated by: | 
 | 48 |  * | 
 | 49 |  *   period/2 + (++t % period/2) | 
 | 50 |  * | 
 | 51 |  * [ Furthermore, when we choose period to be 2^n it can be written in terms of | 
 | 52 |  *   binary operations and wraparound artefacts disappear. ] | 
 | 53 |  * | 
 | 54 |  * Also note that this yields a natural counter of the elapsed periods: | 
 | 55 |  * | 
 | 56 |  *   c = t / (period/2) | 
 | 57 |  * | 
 | 58 |  * [ Its monotonic increasing property can be applied to mitigate the wrap- | 
 | 59 |  *   around issue. ] | 
 | 60 |  * | 
 | 61 |  * This allows us to do away with the loop over all prop_locals on each period | 
 | 62 |  * expiration. By remembering the period count under which it was last accessed | 
 | 63 |  * as c_{j}, we can obtain the number of 'missed' cycles from: | 
 | 64 |  * | 
 | 65 |  *   c - c_{j} | 
 | 66 |  * | 
 | 67 |  * We can then lazily catch up to the global period count every time we are | 
 | 68 |  * going to use x_{j}, by doing: | 
 | 69 |  * | 
 | 70 |  *   x_{j} /= 2^(c - c_{j}), c_{j} = c | 
 | 71 |  */ | 
 | 72 |  | 
 | 73 | #include <linux/proportions.h> | 
 | 74 | #include <linux/rcupdate.h> | 
 | 75 |  | 
 | 76 | /* | 
 | 77 |  * Limit the time part in order to ensure there are some bits left for the | 
 | 78 |  * cycle counter. | 
 | 79 |  */ | 
 | 80 | #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) | 
 | 81 |  | 
 | 82 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) | 
 | 83 | { | 
 | 84 | 	int err; | 
 | 85 |  | 
 | 86 | 	if (shift > PROP_MAX_SHIFT) | 
 | 87 | 		shift = PROP_MAX_SHIFT; | 
 | 88 |  | 
 | 89 | 	pd->index = 0; | 
 | 90 | 	pd->pg[0].shift = shift; | 
 | 91 | 	mutex_init(&pd->mutex); | 
 | 92 | 	err = percpu_counter_init_irq(&pd->pg[0].events, 0); | 
 | 93 | 	if (err) | 
 | 94 | 		goto out; | 
 | 95 |  | 
 | 96 | 	err = percpu_counter_init_irq(&pd->pg[1].events, 0); | 
 | 97 | 	if (err) | 
 | 98 | 		percpu_counter_destroy(&pd->pg[0].events); | 
 | 99 |  | 
 | 100 | out: | 
 | 101 | 	return err; | 
 | 102 | } | 
 | 103 |  | 
 | 104 | /* | 
 | 105 |  * We have two copies, and flip between them to make it seem like an atomic | 
 | 106 |  * update. The update is not really atomic wrt the events counter, but | 
 | 107 |  * it is internally consistent with the bit layout depending on shift. | 
 | 108 |  * | 
 | 109 |  * We copy the events count, move the bits around and flip the index. | 
 | 110 |  */ | 
 | 111 | void prop_change_shift(struct prop_descriptor *pd, int shift) | 
 | 112 | { | 
 | 113 | 	int index; | 
 | 114 | 	int offset; | 
 | 115 | 	u64 events; | 
 | 116 | 	unsigned long flags; | 
 | 117 |  | 
 | 118 | 	if (shift > PROP_MAX_SHIFT) | 
 | 119 | 		shift = PROP_MAX_SHIFT; | 
 | 120 |  | 
 | 121 | 	mutex_lock(&pd->mutex); | 
 | 122 |  | 
 | 123 | 	index = pd->index ^ 1; | 
 | 124 | 	offset = pd->pg[pd->index].shift - shift; | 
 | 125 | 	if (!offset) | 
 | 126 | 		goto out; | 
 | 127 |  | 
 | 128 | 	pd->pg[index].shift = shift; | 
 | 129 |  | 
 | 130 | 	local_irq_save(flags); | 
 | 131 | 	events = percpu_counter_sum(&pd->pg[pd->index].events); | 
 | 132 | 	if (offset < 0) | 
 | 133 | 		events <<= -offset; | 
 | 134 | 	else | 
 | 135 | 		events >>= offset; | 
 | 136 | 	percpu_counter_set(&pd->pg[index].events, events); | 
 | 137 |  | 
 | 138 | 	/* | 
 | 139 | 	 * ensure the new pg is fully written before the switch | 
 | 140 | 	 */ | 
 | 141 | 	smp_wmb(); | 
 | 142 | 	pd->index = index; | 
 | 143 | 	local_irq_restore(flags); | 
 | 144 |  | 
 | 145 | 	synchronize_rcu(); | 
 | 146 |  | 
 | 147 | out: | 
 | 148 | 	mutex_unlock(&pd->mutex); | 
 | 149 | } | 
 | 150 |  | 
 | 151 | /* | 
 | 152 |  * wrap the access to the data in an rcu_read_lock() section; | 
 | 153 |  * this is used to track the active references. | 
 | 154 |  */ | 
 | 155 | static struct prop_global *prop_get_global(struct prop_descriptor *pd) | 
 | 156 | { | 
 | 157 | 	int index; | 
 | 158 |  | 
 | 159 | 	rcu_read_lock(); | 
 | 160 | 	index = pd->index; | 
 | 161 | 	/* | 
 | 162 | 	 * match the wmb from vcd_flip() | 
 | 163 | 	 */ | 
 | 164 | 	smp_rmb(); | 
 | 165 | 	return &pd->pg[index]; | 
 | 166 | } | 
 | 167 |  | 
 | 168 | static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) | 
 | 169 | { | 
 | 170 | 	rcu_read_unlock(); | 
 | 171 | } | 
 | 172 |  | 
 | 173 | static void | 
 | 174 | prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) | 
 | 175 | { | 
 | 176 | 	int offset = *pl_shift - new_shift; | 
 | 177 |  | 
 | 178 | 	if (!offset) | 
 | 179 | 		return; | 
 | 180 |  | 
 | 181 | 	if (offset < 0) | 
 | 182 | 		*pl_period <<= -offset; | 
 | 183 | 	else | 
 | 184 | 		*pl_period >>= offset; | 
 | 185 |  | 
 | 186 | 	*pl_shift = new_shift; | 
 | 187 | } | 
 | 188 |  | 
 | 189 | /* | 
 | 190 |  * PERCPU | 
 | 191 |  */ | 
 | 192 |  | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 193 | #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) | 
 | 194 |  | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 195 | int prop_local_init_percpu(struct prop_local_percpu *pl) | 
 | 196 | { | 
 | 197 | 	spin_lock_init(&pl->lock); | 
 | 198 | 	pl->shift = 0; | 
 | 199 | 	pl->period = 0; | 
 | 200 | 	return percpu_counter_init_irq(&pl->events, 0); | 
 | 201 | } | 
 | 202 |  | 
 | 203 | void prop_local_destroy_percpu(struct prop_local_percpu *pl) | 
 | 204 | { | 
 | 205 | 	percpu_counter_destroy(&pl->events); | 
 | 206 | } | 
 | 207 |  | 
 | 208 | /* | 
 | 209 |  * Catch up with missed period expirations. | 
 | 210 |  * | 
 | 211 |  *   until (c_{j} == c) | 
 | 212 |  *     x_{j} -= x_{j}/2; | 
 | 213 |  *     c_{j}++; | 
 | 214 |  */ | 
 | 215 | static | 
 | 216 | void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) | 
 | 217 | { | 
 | 218 | 	unsigned long period = 1UL << (pg->shift - 1); | 
 | 219 | 	unsigned long period_mask = ~(period - 1); | 
 | 220 | 	unsigned long global_period; | 
 | 221 | 	unsigned long flags; | 
 | 222 |  | 
 | 223 | 	global_period = percpu_counter_read(&pg->events); | 
 | 224 | 	global_period &= period_mask; | 
 | 225 |  | 
 | 226 | 	/* | 
 | 227 | 	 * Fast path - check if the local and global period count still match | 
 | 228 | 	 * outside of the lock. | 
 | 229 | 	 */ | 
 | 230 | 	if (pl->period == global_period) | 
 | 231 | 		return; | 
 | 232 |  | 
 | 233 | 	spin_lock_irqsave(&pl->lock, flags); | 
 | 234 | 	prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 235 |  | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 236 | 	/* | 
 | 237 | 	 * For each missed period, we half the local counter. | 
 | 238 | 	 * basically: | 
 | 239 | 	 *   pl->events >> (global_period - pl->period); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 240 | 	 */ | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 241 | 	period = (global_period - pl->period) >> (pg->shift - 1); | 
 | 242 | 	if (period < BITS_PER_LONG) { | 
 | 243 | 		s64 val = percpu_counter_read(&pl->events); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 244 |  | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 245 | 		if (val < (nr_cpu_ids * PROP_BATCH)) | 
 | 246 | 			val = percpu_counter_sum(&pl->events); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 247 |  | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 248 | 		__percpu_counter_add(&pl->events, -val + (val >> period), | 
 | 249 | 					PROP_BATCH); | 
 | 250 | 	} else | 
 | 251 | 		percpu_counter_set(&pl->events, 0); | 
 | 252 |  | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 253 | 	pl->period = global_period; | 
 | 254 | 	spin_unlock_irqrestore(&pl->lock, flags); | 
 | 255 | } | 
 | 256 |  | 
 | 257 | /* | 
 | 258 |  *   ++x_{j}, ++t | 
 | 259 |  */ | 
 | 260 | void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | 
 | 261 | { | 
 | 262 | 	struct prop_global *pg = prop_get_global(pd); | 
 | 263 |  | 
 | 264 | 	prop_norm_percpu(pg, pl); | 
| Peter Zijlstra | f16b34a | 2007-12-22 14:03:29 -0800 | [diff] [blame] | 265 | 	__percpu_counter_add(&pl->events, 1, PROP_BATCH); | 
| Peter Zijlstra | 145ca25 | 2007-10-16 23:25:49 -0700 | [diff] [blame] | 266 | 	percpu_counter_add(&pg->events, 1); | 
 | 267 | 	prop_put_global(pd, pg); | 
 | 268 | } | 
 | 269 |  | 
 | 270 | /* | 
 | 271 |  * Obtain a fraction of this proportion | 
 | 272 |  * | 
 | 273 |  *   p_{j} = x_{j} / (period/2 + t % period/2) | 
 | 274 |  */ | 
 | 275 | void prop_fraction_percpu(struct prop_descriptor *pd, | 
 | 276 | 		struct prop_local_percpu *pl, | 
 | 277 | 		long *numerator, long *denominator) | 
 | 278 | { | 
 | 279 | 	struct prop_global *pg = prop_get_global(pd); | 
 | 280 | 	unsigned long period_2 = 1UL << (pg->shift - 1); | 
 | 281 | 	unsigned long counter_mask = period_2 - 1; | 
 | 282 | 	unsigned long global_count; | 
 | 283 |  | 
 | 284 | 	prop_norm_percpu(pg, pl); | 
 | 285 | 	*numerator = percpu_counter_read_positive(&pl->events); | 
 | 286 |  | 
 | 287 | 	global_count = percpu_counter_read(&pg->events); | 
 | 288 | 	*denominator = period_2 + (global_count & counter_mask); | 
 | 289 |  | 
 | 290 | 	prop_put_global(pd, pg); | 
 | 291 | } | 
 | 292 |  | 
 | 293 | /* | 
 | 294 |  * SINGLE | 
 | 295 |  */ | 
 | 296 |  | 
 | 297 | int prop_local_init_single(struct prop_local_single *pl) | 
 | 298 | { | 
 | 299 | 	spin_lock_init(&pl->lock); | 
 | 300 | 	pl->shift = 0; | 
 | 301 | 	pl->period = 0; | 
 | 302 | 	pl->events = 0; | 
 | 303 | 	return 0; | 
 | 304 | } | 
 | 305 |  | 
 | 306 | void prop_local_destroy_single(struct prop_local_single *pl) | 
 | 307 | { | 
 | 308 | } | 
 | 309 |  | 
 | 310 | /* | 
 | 311 |  * Catch up with missed period expirations. | 
 | 312 |  */ | 
 | 313 | static | 
 | 314 | void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) | 
 | 315 | { | 
 | 316 | 	unsigned long period = 1UL << (pg->shift - 1); | 
 | 317 | 	unsigned long period_mask = ~(period - 1); | 
 | 318 | 	unsigned long global_period; | 
 | 319 | 	unsigned long flags; | 
 | 320 |  | 
 | 321 | 	global_period = percpu_counter_read(&pg->events); | 
 | 322 | 	global_period &= period_mask; | 
 | 323 |  | 
 | 324 | 	/* | 
 | 325 | 	 * Fast path - check if the local and global period count still match | 
 | 326 | 	 * outside of the lock. | 
 | 327 | 	 */ | 
 | 328 | 	if (pl->period == global_period) | 
 | 329 | 		return; | 
 | 330 |  | 
 | 331 | 	spin_lock_irqsave(&pl->lock, flags); | 
 | 332 | 	prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | 
 | 333 | 	/* | 
 | 334 | 	 * For each missed period, we half the local counter. | 
 | 335 | 	 */ | 
 | 336 | 	period = (global_period - pl->period) >> (pg->shift - 1); | 
 | 337 | 	if (likely(period < BITS_PER_LONG)) | 
 | 338 | 		pl->events >>= period; | 
 | 339 | 	else | 
 | 340 | 		pl->events = 0; | 
 | 341 | 	pl->period = global_period; | 
 | 342 | 	spin_unlock_irqrestore(&pl->lock, flags); | 
 | 343 | } | 
 | 344 |  | 
 | 345 | /* | 
 | 346 |  *   ++x_{j}, ++t | 
 | 347 |  */ | 
 | 348 | void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) | 
 | 349 | { | 
 | 350 | 	struct prop_global *pg = prop_get_global(pd); | 
 | 351 |  | 
 | 352 | 	prop_norm_single(pg, pl); | 
 | 353 | 	pl->events++; | 
 | 354 | 	percpu_counter_add(&pg->events, 1); | 
 | 355 | 	prop_put_global(pd, pg); | 
 | 356 | } | 
 | 357 |  | 
 | 358 | /* | 
 | 359 |  * Obtain a fraction of this proportion | 
 | 360 |  * | 
 | 361 |  *   p_{j} = x_{j} / (period/2 + t % period/2) | 
 | 362 |  */ | 
 | 363 | void prop_fraction_single(struct prop_descriptor *pd, | 
 | 364 | 	       	struct prop_local_single *pl, | 
 | 365 | 		long *numerator, long *denominator) | 
 | 366 | { | 
 | 367 | 	struct prop_global *pg = prop_get_global(pd); | 
 | 368 | 	unsigned long period_2 = 1UL << (pg->shift - 1); | 
 | 369 | 	unsigned long counter_mask = period_2 - 1; | 
 | 370 | 	unsigned long global_count; | 
 | 371 |  | 
 | 372 | 	prop_norm_single(pg, pl); | 
 | 373 | 	*numerator = pl->events; | 
 | 374 |  | 
 | 375 | 	global_count = percpu_counter_read(&pg->events); | 
 | 376 | 	*denominator = period_2 + (global_count & counter_mask); | 
 | 377 |  | 
 | 378 | 	prop_put_global(pd, pg); | 
 | 379 | } |