| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #ifndef __LINUX_PERCPU_H | 
 | 2 | #define __LINUX_PERCPU_H | 
| Martin Peschke | 7ff6f08 | 2006-09-25 23:31:21 -0700 | [diff] [blame] | 3 |  | 
| Robert P. J. Day | 0a3021f | 2007-07-15 23:39:57 -0700 | [diff] [blame] | 4 | #include <linux/preempt.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 5 | #include <linux/slab.h> /* For kmalloc() */ | 
 | 6 | #include <linux/smp.h> | 
| Martin Peschke | 7ff6f08 | 2006-09-25 23:31:21 -0700 | [diff] [blame] | 7 | #include <linux/cpumask.h> | 
| Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 8 | #include <linux/pfn.h> | 
| Martin Peschke | 7ff6f08 | 2006-09-25 23:31:21 -0700 | [diff] [blame] | 9 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 10 | #include <asm/percpu.h> | 
 | 11 |  | 
| Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 12 | /* enough to cover all DEFINE_PER_CPUs in modules */ | 
| Jeremy Fitzhardinge | b00742d | 2007-05-02 19:27:11 +0200 | [diff] [blame] | 13 | #ifdef CONFIG_MODULES | 
| Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 14 | #define PERCPU_MODULE_RESERVE		(8 << 10) | 
| Jeremy Fitzhardinge | b00742d | 2007-05-02 19:27:11 +0200 | [diff] [blame] | 15 | #else | 
| Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 16 | #define PERCPU_MODULE_RESERVE		0 | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 17 | #endif | 
 | 18 |  | 
| Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 19 | #ifndef PERCPU_ENOUGH_ROOM | 
| Jeremy Fitzhardinge | b00742d | 2007-05-02 19:27:11 +0200 | [diff] [blame] | 20 | #define PERCPU_ENOUGH_ROOM						\ | 
| Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 21 | 	(ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) +	\ | 
 | 22 | 	 PERCPU_MODULE_RESERVE) | 
 | 23 | #endif | 
| Jeremy Fitzhardinge | b00742d | 2007-05-02 19:27:11 +0200 | [diff] [blame] | 24 |  | 
| Jan Blunck | 632bbfe | 2006-09-25 23:30:53 -0700 | [diff] [blame] | 25 | /* | 
 | 26 |  * Must be an lvalue. Since @var must be a simple identifier, | 
 | 27 |  * we force a syntax error here if it isn't. | 
 | 28 |  */ | 
 | 29 | #define get_cpu_var(var) (*({				\ | 
| Jan Blunck | a666ecf | 2006-10-06 00:43:58 -0700 | [diff] [blame] | 30 | 	extern int simple_identifier_##var(void);	\ | 
| Jan Blunck | 632bbfe | 2006-09-25 23:30:53 -0700 | [diff] [blame] | 31 | 	preempt_disable();				\ | 
 | 32 | 	&__get_cpu_var(var); })) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 33 | #define put_cpu_var(var) preempt_enable() | 
 | 34 |  | 
 | 35 | #ifdef CONFIG_SMP | 
 | 36 |  | 
| Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 37 | #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA | 
| Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 38 |  | 
| Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 39 | /* minimum unit size, also is the maximum supported allocation size */ | 
| Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 40 | #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10) | 
| Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 41 |  | 
 | 42 | /* | 
 | 43 |  * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy | 
| Tejun Heo | 6b19b0c | 2009-03-06 14:33:59 +0900 | [diff] [blame] | 44 |  * back on the first chunk for dynamic percpu allocation if arch is | 
 | 45 |  * manually allocating and mapping it for faster access (as a part of | 
 | 46 |  * large page mapping for example). | 
| Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 47 |  * | 
| Tejun Heo | 6b19b0c | 2009-03-06 14:33:59 +0900 | [diff] [blame] | 48 |  * The following values give between one and two pages of free space | 
 | 49 |  * after typical minimal boot (2-way SMP, single disk and NIC) with | 
 | 50 |  * both defconfig and a distro config on x86_64 and 32.  More | 
 | 51 |  * intelligent way to determine this would be nice. | 
| Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 52 |  */ | 
| Tejun Heo | 6b19b0c | 2009-03-06 14:33:59 +0900 | [diff] [blame] | 53 | #if BITS_PER_LONG > 32 | 
 | 54 | #define PERCPU_DYNAMIC_RESERVE		(20 << 10) | 
 | 55 | #else | 
 | 56 | #define PERCPU_DYNAMIC_RESERVE		(12 << 10) | 
 | 57 | #endif | 
| Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 58 |  | 
| Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 59 | extern void *pcpu_base_addr; | 
| Tejun Heo | fb435d5 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 60 | extern const unsigned long *pcpu_unit_offsets; | 
| Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 61 |  | 
| Tejun Heo | fd1e8a1 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 62 | struct pcpu_group_info { | 
 | 63 | 	int			nr_units;	/* aligned # of units */ | 
 | 64 | 	unsigned long		base_offset;	/* base address offset */ | 
 | 65 | 	unsigned int		*cpu_map;	/* unit->cpu map, empty | 
 | 66 | 						 * entries contain NR_CPUS */ | 
 | 67 | }; | 
 | 68 |  | 
 | 69 | struct pcpu_alloc_info { | 
 | 70 | 	size_t			static_size; | 
 | 71 | 	size_t			reserved_size; | 
 | 72 | 	size_t			dyn_size; | 
 | 73 | 	size_t			unit_size; | 
 | 74 | 	size_t			atom_size; | 
 | 75 | 	size_t			alloc_size; | 
 | 76 | 	size_t			__ai_size;	/* internal, don't use */ | 
 | 77 | 	int			nr_groups;	/* 0 if grouping unnecessary */ | 
 | 78 | 	struct pcpu_group_info	groups[]; | 
 | 79 | }; | 
 | 80 |  | 
| Tejun Heo | f58dc01 | 2009-08-14 15:00:50 +0900 | [diff] [blame] | 81 | enum pcpu_fc { | 
 | 82 | 	PCPU_FC_AUTO, | 
 | 83 | 	PCPU_FC_EMBED, | 
 | 84 | 	PCPU_FC_PAGE, | 
| Tejun Heo | f58dc01 | 2009-08-14 15:00:50 +0900 | [diff] [blame] | 85 |  | 
 | 86 | 	PCPU_FC_NR, | 
 | 87 | }; | 
 | 88 | extern const char *pcpu_fc_names[PCPU_FC_NR]; | 
 | 89 |  | 
 | 90 | extern enum pcpu_fc pcpu_chosen_fc; | 
 | 91 |  | 
| Tejun Heo | 3cbc856 | 2009-08-14 15:00:50 +0900 | [diff] [blame] | 92 | typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, | 
 | 93 | 				     size_t align); | 
| Tejun Heo | d4b95f8 | 2009-07-04 08:10:59 +0900 | [diff] [blame] | 94 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); | 
 | 95 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); | 
| Tejun Heo | a530b79 | 2009-07-04 08:11:00 +0900 | [diff] [blame] | 96 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); | 
| Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 97 |  | 
| Tejun Heo | fd1e8a1 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 98 | extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, | 
 | 99 | 							     int nr_units); | 
 | 100 | extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); | 
| Tejun Heo | 033e48f | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 101 |  | 
| Tejun Heo | fd1e8a1 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 102 | extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( | 
 | 103 | 				size_t reserved_size, ssize_t dyn_size, | 
 | 104 | 				size_t atom_size, | 
 | 105 | 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn); | 
 | 106 |  | 
| Tejun Heo | fb435d5 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 107 | extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | 
 | 108 | 					 void *base_addr); | 
| Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 109 |  | 
| Tejun Heo | 08fc458 | 2009-08-14 15:00:49 +0900 | [diff] [blame] | 110 | #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK | 
| Tejun Heo | c8826dd | 2009-08-14 15:00:52 +0900 | [diff] [blame] | 111 | extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, | 
 | 112 | 				size_t atom_size, | 
 | 113 | 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn, | 
 | 114 | 				pcpu_fc_alloc_fn_t alloc_fn, | 
 | 115 | 				pcpu_fc_free_fn_t free_fn); | 
| Tejun Heo | 08fc458 | 2009-08-14 15:00:49 +0900 | [diff] [blame] | 116 | #endif | 
| Tejun Heo | 66c3a75 | 2009-03-10 16:27:48 +0900 | [diff] [blame] | 117 |  | 
| Tejun Heo | 08fc458 | 2009-08-14 15:00:49 +0900 | [diff] [blame] | 118 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | 
| Tejun Heo | fb435d5 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 119 | extern int __init pcpu_page_first_chunk(size_t reserved_size, | 
| Tejun Heo | d4b95f8 | 2009-07-04 08:10:59 +0900 | [diff] [blame] | 120 | 				pcpu_fc_alloc_fn_t alloc_fn, | 
 | 121 | 				pcpu_fc_free_fn_t free_fn, | 
 | 122 | 				pcpu_fc_populate_pte_fn_t populate_pte_fn); | 
| Tejun Heo | 08fc458 | 2009-08-14 15:00:49 +0900 | [diff] [blame] | 123 | #endif | 
| Tejun Heo | d4b95f8 | 2009-07-04 08:10:59 +0900 | [diff] [blame] | 124 |  | 
| Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 125 | /* | 
 | 126 |  * Use this to get to a cpu's version of the per-cpu object | 
 | 127 |  * dynamically allocated. Non-atomic access to the current CPU's | 
 | 128 |  * version should probably be combined with get_cpu()/put_cpu(). | 
 | 129 |  */ | 
 | 130 | #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) | 
 | 131 |  | 
| Tejun Heo | edcb463 | 2009-03-06 14:33:59 +0900 | [diff] [blame] | 132 | extern void *__alloc_reserved_percpu(size_t size, size_t align); | 
 | 133 |  | 
| Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 134 | #else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ | 
| Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 135 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 136 | struct percpu_data { | 
| Eric Dumazet | b324215 | 2008-02-06 01:37:01 -0800 | [diff] [blame] | 137 | 	void *ptrs[1]; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 138 | }; | 
 | 139 |  | 
| Catalin Marinas | 2e1483c | 2009-06-11 13:24:13 +0100 | [diff] [blame] | 140 | /* pointer disguising messes up the kmemleak objects tracking */ | 
 | 141 | #ifndef CONFIG_DEBUG_KMEMLEAK | 
| Martin Peschke | 7ff6f08 | 2006-09-25 23:31:21 -0700 | [diff] [blame] | 142 | #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) | 
| Catalin Marinas | 2e1483c | 2009-06-11 13:24:13 +0100 | [diff] [blame] | 143 | #else | 
 | 144 | #define __percpu_disguise(pdata) (struct percpu_data *)(pdata) | 
 | 145 | #endif | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 146 |  | 
| Rusty Russell | b36128c | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 147 | #define per_cpu_ptr(ptr, cpu)						\ | 
 | 148 | ({									\ | 
 | 149 |         struct percpu_data *__p = __percpu_disguise(ptr);		\ | 
 | 150 |         (__typeof__(ptr))__p->ptrs[(cpu)];				\ | 
 | 151 | }) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 152 |  | 
| Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 153 | #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ | 
| Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 154 |  | 
| Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 155 | extern void *__alloc_percpu(size_t size, size_t align); | 
 | 156 | extern void free_percpu(void *__pdata); | 
 | 157 |  | 
| Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 158 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | 
 | 159 | extern void __init setup_per_cpu_areas(void); | 
 | 160 | #endif | 
 | 161 |  | 
| Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 162 | #else /* CONFIG_SMP */ | 
 | 163 |  | 
 | 164 | #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) | 
 | 165 |  | 
 | 166 | static inline void *__alloc_percpu(size_t size, size_t align) | 
 | 167 | { | 
 | 168 | 	/* | 
 | 169 | 	 * Can't easily make larger alignment work with kmalloc.  WARN | 
 | 170 | 	 * on it.  Larger alignment should only be used for module | 
 | 171 | 	 * percpu sections on SMP for which this path isn't used. | 
 | 172 | 	 */ | 
| Tejun Heo | e317603 | 2009-02-26 10:54:17 +0900 | [diff] [blame] | 173 | 	WARN_ON_ONCE(align > SMP_CACHE_BYTES); | 
| Ingo Molnar | d2b0261 | 2009-02-25 14:36:45 +0100 | [diff] [blame] | 174 | 	return kzalloc(size, GFP_KERNEL); | 
| Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 175 | } | 
 | 176 |  | 
 | 177 | static inline void free_percpu(void *p) | 
 | 178 | { | 
 | 179 | 	kfree(p); | 
 | 180 | } | 
 | 181 |  | 
| Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 182 | static inline void __init setup_per_cpu_areas(void) { } | 
 | 183 |  | 
| Tejun Heo | a76761b | 2009-07-15 23:35:14 +0900 | [diff] [blame] | 184 | static inline void *pcpu_lpage_remapped(void *kaddr) | 
 | 185 | { | 
 | 186 | 	return NULL; | 
 | 187 | } | 
 | 188 |  | 
| Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 189 | #endif /* CONFIG_SMP */ | 
 | 190 |  | 
 | 191 | #define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type), \ | 
 | 192 | 						       __alignof__(type)) | 
 | 193 |  | 
| Tejun Heo | 066123a | 2009-04-10 12:02:40 -0700 | [diff] [blame] | 194 | /* | 
 | 195 |  * Optional methods for optimized non-lvalue per-cpu variable access. | 
 | 196 |  * | 
 | 197 |  * @var can be a percpu variable or a field of it and its size should | 
 | 198 |  * equal char, int or long.  percpu_read() evaluates to a lvalue and | 
 | 199 |  * all others to void. | 
 | 200 |  * | 
 | 201 |  * These operations are guaranteed to be atomic w.r.t. preemption. | 
 | 202 |  * The generic versions use plain get/put_cpu_var().  Archs are | 
 | 203 |  * encouraged to implement single-instruction alternatives which don't | 
 | 204 |  * require preemption protection. | 
 | 205 |  */ | 
 | 206 | #ifndef percpu_read | 
 | 207 | # define percpu_read(var)						\ | 
 | 208 |   ({									\ | 
 | 209 | 	typeof(per_cpu_var(var)) __tmp_var__;				\ | 
 | 210 | 	__tmp_var__ = get_cpu_var(var);					\ | 
 | 211 | 	put_cpu_var(var);						\ | 
 | 212 | 	__tmp_var__;							\ | 
 | 213 |   }) | 
 | 214 | #endif | 
 | 215 |  | 
 | 216 | #define __percpu_generic_to_op(var, val, op)				\ | 
 | 217 | do {									\ | 
 | 218 | 	get_cpu_var(var) op val;					\ | 
 | 219 | 	put_cpu_var(var);						\ | 
 | 220 | } while (0) | 
 | 221 |  | 
 | 222 | #ifndef percpu_write | 
 | 223 | # define percpu_write(var, val)		__percpu_generic_to_op(var, (val), =) | 
 | 224 | #endif | 
 | 225 |  | 
 | 226 | #ifndef percpu_add | 
 | 227 | # define percpu_add(var, val)		__percpu_generic_to_op(var, (val), +=) | 
 | 228 | #endif | 
 | 229 |  | 
 | 230 | #ifndef percpu_sub | 
 | 231 | # define percpu_sub(var, val)		__percpu_generic_to_op(var, (val), -=) | 
 | 232 | #endif | 
 | 233 |  | 
 | 234 | #ifndef percpu_and | 
 | 235 | # define percpu_and(var, val)		__percpu_generic_to_op(var, (val), &=) | 
 | 236 | #endif | 
 | 237 |  | 
 | 238 | #ifndef percpu_or | 
 | 239 | # define percpu_or(var, val)		__percpu_generic_to_op(var, (val), |=) | 
 | 240 | #endif | 
 | 241 |  | 
 | 242 | #ifndef percpu_xor | 
 | 243 | # define percpu_xor(var, val)		__percpu_generic_to_op(var, (val), ^=) | 
 | 244 | #endif | 
 | 245 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 246 | #endif /* __LINUX_PERCPU_H */ |