Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #ifndef __LINUX_PERCPU_H |
| 2 | #define __LINUX_PERCPU_H |
Martin Peschke | 7ff6f08 | 2006-09-25 23:31:21 -0700 | [diff] [blame] | 3 | |
Robert P. J. Day | 0a3021f | 2007-07-15 23:39:57 -0700 | [diff] [blame] | 4 | #include <linux/preempt.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 5 | #include <linux/slab.h> /* For kmalloc() */ |
| 6 | #include <linux/smp.h> |
Martin Peschke | 7ff6f08 | 2006-09-25 23:31:21 -0700 | [diff] [blame] | 7 | #include <linux/cpumask.h> |
Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 8 | #include <linux/pfn.h> |
Martin Peschke | 7ff6f08 | 2006-09-25 23:31:21 -0700 | [diff] [blame] | 9 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 10 | #include <asm/percpu.h> |
| 11 | |
Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 12 | /* enough to cover all DEFINE_PER_CPUs in modules */ |
Jeremy Fitzhardinge | b00742d | 2007-05-02 19:27:11 +0200 | [diff] [blame] | 13 | #ifdef CONFIG_MODULES |
Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 14 | #define PERCPU_MODULE_RESERVE (8 << 10) |
Jeremy Fitzhardinge | b00742d | 2007-05-02 19:27:11 +0200 | [diff] [blame] | 15 | #else |
Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 16 | #define PERCPU_MODULE_RESERVE 0 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 17 | #endif |
| 18 | |
Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 19 | #ifndef PERCPU_ENOUGH_ROOM |
Jeremy Fitzhardinge | b00742d | 2007-05-02 19:27:11 +0200 | [diff] [blame] | 20 | #define PERCPU_ENOUGH_ROOM \ |
Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 21 | (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ |
| 22 | PERCPU_MODULE_RESERVE) |
| 23 | #endif |
Jeremy Fitzhardinge | b00742d | 2007-05-02 19:27:11 +0200 | [diff] [blame] | 24 | |
Jan Blunck | 632bbfe | 2006-09-25 23:30:53 -0700 | [diff] [blame] | 25 | /* |
| 26 | * Must be an lvalue. Since @var must be a simple identifier, |
| 27 | * we force a syntax error here if it isn't. |
| 28 | */ |
| 29 | #define get_cpu_var(var) (*({ \ |
Jan Blunck | a666ecf | 2006-10-06 00:43:58 -0700 | [diff] [blame] | 30 | extern int simple_identifier_##var(void); \ |
Jan Blunck | 632bbfe | 2006-09-25 23:30:53 -0700 | [diff] [blame] | 31 | preempt_disable(); \ |
| 32 | &__get_cpu_var(var); })) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 33 | #define put_cpu_var(var) preempt_enable() |
| 34 | |
| 35 | #ifdef CONFIG_SMP |
| 36 | |
Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 37 | #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA |
Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 38 | |
Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 39 | /* minimum unit size, also is the maximum supported allocation size */ |
Tejun Heo | 6a24290 | 2009-03-06 14:33:58 +0900 | [diff] [blame] | 40 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) |
Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 41 | |
| 42 | /* |
| 43 | * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy |
Tejun Heo | 6b19b0c | 2009-03-06 14:33:59 +0900 | [diff] [blame] | 44 | * back on the first chunk for dynamic percpu allocation if arch is |
| 45 | * manually allocating and mapping it for faster access (as a part of |
| 46 | * large page mapping for example). |
Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 47 | * |
Tejun Heo | 6b19b0c | 2009-03-06 14:33:59 +0900 | [diff] [blame] | 48 | * The following values give between one and two pages of free space |
| 49 | * after typical minimal boot (2-way SMP, single disk and NIC) with |
| 50 | * both defconfig and a distro config on x86_64 and 32. More |
| 51 | * intelligent way to determine this would be nice. |
Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 52 | */ |
Tejun Heo | 6b19b0c | 2009-03-06 14:33:59 +0900 | [diff] [blame] | 53 | #if BITS_PER_LONG > 32 |
| 54 | #define PERCPU_DYNAMIC_RESERVE (20 << 10) |
| 55 | #else |
| 56 | #define PERCPU_DYNAMIC_RESERVE (12 << 10) |
| 57 | #endif |
Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 58 | |
Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 59 | extern void *pcpu_base_addr; |
Tejun Heo | fb435d5 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 60 | extern const unsigned long *pcpu_unit_offsets; |
Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 61 | |
Tejun Heo | fd1e8a1 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 62 | struct pcpu_group_info { |
| 63 | int nr_units; /* aligned # of units */ |
| 64 | unsigned long base_offset; /* base address offset */ |
| 65 | unsigned int *cpu_map; /* unit->cpu map, empty |
| 66 | * entries contain NR_CPUS */ |
| 67 | }; |
| 68 | |
| 69 | struct pcpu_alloc_info { |
| 70 | size_t static_size; |
| 71 | size_t reserved_size; |
| 72 | size_t dyn_size; |
| 73 | size_t unit_size; |
| 74 | size_t atom_size; |
| 75 | size_t alloc_size; |
| 76 | size_t __ai_size; /* internal, don't use */ |
| 77 | int nr_groups; /* 0 if grouping unnecessary */ |
| 78 | struct pcpu_group_info groups[]; |
| 79 | }; |
| 80 | |
Tejun Heo | f58dc01 | 2009-08-14 15:00:50 +0900 | [diff] [blame] | 81 | enum pcpu_fc { |
| 82 | PCPU_FC_AUTO, |
| 83 | PCPU_FC_EMBED, |
| 84 | PCPU_FC_PAGE, |
| 85 | PCPU_FC_LPAGE, |
| 86 | |
| 87 | PCPU_FC_NR, |
| 88 | }; |
| 89 | extern const char *pcpu_fc_names[PCPU_FC_NR]; |
| 90 | |
| 91 | extern enum pcpu_fc pcpu_chosen_fc; |
| 92 | |
Tejun Heo | 3cbc856 | 2009-08-14 15:00:50 +0900 | [diff] [blame] | 93 | typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, |
| 94 | size_t align); |
Tejun Heo | d4b95f8 | 2009-07-04 08:10:59 +0900 | [diff] [blame] | 95 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); |
| 96 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); |
Tejun Heo | a530b79 | 2009-07-04 08:11:00 +0900 | [diff] [blame] | 97 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); |
Tejun Heo | 8c4bfc6 | 2009-07-04 08:10:59 +0900 | [diff] [blame] | 98 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); |
Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 99 | |
Tejun Heo | fd1e8a1 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 100 | extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, |
| 101 | int nr_units); |
| 102 | extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); |
Tejun Heo | 033e48f | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 103 | |
Tejun Heo | fd1e8a1 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 104 | extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( |
| 105 | size_t reserved_size, ssize_t dyn_size, |
| 106 | size_t atom_size, |
| 107 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn); |
| 108 | |
Tejun Heo | fb435d5 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 109 | extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
| 110 | void *base_addr); |
Tejun Heo | 8d408b4 | 2009-02-24 11:57:21 +0900 | [diff] [blame] | 111 | |
Tejun Heo | 08fc458 | 2009-08-14 15:00:49 +0900 | [diff] [blame] | 112 | #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK |
Tejun Heo | c8826dd | 2009-08-14 15:00:52 +0900 | [diff] [blame^] | 113 | extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, |
| 114 | size_t atom_size, |
| 115 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn, |
| 116 | pcpu_fc_alloc_fn_t alloc_fn, |
| 117 | pcpu_fc_free_fn_t free_fn); |
Tejun Heo | 08fc458 | 2009-08-14 15:00:49 +0900 | [diff] [blame] | 118 | #endif |
Tejun Heo | 66c3a75 | 2009-03-10 16:27:48 +0900 | [diff] [blame] | 119 | |
Tejun Heo | 08fc458 | 2009-08-14 15:00:49 +0900 | [diff] [blame] | 120 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK |
Tejun Heo | fb435d5 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 121 | extern int __init pcpu_page_first_chunk(size_t reserved_size, |
Tejun Heo | d4b95f8 | 2009-07-04 08:10:59 +0900 | [diff] [blame] | 122 | pcpu_fc_alloc_fn_t alloc_fn, |
| 123 | pcpu_fc_free_fn_t free_fn, |
| 124 | pcpu_fc_populate_pte_fn_t populate_pte_fn); |
Tejun Heo | 08fc458 | 2009-08-14 15:00:49 +0900 | [diff] [blame] | 125 | #endif |
Tejun Heo | d4b95f8 | 2009-07-04 08:10:59 +0900 | [diff] [blame] | 126 | |
Tejun Heo | 08fc458 | 2009-08-14 15:00:49 +0900 | [diff] [blame] | 127 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK |
Tejun Heo | fb435d5 | 2009-08-14 15:00:51 +0900 | [diff] [blame] | 128 | extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, |
Tejun Heo | 8c4bfc6 | 2009-07-04 08:10:59 +0900 | [diff] [blame] | 129 | pcpu_fc_alloc_fn_t alloc_fn, |
| 130 | pcpu_fc_free_fn_t free_fn, |
| 131 | pcpu_fc_map_fn_t map_fn); |
| 132 | |
| 133 | extern void *pcpu_lpage_remapped(void *kaddr); |
| 134 | #else |
Tejun Heo | 8c4bfc6 | 2009-07-04 08:10:59 +0900 | [diff] [blame] | 135 | static inline void *pcpu_lpage_remapped(void *kaddr) |
| 136 | { |
| 137 | return NULL; |
| 138 | } |
| 139 | #endif |
| 140 | |
Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 141 | /* |
| 142 | * Use this to get to a cpu's version of the per-cpu object |
| 143 | * dynamically allocated. Non-atomic access to the current CPU's |
| 144 | * version should probably be combined with get_cpu()/put_cpu(). |
| 145 | */ |
| 146 | #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) |
| 147 | |
Tejun Heo | edcb463 | 2009-03-06 14:33:59 +0900 | [diff] [blame] | 148 | extern void *__alloc_reserved_percpu(size_t size, size_t align); |
| 149 | |
Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 150 | #else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ |
Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 151 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 152 | struct percpu_data { |
Eric Dumazet | b324215 | 2008-02-06 01:37:01 -0800 | [diff] [blame] | 153 | void *ptrs[1]; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 154 | }; |
| 155 | |
Catalin Marinas | 2e1483c | 2009-06-11 13:24:13 +0100 | [diff] [blame] | 156 | /* pointer disguising messes up the kmemleak objects tracking */ |
| 157 | #ifndef CONFIG_DEBUG_KMEMLEAK |
Martin Peschke | 7ff6f08 | 2006-09-25 23:31:21 -0700 | [diff] [blame] | 158 | #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) |
Catalin Marinas | 2e1483c | 2009-06-11 13:24:13 +0100 | [diff] [blame] | 159 | #else |
| 160 | #define __percpu_disguise(pdata) (struct percpu_data *)(pdata) |
| 161 | #endif |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 162 | |
Rusty Russell | b36128c | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 163 | #define per_cpu_ptr(ptr, cpu) \ |
| 164 | ({ \ |
| 165 | struct percpu_data *__p = __percpu_disguise(ptr); \ |
| 166 | (__typeof__(ptr))__p->ptrs[(cpu)]; \ |
| 167 | }) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 168 | |
Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 169 | #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ |
Tejun Heo | fbf59bc | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 170 | |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 171 | extern void *__alloc_percpu(size_t size, size_t align); |
| 172 | extern void free_percpu(void *__pdata); |
| 173 | |
Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 174 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA |
| 175 | extern void __init setup_per_cpu_areas(void); |
| 176 | #endif |
| 177 | |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 178 | #else /* CONFIG_SMP */ |
| 179 | |
| 180 | #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) |
| 181 | |
| 182 | static inline void *__alloc_percpu(size_t size, size_t align) |
| 183 | { |
| 184 | /* |
| 185 | * Can't easily make larger alignment work with kmalloc. WARN |
| 186 | * on it. Larger alignment should only be used for module |
| 187 | * percpu sections on SMP for which this path isn't used. |
| 188 | */ |
Tejun Heo | e317603 | 2009-02-26 10:54:17 +0900 | [diff] [blame] | 189 | WARN_ON_ONCE(align > SMP_CACHE_BYTES); |
Ingo Molnar | d2b0261 | 2009-02-25 14:36:45 +0100 | [diff] [blame] | 190 | return kzalloc(size, GFP_KERNEL); |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 191 | } |
| 192 | |
| 193 | static inline void free_percpu(void *p) |
| 194 | { |
| 195 | kfree(p); |
| 196 | } |
| 197 | |
Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 198 | static inline void __init setup_per_cpu_areas(void) { } |
| 199 | |
Tejun Heo | a76761b | 2009-07-15 23:35:14 +0900 | [diff] [blame] | 200 | static inline void *pcpu_lpage_remapped(void *kaddr) |
| 201 | { |
| 202 | return NULL; |
| 203 | } |
| 204 | |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 205 | #endif /* CONFIG_SMP */ |
| 206 | |
| 207 | #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ |
| 208 | __alignof__(type)) |
| 209 | |
Tejun Heo | 066123a | 2009-04-10 12:02:40 -0700 | [diff] [blame] | 210 | /* |
| 211 | * Optional methods for optimized non-lvalue per-cpu variable access. |
| 212 | * |
| 213 | * @var can be a percpu variable or a field of it and its size should |
| 214 | * equal char, int or long. percpu_read() evaluates to a lvalue and |
| 215 | * all others to void. |
| 216 | * |
| 217 | * These operations are guaranteed to be atomic w.r.t. preemption. |
| 218 | * The generic versions use plain get/put_cpu_var(). Archs are |
| 219 | * encouraged to implement single-instruction alternatives which don't |
| 220 | * require preemption protection. |
| 221 | */ |
| 222 | #ifndef percpu_read |
| 223 | # define percpu_read(var) \ |
| 224 | ({ \ |
| 225 | typeof(per_cpu_var(var)) __tmp_var__; \ |
| 226 | __tmp_var__ = get_cpu_var(var); \ |
| 227 | put_cpu_var(var); \ |
| 228 | __tmp_var__; \ |
| 229 | }) |
| 230 | #endif |
| 231 | |
| 232 | #define __percpu_generic_to_op(var, val, op) \ |
| 233 | do { \ |
| 234 | get_cpu_var(var) op val; \ |
| 235 | put_cpu_var(var); \ |
| 236 | } while (0) |
| 237 | |
| 238 | #ifndef percpu_write |
| 239 | # define percpu_write(var, val) __percpu_generic_to_op(var, (val), =) |
| 240 | #endif |
| 241 | |
| 242 | #ifndef percpu_add |
| 243 | # define percpu_add(var, val) __percpu_generic_to_op(var, (val), +=) |
| 244 | #endif |
| 245 | |
| 246 | #ifndef percpu_sub |
| 247 | # define percpu_sub(var, val) __percpu_generic_to_op(var, (val), -=) |
| 248 | #endif |
| 249 | |
| 250 | #ifndef percpu_and |
| 251 | # define percpu_and(var, val) __percpu_generic_to_op(var, (val), &=) |
| 252 | #endif |
| 253 | |
| 254 | #ifndef percpu_or |
| 255 | # define percpu_or(var, val) __percpu_generic_to_op(var, (val), |=) |
| 256 | #endif |
| 257 | |
| 258 | #ifndef percpu_xor |
| 259 | # define percpu_xor(var, val) __percpu_generic_to_op(var, (val), ^=) |
| 260 | #endif |
| 261 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 262 | #endif /* __LINUX_PERCPU_H */ |