| Nicolas Pitre | 37b8304 | 2011-06-19 23:36:03 -0400 | [diff] [blame] | 1 | Kernel-provided User Helpers | 
|  | 2 | ============================ | 
|  | 3 |  | 
|  | 4 | These are segment of kernel provided user code reachable from user space | 
|  | 5 | at a fixed address in kernel memory.  This is used to provide user space | 
|  | 6 | with some operations which require kernel help because of unimplemented | 
|  | 7 | native feature and/or instructions in many ARM CPUs. The idea is for this | 
|  | 8 | code to be executed directly in user mode for best efficiency but which is | 
|  | 9 | too intimate with the kernel counter part to be left to user libraries. | 
|  | 10 | In fact this code might even differ from one CPU to another depending on | 
|  | 11 | the available instruction set, or whether it is a SMP systems. In other | 
|  | 12 | words, the kernel reserves the right to change this code as needed without | 
|  | 13 | warning. Only the entry points and their results as documented here are | 
|  | 14 | guaranteed to be stable. | 
|  | 15 |  | 
|  | 16 | This is different from (but doesn't preclude) a full blown VDSO | 
|  | 17 | implementation, however a VDSO would prevent some assembly tricks with | 
|  | 18 | constants that allows for efficient branching to those code segments. And | 
|  | 19 | since those code segments only use a few cycles before returning to user | 
|  | 20 | code, the overhead of a VDSO indirect far call would add a measurable | 
|  | 21 | overhead to such minimalistic operations. | 
|  | 22 |  | 
|  | 23 | User space is expected to bypass those helpers and implement those things | 
|  | 24 | inline (either in the code emitted directly by the compiler, or part of | 
|  | 25 | the implementation of a library call) when optimizing for a recent enough | 
|  | 26 | processor that has the necessary native support, but only if resulting | 
|  | 27 | binaries are already to be incompatible with earlier ARM processors due to | 
|  | 28 | useage of similar native instructions for other things.  In other words | 
|  | 29 | don't make binaries unable to run on earlier processors just for the sake | 
|  | 30 | of not using these kernel helpers if your compiled code is not going to | 
|  | 31 | use new instructions for other purpose. | 
|  | 32 |  | 
|  | 33 | New helpers may be added over time, so an older kernel may be missing some | 
|  | 34 | helpers present in a newer kernel.  For this reason, programs must check | 
|  | 35 | the value of __kuser_helper_version (see below) before assuming that it is | 
|  | 36 | safe to call any particular helper.  This check should ideally be | 
|  | 37 | performed only once at process startup time, and execution aborted early | 
|  | 38 | if the required helpers are not provided by the kernel version that | 
|  | 39 | process is running on. | 
|  | 40 |  | 
|  | 41 | kuser_helper_version | 
|  | 42 | -------------------- | 
|  | 43 |  | 
|  | 44 | Location:	0xffff0ffc | 
|  | 45 |  | 
|  | 46 | Reference declaration: | 
|  | 47 |  | 
|  | 48 | extern int32_t __kuser_helper_version; | 
|  | 49 |  | 
|  | 50 | Definition: | 
|  | 51 |  | 
|  | 52 | This field contains the number of helpers being implemented by the | 
|  | 53 | running kernel.  User space may read this to determine the availability | 
|  | 54 | of a particular helper. | 
|  | 55 |  | 
|  | 56 | Usage example: | 
|  | 57 |  | 
|  | 58 | #define __kuser_helper_version (*(int32_t *)0xffff0ffc) | 
|  | 59 |  | 
|  | 60 | void check_kuser_version(void) | 
|  | 61 | { | 
|  | 62 | if (__kuser_helper_version < 2) { | 
|  | 63 | fprintf(stderr, "can't do atomic operations, kernel too old\n"); | 
|  | 64 | abort(); | 
|  | 65 | } | 
|  | 66 | } | 
|  | 67 |  | 
|  | 68 | Notes: | 
|  | 69 |  | 
|  | 70 | User space may assume that the value of this field never changes | 
|  | 71 | during the lifetime of any single process.  This means that this | 
|  | 72 | field can be read once during the initialisation of a library or | 
|  | 73 | startup phase of a program. | 
|  | 74 |  | 
|  | 75 | kuser_get_tls | 
|  | 76 | ------------- | 
|  | 77 |  | 
|  | 78 | Location:	0xffff0fe0 | 
|  | 79 |  | 
|  | 80 | Reference prototype: | 
|  | 81 |  | 
|  | 82 | void * __kuser_get_tls(void); | 
|  | 83 |  | 
|  | 84 | Input: | 
|  | 85 |  | 
|  | 86 | lr = return address | 
|  | 87 |  | 
|  | 88 | Output: | 
|  | 89 |  | 
|  | 90 | r0 = TLS value | 
|  | 91 |  | 
|  | 92 | Clobbered registers: | 
|  | 93 |  | 
|  | 94 | none | 
|  | 95 |  | 
|  | 96 | Definition: | 
|  | 97 |  | 
|  | 98 | Get the TLS value as previously set via the __ARM_NR_set_tls syscall. | 
|  | 99 |  | 
|  | 100 | Usage example: | 
|  | 101 |  | 
|  | 102 | typedef void * (__kuser_get_tls_t)(void); | 
|  | 103 | #define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0) | 
|  | 104 |  | 
|  | 105 | void foo() | 
|  | 106 | { | 
|  | 107 | void *tls = __kuser_get_tls(); | 
|  | 108 | printf("TLS = %p\n", tls); | 
|  | 109 | } | 
|  | 110 |  | 
|  | 111 | Notes: | 
|  | 112 |  | 
|  | 113 | - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12). | 
|  | 114 |  | 
|  | 115 | kuser_cmpxchg | 
|  | 116 | ------------- | 
|  | 117 |  | 
|  | 118 | Location:	0xffff0fc0 | 
|  | 119 |  | 
|  | 120 | Reference prototype: | 
|  | 121 |  | 
|  | 122 | int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr); | 
|  | 123 |  | 
|  | 124 | Input: | 
|  | 125 |  | 
|  | 126 | r0 = oldval | 
|  | 127 | r1 = newval | 
|  | 128 | r2 = ptr | 
|  | 129 | lr = return address | 
|  | 130 |  | 
|  | 131 | Output: | 
|  | 132 |  | 
|  | 133 | r0 = success code (zero or non-zero) | 
|  | 134 | C flag = set if r0 == 0, clear if r0 != 0 | 
|  | 135 |  | 
|  | 136 | Clobbered registers: | 
|  | 137 |  | 
|  | 138 | r3, ip, flags | 
|  | 139 |  | 
|  | 140 | Definition: | 
|  | 141 |  | 
|  | 142 | Atomically store newval in *ptr only if *ptr is equal to oldval. | 
|  | 143 | Return zero if *ptr was changed or non-zero if no exchange happened. | 
|  | 144 | The C flag is also set if *ptr was changed to allow for assembly | 
|  | 145 | optimization in the calling code. | 
|  | 146 |  | 
|  | 147 | Usage example: | 
|  | 148 |  | 
|  | 149 | typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr); | 
|  | 150 | #define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0) | 
|  | 151 |  | 
|  | 152 | int atomic_add(volatile int *ptr, int val) | 
|  | 153 | { | 
|  | 154 | int old, new; | 
|  | 155 |  | 
|  | 156 | do { | 
|  | 157 | old = *ptr; | 
|  | 158 | new = old + val; | 
|  | 159 | } while(__kuser_cmpxchg(old, new, ptr)); | 
|  | 160 |  | 
|  | 161 | return new; | 
|  | 162 | } | 
|  | 163 |  | 
|  | 164 | Notes: | 
|  | 165 |  | 
|  | 166 | - This routine already includes memory barriers as needed. | 
|  | 167 |  | 
|  | 168 | - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12). | 
|  | 169 |  | 
|  | 170 | kuser_memory_barrier | 
|  | 171 | -------------------- | 
|  | 172 |  | 
|  | 173 | Location:	0xffff0fa0 | 
|  | 174 |  | 
|  | 175 | Reference prototype: | 
|  | 176 |  | 
|  | 177 | void __kuser_memory_barrier(void); | 
|  | 178 |  | 
|  | 179 | Input: | 
|  | 180 |  | 
|  | 181 | lr = return address | 
|  | 182 |  | 
|  | 183 | Output: | 
|  | 184 |  | 
|  | 185 | none | 
|  | 186 |  | 
|  | 187 | Clobbered registers: | 
|  | 188 |  | 
|  | 189 | none | 
|  | 190 |  | 
|  | 191 | Definition: | 
|  | 192 |  | 
|  | 193 | Apply any needed memory barrier to preserve consistency with data modified | 
|  | 194 | manually and __kuser_cmpxchg usage. | 
|  | 195 |  | 
|  | 196 | Usage example: | 
|  | 197 |  | 
|  | 198 | typedef void (__kuser_dmb_t)(void); | 
|  | 199 | #define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0) | 
|  | 200 |  | 
|  | 201 | Notes: | 
|  | 202 |  | 
|  | 203 | - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15). | 
| Nicolas Pitre | 40fb79c | 2011-06-19 23:36:03 -0400 | [diff] [blame] | 204 |  | 
|  | 205 | kuser_cmpxchg64 | 
|  | 206 | --------------- | 
|  | 207 |  | 
|  | 208 | Location:	0xffff0f60 | 
|  | 209 |  | 
|  | 210 | Reference prototype: | 
|  | 211 |  | 
|  | 212 | int __kuser_cmpxchg64(const int64_t *oldval, | 
|  | 213 | const int64_t *newval, | 
|  | 214 | volatile int64_t *ptr); | 
|  | 215 |  | 
|  | 216 | Input: | 
|  | 217 |  | 
|  | 218 | r0 = pointer to oldval | 
|  | 219 | r1 = pointer to newval | 
|  | 220 | r2 = pointer to target value | 
|  | 221 | lr = return address | 
|  | 222 |  | 
|  | 223 | Output: | 
|  | 224 |  | 
|  | 225 | r0 = success code (zero or non-zero) | 
|  | 226 | C flag = set if r0 == 0, clear if r0 != 0 | 
|  | 227 |  | 
|  | 228 | Clobbered registers: | 
|  | 229 |  | 
|  | 230 | r3, lr, flags | 
|  | 231 |  | 
|  | 232 | Definition: | 
|  | 233 |  | 
|  | 234 | Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr | 
|  | 235 | is equal to the 64-bit value pointed by *oldval.  Return zero if *ptr was | 
|  | 236 | changed or non-zero if no exchange happened. | 
|  | 237 |  | 
|  | 238 | The C flag is also set if *ptr was changed to allow for assembly | 
|  | 239 | optimization in the calling code. | 
|  | 240 |  | 
|  | 241 | Usage example: | 
|  | 242 |  | 
|  | 243 | typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval, | 
|  | 244 | const int64_t *newval, | 
|  | 245 | volatile int64_t *ptr); | 
|  | 246 | #define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60) | 
|  | 247 |  | 
|  | 248 | int64_t atomic_add64(volatile int64_t *ptr, int64_t val) | 
|  | 249 | { | 
|  | 250 | int64_t old, new; | 
|  | 251 |  | 
|  | 252 | do { | 
|  | 253 | old = *ptr; | 
|  | 254 | new = old + val; | 
|  | 255 | } while(__kuser_cmpxchg64(&old, &new, ptr)); | 
|  | 256 |  | 
|  | 257 | return new; | 
|  | 258 | } | 
|  | 259 |  | 
|  | 260 | Notes: | 
|  | 261 |  | 
|  | 262 | - This routine already includes memory barriers as needed. | 
|  | 263 |  | 
|  | 264 | - Due to the length of this sequence, this spans 2 conventional kuser | 
|  | 265 | "slots", therefore 0xffff0f80 is not used as a valid entry point. | 
|  | 266 |  | 
|  | 267 | - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1). |