| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 1 | /* | 
 | 2 |  * SLOB Allocator: Simple List Of Blocks | 
 | 3 |  * | 
 | 4 |  * Matt Mackall <mpm@selenic.com> 12/30/03 | 
 | 5 |  * | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 6 |  * NUMA support by Paul Mundt, 2007. | 
 | 7 |  * | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 8 |  * How SLOB works: | 
 | 9 |  * | 
 | 10 |  * The core of SLOB is a traditional K&R style heap allocator, with | 
 | 11 |  * support for returning aligned objects. The granularity of this | 
| Nick Piggin | 5539484 | 2007-07-15 23:38:09 -0700 | [diff] [blame] | 12 |  * allocator is as little as 2 bytes, however typically most architectures | 
 | 13 |  * will require 4 bytes on 32-bit and 8 bytes on 64-bit. | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 14 |  * | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 15 |  * The slob heap is a linked list of pages from alloc_pages(), and | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 16 |  * within each page, there is a singly-linked list of free blocks (slob_t). | 
 | 17 |  * The heap is grown on demand and allocation from the heap is currently | 
 | 18 |  * first-fit. | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 19 |  * | 
 | 20 |  * Above this is an implementation of kmalloc/kfree. Blocks returned | 
| Nick Piggin | 5539484 | 2007-07-15 23:38:09 -0700 | [diff] [blame] | 21 |  * from kmalloc are prepended with a 4-byte header with the kmalloc size. | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 22 |  * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 23 |  * alloc_pages() directly, allocating compound pages so the page order | 
| Nick Piggin | d87a133 | 2007-07-15 23:38:08 -0700 | [diff] [blame] | 24 |  * does not have to be separately tracked, and also stores the exact | 
 | 25 |  * allocation size in page->private so that it can be used to accurately | 
 | 26 |  * provide ksize(). These objects are detected in kfree() because slob_page() | 
 | 27 |  * is false for them. | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 28 |  * | 
 | 29 |  * SLAB is emulated on top of SLOB by simply calling constructors and | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 30 |  * destructors for every SLAB allocation. Objects are returned with the | 
 | 31 |  * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which | 
 | 32 |  * case the low-level allocator will fragment blocks to create the proper | 
 | 33 |  * alignment. Again, objects of page-size or greater are allocated by | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 34 |  * calling alloc_pages(). As SLAB objects know their size, no separate | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 35 |  * size bookkeeping is necessary and there is essentially no allocation | 
| Nick Piggin | d87a133 | 2007-07-15 23:38:08 -0700 | [diff] [blame] | 36 |  * space overhead, and compound pages aren't needed for multi-page | 
 | 37 |  * allocations. | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 38 |  * | 
 | 39 |  * NUMA support in SLOB is fairly simplistic, pushing most of the real | 
 | 40 |  * logic down to the page allocator, and simply doing the node accounting | 
 | 41 |  * on the upper levels. In the event that a node id is explicitly | 
 | 42 |  * provided, alloc_pages_node() with the specified node id is used | 
 | 43 |  * instead. The common case (or when the node id isn't explicitly provided) | 
 | 44 |  * will default to the current node, as per numa_node_id(). | 
 | 45 |  * | 
 | 46 |  * Node aware pages are still inserted in to the global freelist, and | 
 | 47 |  * these are scanned for by matching against the node id encoded in the | 
 | 48 |  * page flags. As a result, block allocations that can be satisfied from | 
 | 49 |  * the freelist will only be done so on pages residing on the same node, | 
 | 50 |  * in order to prevent random node placement. | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 51 |  */ | 
 | 52 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 53 | #include <linux/kernel.h> | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 54 | #include <linux/slab.h> | 
 | 55 | #include <linux/mm.h> | 
 | 56 | #include <linux/cache.h> | 
 | 57 | #include <linux/init.h> | 
 | 58 | #include <linux/module.h> | 
| Nick Piggin | afc0ced | 2007-05-16 22:10:49 -0700 | [diff] [blame] | 59 | #include <linux/rcupdate.h> | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 60 | #include <linux/list.h> | 
 | 61 | #include <asm/atomic.h> | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 62 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 63 | /* | 
 | 64 |  * slob_block has a field 'units', which indicates size of block if +ve, | 
 | 65 |  * or offset of next block if -ve (in SLOB_UNITs). | 
 | 66 |  * | 
 | 67 |  * Free blocks of size 1 unit simply contain the offset of the next block. | 
 | 68 |  * Those with larger size contain their size in the first SLOB_UNIT of | 
 | 69 |  * memory, and the offset of the next free block in the second SLOB_UNIT. | 
 | 70 |  */ | 
| Nick Piggin | 5539484 | 2007-07-15 23:38:09 -0700 | [diff] [blame] | 71 | #if PAGE_SIZE <= (32767 * 2) | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 72 | typedef s16 slobidx_t; | 
 | 73 | #else | 
 | 74 | typedef s32 slobidx_t; | 
 | 75 | #endif | 
 | 76 |  | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 77 | struct slob_block { | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 78 | 	slobidx_t units; | 
| Nick Piggin | 5539484 | 2007-07-15 23:38:09 -0700 | [diff] [blame] | 79 | }; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 80 | typedef struct slob_block slob_t; | 
 | 81 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 82 | /* | 
 | 83 |  * We use struct page fields to manage some slob allocation aspects, | 
 | 84 |  * however to avoid the horrible mess in include/linux/mm_types.h, we'll | 
 | 85 |  * just define our own struct page type variant here. | 
 | 86 |  */ | 
 | 87 | struct slob_page { | 
 | 88 | 	union { | 
 | 89 | 		struct { | 
 | 90 | 			unsigned long flags;	/* mandatory */ | 
 | 91 | 			atomic_t _count;	/* mandatory */ | 
 | 92 | 			slobidx_t units;	/* free units left in page */ | 
 | 93 | 			unsigned long pad[2]; | 
 | 94 | 			slob_t *free;		/* first free slob_t in page */ | 
 | 95 | 			struct list_head list;	/* linked list of free pages */ | 
 | 96 | 		}; | 
 | 97 | 		struct page page; | 
 | 98 | 	}; | 
 | 99 | }; | 
 | 100 | static inline void struct_slob_page_wrong_size(void) | 
 | 101 | { BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); } | 
 | 102 |  | 
 | 103 | /* | 
 | 104 |  * free_slob_page: call before a slob_page is returned to the page allocator. | 
 | 105 |  */ | 
 | 106 | static inline void free_slob_page(struct slob_page *sp) | 
 | 107 | { | 
 | 108 | 	reset_page_mapcount(&sp->page); | 
 | 109 | 	sp->page.mapping = NULL; | 
 | 110 | } | 
 | 111 |  | 
 | 112 | /* | 
 | 113 |  * All (partially) free slob pages go on this list. | 
 | 114 |  */ | 
 | 115 | static LIST_HEAD(free_slob_pages); | 
 | 116 |  | 
 | 117 | /* | 
 | 118 |  * slob_page: True for all slob pages (false for bigblock pages) | 
 | 119 |  */ | 
 | 120 | static inline int slob_page(struct slob_page *sp) | 
 | 121 | { | 
 | 122 | 	return test_bit(PG_active, &sp->flags); | 
 | 123 | } | 
 | 124 |  | 
 | 125 | static inline void set_slob_page(struct slob_page *sp) | 
 | 126 | { | 
 | 127 | 	__set_bit(PG_active, &sp->flags); | 
 | 128 | } | 
 | 129 |  | 
 | 130 | static inline void clear_slob_page(struct slob_page *sp) | 
 | 131 | { | 
 | 132 | 	__clear_bit(PG_active, &sp->flags); | 
 | 133 | } | 
 | 134 |  | 
 | 135 | /* | 
 | 136 |  * slob_page_free: true for pages on free_slob_pages list. | 
 | 137 |  */ | 
 | 138 | static inline int slob_page_free(struct slob_page *sp) | 
 | 139 | { | 
 | 140 | 	return test_bit(PG_private, &sp->flags); | 
 | 141 | } | 
 | 142 |  | 
 | 143 | static inline void set_slob_page_free(struct slob_page *sp) | 
 | 144 | { | 
 | 145 | 	list_add(&sp->list, &free_slob_pages); | 
 | 146 | 	__set_bit(PG_private, &sp->flags); | 
 | 147 | } | 
 | 148 |  | 
 | 149 | static inline void clear_slob_page_free(struct slob_page *sp) | 
 | 150 | { | 
 | 151 | 	list_del(&sp->list); | 
 | 152 | 	__clear_bit(PG_private, &sp->flags); | 
 | 153 | } | 
 | 154 |  | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 155 | #define SLOB_UNIT sizeof(slob_t) | 
 | 156 | #define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT) | 
 | 157 | #define SLOB_ALIGN L1_CACHE_BYTES | 
 | 158 |  | 
| Nick Piggin | afc0ced | 2007-05-16 22:10:49 -0700 | [diff] [blame] | 159 | /* | 
 | 160 |  * struct slob_rcu is inserted at the tail of allocated slob blocks, which | 
 | 161 |  * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free | 
 | 162 |  * the block using call_rcu. | 
 | 163 |  */ | 
 | 164 | struct slob_rcu { | 
 | 165 | 	struct rcu_head head; | 
 | 166 | 	int size; | 
 | 167 | }; | 
 | 168 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 169 | /* | 
 | 170 |  * slob_lock protects all slob allocator structures. | 
 | 171 |  */ | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 172 | static DEFINE_SPINLOCK(slob_lock); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 173 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 174 | /* | 
 | 175 |  * Encode the given size and next info into a free slob block s. | 
 | 176 |  */ | 
 | 177 | static void set_slob(slob_t *s, slobidx_t size, slob_t *next) | 
 | 178 | { | 
 | 179 | 	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK); | 
 | 180 | 	slobidx_t offset = next - base; | 
| Dimitri Gorokhovik | bcb4ddb | 2006-12-29 16:48:28 -0800 | [diff] [blame] | 181 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 182 | 	if (size > 1) { | 
 | 183 | 		s[0].units = size; | 
 | 184 | 		s[1].units = offset; | 
 | 185 | 	} else | 
 | 186 | 		s[0].units = -offset; | 
 | 187 | } | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 188 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 189 | /* | 
 | 190 |  * Return the size of a slob block. | 
 | 191 |  */ | 
 | 192 | static slobidx_t slob_units(slob_t *s) | 
 | 193 | { | 
 | 194 | 	if (s->units > 0) | 
 | 195 | 		return s->units; | 
 | 196 | 	return 1; | 
 | 197 | } | 
 | 198 |  | 
 | 199 | /* | 
 | 200 |  * Return the next free slob block pointer after this one. | 
 | 201 |  */ | 
 | 202 | static slob_t *slob_next(slob_t *s) | 
 | 203 | { | 
 | 204 | 	slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK); | 
 | 205 | 	slobidx_t next; | 
 | 206 |  | 
 | 207 | 	if (s[0].units < 0) | 
 | 208 | 		next = -s[0].units; | 
 | 209 | 	else | 
 | 210 | 		next = s[1].units; | 
 | 211 | 	return base+next; | 
 | 212 | } | 
 | 213 |  | 
 | 214 | /* | 
 | 215 |  * Returns true if s is the last free block in its page. | 
 | 216 |  */ | 
 | 217 | static int slob_last(slob_t *s) | 
 | 218 | { | 
 | 219 | 	return !((unsigned long)slob_next(s) & ~PAGE_MASK); | 
 | 220 | } | 
 | 221 |  | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 222 | static void *slob_new_page(gfp_t gfp, int order, int node) | 
 | 223 | { | 
 | 224 | 	void *page; | 
 | 225 |  | 
 | 226 | #ifdef CONFIG_NUMA | 
 | 227 | 	if (node != -1) | 
 | 228 | 		page = alloc_pages_node(node, gfp, order); | 
 | 229 | 	else | 
 | 230 | #endif | 
 | 231 | 		page = alloc_pages(gfp, order); | 
 | 232 |  | 
 | 233 | 	if (!page) | 
 | 234 | 		return NULL; | 
 | 235 |  | 
 | 236 | 	return page_address(page); | 
 | 237 | } | 
 | 238 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 239 | /* | 
 | 240 |  * Allocate a slob block within a given slob_page sp. | 
 | 241 |  */ | 
 | 242 | static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 243 | { | 
 | 244 | 	slob_t *prev, *cur, *aligned = 0; | 
 | 245 | 	int delta = 0, units = SLOB_UNITS(size); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 246 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 247 | 	for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) { | 
 | 248 | 		slobidx_t avail = slob_units(cur); | 
 | 249 |  | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 250 | 		if (align) { | 
 | 251 | 			aligned = (slob_t *)ALIGN((unsigned long)cur, align); | 
 | 252 | 			delta = aligned - cur; | 
 | 253 | 		} | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 254 | 		if (avail >= units + delta) { /* room enough? */ | 
 | 255 | 			slob_t *next; | 
 | 256 |  | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 257 | 			if (delta) { /* need to fragment head to align? */ | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 258 | 				next = slob_next(cur); | 
 | 259 | 				set_slob(aligned, avail - delta, next); | 
 | 260 | 				set_slob(cur, delta, aligned); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 261 | 				prev = cur; | 
 | 262 | 				cur = aligned; | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 263 | 				avail = slob_units(cur); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 264 | 			} | 
 | 265 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 266 | 			next = slob_next(cur); | 
 | 267 | 			if (avail == units) { /* exact fit? unlink. */ | 
 | 268 | 				if (prev) | 
 | 269 | 					set_slob(prev, slob_units(prev), next); | 
 | 270 | 				else | 
 | 271 | 					sp->free = next; | 
 | 272 | 			} else { /* fragment */ | 
 | 273 | 				if (prev) | 
 | 274 | 					set_slob(prev, slob_units(prev), cur + units); | 
 | 275 | 				else | 
 | 276 | 					sp->free = cur + units; | 
 | 277 | 				set_slob(cur + units, avail - units, next); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 278 | 			} | 
 | 279 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 280 | 			sp->units -= units; | 
 | 281 | 			if (!sp->units) | 
 | 282 | 				clear_slob_page_free(sp); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 283 | 			return cur; | 
 | 284 | 		} | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 285 | 		if (slob_last(cur)) | 
 | 286 | 			return NULL; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 287 | 	} | 
 | 288 | } | 
 | 289 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 290 | /* | 
 | 291 |  * slob_alloc: entry point into the slob allocator. | 
 | 292 |  */ | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 293 | static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 294 | { | 
 | 295 | 	struct slob_page *sp; | 
| Matt Mackall | d626954 | 2007-07-21 04:37:40 -0700 | [diff] [blame] | 296 | 	struct list_head *prev; | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 297 | 	slob_t *b = NULL; | 
 | 298 | 	unsigned long flags; | 
 | 299 |  | 
 | 300 | 	spin_lock_irqsave(&slob_lock, flags); | 
 | 301 | 	/* Iterate through each partially free page, try to find room */ | 
 | 302 | 	list_for_each_entry(sp, &free_slob_pages, list) { | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 303 | #ifdef CONFIG_NUMA | 
 | 304 | 		/* | 
 | 305 | 		 * If there's a node specification, search for a partial | 
 | 306 | 		 * page with a matching node id in the freelist. | 
 | 307 | 		 */ | 
 | 308 | 		if (node != -1 && page_to_nid(&sp->page) != node) | 
 | 309 | 			continue; | 
 | 310 | #endif | 
| Matt Mackall | d626954 | 2007-07-21 04:37:40 -0700 | [diff] [blame] | 311 | 		/* Enough room on this page? */ | 
 | 312 | 		if (sp->units < SLOB_UNITS(size)) | 
 | 313 | 			continue; | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 314 |  | 
| Matt Mackall | d626954 | 2007-07-21 04:37:40 -0700 | [diff] [blame] | 315 | 		/* Attempt to alloc */ | 
 | 316 | 		prev = sp->list.prev; | 
 | 317 | 		b = slob_page_alloc(sp, size, align); | 
 | 318 | 		if (!b) | 
 | 319 | 			continue; | 
 | 320 |  | 
 | 321 | 		/* Improve fragment distribution and reduce our average | 
 | 322 | 		 * search time by starting our next search here. (see | 
 | 323 | 		 * Knuth vol 1, sec 2.5, pg 449) */ | 
 | 324 | 		if (free_slob_pages.next != prev->next) | 
 | 325 | 			list_move_tail(&free_slob_pages, prev->next); | 
 | 326 | 		break; | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 327 | 	} | 
 | 328 | 	spin_unlock_irqrestore(&slob_lock, flags); | 
 | 329 |  | 
 | 330 | 	/* Not enough space: must allocate a new page */ | 
 | 331 | 	if (!b) { | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 332 | 		b = slob_new_page(gfp, 0, node); | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 333 | 		if (!b) | 
 | 334 | 			return 0; | 
 | 335 | 		sp = (struct slob_page *)virt_to_page(b); | 
 | 336 | 		set_slob_page(sp); | 
 | 337 |  | 
 | 338 | 		spin_lock_irqsave(&slob_lock, flags); | 
 | 339 | 		sp->units = SLOB_UNITS(PAGE_SIZE); | 
 | 340 | 		sp->free = b; | 
 | 341 | 		INIT_LIST_HEAD(&sp->list); | 
 | 342 | 		set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); | 
 | 343 | 		set_slob_page_free(sp); | 
 | 344 | 		b = slob_page_alloc(sp, size, align); | 
 | 345 | 		BUG_ON(!b); | 
 | 346 | 		spin_unlock_irqrestore(&slob_lock, flags); | 
 | 347 | 	} | 
| Christoph Lameter | d07dbea | 2007-07-17 04:03:23 -0700 | [diff] [blame] | 348 | 	if (unlikely((gfp & __GFP_ZERO) && b)) | 
 | 349 | 		memset(b, 0, size); | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 350 | 	return b; | 
 | 351 | } | 
 | 352 |  | 
 | 353 | /* | 
 | 354 |  * slob_free: entry point into the slob allocator. | 
 | 355 |  */ | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 356 | static void slob_free(void *block, int size) | 
 | 357 | { | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 358 | 	struct slob_page *sp; | 
 | 359 | 	slob_t *prev, *next, *b = (slob_t *)block; | 
 | 360 | 	slobidx_t units; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 361 | 	unsigned long flags; | 
 | 362 |  | 
| Satyam Sharma | 2408c55 | 2007-10-16 01:24:44 -0700 | [diff] [blame] | 363 | 	if (unlikely(ZERO_OR_NULL_PTR(block))) | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 364 | 		return; | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 365 | 	BUG_ON(!size); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 366 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 367 | 	sp = (struct slob_page *)virt_to_page(block); | 
 | 368 | 	units = SLOB_UNITS(size); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 369 |  | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 370 | 	spin_lock_irqsave(&slob_lock, flags); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 371 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 372 | 	if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) { | 
 | 373 | 		/* Go directly to page allocator. Do not pass slob allocator */ | 
 | 374 | 		if (slob_page_free(sp)) | 
 | 375 | 			clear_slob_page_free(sp); | 
 | 376 | 		clear_slob_page(sp); | 
 | 377 | 		free_slob_page(sp); | 
 | 378 | 		free_page((unsigned long)b); | 
 | 379 | 		goto out; | 
 | 380 | 	} | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 381 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 382 | 	if (!slob_page_free(sp)) { | 
 | 383 | 		/* This slob page is about to become partially free. Easy! */ | 
 | 384 | 		sp->units = units; | 
 | 385 | 		sp->free = b; | 
 | 386 | 		set_slob(b, units, | 
 | 387 | 			(void *)((unsigned long)(b + | 
 | 388 | 					SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK)); | 
 | 389 | 		set_slob_page_free(sp); | 
 | 390 | 		goto out; | 
 | 391 | 	} | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 392 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 393 | 	/* | 
 | 394 | 	 * Otherwise the page is already partially free, so find reinsertion | 
 | 395 | 	 * point. | 
 | 396 | 	 */ | 
 | 397 | 	sp->units += units; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 398 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 399 | 	if (b < sp->free) { | 
 | 400 | 		set_slob(b, units, sp->free); | 
 | 401 | 		sp->free = b; | 
 | 402 | 	} else { | 
 | 403 | 		prev = sp->free; | 
 | 404 | 		next = slob_next(prev); | 
 | 405 | 		while (b > next) { | 
 | 406 | 			prev = next; | 
 | 407 | 			next = slob_next(prev); | 
 | 408 | 		} | 
 | 409 |  | 
 | 410 | 		if (!slob_last(prev) && b + units == next) { | 
 | 411 | 			units += slob_units(next); | 
 | 412 | 			set_slob(b, units, slob_next(next)); | 
 | 413 | 		} else | 
 | 414 | 			set_slob(b, units, next); | 
 | 415 |  | 
 | 416 | 		if (prev + slob_units(prev) == b) { | 
 | 417 | 			units = slob_units(b) + slob_units(prev); | 
 | 418 | 			set_slob(prev, units, slob_next(b)); | 
 | 419 | 		} else | 
 | 420 | 			set_slob(prev, slob_units(prev), b); | 
 | 421 | 	} | 
 | 422 | out: | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 423 | 	spin_unlock_irqrestore(&slob_lock, flags); | 
 | 424 | } | 
 | 425 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 426 | /* | 
 | 427 |  * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend. | 
 | 428 |  */ | 
 | 429 |  | 
| Nick Piggin | 5539484 | 2007-07-15 23:38:09 -0700 | [diff] [blame] | 430 | #ifndef ARCH_KMALLOC_MINALIGN | 
 | 431 | #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long) | 
 | 432 | #endif | 
 | 433 |  | 
 | 434 | #ifndef ARCH_SLAB_MINALIGN | 
 | 435 | #define ARCH_SLAB_MINALIGN __alignof__(unsigned long) | 
 | 436 | #endif | 
 | 437 |  | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 438 | void *__kmalloc_node(size_t size, gfp_t gfp, int node) | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 439 | { | 
| Christoph Lameter | 6cb8f91 | 2007-07-17 04:03:22 -0700 | [diff] [blame] | 440 | 	unsigned int *m; | 
| Nick Piggin | 5539484 | 2007-07-15 23:38:09 -0700 | [diff] [blame] | 441 | 	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); | 
 | 442 |  | 
 | 443 | 	if (size < PAGE_SIZE - align) { | 
| Christoph Lameter | 6cb8f91 | 2007-07-17 04:03:22 -0700 | [diff] [blame] | 444 | 		if (!size) | 
 | 445 | 			return ZERO_SIZE_PTR; | 
 | 446 |  | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 447 | 		m = slob_alloc(size + align, gfp, align, node); | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 448 | 		if (m) | 
| Nick Piggin | 5539484 | 2007-07-15 23:38:09 -0700 | [diff] [blame] | 449 | 			*m = size; | 
 | 450 | 		return (void *)m + align; | 
| Nick Piggin | d87a133 | 2007-07-15 23:38:08 -0700 | [diff] [blame] | 451 | 	} else { | 
 | 452 | 		void *ret; | 
 | 453 |  | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 454 | 		ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); | 
| Nick Piggin | d87a133 | 2007-07-15 23:38:08 -0700 | [diff] [blame] | 455 | 		if (ret) { | 
 | 456 | 			struct page *page; | 
 | 457 | 			page = virt_to_page(ret); | 
 | 458 | 			page->private = size; | 
 | 459 | 		} | 
 | 460 | 		return ret; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 461 | 	} | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 462 | } | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 463 | EXPORT_SYMBOL(__kmalloc_node); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 464 |  | 
 | 465 | void kfree(const void *block) | 
 | 466 | { | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 467 | 	struct slob_page *sp; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 468 |  | 
| Satyam Sharma | 2408c55 | 2007-10-16 01:24:44 -0700 | [diff] [blame] | 469 | 	if (unlikely(ZERO_OR_NULL_PTR(block))) | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 470 | 		return; | 
 | 471 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 472 | 	sp = (struct slob_page *)virt_to_page(block); | 
| Nick Piggin | d87a133 | 2007-07-15 23:38:08 -0700 | [diff] [blame] | 473 | 	if (slob_page(sp)) { | 
| Nick Piggin | 5539484 | 2007-07-15 23:38:09 -0700 | [diff] [blame] | 474 | 		int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); | 
 | 475 | 		unsigned int *m = (unsigned int *)(block - align); | 
 | 476 | 		slob_free(m, *m + align); | 
| Nick Piggin | d87a133 | 2007-07-15 23:38:08 -0700 | [diff] [blame] | 477 | 	} else | 
 | 478 | 		put_page(&sp->page); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 479 | } | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 480 | EXPORT_SYMBOL(kfree); | 
 | 481 |  | 
| Nick Piggin | d87a133 | 2007-07-15 23:38:08 -0700 | [diff] [blame] | 482 | /* can't use ksize for kmem_cache_alloc memory, only kmalloc */ | 
| Pekka Enberg | fd76bab | 2007-05-06 14:48:40 -0700 | [diff] [blame] | 483 | size_t ksize(const void *block) | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 484 | { | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 485 | 	struct slob_page *sp; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 486 |  | 
| Christoph Lameter | ef8b452 | 2007-10-16 01:24:46 -0700 | [diff] [blame] | 487 | 	BUG_ON(!block); | 
 | 488 | 	if (unlikely(block == ZERO_SIZE_PTR)) | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 489 | 		return 0; | 
 | 490 |  | 
| Nick Piggin | 95b3512 | 2007-07-15 23:38:07 -0700 | [diff] [blame] | 491 | 	sp = (struct slob_page *)virt_to_page(block); | 
| Nick Piggin | d87a133 | 2007-07-15 23:38:08 -0700 | [diff] [blame] | 492 | 	if (slob_page(sp)) | 
 | 493 | 		return ((slob_t *)block - 1)->units + SLOB_UNIT; | 
 | 494 | 	else | 
 | 495 | 		return sp->page.private; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 496 | } | 
 | 497 |  | 
 | 498 | struct kmem_cache { | 
 | 499 | 	unsigned int size, align; | 
| Nick Piggin | afc0ced | 2007-05-16 22:10:49 -0700 | [diff] [blame] | 500 | 	unsigned long flags; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 501 | 	const char *name; | 
| Christoph Lameter | 4ba9b9d | 2007-10-16 23:25:51 -0700 | [diff] [blame] | 502 | 	void (*ctor)(struct kmem_cache *, void *); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 503 | }; | 
 | 504 |  | 
 | 505 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, | 
 | 506 | 	size_t align, unsigned long flags, | 
| Christoph Lameter | 4ba9b9d | 2007-10-16 23:25:51 -0700 | [diff] [blame] | 507 | 	void (*ctor)(struct kmem_cache *, void *)) | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 508 | { | 
 | 509 | 	struct kmem_cache *c; | 
 | 510 |  | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 511 | 	c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 512 |  | 
 | 513 | 	if (c) { | 
 | 514 | 		c->name = name; | 
 | 515 | 		c->size = size; | 
| Nick Piggin | afc0ced | 2007-05-16 22:10:49 -0700 | [diff] [blame] | 516 | 		if (flags & SLAB_DESTROY_BY_RCU) { | 
| Nick Piggin | afc0ced | 2007-05-16 22:10:49 -0700 | [diff] [blame] | 517 | 			/* leave room for rcu footer at the end of object */ | 
 | 518 | 			c->size += sizeof(struct slob_rcu); | 
 | 519 | 		} | 
 | 520 | 		c->flags = flags; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 521 | 		c->ctor = ctor; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 522 | 		/* ignore alignment unless it's forced */ | 
| Christoph Lameter | 5af6083 | 2007-05-06 14:49:56 -0700 | [diff] [blame] | 523 | 		c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0; | 
| Nick Piggin | 5539484 | 2007-07-15 23:38:09 -0700 | [diff] [blame] | 524 | 		if (c->align < ARCH_SLAB_MINALIGN) | 
 | 525 | 			c->align = ARCH_SLAB_MINALIGN; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 526 | 		if (c->align < align) | 
 | 527 | 			c->align = align; | 
| Akinobu Mita | bc0055a | 2007-05-06 14:49:52 -0700 | [diff] [blame] | 528 | 	} else if (flags & SLAB_PANIC) | 
 | 529 | 		panic("Cannot create slab cache %s\n", name); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 530 |  | 
 | 531 | 	return c; | 
 | 532 | } | 
 | 533 | EXPORT_SYMBOL(kmem_cache_create); | 
 | 534 |  | 
| Alexey Dobriyan | 133d205 | 2006-09-27 01:49:41 -0700 | [diff] [blame] | 535 | void kmem_cache_destroy(struct kmem_cache *c) | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 536 | { | 
 | 537 | 	slob_free(c, sizeof(struct kmem_cache)); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 538 | } | 
 | 539 | EXPORT_SYMBOL(kmem_cache_destroy); | 
 | 540 |  | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 541 | void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 542 | { | 
 | 543 | 	void *b; | 
 | 544 |  | 
 | 545 | 	if (c->size < PAGE_SIZE) | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 546 | 		b = slob_alloc(c->size, flags, c->align, node); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 547 | 	else | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 548 | 		b = slob_new_page(flags, get_order(c->size), node); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 549 |  | 
 | 550 | 	if (c->ctor) | 
| Christoph Lameter | 4ba9b9d | 2007-10-16 23:25:51 -0700 | [diff] [blame] | 551 | 		c->ctor(c, b); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 552 |  | 
 | 553 | 	return b; | 
 | 554 | } | 
| Paul Mundt | 6193a2f | 2007-07-15 23:38:22 -0700 | [diff] [blame] | 555 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 556 |  | 
| Nick Piggin | afc0ced | 2007-05-16 22:10:49 -0700 | [diff] [blame] | 557 | static void __kmem_cache_free(void *b, int size) | 
 | 558 | { | 
 | 559 | 	if (size < PAGE_SIZE) | 
 | 560 | 		slob_free(b, size); | 
 | 561 | 	else | 
 | 562 | 		free_pages((unsigned long)b, get_order(size)); | 
 | 563 | } | 
 | 564 |  | 
 | 565 | static void kmem_rcu_free(struct rcu_head *head) | 
 | 566 | { | 
 | 567 | 	struct slob_rcu *slob_rcu = (struct slob_rcu *)head; | 
 | 568 | 	void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu)); | 
 | 569 |  | 
 | 570 | 	__kmem_cache_free(b, slob_rcu->size); | 
 | 571 | } | 
 | 572 |  | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 573 | void kmem_cache_free(struct kmem_cache *c, void *b) | 
 | 574 | { | 
| Nick Piggin | afc0ced | 2007-05-16 22:10:49 -0700 | [diff] [blame] | 575 | 	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { | 
 | 576 | 		struct slob_rcu *slob_rcu; | 
 | 577 | 		slob_rcu = b + (c->size - sizeof(struct slob_rcu)); | 
 | 578 | 		INIT_RCU_HEAD(&slob_rcu->head); | 
 | 579 | 		slob_rcu->size = c->size; | 
 | 580 | 		call_rcu(&slob_rcu->head, kmem_rcu_free); | 
 | 581 | 	} else { | 
| Nick Piggin | afc0ced | 2007-05-16 22:10:49 -0700 | [diff] [blame] | 582 | 		__kmem_cache_free(b, c->size); | 
 | 583 | 	} | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 584 | } | 
 | 585 | EXPORT_SYMBOL(kmem_cache_free); | 
 | 586 |  | 
 | 587 | unsigned int kmem_cache_size(struct kmem_cache *c) | 
 | 588 | { | 
 | 589 | 	return c->size; | 
 | 590 | } | 
 | 591 | EXPORT_SYMBOL(kmem_cache_size); | 
 | 592 |  | 
 | 593 | const char *kmem_cache_name(struct kmem_cache *c) | 
 | 594 | { | 
 | 595 | 	return c->name; | 
 | 596 | } | 
 | 597 | EXPORT_SYMBOL(kmem_cache_name); | 
 | 598 |  | 
| Christoph Lameter | 2e892f4 | 2006-12-13 00:34:23 -0800 | [diff] [blame] | 599 | int kmem_cache_shrink(struct kmem_cache *d) | 
 | 600 | { | 
 | 601 | 	return 0; | 
 | 602 | } | 
 | 603 | EXPORT_SYMBOL(kmem_cache_shrink); | 
 | 604 |  | 
| Christoph Lameter | 55935a3 | 2006-12-13 00:34:24 -0800 | [diff] [blame] | 605 | int kmem_ptr_validate(struct kmem_cache *a, const void *b) | 
| Christoph Lameter | 2e892f4 | 2006-12-13 00:34:23 -0800 | [diff] [blame] | 606 | { | 
 | 607 | 	return 0; | 
 | 608 | } | 
 | 609 |  | 
| Paul Mundt | 84a01c2 | 2007-07-15 23:38:24 -0700 | [diff] [blame] | 610 | static unsigned int slob_ready __read_mostly; | 
 | 611 |  | 
 | 612 | int slab_is_available(void) | 
 | 613 | { | 
 | 614 | 	return slob_ready; | 
 | 615 | } | 
 | 616 |  | 
| Dimitri Gorokhovik | bcb4ddb | 2006-12-29 16:48:28 -0800 | [diff] [blame] | 617 | void __init kmem_cache_init(void) | 
 | 618 | { | 
| Paul Mundt | 84a01c2 | 2007-07-15 23:38:24 -0700 | [diff] [blame] | 619 | 	slob_ready = 1; | 
| Matt Mackall | 10cef60 | 2006-01-08 01:01:45 -0800 | [diff] [blame] | 620 | } |