From 1537066c69bee9ebc4b998626c532ce1724d4e67 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Aug 2010 12:37:12 -0500 Subject: [PATCH 01/27] slub: Force no inlining of debug functions Compiler folds the debgging functions into the critical paths. Avoid that by adding noinline to the functions that check for problems. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 13fffe1f0f3..d28ee64bcdb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -862,7 +862,7 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, init_tracking(s, object); } -static int alloc_debug_processing(struct kmem_cache *s, struct page *page, +static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) { if (!check_slab(s, page)) @@ -902,8 +902,8 @@ bad: return 0; } -static int free_debug_processing(struct kmem_cache *s, struct page *page, - void *object, unsigned long addr) +static noinline int free_debug_processing(struct kmem_cache *s, + struct page *page, void *object, unsigned long addr) { if (!check_slab(s, page)) goto fail; From 55136592fedf5cc58c03d9b5eb66103a9013d4e5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Aug 2010 12:37:13 -0500 Subject: [PATCH 02/27] slub: Remove dynamic dma slab allocation Remove the dynamic dma slab allocation since this causes too many issues with nested locks etc etc. The change avoids passing gfpflags into many functions. V3->V4: - Create dma caches in kmem_cache_init() instead of kmem_cache_init_late(). Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 150 ++++++++++++++---------------------------------------- 1 file changed, 39 insertions(+), 111 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index d28ee64bcdb..38c73a3364c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2064,7 +2064,7 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); -static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) /* @@ -2091,7 +2091,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) * when allocating for the kmalloc_node_cache. This is used for bootstrapping * memory on a fresh node that has no slab structures yet. */ -static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node) +static void early_kmem_cache_node_alloc(int node) { struct page *page; struct kmem_cache_node *n; @@ -2099,7 +2099,7 @@ static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node) BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); - page = new_slab(kmalloc_caches, gfpflags, node); + page = new_slab(kmalloc_caches, GFP_NOWAIT, node); BUG_ON(!page); if (page_to_nid(page) != node) { @@ -2143,7 +2143,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) } } -static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) +static int init_kmem_cache_nodes(struct kmem_cache *s) { int node; @@ -2151,11 +2151,11 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) struct kmem_cache_node *n; if (slab_state == DOWN) { - early_kmem_cache_node_alloc(gfpflags, node); + early_kmem_cache_node_alloc(node); continue; } n = kmem_cache_alloc_node(kmalloc_caches, - gfpflags, node); + GFP_KERNEL, node); if (!n) { free_kmem_cache_nodes(s); @@ -2172,7 +2172,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) { } -static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) +static int init_kmem_cache_nodes(struct kmem_cache *s) { init_kmem_cache_node(&s->local_node, s); return 1; @@ -2312,7 +2312,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) } -static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, +static int kmem_cache_open(struct kmem_cache *s, const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *)) @@ -2348,10 +2348,10 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 1000; #endif - if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) + if (!init_kmem_cache_nodes(s)) goto error; - if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) + if (alloc_kmem_cache_cpus(s)) return 1; free_kmem_cache_nodes(s); @@ -2510,6 +2510,10 @@ EXPORT_SYMBOL(kmem_cache_destroy); struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); +#ifdef CONFIG_ZONE_DMA +static struct kmem_cache kmalloc_dma_caches[SLUB_PAGE_SHIFT]; +#endif + static int __init setup_slub_min_order(char *str) { get_option(&str, &slub_min_order); @@ -2546,116 +2550,26 @@ static int __init setup_slub_nomerge(char *str) __setup("slub_nomerge", setup_slub_nomerge); -static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, - const char *name, int size, gfp_t gfp_flags) +static void create_kmalloc_cache(struct kmem_cache *s, + const char *name, int size, unsigned int flags) { - unsigned int flags = 0; - - if (gfp_flags & SLUB_DMA) - flags = SLAB_CACHE_DMA; - /* * This function is called with IRQs disabled during early-boot on * single CPU so there's no need to take slub_lock here. */ - if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, + if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, flags, NULL)) goto panic; list_add(&s->list, &slab_caches); - if (sysfs_slab_add(s)) - goto panic; - return s; + if (!sysfs_slab_add(s)) + return; panic: panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); } -#ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT]; - -static void sysfs_add_func(struct work_struct *w) -{ - struct kmem_cache *s; - - down_write(&slub_lock); - list_for_each_entry(s, &slab_caches, list) { - if (s->flags & __SYSFS_ADD_DEFERRED) { - s->flags &= ~__SYSFS_ADD_DEFERRED; - sysfs_slab_add(s); - } - } - up_write(&slub_lock); -} - -static DECLARE_WORK(sysfs_add_work, sysfs_add_func); - -static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) -{ - struct kmem_cache *s; - char *text; - size_t realsize; - unsigned long slabflags; - int i; - - s = kmalloc_caches_dma[index]; - if (s) - return s; - - /* Dynamically create dma cache */ - if (flags & __GFP_WAIT) - down_write(&slub_lock); - else { - if (!down_write_trylock(&slub_lock)) - goto out; - } - - if (kmalloc_caches_dma[index]) - goto unlock_out; - - realsize = kmalloc_caches[index].objsize; - text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", - (unsigned int)realsize); - - s = NULL; - for (i = 0; i < KMALLOC_CACHES; i++) - if (!kmalloc_caches[i].size) - break; - - BUG_ON(i >= KMALLOC_CACHES); - s = kmalloc_caches + i; - - /* - * Must defer sysfs creation to a workqueue because we don't know - * what context we are called from. Before sysfs comes up, we don't - * need to do anything because our sysfs initcall will start by - * adding all existing slabs to sysfs. - */ - slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK; - if (slab_state >= SYSFS) - slabflags |= __SYSFS_ADD_DEFERRED; - - if (!text || !kmem_cache_open(s, flags, text, - realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { - s->size = 0; - kfree(text); - goto unlock_out; - } - - list_add(&s->list, &slab_caches); - kmalloc_caches_dma[index] = s; - - if (slab_state >= SYSFS) - schedule_work(&sysfs_add_work); - -unlock_out: - up_write(&slub_lock); -out: - return kmalloc_caches_dma[index]; -} -#endif - /* * Conversion table for small slabs sizes / 8 to the index in the * kmalloc array. This is necessary for slabs < 192 since we have non power @@ -2708,7 +2622,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) #ifdef CONFIG_ZONE_DMA if (unlikely((flags & SLUB_DMA))) - return dma_kmalloc_cache(index, flags); + return &kmalloc_dma_caches[index]; #endif return &kmalloc_caches[index]; @@ -3047,7 +2961,7 @@ void __init kmem_cache_init(void) * kmem_cache_open for slab_state == DOWN. */ create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", - sizeof(struct kmem_cache_node), GFP_NOWAIT); + sizeof(struct kmem_cache_node), 0); kmalloc_caches[0].refcount = -1; caches++; @@ -3060,18 +2974,18 @@ void __init kmem_cache_init(void) /* Caches that are not of the two-to-the-power-of size */ if (KMALLOC_MIN_SIZE <= 32) { create_kmalloc_cache(&kmalloc_caches[1], - "kmalloc-96", 96, GFP_NOWAIT); + "kmalloc-96", 96, 0); caches++; } if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[2], - "kmalloc-192", 192, GFP_NOWAIT); + "kmalloc-192", 192, 0); caches++; } for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], - "kmalloc", 1 << i, GFP_NOWAIT); + "kmalloc", 1 << i, 0); caches++; } @@ -3134,6 +3048,20 @@ void __init kmem_cache_init(void) kmem_size = sizeof(struct kmem_cache); #endif +#ifdef CONFIG_ZONE_DMA + for (i = 1; i < SLUB_PAGE_SHIFT; i++) { + struct kmem_cache *s = &kmalloc_caches[i]; + + if (s->size) { + char *name = kasprintf(GFP_NOWAIT, + "dma-kmalloc-%d", s->objsize); + + BUG_ON(!name); + create_kmalloc_cache(&kmalloc_dma_caches[i], + name, s->objsize, SLAB_CACHE_DMA); + } + } +#endif printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," " CPUs=%d, Nodes=%d\n", @@ -3236,7 +3164,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, s = kmalloc(kmem_size, GFP_KERNEL); if (s) { - if (kmem_cache_open(s, GFP_KERNEL, name, + if (kmem_cache_open(s, name, size, align, flags, ctor)) { list_add(&s->list, &slab_caches); if (sysfs_slab_add(s)) { From 6c182dc0de26ef97efb6a97a8deab074833764e7 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Aug 2010 12:37:14 -0500 Subject: [PATCH 03/27] slub: Remove static kmem_cache_cpu array for boot The percpu allocator can now handle allocations during early boot. So drop the static kmem_cache_cpu array. Cc: Tejun Heo Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 38c73a3364c..e8c11759536 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2062,23 +2062,14 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) #endif } -static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); - static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { - if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) - /* - * Boot time creation of the kmalloc array. Use static per cpu data - * since the per cpu allocator is not available yet. - */ - s->cpu_slab = kmalloc_percpu + (s - kmalloc_caches); - else - s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); + BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < + SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); - if (!s->cpu_slab) - return 0; + s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); - return 1; + return s->cpu_slab != NULL; } #ifdef CONFIG_NUMA From 51df1142816e469173889fb6d6dc810be9b9e022 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Aug 2010 12:37:15 -0500 Subject: [PATCH 04/27] slub: Dynamically size kmalloc cache allocations kmalloc caches are statically defined and may take up a lot of space just because the sizes of the node array has to be dimensioned for the largest node count supported. This patch makes the size of the kmem_cache structure dynamic throughout by creating a kmem_cache slab cache for the kmem_cache objects. The bootstrap occurs by allocating the initial one or two kmem_cache objects from the page allocator. C2->C3 - Fix various issues indicated by David - Make create kmalloc_cache return a kmem_cache * pointer. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 7 +- mm/slub.c | 191 ++++++++++++++++++++++++++++----------- 2 files changed, 140 insertions(+), 58 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9f63538928c..a6c43ec6a4a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -139,19 +139,16 @@ struct kmem_cache { #ifdef CONFIG_ZONE_DMA #define SLUB_DMA __GFP_DMA -/* Reserve extra caches for potential DMA use */ -#define KMALLOC_CACHES (2 * SLUB_PAGE_SHIFT) #else /* Disable DMA functionality */ #define SLUB_DMA (__force gfp_t)0 -#define KMALLOC_CACHES SLUB_PAGE_SHIFT #endif /* * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache kmalloc_caches[KMALLOC_CACHES]; +extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; /* * Sorry that the following has to be that ugly but some versions of GCC @@ -216,7 +213,7 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size) if (index == 0) return NULL; - return &kmalloc_caches[index]; + return kmalloc_caches[index]; } void *kmem_cache_alloc(struct kmem_cache *, gfp_t); diff --git a/mm/slub.c b/mm/slub.c index e8c11759536..94fee96da0d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -168,7 +168,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s) /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000UL /* Poison object */ -#define __SYSFS_ADD_DEFERRED 0x40000000UL /* Not yet visible via sysfs */ static int kmem_size = sizeof(struct kmem_cache); @@ -178,7 +177,7 @@ static struct notifier_block slab_notifier; static enum { DOWN, /* No slab functionality available */ - PARTIAL, /* kmem_cache_open() works but kmalloc does not */ + PARTIAL, /* Kmem_cache_node works */ UP, /* Everything works but does not show up in sysfs */ SYSFS /* Sysfs up */ } slab_state = DOWN; @@ -2073,6 +2072,8 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) } #ifdef CONFIG_NUMA +static struct kmem_cache *kmem_cache_node; + /* * No kmalloc_node yet so do it by hand. We know that this is the first * slab on the node for this slabcache. There are no concurrent accesses @@ -2088,9 +2089,9 @@ static void early_kmem_cache_node_alloc(int node) struct kmem_cache_node *n; unsigned long flags; - BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); + BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); - page = new_slab(kmalloc_caches, GFP_NOWAIT, node); + page = new_slab(kmem_cache_node, GFP_NOWAIT, node); BUG_ON(!page); if (page_to_nid(page) != node) { @@ -2102,15 +2103,15 @@ static void early_kmem_cache_node_alloc(int node) n = page->freelist; BUG_ON(!n); - page->freelist = get_freepointer(kmalloc_caches, n); + page->freelist = get_freepointer(kmem_cache_node, n); page->inuse++; - kmalloc_caches->node[node] = n; + kmem_cache_node->node[node] = n; #ifdef CONFIG_SLUB_DEBUG - init_object(kmalloc_caches, n, 1); - init_tracking(kmalloc_caches, n); + init_object(kmem_cache_node, n, 1); + init_tracking(kmem_cache_node, n); #endif - init_kmem_cache_node(n, kmalloc_caches); - inc_slabs_node(kmalloc_caches, node, page->objects); + init_kmem_cache_node(n, kmem_cache_node); + inc_slabs_node(kmem_cache_node, node, page->objects); /* * lockdep requires consistent irq usage for each lock @@ -2128,8 +2129,10 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = s->node[node]; + if (n) - kmem_cache_free(kmalloc_caches, n); + kmem_cache_free(kmem_cache_node, n); + s->node[node] = NULL; } } @@ -2145,7 +2148,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s) early_kmem_cache_node_alloc(node); continue; } - n = kmem_cache_alloc_node(kmalloc_caches, + n = kmem_cache_alloc_node(kmem_cache_node, GFP_KERNEL, node); if (!n) { @@ -2498,11 +2501,13 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; +struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; EXPORT_SYMBOL(kmalloc_caches); +static struct kmem_cache *kmem_cache; + #ifdef CONFIG_ZONE_DMA -static struct kmem_cache kmalloc_dma_caches[SLUB_PAGE_SHIFT]; +static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; #endif static int __init setup_slub_min_order(char *str) @@ -2541,9 +2546,13 @@ static int __init setup_slub_nomerge(char *str) __setup("slub_nomerge", setup_slub_nomerge); -static void create_kmalloc_cache(struct kmem_cache *s, - const char *name, int size, unsigned int flags) +static struct kmem_cache *__init create_kmalloc_cache(const char *name, + int size, unsigned int flags) { + struct kmem_cache *s; + + s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); + /* * This function is called with IRQs disabled during early-boot on * single CPU so there's no need to take slub_lock here. @@ -2553,12 +2562,11 @@ static void create_kmalloc_cache(struct kmem_cache *s, goto panic; list_add(&s->list, &slab_caches); - - if (!sysfs_slab_add(s)) - return; + return s; panic: panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); + return NULL; } /* @@ -2613,10 +2621,10 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) #ifdef CONFIG_ZONE_DMA if (unlikely((flags & SLUB_DMA))) - return &kmalloc_dma_caches[index]; + return kmalloc_dma_caches[index]; #endif - return &kmalloc_caches[index]; + return kmalloc_caches[index]; } void *__kmalloc(size_t size, gfp_t flags) @@ -2940,46 +2948,113 @@ static int slab_memory_callback(struct notifier_block *self, * Basic setup of slabs *******************************************************************/ +/* + * Used for early kmem_cache structures that were allocated using + * the page allocator + */ + +static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s) +{ + int node; + + list_add(&s->list, &slab_caches); + s->refcount = -1; + + for_each_node_state(node, N_NORMAL_MEMORY) { + struct kmem_cache_node *n = get_node(s, node); + struct page *p; + + if (n) { + list_for_each_entry(p, &n->partial, lru) + p->slab = s; + +#ifdef CONFIG_SLAB_DEBUG + list_for_each_entry(p, &n->full, lru) + p->slab = s; +#endif + } + } +} + void __init kmem_cache_init(void) { int i; int caches = 0; + struct kmem_cache *temp_kmem_cache; + int order; #ifdef CONFIG_NUMA + struct kmem_cache *temp_kmem_cache_node; + unsigned long kmalloc_size; + + kmem_size = offsetof(struct kmem_cache, node) + + nr_node_ids * sizeof(struct kmem_cache_node *); + + /* Allocate two kmem_caches from the page allocator */ + kmalloc_size = ALIGN(kmem_size, cache_line_size()); + order = get_order(2 * kmalloc_size); + kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); + /* * Must first have the slab cache available for the allocations of the * struct kmem_cache_node's. There is special bootstrap code in * kmem_cache_open for slab_state == DOWN. */ - create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", - sizeof(struct kmem_cache_node), 0); - kmalloc_caches[0].refcount = -1; - caches++; + kmem_cache_node = (void *)kmem_cache + kmalloc_size; + + kmem_cache_open(kmem_cache_node, "kmem_cache_node", + sizeof(struct kmem_cache_node), + 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); +#else + /* Allocate a single kmem_cache from the page allocator */ + kmem_size = sizeof(struct kmem_cache); + order = get_order(kmem_size); + kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); #endif /* Able to allocate the per node structures */ slab_state = PARTIAL; - /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 32) { - create_kmalloc_cache(&kmalloc_caches[1], - "kmalloc-96", 96, 0); - caches++; - } - if (KMALLOC_MIN_SIZE <= 64) { - create_kmalloc_cache(&kmalloc_caches[2], - "kmalloc-192", 192, 0); - caches++; - } + temp_kmem_cache = kmem_cache; + kmem_cache_open(kmem_cache, "kmem_cache", kmem_size, + 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); + memcpy(kmem_cache, temp_kmem_cache, kmem_size); - for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { - create_kmalloc_cache(&kmalloc_caches[i], - "kmalloc", 1 << i, 0); - caches++; - } +#ifdef CONFIG_NUMA + /* + * Allocate kmem_cache_node properly from the kmem_cache slab. + * kmem_cache_node is separately allocated so no need to + * update any list pointers. + */ + temp_kmem_cache_node = kmem_cache_node; + kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); + memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size); + + kmem_cache_bootstrap_fixup(kmem_cache_node); + + caches++; +#else + /* + * kmem_cache has kmem_cache_node embedded and we moved it! + * Update the list heads + */ + INIT_LIST_HEAD(&kmem_cache->local_node.partial); + list_splice(&temp_kmem_cache->local_node.partial, &kmem_cache->local_node.partial); +#ifdef CONFIG_SLUB_DEBUG + INIT_LIST_HEAD(&kmem_cache->local_node.full); + list_splice(&temp_kmem_cache->local_node.full, &kmem_cache->local_node.full); +#endif +#endif + kmem_cache_bootstrap_fixup(kmem_cache); + caches++; + /* Free temporary boot structure */ + free_pages((unsigned long)temp_kmem_cache, order); + + /* Now we can use the kmem_cache to allocate kmalloc slabs */ /* * Patch up the size_index table if we have strange large alignment @@ -3019,6 +3094,22 @@ void __init kmem_cache_init(void) size_index[size_index_elem(i)] = 8; } + /* Caches that are not of the two-to-the-power-of size */ + if (KMALLOC_MIN_SIZE <= 32) { + kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0); + caches++; + } + + if (KMALLOC_MIN_SIZE <= 64) { + kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0); + caches++; + } + + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { + kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); + caches++; + } + slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ @@ -3026,30 +3117,24 @@ void __init kmem_cache_init(void) char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); BUG_ON(!s); - kmalloc_caches[i].name = s; + kmalloc_caches[i]->name = s; } #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); #endif -#ifdef CONFIG_NUMA - kmem_size = offsetof(struct kmem_cache, node) + - nr_node_ids * sizeof(struct kmem_cache_node *); -#else - kmem_size = sizeof(struct kmem_cache); -#endif #ifdef CONFIG_ZONE_DMA - for (i = 1; i < SLUB_PAGE_SHIFT; i++) { - struct kmem_cache *s = &kmalloc_caches[i]; + for (i = 0; i < SLUB_PAGE_SHIFT; i++) { + struct kmem_cache *s = kmalloc_caches[i]; - if (s->size) { + if (s && s->size) { char *name = kasprintf(GFP_NOWAIT, "dma-kmalloc-%d", s->objsize); BUG_ON(!name); - create_kmalloc_cache(&kmalloc_dma_caches[i], - name, s->objsize, SLAB_CACHE_DMA); + kmalloc_dma_caches[i] = create_kmalloc_cache(name, + s->objsize, SLAB_CACHE_DMA); } } #endif From c016b0bdeee74a7fbe5179937c0d667eabcf379e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Aug 2010 12:37:16 -0500 Subject: [PATCH 05/27] slub: Extract hooks for memory checkers from hotpaths Extract the code that memory checkers and other verification tools use from the hotpaths. Makes it easier to add new ones and reduces the disturbances of the hotpaths. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 49 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 94fee96da0d..ca49d02b5ff 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -790,6 +790,37 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, } } +/* + * Hooks for other subsystems that check memory allocations. In a typical + * production configuration these hooks all should produce no code at all. + */ +static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) +{ + lockdep_trace_alloc(flags); + might_sleep_if(flags & __GFP_WAIT); + + return should_failslab(s->objsize, flags, s->flags); +} + +static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) +{ + kmemcheck_slab_alloc(s, flags, object, s->objsize); + kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); +} + +static inline void slab_free_hook(struct kmem_cache *s, void *x) +{ + kmemleak_free_recursive(x, s->flags); +} + +static inline void slab_free_hook_irq(struct kmem_cache *s, void *object) +{ + kmemcheck_slab_free(s, object, s->objsize); + debug_check_no_locks_freed(object, s->objsize); + if (!(s->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(object, s->objsize); +} + /* * Tracking of fully allocated slabs for debugging purposes. */ @@ -1696,10 +1727,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, gfpflags &= gfp_allowed_mask; - lockdep_trace_alloc(gfpflags); - might_sleep_if(gfpflags & __GFP_WAIT); - - if (should_failslab(s->objsize, gfpflags, s->flags)) + if (slab_pre_alloc_hook(s, gfpflags)) return NULL; local_irq_save(flags); @@ -1718,8 +1746,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, if (unlikely(gfpflags & __GFP_ZERO) && object) memset(object, 0, s->objsize); - kmemcheck_slab_alloc(s, gfpflags, object, s->objsize); - kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags); + slab_post_alloc_hook(s, gfpflags, object); return object; } @@ -1849,13 +1876,13 @@ static __always_inline void slab_free(struct kmem_cache *s, struct kmem_cache_cpu *c; unsigned long flags; - kmemleak_free_recursive(x, s->flags); + slab_free_hook(s, x); + local_irq_save(flags); c = __this_cpu_ptr(s->cpu_slab); - kmemcheck_slab_free(s, object, s->objsize); - debug_check_no_locks_freed(object, s->objsize); - if (!(s->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(object, s->objsize); + + slab_free_hook_irq(s, x); + if (likely(page == c->page && c->node >= 0)) { set_freepointer(s, object, c->freelist); c->freelist = object; From c1d508365ea07f9f8849c7da5b02186601570f8b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Aug 2010 12:37:17 -0500 Subject: [PATCH 06/27] slub: Move gfpflag masking out of the hotpath Move the gfpflags masking into the hooks for checkers and into the slowpaths. gfpflag masking requires access to a global variable and thus adds an additional cacheline reference to the hotpaths. If no hooks are active then the gfpflag masking will result in code that the compiler can toss out. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index ca49d02b5ff..6608f2bc310 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -796,6 +796,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, */ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) { + flags &= gfp_allowed_mask; lockdep_trace_alloc(flags); might_sleep_if(flags & __GFP_WAIT); @@ -804,6 +805,7 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) { + flags &= gfp_allowed_mask; kmemcheck_slab_alloc(s, flags, object, s->objsize); kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); } @@ -1677,6 +1679,7 @@ new_slab: goto load_freelist; } + gfpflags &= gfp_allowed_mask; if (gfpflags & __GFP_WAIT) local_irq_enable(); @@ -1725,8 +1728,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, struct kmem_cache_cpu *c; unsigned long flags; - gfpflags &= gfp_allowed_mask; - if (slab_pre_alloc_hook(s, gfpflags)) return NULL; From 8df275af8db8220d7e3f1bf97b6ac7aad05f96f0 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Sun, 22 Aug 2010 16:16:06 -0700 Subject: [PATCH 07/27] slob: fix gfp flags for order-0 page allocations kmalloc_node() may allocate higher order slob pages, but the __GFP_COMP bit is only passed to the page allocator and not represented in the tracepoint event. The bit should be passed to trace_kmalloc_node() as well. Acked-by: Matt Mackall Reviewed-by: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slob.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/slob.c b/mm/slob.c index d582171c810..617b6d6c42c 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -500,7 +500,9 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) } else { unsigned int order = get_order(size); - ret = slob_new_pages(gfp | __GFP_COMP, get_order(size), node); + if (likely(order)) + gfp |= __GFP_COMP; + ret = slob_new_pages(gfp, order, node); if (ret) { struct page *page; page = virt_to_page(ret); From 7d550c56a24b20e91ec469054230bd2e2485996a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 25 Aug 2010 14:07:16 -0500 Subject: [PATCH 08/27] slub: Add dummy functions for the !SLUB_DEBUG case On Wed, 25 Aug 2010, Randy Dunlap wrote: > mm/slub.c:1732: error: implicit declaration of function 'slab_pre_alloc_hook' > mm/slub.c:1751: error: implicit declaration of function 'slab_post_alloc_hook' > mm/slub.c:1881: error: implicit declaration of function 'slab_free_hook' > mm/slub.c:1886: error: implicit declaration of function 'slab_free_hook_irq' Empty functions are missing if the runtime debuggability option is compiled out. Provide the fall back functions to empty hooks if SLUB_DEBUG is not set. Acked-by: Randy Dunlap Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 6608f2bc310..c1add106c43 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1098,6 +1098,18 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) {} static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} + +static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) + { return 0; } + +static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, + void *object) {} + +static inline void slab_free_hook(struct kmem_cache *s, void *x) {} + +static inline void slab_free_hook_irq(struct kmem_cache *s, + void *object) {} + #endif /* From 8de66a0c022c7c575c7481224803292cdabed4c4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 25 Aug 2010 14:51:14 -0500 Subject: [PATCH 09/27] slub: Fix up missing kmalloc_cache -> kmem_cache_node case for memoryhotplug Memory hotplug allocates and frees per node structures. Use the correct name. Acked-by: David Rientjes Acked-by: Randy Dunlap Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index c1add106c43..b244a5a11a9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2909,7 +2909,7 @@ static void slab_mem_offline_callback(void *arg) BUG_ON(slabs_node(s, offline_node)); s->node[offline_node] = NULL; - kmem_cache_free(kmalloc_caches, n); + kmem_cache_free(kmem_cache_node, n); } } up_read(&slub_lock); @@ -2942,7 +2942,7 @@ static int slab_mem_going_online_callback(void *arg) * since memory is not yet available from the node that * is brought up. */ - n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL); + n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL); if (!n) { ret = -ENOMEM; goto out; From a016471a16b5c4d4ec8f5221575e603a3d11e5e9 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 25 Aug 2010 16:32:27 -0700 Subject: [PATCH 10/27] slub: fix SLUB_RESILIENCY_TEST for dynamic kmalloc caches Now that the kmalloc_caches array is dynamically allocated at boot, SLUB_RESILIENCY_TEST needs to be fixed to pass the correct type. Acked-by: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slub.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b244a5a11a9..4c5a76f505e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3498,6 +3498,8 @@ static void resiliency_test(void) { u8 *p; + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); + printk(KERN_ERR "SLUB resiliency testing\n"); printk(KERN_ERR "-----------------------\n"); printk(KERN_ERR "A. Corruption after allocation\n"); @@ -3507,7 +3509,7 @@ static void resiliency_test(void) printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" " 0x12->0x%p\n\n", p + 16); - validate_slab_cache(kmalloc_caches + 4); + validate_slab_cache(kmalloc_caches[4]); /* Hmmm... The next two are dangerous */ p = kzalloc(32, GFP_KERNEL); @@ -3517,7 +3519,7 @@ static void resiliency_test(void) printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); - validate_slab_cache(kmalloc_caches + 5); + validate_slab_cache(kmalloc_caches[5]); p = kzalloc(64, GFP_KERNEL); p += 64 + (get_cycles() & 0xff) * sizeof(void *); *p = 0x56; @@ -3525,27 +3527,27 @@ static void resiliency_test(void) p); printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); - validate_slab_cache(kmalloc_caches + 6); + validate_slab_cache(kmalloc_caches[6]); printk(KERN_ERR "\nB. Corruption after free\n"); p = kzalloc(128, GFP_KERNEL); kfree(p); *p = 0x78; printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches + 7); + validate_slab_cache(kmalloc_caches[7]); p = kzalloc(256, GFP_KERNEL); kfree(p); p[50] = 0x9a; printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches + 8); + validate_slab_cache(kmalloc_caches[8]); p = kzalloc(512, GFP_KERNEL); kfree(p); p[512] = 0xab; printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches + 9); + validate_slab_cache(kmalloc_caches[9]); } #else static void resiliency_test(void) {}; From db210e70e5f191710a3b1d09f653b44885d397ea Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 26 Aug 2010 09:41:19 -0500 Subject: [PATCH 11/27] Slub: UP bandaid Since the percpu allocator does not provide early allocation in UP mode (only in SMP configurations) use __get_free_page() to improvise a compound page allocation that can be later freed via kfree(). Compound pages will be released when the cpu caches are resized. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 4c5a76f505e..05674aac929 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2103,8 +2103,24 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { +#ifdef CONFIG_SMP + /* + * Will use reserve that does not require slab operation during + * early boot. + */ BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); +#else + /* + * Special hack for UP mode. allocpercpu() falls back to kmalloc + * operations. So we cannot use that before the slab allocator is up + * Simply get the smallest possible compound page. The page will be + * released via kfree() when the cpu caches are resized later. + */ + if (slab_state < UP) + s->cpu_slab = (__percpu void *)kmalloc_large(PAGE_SIZE << 1, GFP_NOWAIT); + else +#endif s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); From 84c1cf62465e2fb0a692620dcfeb52323ab03d48 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 14 Sep 2010 23:21:12 +0300 Subject: [PATCH 12/27] SLUB: Fix merged slab cache names As explained by Linus "I'm Proud to be an American" Torvalds: Looking at the merging code, I actually think it's totally buggy. If you have something like this: - load module A: create slab cache A - load module B: create slab cache B that can merge with A - unload module A - "cat /proc/slabinfo": BOOM. Oops. exactly because the name is not handled correctly, and you'll have module B holding open a slab cache that has a name pointer that points to module A that no longer exists. This patch fixes the problem by using kstrdup() to allocate dynamic memory for ->name of "struct kmem_cache" as suggested by Christoph Lameter. Acked-by: Christoph Lameter Cc: David Rientjes Reported-by: Linus Torvalds Signed-off-by: Pekka Enberg Conflicts: mm/slub.c --- mm/slub.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 05674aac929..42ce1730427 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -209,6 +209,7 @@ static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) { return 0; } static inline void sysfs_slab_remove(struct kmem_cache *s) { + kfree(s->name); kfree(s); } @@ -3169,6 +3170,16 @@ void __init kmem_cache_init(void) slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ + if (KMALLOC_MIN_SIZE <= 32) { + kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT); + BUG_ON(!kmalloc_caches[1]->name); + } + + if (KMALLOC_MIN_SIZE <= 64) { + kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT); + BUG_ON(!kmalloc_caches[2]->name); + } + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); @@ -3271,6 +3282,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *)) { struct kmem_cache *s; + char *n; if (WARN_ON(!name)) return NULL; @@ -3294,19 +3306,25 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, return s; } + n = kstrdup(name, GFP_KERNEL); + if (!n) + goto err; + s = kmalloc(kmem_size, GFP_KERNEL); if (s) { - if (kmem_cache_open(s, name, + if (kmem_cache_open(s, n, size, align, flags, ctor)) { list_add(&s->list, &slab_caches); if (sysfs_slab_add(s)) { list_del(&s->list); + kfree(n); kfree(s); goto err; } up_write(&slub_lock); return s; } + kfree(n); kfree(s); } up_write(&slub_lock); @@ -4439,6 +4457,7 @@ static void kmem_cache_release(struct kobject *kobj) { struct kmem_cache *s = to_slab(kobj); + kfree(s->name); kfree(s); } From 0bc14062414d35c269b7c7dc3243a890886e7b38 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: [PATCH 13/27] vmalloc: pcpu_get/free_vm_areas() aren't needed on UP These functions are used only by percpu memory allocator on SMP. Don't build them on UP. Signed-off-by: Tejun Heo Cc: Nick Piggin --- include/linux/vmalloc.h | 2 ++ mm/vmalloc.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 01c2145118d..63a4fe6d51b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +#ifdef CONFIG_SMP struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align, gfp_t gfp_mask); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); +#endif #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b8889da69a..c623e0ce3f0 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2056,6 +2056,7 @@ void free_vm_area(struct vm_struct *area) } EXPORT_SYMBOL_GPL(free_vm_area); +#ifdef CONFIG_SMP static struct vmap_area *node_to_va(struct rb_node *n) { return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; @@ -2336,6 +2337,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) free_vm_area(vms[i]); kfree(vms); } +#endif /* CONFIG_SMP */ #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) From a7b6b77b8917488d2d6b99d82673845e508144a3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: [PATCH 14/27] percpu: reduce PCPU_MIN_UNIT_SIZE to 32k In preparation of enabling percpu allocator for UP, reduce PCPU_MIN_UNIT_SIZE to 32k. On UP, the first chunk doesn't have to include static percpu variables and chunk size can be smaller which is important as UP percpu allocator will use contiguous kernel memory to populate chunks. PCPU_MIN_UNIT_SIZE also determines the maximum supported allocation size but 32k should still be enough. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 49466b13c5c..fc8130a7cac 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -42,7 +42,7 @@ #ifdef CONFIG_SMP /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) /* * Percpu allocator can serve percpu allocations before slab is From 9b8327bb2483ded5e04df6c33cf339ce7c02f6e9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:48 +0200 Subject: [PATCH 15/27] percpu: use percpu allocator on UP too On UP, percpu allocations were redirected to kmalloc. This has the following problems. * For certain amount of allocations (determined by PERCPU_DYNAMIC_EARLY_SLOTS and PERCPU_DYNAMIC_EARLY_SIZE), percpu allocator can be used before the usual kernel memory allocator is brought online. On SMP, this is used to initialize the kernel memory allocator. * percpu allocator honors alignment upto PAGE_SIZE but kmalloc() doesn't. For example, workqueue makes use of larger alignments for cpu_workqueues. Currently, users of percpu allocators need to handle UP differently, which is somewhat fragile and ugly. Other than small amount of memory, there isn't much to lose by enabling percpu allocator on UP. It can simply use kernel memory based chunk allocation which was added for SMP archs w/o MMUs. This patch removes mm/percpu_up.c, builds mm/percpu.c on UP too and makes UP build use percpu-km. As percpu addresses and kernel addresses are always identity mapped and static percpu variables don't need any special treatment, nothing is arch dependent and mm/percpu.c implements generic setup_per_cpu_areas() for UP. Signed-off-by: Tejun Heo Cc: Christoph Lameter Cc: Pekka Enberg --- include/linux/percpu.h | 29 ++++---------------- mm/Kconfig | 8 ++++++ mm/Makefile | 7 +---- mm/percpu-km.c | 2 +- mm/percpu.c | 60 +++++++++++++++++++++++++++++++++++++++--- mm/percpu_up.c | 30 --------------------- 6 files changed, 71 insertions(+), 65 deletions(-) delete mode 100644 mm/percpu_up.c diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fc8130a7cac..aeeeef1093c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -39,8 +39,6 @@ preempt_enable(); \ } while (0) -#ifdef CONFIG_SMP - /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) @@ -137,37 +135,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, * dynamically allocated. Non-atomic access to the current CPU's * version should probably be combined with get_cpu()/put_cpu(). */ +#ifdef CONFIG_SMP #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +#else +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) +#endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern bool is_kernel_percpu_address(unsigned long addr); -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) extern void __init setup_per_cpu_areas(void); #endif extern void __init percpu_init_late(void); -#else /* CONFIG_SMP */ - -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) - -/* can't distinguish from other static vars, always false */ -static inline bool is_kernel_percpu_address(unsigned long addr) -{ - return false; -} - -static inline void __init setup_per_cpu_areas(void) { } - -static inline void __init percpu_init_late(void) { } - -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} - -#endif /* CONFIG_SMP */ - extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); diff --git a/mm/Kconfig b/mm/Kconfig index f0fb9124e41..c2c8a4a1189 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS of 1 says that all excess pages should be trimmed. See Documentation/nommu-mmap.txt for more information. + +# +# UP and nommu archs use km based percpu allocator +# +config NEED_PER_CPU_KM + depends on !SMP + bool + default y diff --git a/mm/Makefile b/mm/Makefile index 34b2546a9e3..f73f75a29f8 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ - page_isolation.o mm_init.o mmu_context.o \ + page_isolation.o mm_init.o mmu_context.o percpu.o \ $(mmu-y) obj-y += init-mm.o @@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o -ifdef CONFIG_SMP -obj-y += percpu.o -else -obj-y += percpu_up.o -endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o diff --git a/mm/percpu-km.c b/mm/percpu-km.c index df680855540..7037bc73bfa 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -27,7 +27,7 @@ * chunk size is not aligned. percpu-km code will whine about it. */ -#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK +#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) #error "contiguous percpu allocation is incompatible with paged first chunk" #endif diff --git a/mm/percpu.c b/mm/percpu.c index c76ef3891e0..9734b184aaa 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -76,6 +76,7 @@ #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ +#ifdef CONFIG_SMP /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ #ifndef __addr_to_pcpu_ptr #define __addr_to_pcpu_ptr(addr) \ @@ -89,6 +90,11 @@ (unsigned long)pcpu_base_addr - \ (unsigned long)__per_cpu_start) #endif +#else /* CONFIG_SMP */ +/* on UP, it's always identity mapped */ +#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr) +#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) +#endif /* CONFIG_SMP */ struct pcpu_chunk { struct list_head list; /* linked to pcpu_slot lists */ @@ -949,6 +955,7 @@ EXPORT_SYMBOL_GPL(free_percpu); */ bool is_kernel_percpu_address(unsigned long addr) { +#ifdef CONFIG_SMP const size_t static_size = __per_cpu_end - __per_cpu_start; void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); unsigned int cpu; @@ -959,6 +966,8 @@ bool is_kernel_percpu_address(unsigned long addr) if ((void *)addr >= start && (void *)addr < start + static_size) return true; } +#endif + /* on UP, can't distinguish from other static vars, always false */ return false; } @@ -1066,6 +1075,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) free_bootmem(__pa(ai), ai->__ai_size); } +#if defined(CONFIG_SMP) && (defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ + defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)) /** * pcpu_build_alloc_info - build alloc_info considering distances between CPUs * @reserved_size: the size of reserved percpu area in bytes @@ -1220,6 +1231,8 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( return ai; } +#endif /* CONFIG_SMP && (CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || + CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) */ /** * pcpu_dump_alloc_info - print out information about pcpu_alloc_info @@ -1363,7 +1376,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, /* sanity checks */ PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); +#ifdef CONFIG_SMP PCPU_SETUP_BUG_ON(!ai->static_size); +#endif PCPU_SETUP_BUG_ON(!base_addr); PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); @@ -1488,6 +1503,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, return 0; } +#ifdef CONFIG_SMP + const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { [PCPU_FC_AUTO] = "auto", [PCPU_FC_EMBED] = "embed", @@ -1758,8 +1775,9 @@ out_free_ar: } #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA /* - * Generic percpu area setup. + * Generic SMP percpu area setup. * * The embedding helper is used because its behavior closely resembles * the original non-dynamic generic percpu area setup. This is @@ -1770,7 +1788,6 @@ out_free_ar: * on the physical linear memory mapping which uses large page * mappings on applicable archs. */ -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); @@ -1799,13 +1816,48 @@ void __init setup_per_cpu_areas(void) PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); if (rc < 0) - panic("Failed to initialized percpu areas."); + panic("Failed to initialize percpu areas."); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; } -#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ +#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ + +#else /* CONFIG_SMP */ + +/* + * UP percpu area setup. + * + * UP always uses km-based percpu allocator with identity mapping. + * Static percpu variables are indistinguishable from the usual static + * variables and don't require any special preparation. + */ +void __init setup_per_cpu_areas(void) +{ + const size_t unit_size = + roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE, + PERCPU_DYNAMIC_RESERVE)); + struct pcpu_alloc_info *ai; + void *fc; + + ai = pcpu_alloc_alloc_info(1, 1); + fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + if (!ai || !fc) + panic("Failed to allocate memory for percpu areas."); + + ai->dyn_size = unit_size; + ai->unit_size = unit_size; + ai->atom_size = unit_size; + ai->alloc_size = unit_size; + ai->groups[0].nr_units = 1; + ai->groups[0].cpu_map[0] = 0; + + if (pcpu_setup_first_chunk(ai, fc) < 0) + panic("Failed to initialize percpu areas."); +} + +#endif /* CONFIG_SMP */ /* * First and reserved chunks are initialized with temporary allocation diff --git a/mm/percpu_up.c b/mm/percpu_up.c deleted file mode 100644 index db884fae572..00000000000 --- a/mm/percpu_up.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * mm/percpu_up.c - dummy percpu memory allocator implementation for UP - */ - -#include -#include -#include - -void __percpu *__alloc_percpu(size_t size, size_t align) -{ - /* - * Can't easily make larger alignment work with kmalloc. WARN - * on it. Larger alignment should only be used for module - * percpu sections on SMP for which this path isn't used. - */ - WARN_ON_ONCE(align > SMP_CACHE_BYTES); - return (void __percpu __force *)kzalloc(size, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(__alloc_percpu); - -void free_percpu(void __percpu *p) -{ - kfree(this_cpu_ptr(p)); -} -EXPORT_SYMBOL_GPL(free_percpu); - -phys_addr_t per_cpu_ptr_to_phys(void *addr) -{ - return __pa(addr); -} From ed6c1115c835d822ec5d6356ae3043de54088f43 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 10 Sep 2010 10:49:37 +0200 Subject: [PATCH 16/27] percpu: clear memory allocated with the km allocator Percpu allocator should clear memory before returning it but the km allocator forgot to do it. Fix it. Signed-off-by: Tejun Heo Reported-by: Peter Zijlstra Acked-by: Peter Zijlstra --- mm/percpu-km.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/percpu-km.c b/mm/percpu-km.c index 7037bc73bfa..89633fefc6a 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -35,7 +35,11 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) { - /* noop */ + unsigned int cpu; + + for_each_possible_cpu(cpu) + memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); + return 0; } From ed59ecbf8904a40cf0a1ee5d6f100d76d2f44e5f Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 18 Sep 2010 20:45:06 +0300 Subject: [PATCH 17/27] Revert "Slub: UP bandaid" This reverts commit 5249d039500f05a5ab379286b1d23ab9b04d3f2c. It's not needed after commit bbddff0545878a8649c091a9dd7c43ce91516734 ("percpu: use percpu allocator on UP too"). --- mm/slub.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 42ce1730427..7e1fe663795 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2104,24 +2104,8 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { -#ifdef CONFIG_SMP - /* - * Will use reserve that does not require slab operation during - * early boot. - */ BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); -#else - /* - * Special hack for UP mode. allocpercpu() falls back to kmalloc - * operations. So we cannot use that before the slab allocator is up - * Simply get the smallest possible compound page. The page will be - * released via kfree() when the cpu caches are resized later. - */ - if (slab_state < UP) - s->cpu_slab = (__percpu void *)kmalloc_large(PAGE_SIZE << 1, GFP_NOWAIT); - else -#endif s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); From 7340cc84141d5236c5dd003359ee921513cd9b84 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 28 Sep 2010 08:10:26 -0500 Subject: [PATCH 18/27] slub: reduce differences between SMP and NUMA Reduce the #ifdefs and simplify bootstrap by making SMP and NUMA as much alike as possible. This means that there will be an additional indirection to get to the kmem_cache_node field under SMP. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 5 +---- mm/slub.c | 39 +-------------------------------------- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index a6c43ec6a4a..b33c0f2e61d 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -96,11 +96,8 @@ struct kmem_cache { * Defragmentation by allocating from a remote node. */ int remote_node_defrag_ratio; - struct kmem_cache_node *node[MAX_NUMNODES]; -#else - /* Avoid an extra cache line for UP */ - struct kmem_cache_node local_node; #endif + struct kmem_cache_node *node[MAX_NUMNODES]; }; /* diff --git a/mm/slub.c b/mm/slub.c index 7e1fe663795..064bda294af 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -233,11 +233,7 @@ int slab_is_available(void) static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) { -#ifdef CONFIG_NUMA return s->node[node]; -#else - return &s->local_node; -#endif } /* Verify that a pointer has an address that is valid within a slab page */ @@ -871,7 +867,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) * dilemma by deferring the increment of the count during * bootstrap (see early_kmem_cache_node_alloc). */ - if (!NUMA_BUILD || n) { + if (n) { atomic_long_inc(&n->nr_slabs); atomic_long_add(objects, &n->total_objects); } @@ -2112,7 +2108,6 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) return s->cpu_slab != NULL; } -#ifdef CONFIG_NUMA static struct kmem_cache *kmem_cache_node; /* @@ -2202,17 +2197,6 @@ static int init_kmem_cache_nodes(struct kmem_cache *s) } return 1; } -#else -static void free_kmem_cache_nodes(struct kmem_cache *s) -{ -} - -static int init_kmem_cache_nodes(struct kmem_cache *s) -{ - init_kmem_cache_node(&s->local_node, s); - return 1; -} -#endif static void set_min_partial(struct kmem_cache *s, unsigned long min) { @@ -3023,8 +3007,6 @@ void __init kmem_cache_init(void) int caches = 0; struct kmem_cache *temp_kmem_cache; int order; - -#ifdef CONFIG_NUMA struct kmem_cache *temp_kmem_cache_node; unsigned long kmalloc_size; @@ -3048,12 +3030,6 @@ void __init kmem_cache_init(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); -#else - /* Allocate a single kmem_cache from the page allocator */ - kmem_size = sizeof(struct kmem_cache); - order = get_order(kmem_size); - kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); -#endif /* Able to allocate the per node structures */ slab_state = PARTIAL; @@ -3064,7 +3040,6 @@ void __init kmem_cache_init(void) kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); memcpy(kmem_cache, temp_kmem_cache, kmem_size); -#ifdef CONFIG_NUMA /* * Allocate kmem_cache_node properly from the kmem_cache slab. * kmem_cache_node is separately allocated so no need to @@ -3078,18 +3053,6 @@ void __init kmem_cache_init(void) kmem_cache_bootstrap_fixup(kmem_cache_node); caches++; -#else - /* - * kmem_cache has kmem_cache_node embedded and we moved it! - * Update the list heads - */ - INIT_LIST_HEAD(&kmem_cache->local_node.partial); - list_splice(&temp_kmem_cache->local_node.partial, &kmem_cache->local_node.partial); -#ifdef CONFIG_SLUB_DEBUG - INIT_LIST_HEAD(&kmem_cache->local_node.full); - list_splice(&temp_kmem_cache->local_node.full, &kmem_cache->local_node.full); -#endif -#endif kmem_cache_bootstrap_fixup(kmem_cache); caches++; /* Free temporary boot structure */ From f7cb1933621bce66a77f690776a16fe3ebbc4d58 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 29 Sep 2010 07:15:01 -0500 Subject: [PATCH 19/27] SLUB: Pass active and inactive redzone flags instead of boolean to debug functions Pass the actual values used for inactive and active redzoning to the functions that check the objects. Avoids a lot of the ? : things to lookup the values in the functions. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 064bda294af..b5df67b0397 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -490,7 +490,7 @@ static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) dump_stack(); } -static void init_object(struct kmem_cache *s, void *object, int active) +static void init_object(struct kmem_cache *s, void *object, u8 val) { u8 *p = object; @@ -500,9 +500,7 @@ static void init_object(struct kmem_cache *s, void *object, int active) } if (s->flags & SLAB_RED_ZONE) - memset(p + s->objsize, - active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE, - s->inuse - s->objsize); + memset(p + s->objsize, val, s->inuse - s->objsize); } static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) @@ -637,17 +635,14 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) } static int check_object(struct kmem_cache *s, struct page *page, - void *object, int active) + void *object, u8 val) { u8 *p = object; u8 *endobject = object + s->objsize; if (s->flags & SLAB_RED_ZONE) { - unsigned int red = - active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE; - if (!check_bytes_and_report(s, page, object, "Redzone", - endobject, red, s->inuse - s->objsize)) + endobject, val, s->inuse - s->objsize)) return 0; } else { if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { @@ -657,7 +652,7 @@ static int check_object(struct kmem_cache *s, struct page *page, } if (s->flags & SLAB_POISON) { - if (!active && (s->flags & __OBJECT_POISON) && + if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && (!check_bytes_and_report(s, page, p, "Poison", p, POISON_FREE, s->objsize - 1) || !check_bytes_and_report(s, page, p, "Poison", @@ -669,7 +664,7 @@ static int check_object(struct kmem_cache *s, struct page *page, check_pad_bytes(s, page, p); } - if (!s->offset && active) + if (!s->offset && val == SLUB_RED_ACTIVE) /* * Object and freepointer overlap. Cannot check * freepointer while object is allocated. @@ -887,7 +882,7 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) return; - init_object(s, object, 0); + init_object(s, object, SLUB_RED_INACTIVE); init_tracking(s, object); } @@ -907,14 +902,14 @@ static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *pa goto bad; } - if (!check_object(s, page, object, 0)) + if (!check_object(s, page, object, SLUB_RED_INACTIVE)) goto bad; /* Success perform special debug activities for allocs */ if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_ALLOC, addr); trace(s, page, object, 1); - init_object(s, object, 1); + init_object(s, object, SLUB_RED_ACTIVE); return 1; bad: @@ -947,7 +942,7 @@ static noinline int free_debug_processing(struct kmem_cache *s, goto fail; } - if (!check_object(s, page, object, 1)) + if (!check_object(s, page, object, SLUB_RED_ACTIVE)) return 0; if (unlikely(s != page->slab)) { @@ -971,7 +966,7 @@ static noinline int free_debug_processing(struct kmem_cache *s, if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); trace(s, page, object, 0); - init_object(s, object, 0); + init_object(s, object, SLUB_RED_INACTIVE); return 1; fail: @@ -1075,7 +1070,7 @@ static inline int free_debug_processing(struct kmem_cache *s, static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } static inline int check_object(struct kmem_cache *s, struct page *page, - void *object, int active) { return 1; } + void *object, u8 val) { return 1; } static inline void add_full(struct kmem_cache_node *n, struct page *page) {} static inline unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, @@ -1235,7 +1230,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) slab_pad_check(s, page); for_each_object(p, s, page_address(page), page->objects) - check_object(s, page, p, 0); + check_object(s, page, p, SLUB_RED_INACTIVE); } kmemcheck_free_shadow(page, compound_order(page)); @@ -2143,7 +2138,7 @@ static void early_kmem_cache_node_alloc(int node) page->inuse++; kmem_cache_node->node[node] = n; #ifdef CONFIG_SLUB_DEBUG - init_object(kmem_cache_node, n, 1); + init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); init_tracking(kmem_cache_node, n); #endif init_kmem_cache_node(n, kmem_cache_node); From 62e346a83026a28526fc9799337bcc6154819f25 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 28 Sep 2010 08:10:28 -0500 Subject: [PATCH 20/27] slub: extract common code to remove objects from partial list without locking There are a couple of places where repeat the same statements when removing a page from the partial list. Consolidate that into __remove_partial(). Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b5df67b0397..aad00ba486f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1310,13 +1310,19 @@ static void add_partial(struct kmem_cache_node *n, spin_unlock(&n->list_lock); } +static inline void __remove_partial(struct kmem_cache_node *n, + struct page *page) +{ + list_del(&page->lru); + n->nr_partial--; +} + static void remove_partial(struct kmem_cache *s, struct page *page) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); spin_lock(&n->list_lock); - list_del(&page->lru); - n->nr_partial--; + __remove_partial(n, page); spin_unlock(&n->list_lock); } @@ -1329,8 +1335,7 @@ static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page) { if (slab_trylock(page)) { - list_del(&page->lru); - n->nr_partial--; + __remove_partial(n, page); __SetPageSlubFrozen(page); return 1; } @@ -2462,9 +2467,8 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { - list_del(&page->lru); + __remove_partial(n, page); discard_slab(s, page); - n->nr_partial--; } else { list_slab_objects(s, page, "Objects remaining on kmem_cache_close()"); @@ -2822,8 +2826,7 @@ int kmem_cache_shrink(struct kmem_cache *s) * may have freed the last object and be * waiting to release the slab. */ - list_del(&page->lru); - n->nr_partial--; + __remove_partial(n, page); slab_unlock(page); discard_slab(s, page); } else { From a5dd5c117cbf620378d693963ffc42239297fac4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 29 Sep 2010 21:02:13 +0900 Subject: [PATCH 21/27] slub: Fix signedness warnings The bit-ops routines require its arg to be a pointer to unsigned long. This leads sparse to complain about different signedness as follows: mm/slub.c:2425:49: warning: incorrect type in argument 2 (different signedness) mm/slub.c:2425:49: expected unsigned long volatile *addr mm/slub.c:2425:49: got long *map Acked-by: Christoph Lameter Acked-by: David Rientjes Signed-off-by: Namhyung Kim Signed-off-by: Pekka Enberg --- mm/slub.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index aad00ba486f..ac236b1ced1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2433,9 +2433,8 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, #ifdef CONFIG_SLUB_DEBUG void *addr = page_address(page); void *p; - long *map = kzalloc(BITS_TO_LONGS(page->objects) * sizeof(long), - GFP_ATOMIC); - + unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * + sizeof(long), GFP_ATOMIC); if (!map) return; slab_err(s, page, "%s", text); @@ -3660,7 +3659,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, static void process_slab(struct loc_track *t, struct kmem_cache *s, struct page *page, enum track_item alloc, - long *map) + unsigned long *map) { void *addr = page_address(page); void *p; From 3478973dedee5e957c45dc93c11d12dc3f733ee0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 29 Sep 2010 21:02:14 +0900 Subject: [PATCH 22/27] slub: Add lock release annotation The unfreeze_slab() releases page's PG_locked bit but was missing proper annotation. The deactivate_slab() needs to be marked also since it calls unfreeze_slab() without grabbing the lock. Acked-by: David Rientjes Signed-off-by: Namhyung Kim Signed-off-by: Pekka Enberg --- mm/slub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index ac236b1ced1..118422e220f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1446,6 +1446,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) * On exit the slab lock will have been dropped. */ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) + __releases(bitlock) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); @@ -1488,6 +1489,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) * Remove the cpu slab */ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) + __releases(bitlock) { struct page *page = c->page; int tail = 1; From 5d1f57e4d3d547b113ebd62f569be13bf485e53b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 29 Sep 2010 21:02:15 +0900 Subject: [PATCH 23/27] slub: Move NUMA-related functions under CONFIG_NUMA Make kmalloc_cache_alloc_node_notrace(), kmalloc_large_node() and __kmalloc_node_track_caller() to be compiled only when CONFIG_NUMA is selected. Acked-by: David Rientjes Signed-off-by: Namhyung Kim Signed-off-by: Pekka Enberg --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 118422e220f..9f121c10184 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1792,7 +1792,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node); -#endif #ifdef CONFIG_TRACING void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, @@ -1803,6 +1802,7 @@ void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, } EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); #endif +#endif /* * Slow patch handling. This may still be called frequently since objects @@ -2673,6 +2673,7 @@ void *__kmalloc(size_t size, gfp_t flags) } EXPORT_SYMBOL(__kmalloc); +#ifdef CONFIG_NUMA static void *kmalloc_large_node(size_t size, gfp_t flags, int node) { struct page *page; @@ -2687,7 +2688,6 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) return ptr; } -#ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *s; @@ -3342,6 +3342,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) return ret; } +#ifdef CONFIG_NUMA void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, int node, unsigned long caller) { @@ -3370,6 +3371,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return ret; } +#endif #ifdef CONFIG_SLUB_DEBUG static int count_inuse(struct page *page) From 15b7c5142049e7efc3071280e1370dc3b8add6f5 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 2 Oct 2010 11:32:32 +0300 Subject: [PATCH 24/27] SLUB: Optimize slab_free() debug check This patch optimizes slab_free() debug check to use "c->node != NUMA_NO_NODE" instead of "c->node >= 0" because the former generates smaller code on x86-64: Before: 4736: 48 39 70 08 cmp %rsi,0x8(%rax) 473a: 75 26 jne 4762 473c: 44 8b 48 10 mov 0x10(%rax),%r9d 4740: 45 85 c9 test %r9d,%r9d 4743: 78 1d js 4762 After: 4736: 48 39 70 08 cmp %rsi,0x8(%rax) 473a: 75 23 jne 475f 473c: 83 78 10 ff cmpl $0xffffffffffffffff,0x10(%rax) 4740: 74 1d je 475f This patch also cleans up __slab_alloc() to use NUMA_NO_NODE instead of "-1" for enabling debugging for a per-CPU cache. Acked-by: Christoph Lameter Acked-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 9f121c10184..a018019aa91 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1718,7 +1718,7 @@ debug: c->page->inuse++; c->page->freelist = get_freepointer(s, object); - c->node = -1; + c->node = NUMA_NO_NODE; goto unlock_out; } @@ -1895,7 +1895,7 @@ static __always_inline void slab_free(struct kmem_cache *s, slab_free_hook_irq(s, x); - if (likely(page == c->page && c->node >= 0)) { + if (likely(page == c->page && c->node != NUMA_NO_NODE)) { set_freepointer(s, object, c->freelist); c->freelist = object; stat(s, FREE_FASTPATH); From ab4d5ed5eeda4f57c50d14131ce1b1da75d0c938 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Oct 2010 13:57:26 -0500 Subject: [PATCH 25/27] slub: Enable sysfs support for !CONFIG_SLUB_DEBUG Currently disabling CONFIG_SLUB_DEBUG also disabled SYSFS support meaning that the slabs cannot be tuned without DEBUG. Make SYSFS support independent of CONFIG_SLUB_DEBUG Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 2 +- lib/Kconfig.debug | 2 +- mm/slub.c | 40 +++++++++++++++++++++++++++++++++++----- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index b33c0f2e61d..e4f5ed180b9 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -87,7 +87,7 @@ struct kmem_cache { unsigned long min_partial; const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ -#ifdef CONFIG_SLUB_DEBUG +#ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1b4afd2e6ca..b6263651a95 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -353,7 +353,7 @@ config SLUB_DEBUG_ON config SLUB_STATS default n bool "Enable SLUB performance statistics" - depends on SLUB && SLUB_DEBUG && SYSFS + depends on SLUB && SYSFS help SLUB statistics are useful to debug SLUBs allocation behavior in order find ways to optimize the allocator. This should never be diff --git a/mm/slub.c b/mm/slub.c index a018019aa91..be4d66231c6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -198,7 +198,7 @@ struct track { enum track_item { TRACK_ALLOC, TRACK_FREE }; -#ifdef CONFIG_SLUB_DEBUG +#ifdef CONFIG_SYSFS static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); static void sysfs_slab_remove(struct kmem_cache *); @@ -1102,7 +1102,7 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) {} static inline void slab_free_hook_irq(struct kmem_cache *s, void *object) {} -#endif +#endif /* CONFIG_SLUB_DEBUG */ /* * Slab allocation and freeing @@ -3373,7 +3373,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, } #endif -#ifdef CONFIG_SLUB_DEBUG +#ifdef CONFIG_SYSFS static int count_inuse(struct page *page) { return page->inuse; @@ -3383,7 +3383,9 @@ static int count_total(struct page *page) { return page->objects; } +#endif +#ifdef CONFIG_SLUB_DEBUG static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) { @@ -3474,6 +3476,7 @@ static long validate_slab_cache(struct kmem_cache *s) kfree(map); return count; } +#endif #ifdef SLUB_RESILIENCY_TEST static void resiliency_test(void) @@ -3532,9 +3535,12 @@ static void resiliency_test(void) validate_slab_cache(kmalloc_caches[9]); } #else +#ifdef CONFIG_SYSFS static void resiliency_test(void) {}; #endif +#endif +#ifdef CONFIG_DEBUG /* * Generate lists of code addresses where slabcache objects are allocated * and freed. @@ -3763,7 +3769,9 @@ static int list_locations(struct kmem_cache *s, char *buf, len += sprintf(buf, "No data\n"); return len; } +#endif +#ifdef CONFIG_SYSFS enum slab_stat_type { SL_ALL, /* All slabs */ SL_PARTIAL, /* Only partially allocated slabs */ @@ -3816,6 +3824,8 @@ static ssize_t show_slab_objects(struct kmem_cache *s, } } + down_read(&slub_lock); +#ifdef CONFIG_SLUB_DEBUG if (flags & SO_ALL) { for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); @@ -3832,7 +3842,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s, nodes[node] += x; } - } else if (flags & SO_PARTIAL) { + } else +#endif + if (flags & SO_PARTIAL) { for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); @@ -3857,6 +3869,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, return x + sprintf(buf + x, "\n"); } +#ifdef CONFIG_SLUB_DEBUG static int any_slab_objects(struct kmem_cache *s) { int node; @@ -3872,6 +3885,7 @@ static int any_slab_objects(struct kmem_cache *s) } return 0; } +#endif #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) #define to_slab(n) container_of(n, struct kmem_cache, kobj); @@ -3973,11 +3987,13 @@ static ssize_t aliases_show(struct kmem_cache *s, char *buf) } SLAB_ATTR_RO(aliases); +#ifdef CONFIG_SLUB_DEBUG static ssize_t slabs_show(struct kmem_cache *s, char *buf) { return show_slab_objects(s, buf, SO_ALL); } SLAB_ATTR_RO(slabs); +#endif static ssize_t partial_show(struct kmem_cache *s, char *buf) { @@ -4003,6 +4019,7 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) } SLAB_ATTR_RO(objects_partial); +#ifdef CONFIG_SLUB_DEBUG static ssize_t total_objects_show(struct kmem_cache *s, char *buf) { return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); @@ -4055,6 +4072,7 @@ static ssize_t failslab_store(struct kmem_cache *s, const char *buf, } SLAB_ATTR(failslab); #endif +#endif static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) { @@ -4091,6 +4109,7 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) } SLAB_ATTR_RO(destroy_by_rcu); +#ifdef CONFIG_SLUB_DEBUG static ssize_t red_zone_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE)); @@ -4166,6 +4185,7 @@ static ssize_t validate_store(struct kmem_cache *s, return ret; } SLAB_ATTR(validate); +#endif static ssize_t shrink_show(struct kmem_cache *s, char *buf) { @@ -4186,6 +4206,7 @@ static ssize_t shrink_store(struct kmem_cache *s, } SLAB_ATTR(shrink); +#ifdef CONFIG_SLUB_DEBUG static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) { if (!(s->flags & SLAB_STORE_USER)) @@ -4201,6 +4222,7 @@ static ssize_t free_calls_show(struct kmem_cache *s, char *buf) return list_locations(s, buf, TRACK_FREE); } SLAB_ATTR_RO(free_calls); +#endif #ifdef CONFIG_NUMA static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) @@ -4307,25 +4329,33 @@ static struct attribute *slab_attrs[] = { &min_partial_attr.attr, &objects_attr.attr, &objects_partial_attr.attr, +#ifdef CONFIG_SLUB_DEBUG &total_objects_attr.attr, &slabs_attr.attr, +#endif &partial_attr.attr, &cpu_slabs_attr.attr, &ctor_attr.attr, &aliases_attr.attr, &align_attr.attr, +#ifdef CONFIG_SLUB_DEBUG &sanity_checks_attr.attr, &trace_attr.attr, +#endif &hwcache_align_attr.attr, &reclaim_account_attr.attr, &destroy_by_rcu_attr.attr, +#ifdef CONFIG_SLUB_DEBUG &red_zone_attr.attr, &poison_attr.attr, &store_user_attr.attr, &validate_attr.attr, +#endif &shrink_attr.attr, +#ifdef CONFIG_SLUB_DEBUG &alloc_calls_attr.attr, &free_calls_attr.attr, +#endif #ifdef CONFIG_ZONE_DMA &cache_dma_attr.attr, #endif @@ -4608,7 +4638,7 @@ static int __init slab_sysfs_init(void) } __initcall(slab_sysfs_init); -#endif +#endif /* CONFIG_SYSFS */ /* * The /proc/slabinfo ABI From a5a84755c5900416dce0166ac825866aad8048ef Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Oct 2010 13:57:27 -0500 Subject: [PATCH 26/27] slub: Move functions to reduce #ifdefs There is a lot of #ifdef/#endifs that can be avoided if functions would be in different places. Move them around and reduce #ifdef. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 299 ++++++++++++++++++++++++++---------------------------- 1 file changed, 142 insertions(+), 157 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index be4d66231c6..fe536d3474d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3476,71 +3476,6 @@ static long validate_slab_cache(struct kmem_cache *s) kfree(map); return count; } -#endif - -#ifdef SLUB_RESILIENCY_TEST -static void resiliency_test(void) -{ - u8 *p; - - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); - - printk(KERN_ERR "SLUB resiliency testing\n"); - printk(KERN_ERR "-----------------------\n"); - printk(KERN_ERR "A. Corruption after allocation\n"); - - p = kzalloc(16, GFP_KERNEL); - p[16] = 0x12; - printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" - " 0x12->0x%p\n\n", p + 16); - - validate_slab_cache(kmalloc_caches[4]); - - /* Hmmm... The next two are dangerous */ - p = kzalloc(32, GFP_KERNEL); - p[32 + sizeof(void *)] = 0x34; - printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" - " 0x34 -> -0x%p\n", p); - printk(KERN_ERR - "If allocated object is overwritten then not detectable\n\n"); - - validate_slab_cache(kmalloc_caches[5]); - p = kzalloc(64, GFP_KERNEL); - p += 64 + (get_cycles() & 0xff) * sizeof(void *); - *p = 0x56; - printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", - p); - printk(KERN_ERR - "If allocated object is overwritten then not detectable\n\n"); - validate_slab_cache(kmalloc_caches[6]); - - printk(KERN_ERR "\nB. Corruption after free\n"); - p = kzalloc(128, GFP_KERNEL); - kfree(p); - *p = 0x78; - printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[7]); - - p = kzalloc(256, GFP_KERNEL); - kfree(p); - p[50] = 0x9a; - printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", - p); - validate_slab_cache(kmalloc_caches[8]); - - p = kzalloc(512, GFP_KERNEL); - kfree(p); - p[512] = 0xab; - printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[9]); -} -#else -#ifdef CONFIG_SYSFS -static void resiliency_test(void) {}; -#endif -#endif - -#ifdef CONFIG_DEBUG /* * Generate lists of code addresses where slabcache objects are allocated * and freed. @@ -3771,6 +3706,68 @@ static int list_locations(struct kmem_cache *s, char *buf, } #endif +#ifdef SLUB_RESILIENCY_TEST +static void resiliency_test(void) +{ + u8 *p; + + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); + + printk(KERN_ERR "SLUB resiliency testing\n"); + printk(KERN_ERR "-----------------------\n"); + printk(KERN_ERR "A. Corruption after allocation\n"); + + p = kzalloc(16, GFP_KERNEL); + p[16] = 0x12; + printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" + " 0x12->0x%p\n\n", p + 16); + + validate_slab_cache(kmalloc_caches[4]); + + /* Hmmm... The next two are dangerous */ + p = kzalloc(32, GFP_KERNEL); + p[32 + sizeof(void *)] = 0x34; + printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" + " 0x34 -> -0x%p\n", p); + printk(KERN_ERR + "If allocated object is overwritten then not detectable\n\n"); + + validate_slab_cache(kmalloc_caches[5]); + p = kzalloc(64, GFP_KERNEL); + p += 64 + (get_cycles() & 0xff) * sizeof(void *); + *p = 0x56; + printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", + p); + printk(KERN_ERR + "If allocated object is overwritten then not detectable\n\n"); + validate_slab_cache(kmalloc_caches[6]); + + printk(KERN_ERR "\nB. Corruption after free\n"); + p = kzalloc(128, GFP_KERNEL); + kfree(p); + *p = 0x78; + printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); + validate_slab_cache(kmalloc_caches[7]); + + p = kzalloc(256, GFP_KERNEL); + kfree(p); + p[50] = 0x9a; + printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", + p); + validate_slab_cache(kmalloc_caches[8]); + + p = kzalloc(512, GFP_KERNEL); + kfree(p); + p[512] = 0xab; + printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); + validate_slab_cache(kmalloc_caches[9]); +} +#else +#ifdef CONFIG_SYSFS +static void resiliency_test(void) {}; +#endif +#endif + #ifdef CONFIG_SYSFS enum slab_stat_type { SL_ALL, /* All slabs */ @@ -3987,14 +3984,6 @@ static ssize_t aliases_show(struct kmem_cache *s, char *buf) } SLAB_ATTR_RO(aliases); -#ifdef CONFIG_SLUB_DEBUG -static ssize_t slabs_show(struct kmem_cache *s, char *buf) -{ - return show_slab_objects(s, buf, SO_ALL); -} -SLAB_ATTR_RO(slabs); -#endif - static ssize_t partial_show(struct kmem_cache *s, char *buf) { return show_slab_objects(s, buf, SO_PARTIAL); @@ -4019,61 +4008,6 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) } SLAB_ATTR_RO(objects_partial); -#ifdef CONFIG_SLUB_DEBUG -static ssize_t total_objects_show(struct kmem_cache *s, char *buf) -{ - return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); -} -SLAB_ATTR_RO(total_objects); - -static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) -{ - return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); -} - -static ssize_t sanity_checks_store(struct kmem_cache *s, - const char *buf, size_t length) -{ - s->flags &= ~SLAB_DEBUG_FREE; - if (buf[0] == '1') - s->flags |= SLAB_DEBUG_FREE; - return length; -} -SLAB_ATTR(sanity_checks); - -static ssize_t trace_show(struct kmem_cache *s, char *buf) -{ - return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); -} - -static ssize_t trace_store(struct kmem_cache *s, const char *buf, - size_t length) -{ - s->flags &= ~SLAB_TRACE; - if (buf[0] == '1') - s->flags |= SLAB_TRACE; - return length; -} -SLAB_ATTR(trace); - -#ifdef CONFIG_FAILSLAB -static ssize_t failslab_show(struct kmem_cache *s, char *buf) -{ - return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); -} - -static ssize_t failslab_store(struct kmem_cache *s, const char *buf, - size_t length) -{ - s->flags &= ~SLAB_FAILSLAB; - if (buf[0] == '1') - s->flags |= SLAB_FAILSLAB; - return length; -} -SLAB_ATTR(failslab); -#endif -#endif - static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); @@ -4110,6 +4044,48 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) SLAB_ATTR_RO(destroy_by_rcu); #ifdef CONFIG_SLUB_DEBUG +static ssize_t slabs_show(struct kmem_cache *s, char *buf) +{ + return show_slab_objects(s, buf, SO_ALL); +} +SLAB_ATTR_RO(slabs); + +static ssize_t total_objects_show(struct kmem_cache *s, char *buf) +{ + return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); +} +SLAB_ATTR_RO(total_objects); + +static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); +} + +static ssize_t sanity_checks_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + s->flags &= ~SLAB_DEBUG_FREE; + if (buf[0] == '1') + s->flags |= SLAB_DEBUG_FREE; + return length; +} +SLAB_ATTR(sanity_checks); + +static ssize_t trace_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); +} + +static ssize_t trace_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + s->flags &= ~SLAB_TRACE; + if (buf[0] == '1') + s->flags |= SLAB_TRACE; + return length; +} +SLAB_ATTR(trace); + static ssize_t red_zone_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE)); @@ -4185,6 +4161,39 @@ static ssize_t validate_store(struct kmem_cache *s, return ret; } SLAB_ATTR(validate); + +static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) +{ + if (!(s->flags & SLAB_STORE_USER)) + return -ENOSYS; + return list_locations(s, buf, TRACK_ALLOC); +} +SLAB_ATTR_RO(alloc_calls); + +static ssize_t free_calls_show(struct kmem_cache *s, char *buf) +{ + if (!(s->flags & SLAB_STORE_USER)) + return -ENOSYS; + return list_locations(s, buf, TRACK_FREE); +} +SLAB_ATTR_RO(free_calls); +#endif /* CONFIG_SLUB_DEBUG */ + +#ifdef CONFIG_FAILSLAB +static ssize_t failslab_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); +} + +static ssize_t failslab_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + s->flags &= ~SLAB_FAILSLAB; + if (buf[0] == '1') + s->flags |= SLAB_FAILSLAB; + return length; +} +SLAB_ATTR(failslab); #endif static ssize_t shrink_show(struct kmem_cache *s, char *buf) @@ -4206,24 +4215,6 @@ static ssize_t shrink_store(struct kmem_cache *s, } SLAB_ATTR(shrink); -#ifdef CONFIG_SLUB_DEBUG -static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) -{ - if (!(s->flags & SLAB_STORE_USER)) - return -ENOSYS; - return list_locations(s, buf, TRACK_ALLOC); -} -SLAB_ATTR_RO(alloc_calls); - -static ssize_t free_calls_show(struct kmem_cache *s, char *buf) -{ - if (!(s->flags & SLAB_STORE_USER)) - return -ENOSYS; - return list_locations(s, buf, TRACK_FREE); -} -SLAB_ATTR_RO(free_calls); -#endif - #ifdef CONFIG_NUMA static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) { @@ -4329,30 +4320,24 @@ static struct attribute *slab_attrs[] = { &min_partial_attr.attr, &objects_attr.attr, &objects_partial_attr.attr, -#ifdef CONFIG_SLUB_DEBUG - &total_objects_attr.attr, - &slabs_attr.attr, -#endif &partial_attr.attr, &cpu_slabs_attr.attr, &ctor_attr.attr, &aliases_attr.attr, &align_attr.attr, -#ifdef CONFIG_SLUB_DEBUG - &sanity_checks_attr.attr, - &trace_attr.attr, -#endif &hwcache_align_attr.attr, &reclaim_account_attr.attr, &destroy_by_rcu_attr.attr, + &shrink_attr.attr, #ifdef CONFIG_SLUB_DEBUG + &total_objects_attr.attr, + &slabs_attr.attr, + &sanity_checks_attr.attr, + &trace_attr.attr, &red_zone_attr.attr, &poison_attr.attr, &store_user_attr.attr, &validate_attr.attr, -#endif - &shrink_attr.attr, -#ifdef CONFIG_SLUB_DEBUG &alloc_calls_attr.attr, &free_calls_attr.attr, #endif From 92a5bbc11ff2442a54b2f1d313088c245828ef4e Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 6 Oct 2010 16:58:16 +0300 Subject: [PATCH 27/27] SLUB: Fix memory hotplug with !NUMA This patch fixes the following build breakage when memory hotplug is enabled on UMA configurations: /home/test/linux-2.6/mm/slub.c: In function 'kmem_cache_init': /home/test/linux-2.6/mm/slub.c:3031:2: error: 'slab_memory_callback' undeclared (first use in this function) /home/test/linux-2.6/mm/slub.c:3031:2: note: each undeclared identifier is reported only once for each function it appears in make[2]: *** [mm/slub.o] Error 1 make[1]: *** [mm] Error 2 make: *** [sub-make] Error 2 Reported-by: Zimny Lech Acked-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index fe536d3474d..8fd5401bb07 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2851,7 +2851,7 @@ int kmem_cache_shrink(struct kmem_cache *s) } EXPORT_SYMBOL(kmem_cache_shrink); -#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) +#if defined(CONFIG_MEMORY_HOTPLUG) static int slab_mem_going_offline_callback(void *arg) { struct kmem_cache *s;