Index: linux-2.6.19-mm1/mm/slub.c =================================================================== --- linux-2.6.19-mm1.orig/mm/slub.c 2006-12-14 20:20:17.843144756 -0800 +++ linux-2.6.19-mm1/mm/slub.c 2006-12-14 23:56:06.571279992 -0800 @@ -1,10 +1,18 @@ /* - * Generic Slabifier for the allocator abstraction framework. + * Uncached Slab allocator SLUB. + * + * This allocator uses slabs of objects as caches and does not manage + * lists of cached objects like the regular Linux SLAB allocator. + * * * The allocator synchronizes using slab based locks and only * uses a centralized list lock to manage the pool of partial slabs. * * (C) 2006 Silicon Graphics Inc., Christoph Lameter + * + * TODO: + * - NUMA per node partial slab management + * - Performance tests. */ #include @@ -23,7 +31,7 @@ * Enabling SLAB_DEBUG results in internal consistency checks * being enabled. */ -#undef SLAB_DEBUG +#define SLAB_DEBUG /* * SLAB_DEBUG_KFREE enabled checking for double frees. In order to do this @@ -39,9 +47,9 @@ * statistics are only kept per slab and thus one will not be able to * separate out the uses of various slabs. */ -#ifndef SLAB_DEBUG +//#ifndef SLAB_DEBUG #define SLAB_MERGE -#endif +//#endif /* * Set of flags that will prohibit slab merging @@ -58,12 +66,11 @@ #define ARCH_SLAB_MINALIGN sizeof(void *) #endif -#ifdef CONFIG_NUMA - /* We need to bootstrap the slab with the active slabs in a special way */ #define ACTIVE_SLAB_NR kmalloc_index(sizeof(struct active_slab)) #define ACTIVE_SLAB_SLAB &kmalloc_caches[ACTIVE_SLAB_NR - KMALLOC_SHIFT_LOW] +#ifdef CONFIG_NUMA #define ACTIVE_SLAB(__s,__cpu) ((__s)->active[__cpu]) #else #define ACTIVE_SLAB(__s,__cpu) (&(__s)->active[__cpu]) @@ -116,17 +123,17 @@ void unregister_slab(struct kmem_cache * * 1. slab_lock(page) * 2. slab->list_lock * - * The slub assigns one slab for allocation to each processor. + * SLUB assigns one "active" slab for allocation to each processor. * Allocations only occur from these active slabs. * - * If a cpu slab is active then a workqueue thread checks every 10 + * If a slab is active then a workqueue thread checks every few seconds * seconds if the cpu slab is still in use. The cpu slab is pushed back * to the list if inactive [only needed for SMP]. * - * Leftover slabs with free elements are kept on a partial list. + * Slabs with free and used objects are kept on a partial list. * There is no list for full slabs. If an object in a full slab is * freed then the slab will show up again on the partial lists. - * Otherwise there is no need to track filled up slabs. + * Otherwise there is no need to track full slabs. * * Slabs are freed when they become empty. Teardown and setup is * minimal so we rely on the page allocators per cpu caches for @@ -231,6 +238,15 @@ static __always_inline void slab_unlock( #endif } +static __always_inline int slab_trylock(struct page *page) +{ +#ifdef CONFIG_SMP + return bit_spin_trylock(PG_locked, &page->flags); +#else + return 1; +#endif +} + /* * Management of partially allocated slabs */ @@ -259,7 +275,7 @@ static void __always_inline remove_parti static __always_inline int lock_and_del_slab(struct kmem_cache *s, struct page *page) { - if (bit_spin_trylock(PG_locked, &page->flags)) { + if (slab_trylock(page)) { list_del(&page->lru); s->nr_partial--; return 1; @@ -292,7 +308,8 @@ static struct page *get_partial(struct k { struct page *page; - /* Racy check. If we mistakenly see no partial slabs then we + /* + * Racy check. If we mistakenly see no partial slabs then we * just allocate an empty slab. If we mistakenly try to get a * partial slab then get_partials() will return NULL. */ @@ -301,13 +318,16 @@ static struct page *get_partial(struct k spin_lock(&s->list_lock); + /* First find a partial slab that fits the preferred NUMA node */ page = numa_partial(s, flags, node); if (page) goto out; + /* If we cannot fall back then fail */ if (NUMA_BUILD && !(flags & __GFP_THISNODE)) goto out; + /* Pick any partial slab */ list_for_each_entry(page, &s->partial, lru) if (likely(lock_and_del_slab(s, page))) goto out; @@ -406,9 +426,6 @@ void check_free_chain(struct kmem_cache #endif } -/* - * Operations on slabs - */ static void discard_slab(struct kmem_cache *s, struct page *page) { atomic_long_dec(&s->nr_slabs); @@ -429,6 +446,9 @@ static struct page *new_slab(struct kmem { struct page *page; + if (flags & __GFP_NO_GROW) + return NULL; + page = allocate_slab(s, flags & GFP_LEVEL_MASK, node); if (!page) return NULL; @@ -480,7 +500,7 @@ static void __always_inline putback_slab /* * Remove the currently active slab */ -static void __always_inline deactivate_slab(struct active_slab *a) +static void deactivate_slab(struct active_slab *a) { struct page *page = a->page; struct kmem_cache *s = a->slab; @@ -531,7 +551,9 @@ static void flush_active(void *d) if (likely(a->page)) { slab_lock(a->page); deactivate_slab(a); +#ifdef CONFIG_SMP a->flush_active = 0; +#endif } } @@ -561,22 +583,13 @@ void check_flush_active(struct work_stru } local_irq_enable(); } +#endif static void drain_all(struct kmem_cache *s) { on_each_cpu(flush_active, s , 1, 1); } -#else -static void drain_all(struct kmem_cache *s) -{ - unsigned long flags; - - local_irq_save(flags); - flush_active(s); - local_irq_restore(flags); -} -#endif static __always_inline void *allocate(struct kmem_cache *s, gfp_t gfpflags, int node) @@ -590,19 +603,26 @@ static __always_inline void *allocate(st if (unlikely(!a->page)) goto new_slab; + /* + * Check NUMA conditions if they exist. This is + * optimized away for kmem_cache_alloc(). + */ + if (unlikely(node != -1 && page_to_nid(a->page) != node)) { + slab_lock(a->page); + deactivate_slab(a); + goto new_slab; + } + if (likely(a->nr_free)) goto get_object; slab_lock(a->page); - if (node != -1 && page_to_nid(a->page) != node) - goto switch_slabs; - check_free_chain(s, a->page); - if (a->page->freelist) + if (likely(a->page->freelist)) goto get_freelist; -switch_slabs: deactivate_slab(a); + new_slab: a->page = get_partial(s, gfpflags, node); if (unlikely(!a->page)) { @@ -611,7 +631,7 @@ new_slab: if (flags & __GFP_WAIT) local_irq_enable(); - page = new_slab(s, flags, node); + page = new_slab(s, gfpflags, node); if (flags & __GFP_WAIT) local_irq_disable(); @@ -665,7 +685,7 @@ get_object: a->freelist = object[a->page->offset]; #ifdef CONFIG_SMP - if (!a->flush_active && keventd_up()) { + if (unlikely(!a->flush_active && keventd_up())) { a->flush_active = 1; schedule_delayed_work(&a->flush, 2 * HZ); } @@ -755,9 +775,7 @@ void kmem_cache_free(struct kmem_cache * a->nr_free++; object[s->offset] = a->freelist; a->freelist = object; -out: - local_irq_restore(flags); - return; + goto out; } if (unlikely(PageSlabsingle(page))) { @@ -772,11 +790,8 @@ out: page->freelist = object; page->inuse--; - if (likely(PageActive(page) || (page->inuse && prior))) { -out_unlock: - slab_unlock(page); - goto out; - } + if (likely(PageActive(page) || (page->inuse && prior))) + goto out_unlock; if (!prior) { /* @@ -784,16 +799,20 @@ out_unlock: * object now. So move to the partial list. */ add_partial(s, page); - goto out_unlock; +out_unlock: + slab_unlock(page); + goto out; } /* - * All object have been freed. + * Slab is empty. */ remove_partial(s, page); slab_unlock(page); discard_slab(s, page); - goto out; +out: + local_irq_restore(flags); + return; #ifdef SLAB_DEBUG_KFREE double_free: @@ -1333,7 +1352,7 @@ static struct kmem_cache *get_slab(size_ struct kmem_cache *s; size_t realsize; - BUG_ON(size < 0); + BUG_ON(index < 0); if (!(flags & __GFP_DMA)) return &kmalloc_caches[index]; @@ -1343,7 +1362,7 @@ static struct kmem_cache *get_slab(size_ return s; /* Dynamically create dma cache */ - s = kmalloc(sizeof(struct kmem_cache), flags & ~(__GFP_DMA)); + s = kmalloc(sizeof(struct kmem_cache), flags & ~__GFP_DMA); if (!s) panic("Unable to allocate memory for dma cache\n"); @@ -1401,7 +1420,6 @@ EXPORT_SYMBOL(kfree); void __init kmem_cache_init(void) { int i; - char *bootname = "kmalloc"; /* * NUMA Bootstrap only works if the slab for the active_slab @@ -1411,6 +1429,7 @@ void __init kmem_cache_init(void) kmem_cache_open(ACTIVE_SLAB_SLAB, "active_slab", 1 << ACTIVE_SLAB_NR, ARCH_KMALLOC_MINALIGN, SLAB_PANIC, NULL, NULL); + slab_state = PARTIAL; /* Power of two sized caches */ @@ -1418,7 +1437,7 @@ void __init kmem_cache_init(void) if (i != ACTIVE_SLAB_NR) kmem_cache_open( &kmalloc_caches[i - KMALLOC_SHIFT_LOW], - bootname, 1 << i, + "kmalloc", 1 << i, ARCH_KMALLOC_MINALIGN, SLAB_PANIC, NULL, NULL); #ifdef KMALLOC_EXTRA @@ -1435,9 +1454,14 @@ void __init kmem_cache_init(void) slab_state = UP; /* We can provide the correct kmalloc names now that the caches are up */ - for (i = 0; i < KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW; i++) - kmalloc_caches[i].name = kasprintf(GFP_KERNEL, "kmalloc-%d", + for (i = 0; i <= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW; i++) { + char *name = kasprintf(GFP_KERNEL, "kmalloc-%d", kmalloc_caches[i].size); + + BUG_ON(!name); + kmalloc_caches[i].name = name; + }; + printk(KERN_INFO "Kmalloc cache initialized: Caches=%d" " Min_order=%d.\n", KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + KMALLOC_EXTRAS,