Index: linux-2.6.19-mm1/mm/slub.c
===================================================================
--- linux-2.6.19-mm1.orig/mm/slub.c	2006-12-14 20:20:17.843144756 -0800
+++ linux-2.6.19-mm1/mm/slub.c	2006-12-14 23:56:06.571279992 -0800
@@ -1,10 +1,18 @@
 /*
- * Generic Slabifier for the allocator abstraction framework.
+ * Uncached Slab allocator SLUB.
+ *
+ * This allocator uses slabs of objects as caches and does not manage
+ * lists of cached objects like the regular Linux SLAB allocator.
+ *
  *
  * The allocator synchronizes using slab based locks and only
  * uses a centralized list lock to manage the pool of partial slabs.
  *
  * (C) 2006 Silicon Graphics Inc., Christoph Lameter <clameter@sgi.com>
+ *
+ * TODO:
+ * 	- NUMA per node partial slab management
+ *	- Performance tests.
  */
 
 #include <linux/mm.h>
@@ -23,7 +31,7 @@
  * Enabling SLAB_DEBUG results in internal consistency checks
  * being enabled.
  */
-#undef SLAB_DEBUG
+#define SLAB_DEBUG
 
 /*
  * SLAB_DEBUG_KFREE enabled checking for double frees. In order to do this
@@ -39,9 +47,9 @@
  * statistics are only kept per slab and thus one will not be able to
  * separate out the uses of various slabs.
  */
-#ifndef SLAB_DEBUG
+//#ifndef SLAB_DEBUG
 #define SLAB_MERGE
-#endif
+//#endif
 
 /*
  * Set of flags that will prohibit slab merging
@@ -58,12 +66,11 @@
 #define ARCH_SLAB_MINALIGN sizeof(void *)
 #endif
 
-#ifdef CONFIG_NUMA
-
 /* We need to bootstrap the slab with the active slabs in a special way */
 #define ACTIVE_SLAB_NR kmalloc_index(sizeof(struct active_slab))
 #define ACTIVE_SLAB_SLAB &kmalloc_caches[ACTIVE_SLAB_NR - KMALLOC_SHIFT_LOW]
 
+#ifdef CONFIG_NUMA
 #define ACTIVE_SLAB(__s,__cpu)	((__s)->active[__cpu])
 #else
 #define ACTIVE_SLAB(__s,__cpu)	(&(__s)->active[__cpu])
@@ -116,17 +123,17 @@ void unregister_slab(struct kmem_cache *
  *   1. slab_lock(page)
  *   2. slab->list_lock
  *
- * The slub assigns one slab for allocation to each processor.
+ * SLUB assigns one "active" slab for allocation to each processor.
  * Allocations only occur from these active slabs.
  *
- * If a cpu slab is active then a workqueue thread checks every 10
+ * If a slab is active then a workqueue thread checks every few seconds
  * seconds if the cpu slab is still in use. The cpu slab is pushed back
  * to the list if inactive [only needed for SMP].
  *
- * Leftover slabs with free elements are kept on a partial list.
+ * Slabs with free and used objects are kept on a partial list.
  * There is no list for full slabs. If an object in a full slab is
  * freed then the slab will show up again on the partial lists.
- * Otherwise there is no need to track filled up slabs.
+ * Otherwise there is no need to track full slabs.
  *
  * Slabs are freed when they become empty. Teardown and setup is
  * minimal so we rely on the page allocators per cpu caches for
@@ -231,6 +238,15 @@ static __always_inline void slab_unlock(
 #endif
 }
 
+static __always_inline int slab_trylock(struct page *page)
+{
+#ifdef CONFIG_SMP
+	return bit_spin_trylock(PG_locked, &page->flags);
+#else
+	return 1;
+#endif
+}
+
 /*
  * Management of partially allocated slabs
  */
@@ -259,7 +275,7 @@ static void __always_inline remove_parti
 static __always_inline int lock_and_del_slab(struct kmem_cache *s,
 						struct page *page)
 {
-	if (bit_spin_trylock(PG_locked, &page->flags)) {
+	if (slab_trylock(page)) {
 		list_del(&page->lru);
 		s->nr_partial--;
 		return 1;
@@ -292,7 +308,8 @@ static struct page *get_partial(struct k
 {
 	struct page *page;
 
-	/* Racy check. If we mistakenly see no partial slabs then we
+	/*
+	 * Racy check. If we mistakenly see no partial slabs then we
 	 * just allocate an empty slab. If we mistakenly try to get a
 	 * partial slab then get_partials() will return NULL.
 	 */
@@ -301,13 +318,16 @@ static struct page *get_partial(struct k
 
 	spin_lock(&s->list_lock);
 
+	/* First find a partial slab that fits the preferred NUMA node */
 	page = numa_partial(s, flags, node);
 	if (page)
 		goto out;
 
+	/* If we cannot fall back then fail */
 	if (NUMA_BUILD && !(flags & __GFP_THISNODE))
 		goto out;
 
+	/* Pick any partial slab */
 	list_for_each_entry(page, &s->partial, lru)
 		if (likely(lock_and_del_slab(s, page)))
 			goto out;
@@ -406,9 +426,6 @@ void check_free_chain(struct kmem_cache 
 #endif
 }
 
-/*
- * Operations on slabs
- */
 static void discard_slab(struct kmem_cache *s, struct page *page)
 {
 	atomic_long_dec(&s->nr_slabs);
@@ -429,6 +446,9 @@ static struct page *new_slab(struct kmem
 {
 	struct page *page;
 
+	if (flags & __GFP_NO_GROW)
+		return NULL;
+
 	page = allocate_slab(s, flags & GFP_LEVEL_MASK, node);
 	if (!page)
 		return NULL;
@@ -480,7 +500,7 @@ static void __always_inline putback_slab
 /*
  * Remove the currently active slab
  */
-static void __always_inline deactivate_slab(struct active_slab *a)
+static void deactivate_slab(struct active_slab *a)
 {
 	struct page *page = a->page;
 	struct kmem_cache *s = a->slab;
@@ -531,7 +551,9 @@ static void flush_active(void *d)
 	if (likely(a->page)) {
 		slab_lock(a->page);
 		deactivate_slab(a);
+#ifdef CONFIG_SMP
 		a->flush_active = 0;
+#endif
 	}
 }
 
@@ -561,22 +583,13 @@ void check_flush_active(struct work_stru
 	}
 	local_irq_enable();
 }
+#endif
 
 static void drain_all(struct kmem_cache *s)
 {
 	on_each_cpu(flush_active, s , 1, 1);
 
 }
-#else
-static void drain_all(struct kmem_cache *s)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	flush_active(s);
-	local_irq_restore(flags);
-}
-#endif
 
 static __always_inline void *allocate(struct kmem_cache *s,
 					gfp_t gfpflags, int node)
@@ -590,19 +603,26 @@ static __always_inline void *allocate(st
 	if (unlikely(!a->page))
 		goto new_slab;
 
+	/*
+	 * Check NUMA conditions if they exist. This is
+	 * optimized away for kmem_cache_alloc().
+	 */
+	if (unlikely(node != -1 && page_to_nid(a->page) != node)) {
+		slab_lock(a->page);
+		deactivate_slab(a);
+		goto new_slab;
+	}
+
 	if (likely(a->nr_free))
 		goto get_object;
 
 	slab_lock(a->page);
-	if (node != -1 && page_to_nid(a->page) != node)
-		goto switch_slabs;
-
 	check_free_chain(s, a->page);
-	if (a->page->freelist)
+	if (likely(a->page->freelist))
 		goto get_freelist;
 
-switch_slabs:
 	deactivate_slab(a);
+
 new_slab:
 	a->page = get_partial(s, gfpflags, node);
 	if (unlikely(!a->page)) {
@@ -611,7 +631,7 @@ new_slab:
 		if (flags & __GFP_WAIT)
 			local_irq_enable();
 
-		page = new_slab(s, flags, node);
+		page = new_slab(s, gfpflags, node);
 
 		if (flags & __GFP_WAIT)
 			local_irq_disable();
@@ -665,7 +685,7 @@ get_object:
 	a->freelist = object[a->page->offset];
 
 #ifdef CONFIG_SMP
-	if (!a->flush_active && keventd_up()) {
+	if (unlikely(!a->flush_active && keventd_up())) {
 		a->flush_active = 1;
 		schedule_delayed_work(&a->flush, 2 * HZ);
 	}
@@ -755,9 +775,7 @@ void kmem_cache_free(struct kmem_cache *
 		a->nr_free++;
 		object[s->offset] = a->freelist;
 		a->freelist = object;
-out:
-		local_irq_restore(flags);
-		return;
+		goto out;
 	}
 
 	if (unlikely(PageSlabsingle(page))) {
@@ -772,11 +790,8 @@ out:
 	page->freelist = object;
 	page->inuse--;
 
-	if (likely(PageActive(page) || (page->inuse && prior))) {
-out_unlock:
-		slab_unlock(page);
-		goto out;
-	}
+	if (likely(PageActive(page) || (page->inuse && prior)))
+		goto out_unlock;
 
 	if (!prior) {
 		/*
@@ -784,16 +799,20 @@ out_unlock:
 		 * object now. So move to the partial list.
 		 */
 		add_partial(s, page);
-		goto out_unlock;
+out_unlock:
+		slab_unlock(page);
+		goto out;
 	}
 
 	/*
-	 * All object have been freed.
+	 * Slab is empty.
 	 */
 	remove_partial(s, page);
 	slab_unlock(page);
 	discard_slab(s, page);
-	goto out;
+out:
+	local_irq_restore(flags);
+	return;
 
 #ifdef SLAB_DEBUG_KFREE
 double_free:
@@ -1333,7 +1352,7 @@ static struct kmem_cache *get_slab(size_
 	struct kmem_cache *s;
 	size_t realsize;
 
-	BUG_ON(size < 0);
+	BUG_ON(index < 0);
 
 	if (!(flags & __GFP_DMA))
 		return &kmalloc_caches[index];
@@ -1343,7 +1362,7 @@ static struct kmem_cache *get_slab(size_
 		return s;
 
 	/* Dynamically create dma cache */
-	s = kmalloc(sizeof(struct kmem_cache), flags & ~(__GFP_DMA));
+	s = kmalloc(sizeof(struct kmem_cache), flags & ~__GFP_DMA);
 
 	if (!s)
 		panic("Unable to allocate memory for dma cache\n");
@@ -1401,7 +1420,6 @@ EXPORT_SYMBOL(kfree);
 void __init kmem_cache_init(void)
 {
 	int i;
-	char *bootname = "kmalloc";
 
 	/*
 	 * NUMA Bootstrap only works if the slab for the active_slab
@@ -1411,6 +1429,7 @@ void __init kmem_cache_init(void)
 
 	kmem_cache_open(ACTIVE_SLAB_SLAB, "active_slab", 1 << ACTIVE_SLAB_NR,
 		 ARCH_KMALLOC_MINALIGN, SLAB_PANIC, NULL, NULL);
+
 	slab_state = PARTIAL;
 
 	/* Power of two sized caches */
@@ -1418,7 +1437,7 @@ void __init kmem_cache_init(void)
 		if (i != ACTIVE_SLAB_NR)
 			kmem_cache_open(
 				&kmalloc_caches[i - KMALLOC_SHIFT_LOW],
-				bootname, 1 << i,
+				"kmalloc", 1 << i,
 		 		ARCH_KMALLOC_MINALIGN, SLAB_PANIC, NULL, NULL);
 
 #ifdef KMALLOC_EXTRA
@@ -1435,9 +1454,14 @@ void __init kmem_cache_init(void)
 	slab_state = UP;
 
 	/* We can provide the correct kmalloc names now that the caches are up */
-	for (i = 0; i < KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW; i++)
-		kmalloc_caches[i].name = kasprintf(GFP_KERNEL, "kmalloc-%d",
+	for (i = 0; i <= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW; i++) {
+		char *name = kasprintf(GFP_KERNEL, "kmalloc-%d",
 			kmalloc_caches[i].size);
+
+		BUG_ON(!name);
+		kmalloc_caches[i].name = name;
+	};
+
 	printk(KERN_INFO "Kmalloc cache initialized: Caches=%d"
 		" Min_order=%d.\n",
 		KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + KMALLOC_EXTRAS,