Enhance VM statistics by marking pages by type The current statistics for slab, pagecache and pagetable use are kept in a per cpu array in order to avoid having to use locks when updating these counters. However, this means that the counters only have meaning when added up for all processors. The counter for slab allocations does just mean that this processor has performed some slab allocations. The counter cannot be used to see how many slab pages were allocated on a specific node or in a zone. The same is true for the page cache and pagetables. The following patch moves the keeping of the statistics into the page allocator. Pages are requested by specifying a type (slab, pagetable, pagecache) and the page allocator will count the pages during allocation and freeing of pages. That is possible since the page allocator already needs to take the zone lock in order to access the free lists. We can now have accurate statistics per zone for slab cache, page table and page cache use since the counters are now protected by a lock. A nice side effect is that code sizes shrinks since we do not need these calls to inc_page_state() anymore. Only i386, x86_64 and ia64 are supported right now. The other arches would need to add __GFP_PAGETABLE to page table allocations in order to get the correct statistics. Signed-off-by: Christoph Lameter Index: linux-2.6.15-rc3/include/linux/pagemap.h =================================================================== --- linux-2.6.15-rc3.orig/include/linux/pagemap.h 2005-11-28 19:51:27.000000000 -0800 +++ linux-2.6.15-rc3/include/linux/pagemap.h 2005-11-30 14:05:01.000000000 -0800 @@ -53,12 +53,12 @@ void release_pages(struct page **pages, static inline struct page *page_cache_alloc(struct address_space *x) { - return alloc_pages(mapping_gfp_mask(x), 0); + return alloc_pages(mapping_gfp_mask(x) | __GFP_PAGECACHE, 0); } static inline struct page *page_cache_alloc_cold(struct address_space *x) { - return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0); + return alloc_pages(mapping_gfp_mask(x)| __GFP_PAGECACHE | __GFP_COLD, 0); } typedef int filler_t(void *, struct page *); Index: linux-2.6.15-rc3/mm/slab.c =================================================================== --- linux-2.6.15-rc3.orig/mm/slab.c 2005-11-30 13:20:29.000000000 -0800 +++ linux-2.6.15-rc3/mm/slab.c 2005-11-30 14:05:01.000000000 -0800 @@ -1221,7 +1221,7 @@ static void *kmem_getpages(kmem_cache_t int i; flags |= cachep->gfpflags; - page = alloc_pages_node(nodeid, flags, cachep->gfporder); + page = alloc_pages_node(nodeid, flags | __GFP_SLAB, cachep->gfporder); if (!page) return NULL; addr = page_address(page); Index: linux-2.6.15-rc3/include/asm-x86_64/pgalloc.h =================================================================== --- linux-2.6.15-rc3.orig/include/asm-x86_64/pgalloc.h 2005-11-28 19:51:27.000000000 -0800 +++ linux-2.6.15-rc3/include/asm-x86_64/pgalloc.h 2005-11-30 14:05:01.000000000 -0800 @@ -20,7 +20,7 @@ static inline void pmd_populate(struct m static inline pmd_t *get_pmd(void) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL); + return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_PAGETABLE); } static inline void pmd_free(pmd_t *pmd) @@ -31,12 +31,12 @@ static inline void pmd_free(pmd_t *pmd) static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT | __GFP_PAGETABLE); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT | __GFP_PAGETABLE); } static inline void pud_free (pud_t *pud) @@ -48,7 +48,7 @@ static inline void pud_free (pud_t *pud) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { unsigned boundary; - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT | __GFP_PAGETABLE); if (!pgd) return NULL; /* @@ -72,12 +72,12 @@ static inline void pgd_free(pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT | __GFP_PAGETABLE); } static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT | __GFP_PAGETABLE); if (!p) return NULL; return virt_to_page(p); Index: linux-2.6.15-rc3/include/asm-ia64/pgalloc.h =================================================================== --- linux-2.6.15-rc3.orig/include/asm-ia64/pgalloc.h 2005-11-28 19:51:27.000000000 -0800 +++ linux-2.6.15-rc3/include/asm-ia64/pgalloc.h 2005-11-30 14:05:01.000000000 -0800 @@ -52,7 +52,7 @@ static inline void *pgtable_quicklist_al preempt_enable(); } else { preempt_enable(); - ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_PAGETABLE); } return ret; Index: linux-2.6.15-rc3/arch/i386/mm/pgtable.c =================================================================== --- linux-2.6.15-rc3.orig/arch/i386/mm/pgtable.c 2005-11-30 14:04:43.000000000 -0800 +++ linux-2.6.15-rc3/arch/i386/mm/pgtable.c 2005-11-30 14:05:01.000000000 -0800 @@ -164,9 +164,9 @@ struct page *pte_alloc_one(struct mm_str struct page *pte; #ifdef CONFIG_HIGHPTE - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO|__GFP_PAGETABLE, 0); #else - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_PAGETABLE, 0); #endif return pte; }