Index: linux-2.6.15-rc2/mm/swap_state.c =================================================================== --- linux-2.6.15-rc2.orig/mm/swap_state.c 2005-11-20 03:25:03.000000000 +0000 +++ linux-2.6.15-rc2/mm/swap_state.c 2005-11-22 21:03:51.000000000 +0000 @@ -84,7 +84,7 @@ static int __add_to_swap_cache(struct pa SetPageSwapCache(page); set_page_private(page, entry.val); total_swapcache_pages++; - pagecache_acct(1); + pagecache_acct(1, page); } write_unlock_irq(&swapper_space.tree_lock); radix_tree_preload_end(); @@ -129,7 +129,7 @@ void __delete_from_swap_cache(struct pag set_page_private(page, 0); ClearPageSwapCache(page); total_swapcache_pages--; - pagecache_acct(-1); + pagecache_acct(-1, page); INC_CACHE_INFO(del_total); } Index: linux-2.6.15-rc2/include/linux/pagemap.h =================================================================== --- linux-2.6.15-rc2.orig/include/linux/pagemap.h 2005-11-20 03:25:03.000000000 +0000 +++ linux-2.6.15-rc2/include/linux/pagemap.h 2005-11-22 21:03:51.000000000 +0000 @@ -99,49 +99,12 @@ int add_to_page_cache_lru(struct page *p extern void remove_from_page_cache(struct page *page); extern void __remove_from_page_cache(struct page *page); -extern atomic_t nr_pagecache; - -#ifdef CONFIG_SMP - -#define PAGECACHE_ACCT_THRESHOLD max(16, NR_CPUS * 2) -DECLARE_PER_CPU(long, nr_pagecache_local); - /* - * pagecache_acct implements approximate accounting for pagecache. - * vm_enough_memory() do not need high accuracy. Writers will keep - * an offset in their per-cpu arena and will spill that into the - * global count whenever the absolute value of the local count - * exceeds the counter's threshold. - * - * MUST be protected from preemption. - * current protection is mapping->page_lock. + * pagecache_acct implements accounting for pagecache. */ -static inline void pagecache_acct(int count) -{ - long *local; - - local = &__get_cpu_var(nr_pagecache_local); - *local += count; - if (*local > PAGECACHE_ACCT_THRESHOLD || *local < -PAGECACHE_ACCT_THRESHOLD) { - atomic_add(*local, &nr_pagecache); - *local = 0; - } -} - -#else - -static inline void pagecache_acct(int count) +static inline void pagecache_acct(int count, struct page *page) { - atomic_add(count, &nr_pagecache); -} -#endif - -static inline unsigned long get_page_cache_size(void) -{ - int ret = atomic_read(&nr_pagecache); - if (unlikely(ret < 0)) - ret = 0; - return ret; + atomic_add(count, &page_zone(page)->nr_pagecache); } /* Index: linux-2.6.15-rc2/mm/filemap.c =================================================================== --- linux-2.6.15-rc2.orig/mm/filemap.c 2005-11-20 03:25:03.000000000 +0000 +++ linux-2.6.15-rc2/mm/filemap.c 2005-11-22 21:03:51.000000000 +0000 @@ -115,7 +115,7 @@ void __remove_from_page_cache(struct pag radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; mapping->nrpages--; - pagecache_acct(-1); + pagecache_acct(-1, page); } void remove_from_page_cache(struct page *page) @@ -390,7 +390,7 @@ int add_to_page_cache(struct page *page, page->mapping = mapping; page->index = offset; mapping->nrpages++; - pagecache_acct(1); + pagecache_acct(1, page); } write_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); Index: linux-2.6.15-rc2/include/linux/mmzone.h =================================================================== --- linux-2.6.15-rc2.orig/include/linux/mmzone.h 2005-11-20 03:25:03.000000000 +0000 +++ linux-2.6.15-rc2/include/linux/mmzone.h 2005-11-22 21:05:50.000000000 +0000 @@ -160,6 +160,9 @@ struct zone { unsigned long pages_scanned; /* since last reclaim */ int all_unreclaimable; /* All pages pinned */ + /* Count pages in the page cache */ + atomic_t nr_pagecache; + /* * Does the allocator try to reclaim pages from the zone as soon * as it fails a watermark_ok() in __alloc_pages? @@ -325,9 +328,10 @@ typedef struct pglist_data { extern struct pglist_data *pgdat_list; void __get_zone_counts(unsigned long *active, unsigned long *inactive, - unsigned long *free, struct pglist_data *pgdat); + unsigned long *free, unsigned long *pagecache, + struct pglist_data *pgdat); void get_zone_counts(unsigned long *active, unsigned long *inactive, - unsigned long *free); + unsigned long *free, unsigned long *pagecache); void build_all_zonelists(void); void wakeup_kswapd(struct zone *zone, int order); int zone_watermark_ok(struct zone *z, int order, unsigned long mark, Index: linux-2.6.15-rc2/mm/page_alloc.c =================================================================== --- linux-2.6.15-rc2.orig/mm/page_alloc.c 2005-11-20 03:25:03.000000000 +0000 +++ linux-2.6.15-rc2/mm/page_alloc.c 2005-11-22 21:03:51.000000000 +0000 @@ -1225,7 +1225,8 @@ void __mod_page_state(unsigned long offs EXPORT_SYMBOL(__mod_page_state); void __get_zone_counts(unsigned long *active, unsigned long *inactive, - unsigned long *free, struct pglist_data *pgdat) + unsigned long *free, unsigned long *pagecache, + struct pglist_data *pgdat) { struct zone *zones = pgdat->node_zones; int i; @@ -1237,23 +1238,27 @@ void __get_zone_counts(unsigned long *ac *active += zones[i].nr_active; *inactive += zones[i].nr_inactive; *free += zones[i].free_pages; + *pagecache += atomic_read(&zones[i].nr_pagecache); } } void get_zone_counts(unsigned long *active, - unsigned long *inactive, unsigned long *free) + unsigned long *inactive, unsigned long *free, + unsigned long *pagecache) { struct pglist_data *pgdat; *active = 0; *inactive = 0; *free = 0; + *pagecache = 0; for_each_pgdat(pgdat) { - unsigned long l, m, n; - __get_zone_counts(&l, &m, &n, pgdat); + unsigned long l, m, n, p; + __get_zone_counts(&l, &m, &n, &p, pgdat); *active += l; *inactive += m; *free += n; + *pagecache += p; } } @@ -1302,6 +1307,7 @@ void show_free_areas(void) unsigned long active; unsigned long inactive; unsigned long free; + unsigned long nr_pagecache; struct zone *zone; for_each_zone(zone) { @@ -1331,20 +1337,21 @@ void show_free_areas(void) } get_page_state(&ps); - get_zone_counts(&active, &inactive, &free); + get_zone_counts(&active, &inactive, &free, &nr_pagecache); printk("Free pages: %11ukB (%ukB HighMem)\n", K(nr_free_pages()), K(nr_free_highpages())); printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " - "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", + "unstable:%lu free:%u pagecache:%lu slab:%lu mapped:%lu pagetables:%lu\n", active, inactive, ps.nr_dirty, ps.nr_writeback, ps.nr_unstable, nr_free_pages(), + nr_pagecache, ps.nr_slab, ps.nr_mapped, ps.nr_page_table_pages); Index: linux-2.6.15-rc2/fs/proc/proc_misc.c =================================================================== --- linux-2.6.15-rc2.orig/fs/proc/proc_misc.c 2005-11-20 03:25:03.000000000 +0000 +++ linux-2.6.15-rc2/fs/proc/proc_misc.c 2005-11-22 21:03:51.000000000 +0000 @@ -124,13 +124,14 @@ static int meminfo_read_proc(char *page, unsigned long inactive; unsigned long active; unsigned long free; + unsigned long pagecache; unsigned long committed; unsigned long allowed; struct vmalloc_info vmi; long cached; get_page_state(&ps); - get_zone_counts(&active, &inactive, &free); + get_zone_counts(&active, &inactive, &free, &pagecache); /* * display in kilobytes. @@ -142,7 +143,7 @@ static int meminfo_read_proc(char *page, allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; - cached = get_page_cache_size() - total_swapcache_pages - i.bufferram; + cached = pagecache - total_swapcache_pages - i.bufferram; if (cached < 0) cached = 0; Index: linux-2.6.15-rc2/mm/mmap.c =================================================================== --- linux-2.6.15-rc2.orig/mm/mmap.c 2005-11-20 03:25:03.000000000 +0000 +++ linux-2.6.15-rc2/mm/mmap.c 2005-11-22 21:03:51.000000000 +0000 @@ -93,9 +93,14 @@ int __vm_enough_memory(long pages, int c return 0; if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { - unsigned long n; + unsigned long n, pfree, pagecache; - free = get_page_cache_size(); + get_zone_counts(&n, &n, &pfree, &pagecache); + + if (!cap_sys_admin) + pfree -= pfree / 32; + + free = pagecache + pfree; free += nr_swap_pages; /* @@ -115,17 +120,6 @@ int __vm_enough_memory(long pages, int c if (free > pages) return 0; - /* - * nr_free_pages() is very expensive on large systems, - * only call if we're about to fail. - */ - n = nr_free_pages(); - if (!cap_sys_admin) - n -= n / 32; - free += n; - - if (free > pages) - return 0; vm_unacct_memory(pages); return -ENOMEM; } Index: linux-2.6.15-rc2/drivers/base/node.c =================================================================== --- linux-2.6.15-rc2.orig/drivers/base/node.c 2005-11-20 03:25:03.000000000 +0000 +++ linux-2.6.15-rc2/drivers/base/node.c 2005-11-22 21:03:51.000000000 +0000 @@ -43,10 +43,11 @@ static ssize_t node_read_meminfo(struct unsigned long inactive; unsigned long active; unsigned long free; + unsigned long pagecache; si_meminfo_node(&i, nid); get_page_state_node(&ps, nid); - __get_zone_counts(&active, &inactive, &free, NODE_DATA(nid)); + __get_zone_counts(&active, &inactive, &free, &pagecache, NODE_DATA(nid)); /* Check for negative values in these approximate counters */ if ((long)ps.nr_dirty < 0) @@ -71,6 +72,7 @@ static ssize_t node_read_meminfo(struct "Node %d Dirty: %8lu kB\n" "Node %d Writeback: %8lu kB\n" "Node %d Mapped: %8lu kB\n" + "Node %d Pagecache: %8lu KB\n" "Node %d Slab: %8lu kB\n", nid, K(i.totalram), nid, K(i.freeram), @@ -83,6 +85,7 @@ static ssize_t node_read_meminfo(struct nid, K(i.freeram - i.freehigh), nid, K(ps.nr_dirty), nid, K(ps.nr_writeback), + nid, K(pagecache), nid, K(ps.nr_mapped), nid, K(ps.nr_slab)); n += hugetlb_report_node_meminfo(nid, buf + n); Index: linux-2.6.15-rc2/mm/readahead.c =================================================================== --- linux-2.6.15-rc2.orig/mm/readahead.c 2005-11-20 03:25:03.000000000 +0000 +++ linux-2.6.15-rc2/mm/readahead.c 2005-11-22 21:03:51.000000000 +0000 @@ -565,7 +565,8 @@ unsigned long max_sane_readahead(unsigne unsigned long active; unsigned long inactive; unsigned long free; + unsigned long pagecache; - __get_zone_counts(&active, &inactive, &free, NODE_DATA(numa_node_id())); + __get_zone_counts(&active, &inactive, &free, &pagecache, NODE_DATA(numa_node_id())); return min(nr, (inactive + free) / 2); }