Completely stop reclaim when all pages of a zone are unreclaimable. Currently we still keep on scanning small portions of memory after we have established that all pages of a zone are unreclaimable (because they are pinned in some fashion). We continue scanning so that we can determine that pages have become reclaimable again because they were unpinned. However, this scanning may cause lots of unecessary overhead. This is in particular a problem on NUMA platforms where the pinning of a large amount of memory may fully pin memory on a number of nodes. If we assume that pages are mainly unpinned when they are freed then we can tak NR_FILE_MAPPED as a counter that would have to change if pages are unpinned. If we store the number of mapped pages at the time of the determination that all pages are pinned then we can simply check against that counter and not scan as long as NR_FILE_MAPPED does not change. Signed-off-by: Christoph Lameter Index: linux-2.6.18-mm2/include/linux/mmzone.h =================================================================== --- linux-2.6.18-mm2.orig/include/linux/mmzone.h 2006-10-02 13:59:31.216251272 -0500 +++ linux-2.6.18-mm2/include/linux/mmzone.h 2006-10-02 14:30:37.580251356 -0500 @@ -220,6 +220,12 @@ struct zone { unsigned long pages_scanned; /* since last reclaim */ int all_unreclaimable; /* All pages pinned */ + /* + * The number of mapped pages in use when we determined that the + * zone is unreclaimable. + */ + unsigned long last_mapped; + /* The accumulated number of activities that may cause page aging, * that is, make some pages closer to the tail of inactive_list. */ Index: linux-2.6.18-mm2/mm/vmscan.c =================================================================== --- linux-2.6.18-mm2.orig/mm/vmscan.c 2006-10-02 13:59:31.226017741 -0500 +++ linux-2.6.18-mm2/mm/vmscan.c 2006-10-02 14:35:48.052272373 -0500 @@ -975,8 +975,10 @@ static unsigned long shrink_zones(int pr if (zone->prev_priority > priority) zone->prev_priority = priority; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) - continue; /* Let kswapd poll it */ + if (zone->all_unreclaimable && (priority != DEF_PRIORITY + || zone->last_mapped <= + zone_page_state(zone, NR_FILE_MAPPED))) + continue; sc->all_unreclaimable = 0; @@ -1144,7 +1146,9 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && (priority != DEF_PRIORITY + || zone->last_mapped <= + zone_page_state(zone, NR_FILE_MAPPED))) continue; if (!zone_watermark_ok(zone, order, zone->pages_high, @@ -1177,7 +1181,9 @@ scan: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && (priority != DEF_PRIORITY + || zone->last_mapped <= + zone_page_state(zone, NR_FILE_MAPPED))) continue; if (!zone_watermark_ok(zone, order, zone->pages_high, @@ -1196,8 +1202,11 @@ scan: if (zone->all_unreclaimable) continue; if (nr_slab == 0 && zone->pages_scanned >= - (zone->nr_active + zone->nr_inactive) * 6) + (zone->nr_active + zone->nr_inactive) * 6) { zone->all_unreclaimable = 1; + zone->last_mapped = zone_page_state(zone, + NR_FILE_MAPPED); + } /* * If we've done a decent amount of scanning and * the reclaim ratio is low, start doing writepage @@ -1349,8 +1358,10 @@ static unsigned long shrink_all_zones(un if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && prio != DEF_PRIORITY) - continue; + if (zone->all_unreclaimable && (prio != DEF_PRIORITY + || zone->last_mapped <= + zone_page_state(zone, NR_FILE_MAPPED))) + continue; /* For pass = 0 we don't shrink the active list */ if (pass > 0) {