--- mm/vmscan.c | 193 +++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 139 insertions(+), 54 deletions(-) Index: linux-2.6/mm/vmscan.c =================================================================== --- linux-2.6.orig/mm/vmscan.c 2007-08-19 18:42:18.000000000 -0700 +++ linux-2.6/mm/vmscan.c 2007-08-19 18:43:47.000000000 -0700 @@ -423,7 +423,7 @@ cannot_free: * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, - struct scan_control *sc) + struct scan_control *sc, struct list_head *laundry) { LIST_HEAD(ret_pages); struct pagevec freed_pvec; @@ -504,27 +504,10 @@ static unsigned long shrink_page_list(st if (!sc->may_writepage) goto keep_locked; - /* Page is dirty, try to write it out here */ - switch(pageout(page, mapping)) { - case PAGE_KEEP: - goto keep_locked; - case PAGE_ACTIVATE: - goto activate_locked; - case PAGE_SUCCESS: - if (PageWriteback(page) || PageDirty(page)) - goto keep; - /* - * A synchronous write - probably a ramdisk. Go - * ahead and try to reclaim the page. - */ - if (TestSetPageLocked(page)) - goto keep; - if (PageDirty(page) || PageWriteback(page)) - goto keep_locked; - mapping = page_mapping(page); - case PAGE_CLEAN: - ; /* try to free the page below */ - } + /* Will be handled later */ + list_move(&page->lru, laundry); + unlock_page(page); + continue; } /* @@ -581,6 +564,116 @@ keep: return nr_reclaimed; } +/* + * Put back any unfreeable pages. + */ +void release_lru_pages(struct zone *zone, struct pagevec *pvec, + struct list_head *page_list) +{ + struct page *page; + + while (!list_empty(page_list)) { + page = lru_to_page(page_list); + VM_BUG_ON(PageLRU(page)); + SetPageLRU(page); + list_del(&page->lru); + if (PageActive(page)) + add_page_to_active_list(zone, page); + else + add_page_to_inactive_list(zone, page); + if (!pagevec_add(pvec, page)) { + spin_unlock_irq(&zone->lru_lock); + __pagevec_release(pvec); + spin_lock_irq(&zone->lru_lock); + } + } +} + +unsigned long do_laundry(struct zone *zone, struct list_head *page_list) +{ + LIST_HEAD(ret_pages); + struct pagevec freed_pvec; + int pgactivate = 0; + unsigned long nr_reclaimed = 0; + + cond_resched(); + + pagevec_init(&freed_pvec, 1); + while (!list_empty(page_list)) { + struct address_space *mapping; + struct page *page; + + cond_resched(); + + page = lru_to_page(page_list); + list_del(&page->lru); + + if (TestSetPageLocked(page) || page_mapped(page) || + !PageDirty(page)) + goto keep; + + mapping = page_mapping(page); + + /* Page is dirty, try to write it out here */ + switch(pageout(page, mapping)) { + case PAGE_KEEP: + goto keep_locked; + case PAGE_ACTIVATE: + goto activate_locked; + case PAGE_SUCCESS: + if (PageWriteback(page) || PageDirty(page)) + goto keep; + /* + * A synchronous write - probably a ramdisk. Go + * ahead and try to reclaim the page. + */ + if (TestSetPageLocked(page)) + goto keep; + if (PageDirty(page) || PageWriteback(page)) + goto keep_locked; + mapping = page_mapping(page); + case PAGE_CLEAN: + ; /* try to free the page below */ + } + + if (PagePrivate(page)) { + if (!try_to_release_page(page, GFP_KERNEL)) + goto activate_locked; + if (!mapping && page_count(page) == 1) + goto free_it; + } + + if (!mapping || !remove_mapping(mapping, page)) + goto keep_locked; + +free_it: + unlock_page(page); + nr_reclaimed++; + if (!pagevec_add(&freed_pvec, page)) + __pagevec_release_nonlru(&freed_pvec); + continue; + +activate_locked: + SetPageActive(page); + pgactivate++; +keep_locked: + unlock_page(page); +keep: + list_add(&page->lru, &ret_pages); + VM_BUG_ON(PageLRU(page)); + } + if (pagevec_count(&freed_pvec)) + __pagevec_release_nonlru(&freed_pvec); + count_vm_events(PGACTIVATE, pgactivate); + pagevec_init(&freed_pvec, 1); + spin_lock_irq(&zone->lru_lock); + release_lru_pages(zone, &freed_pvec, &ret_pages); + if (pagevec_count(&freed_pvec)) + __pagevec_release_nonlru(&freed_pvec); + spin_unlock_irq(&zone->lru_lock); + return nr_reclaimed; +} + /* LRU Isolation modes. */ #define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */ #define ISOLATE_ACTIVE 1 /* Isolate active pages. */ @@ -753,7 +846,7 @@ static unsigned long clear_active_flags( * of reclaimed pages */ static unsigned long shrink_inactive_list(unsigned long max_scan, - struct zone *zone, struct scan_control *sc) + struct zone *zone, struct scan_control *sc, struct list_head *laundry) { LIST_HEAD(page_list); struct pagevec pvec; @@ -765,7 +858,6 @@ static unsigned long shrink_inactive_lis lru_add_drain(); spin_lock_irq(&zone->lru_lock); do { - struct page *page; unsigned long nr_taken; unsigned long nr_scan; unsigned long nr_freed; @@ -785,7 +877,7 @@ static unsigned long shrink_inactive_lis spin_unlock_irq(&zone->lru_lock); nr_scanned += nr_scan; - nr_freed = shrink_page_list(&page_list, sc); + nr_freed = shrink_page_list(&page_list, sc, laundry); nr_reclaimed += nr_freed; local_irq_disable(); if (current_is_kswapd()) { @@ -798,27 +890,9 @@ static unsigned long shrink_inactive_lis if (nr_taken == 0) goto done; - spin_lock(&zone->lru_lock); - /* - * Put back any unfreeable pages. - */ - while (!list_empty(&page_list)) { - page = lru_to_page(&page_list); - VM_BUG_ON(PageLRU(page)); - SetPageLRU(page); - list_del(&page->lru); - if (PageActive(page)) - add_page_to_active_list(zone, page); - else - add_page_to_inactive_list(zone, page); - if (!pagevec_add(&pvec, page)) { - spin_unlock_irq(&zone->lru_lock); - __pagevec_release(&pvec); - spin_lock_irq(&zone->lru_lock); - } - } + spin_lock_irq(&zone->lru_lock); + release_lru_pages(zone, &pvec, &page_list); } while (nr_scanned < max_scan); - spin_unlock(&zone->lru_lock); done: local_irq_enable(); pagevec_release(&pvec); @@ -1007,7 +1081,7 @@ force_reclaim_mapped: * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ static unsigned long shrink_zone(int priority, struct zone *zone, - struct scan_control *sc) + struct scan_control *sc, struct list_head *laundry) { unsigned long nr_active; unsigned long nr_inactive; @@ -1049,12 +1123,10 @@ static unsigned long shrink_zone(int pri (unsigned long)sc->swap_cluster_max); nr_inactive -= nr_to_scan; nr_reclaimed += shrink_inactive_list(nr_to_scan, zone, - sc); + sc, laundry); } } - throttle_vm_writeout(sc->gfp_mask); - atomic_dec(&zone->reclaim_in_progress); return nr_reclaimed; } @@ -1098,7 +1170,13 @@ static unsigned long shrink_zones(int pr sc->all_unreclaimable = 0; - nr_reclaimed += shrink_zone(priority, zone, sc); + nr_reclaimed += shrink_zone(priority, zone, sc, NULL); + + /* + * Not sure if we need this but is certainly good to slow + * things down here. + */ + throttle_vm_writeout(GFP_KERNEL); } return nr_reclaimed; } @@ -1305,6 +1383,7 @@ loop_again: for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; int nr_slab; + LIST_HEAD(laundry); if (!populated_zone(zone)) continue; @@ -1318,12 +1397,18 @@ loop_again: temp_priority[i] = priority; sc.nr_scanned = 0; note_zone_scanning_priority(zone, priority); - nr_reclaimed += shrink_zone(priority, zone, &sc); + nr_reclaimed += shrink_zone(priority, zone, &sc, &laundry); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages); nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; + if (!list_empty(&laundry)) { + current->flags &= ~PF_MEMALLOC; + nr_reclaimed += do_laundry(zone, &laundry); + current->flags |= PF_MEMALLOC; + throttle_vm_writeout(GFP_KERNEL); + } if (zone->all_unreclaimable) continue; if (nr_slab == 0 && zone->pages_scanned >= @@ -1420,7 +1505,7 @@ static int kswapd(void *p) * us from recursively trying to free more memory as we're * trying to free the first piece of memory in the first place). */ - tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + tsk->flags |= PF_SWAPWRITE | PF_KSWAPD; set_freezable(); order = 0; @@ -1516,7 +1601,7 @@ static unsigned long shrink_all_zones(un zone->nr_scan_inactive = 0; nr_to_scan = min(nr_pages, zone_page_state(zone, NR_INACTIVE)); - ret += shrink_inactive_list(nr_to_scan, zone, sc); + ret += shrink_inactive_list(nr_to_scan, zone, sc, NULL); if (ret >= nr_pages) return ret; } @@ -1757,7 +1842,7 @@ static int __zone_reclaim(struct zone *z priority = ZONE_RECLAIM_PRIORITY; do { note_zone_scanning_priority(zone, priority); - nr_reclaimed += shrink_zone(priority, zone, &sc); + nr_reclaimed += shrink_zone(priority, zone, &sc, NULL); priority--; } while (priority >= 0 && nr_reclaimed < nr_pages); }