Index: linux-2.6.17-rc3/mm/mprotect.c =================================================================== --- linux-2.6.17-rc3.orig/mm/mprotect.c 2006-04-30 21:09:45.025662626 -0700 +++ linux-2.6.17-rc3/mm/mprotect.c 2006-04-30 21:14:51.497846120 -0700 @@ -45,6 +45,7 @@ ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot); set_pte_at(mm, addr, pte, ptent); lazy_mmu_prot_update(ptent); +#ifdef CONFIG_MIGRATION } else if (!pte_file(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); @@ -57,6 +58,7 @@ set_pte_at(mm, addr, pte, swp_entry_to_pte(entry)); } +#endif } } while (pte++, addr += PAGE_SIZE, addr != end); Index: linux-2.6.17-rc3/mm/rmap.c =================================================================== --- linux-2.6.17-rc3.orig/mm/rmap.c 2006-04-30 21:09:47.506953885 -0700 +++ linux-2.6.17-rc3/mm/rmap.c 2006-04-30 21:13:01.372847279 -0700 @@ -596,6 +596,7 @@ spin_unlock(&mmlist_lock); } dec_mm_counter(mm, anon_rss); +#ifdef CONFIG_MIGRATION } else { /* * Store the pfn of the page in a special migration @@ -604,17 +605,21 @@ */ BUG_ON(!migration); entry = make_migration_entry(page, pte_write(pteval)); +#endif } set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); BUG_ON(pte_file(*pte)); - } else if (!migration) - dec_mm_counter(mm, file_rss); - else { + } else +#ifdef CONFIG_MIGRATION + if (migration) { /* Establish migration entry for a file page */ swp_entry_t entry; entry = make_migration_entry(page, pte_write(pteval)); set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); - } + } else +#endif + dec_mm_counter(mm, file_rss); + page_remove_rmap(page); page_cache_release(page); Index: linux-2.6.17-rc3/include/linux/swap.h =================================================================== --- linux-2.6.17-rc3.orig/include/linux/swap.h 2006-04-30 21:09:45.018827113 -0700 +++ linux-2.6.17-rc3/include/linux/swap.h 2006-04-30 21:19:54.977051834 -0700 @@ -32,7 +32,7 @@ #ifndef CONFIG_MIGRATION #define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT) #else -/* Use last entry for page migration swap entries */ +/* Use last two entries for page migration swap entries */ #define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2) #define SWP_MIGRATION_READ MAX_SWAPFILES #define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1) Index: linux-2.6.17-rc3/mm/migrate.c =================================================================== --- linux-2.6.17-rc3.orig/mm/migrate.c 2006-04-30 21:09:47.505977383 -0700 +++ linux-2.6.17-rc3/mm/migrate.c 2006-04-30 22:19:18.651956027 -0700 @@ -28,7 +28,7 @@ #include "internal.h" /* The maximum number of pages to take off the LRU for migration */ -#define MIGRATE_CHUNK_SIZE 256 +#define MIGRATE_CHUNK_SIZE 1024 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) @@ -121,8 +121,7 @@ } /* - * Restore a potential migration pte to a working pte entry for - * anonymous pages. + * Restore a potential migration pte to a working pte entry */ static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr, struct page *old, struct page *new) @@ -181,9 +180,8 @@ } /* - * Note that remove_file_migration_ptes will only work on regular mappings - * specialized other mappings will simply be unmapped and do not use - * migration entries. + * Note that remove_file_migration_ptes will only work on regular mappings, + * Nonlinear mappings do not use migration entries. */ static void remove_file_migration_ptes(struct page *old, struct page *new) { @@ -269,9 +267,6 @@ page = migration_entry_to_page(entry); - /* Pages with migration entries are always locked */ - BUG_ON(!PageLocked(page)); - get_page(page); pte_unmap_unlock(ptep, ptl); wait_on_page_locked(page); @@ -282,7 +277,7 @@ } /* - * Remove or replace the page in the mapping. + * Replace the page in the mapping. * * The number of remaining references must be: * 1 for anonymous pages without a mapping @@ -466,19 +461,20 @@ } EXPORT_SYMBOL(buffer_migrate_page); +/* + * Default handling if a filesystem does not provide a migration function. + */ static int fallback_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page) { - /* - * Default handling if a filesystem does not provide - * a migration function. We can only migrate clean - * pages so try to write out any dirty pages first. - */ if (PageDirty(page)) { /* - * Remove the migration entries because pageout() may - * unlock which may result in migration entries pointing - * to unlocked pages. + * A dirty page may imply that the underlying filesystem has + * the page on some queue. So the page must be clean for + * migration. Writeout may mean we loose the lock and the + * page state is no longer what we checked for earlier. + * At this point we know that the migration attempt cannot + * be successful. */ remove_migration_ptes(page, page); @@ -490,7 +486,7 @@ } /* - * Buffers are managed in a filesystem specific way. + * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ if (page_has_buffers(page) && @@ -501,11 +497,101 @@ } /* + * Move a page to a page from the free list in "to". + * The page is locked and all ptes have been successfully removed. + * + * The new page will have replaced the old page if this function + * is successful. + */ +static int move_page(struct page *page, struct list_head *to) +{ + struct page *newpage = lru_to_page(to); + struct address_space *mapping; + int rc; + + newpage = lru_to_page(to); + lock_page(newpage); + + /* Prepare mapping for the new page.*/ + newpage->index = page->index; + newpage->mapping = page->mapping; + + mapping = page_mapping(page); + if (!mapping) + rc = migrate_page(mapping, newpage, page); + + else if (mapping->a_ops->migratepage) + /* + * Most pages have a mapping and most filesystems + * should provide a migration function. Anonymous + * pages are part of swap space which also has its + * own migration function. This is the most common + * path for page migration. + */ + rc = mapping->a_ops->migratepage(mapping, + newpage, page); + else + rc = fallback_migrate_page(mapping, newpage, page); + + if (!rc) + remove_migration_ptes(page, newpage); + else + newpage->mapping; + + unlock_page(newpage); + + if (!rc) + move_to_lru(newpage); + return rc; +} + +/* + * Obtain the lock on page, remove all ptes and migrate the page + * + * The function attempts to obtain the lock if the page has not + * already been locked. + */ +static int unmap_and_move(struct page *page, struct list_head *to, int force) +{ + int rc; + + if (!TestSetPageLocked(page)) { + if (!force) + return -EAGAIN; + lock_page(page); + } + + if (PageWriteback(page)) { + if (!force) + return -EAGAIN; + wait_on_page_writeback(page); + } + + /* + * Establish migration ptes or remove ptes + */ + if (try_to_unmap(page, 1) != SWAP_FAIL) { + if (!page_mapped(page)) + rc = move_page(page, to); + else + rc = -EAGAIN; + } else + /* A vma has VM_LOCKED set -> permanent failure */ + rc = -EPERM; + + if (rc) + remove_migration_ptes(page, page); + + unlock_page(page); + return rc; +} + +/* * migrate_pages * * Two lists are passed to this function. The first list * contains the pages isolated from the LRU to be migrated. - * The second list contains new pages that the pages isolated + * The second list contains new pages that the isolated pages * can be moved to. * * The function returns after 10 attempts or if no pages @@ -532,97 +618,33 @@ retry = 0; list_for_each_entry_safe(page, page2, from, lru) { - struct page *newpage = NULL; - struct address_space *mapping; + + if (list_empty(to)) + break; cond_resched(); - rc = 0; if (page_count(page) == 1) /* page was freed from under us. So we are done. */ - goto next; - - if (to && list_empty(to)) - break; - - /* - * Skip locked pages during the first two passes to give the - * functions holding the lock time to release the page. Later we - * use lock_page() to have a higher chance of acquiring the - * lock. - */ - rc = -EAGAIN; - if (pass > 2) - lock_page(page); - else - if (TestSetPageLocked(page)) - goto next; - - /* - * Only wait on writeback if we have already done a pass where - * we we may have triggered writeouts for lots of pages. - */ - if (pass > 0) - wait_on_page_writeback(page); - else - if (PageWriteback(page)) - goto unlock_page; - - /* - * Establish migration ptes or remove ptes - */ - rc = -EPERM; - if (try_to_unmap(page, 1) == SWAP_FAIL) - /* A vma has VM_LOCKED set -> permanent failure */ - goto unlock_page; - - rc = -EAGAIN; - if (page_mapped(page)) - goto unlock_page; - - newpage = lru_to_page(to); - lock_page(newpage); - /* Prepare mapping for the new page.*/ - newpage->index = page->index; - newpage->mapping = page->mapping; - - /* - * Pages are properly locked and writeback is complete. - * Try to migrate the page. - */ - mapping = page_mapping(page); - if (!mapping) - rc = migrate_page(mapping, newpage, page); - - else if (mapping->a_ops->migratepage) + rc = 0; + else { /* - * Most pages have a mapping and most filesystems - * should provide a migration function. Anonymous - * pages are part of swap space which also has its - * own migration function. This is the most common - * path for page migration. + * Wait for the page to become unlocked if we have already done + * a couple of passes. */ - rc = mapping->a_ops->migratepage(mapping, - newpage, page); - else - rc = fallback_migrate_page(mapping, newpage, page); - - if (!rc) - remove_migration_ptes(page, newpage); - - unlock_page(newpage); - -unlock_page: - if (rc) - remove_migration_ptes(page, page); + if (pass > 2) + wait_on_page_locked(page); + /* + * Only wait on writeback if we have already done a pass where + * we we may have triggered writeouts for lots of pages. + */ + if (pass > 0) + wait_on_page_writeback(page); - unlock_page(page); + rc = unmap_and_move(page, to, pass > 5); + } -next: if (rc) { - if (newpage) - newpage->mapping = NULL; - if (rc == -EAGAIN) retry++; else { @@ -630,13 +652,8 @@ list_move(&page->lru, failed); nr_failed++; } - } else { - if (newpage) { - /* Successful migration. Return page to LRU */ - move_to_lru(newpage); - } + } else list_move(&page->lru, moved); - } } if (retry && pass++ < 10) goto redo;