Do not invoke OOM killer for constrained allocations Add some code to detect policies that do not include all nodes in the system. For those policies invoke the page allocator with the newly added __GFP_NO_OOM_KILLER flags. Allocations will then not invoke the OOM killer (finding no page just means that the system has no available page for that particular process) but terminate the application instead. This may interfere with the cpusets mechanism for killing processes through the OOM killer. This could be fixed by setting __GFP_NO_OOM_KILLER if the __alloc_pages detects that the active cpuset is constraining allocations. Signed-off-by: Christoph Lameter Index: linux-2.6.16-rc2/mm/page_alloc.c =================================================================== --- linux-2.6.16-rc2.orig/mm/page_alloc.c 2006-02-02 22:03:08.000000000 -0800 +++ linux-2.6.16-rc2/mm/page_alloc.c 2006-02-06 17:07:41.000000000 -0800 @@ -1011,6 +1011,13 @@ rebalance: if (page) goto got_pg; + if (gfp_mask & __GFP_NO_OOM_KILLER) + /* + * Process uses constrained allocations. + * Terminate the application instead. + */ + return NULL; + out_of_memory(gfp_mask, order); goto restart; } Index: linux-2.6.16-rc2/mm/mempolicy.c =================================================================== --- linux-2.6.16-rc2.orig/mm/mempolicy.c 2006-02-02 22:03:08.000000000 -0800 +++ linux-2.6.16-rc2/mm/mempolicy.c 2006-02-06 17:07:41.000000000 -0800 @@ -161,6 +161,11 @@ static struct mempolicy *mpol_new(int mo if (!policy) return ERR_PTR(-ENOMEM); atomic_set(&policy->refcnt, 1); + + policy->gfp_flags = 0; + if (!nodes_equal(*nodes, node_online_map)) + policy->gfp_flags |= __GFP_NO_OOM_KILLER; + switch (mode) { case MPOL_INTERLEAVE: policy->v.nodes = *nodes; @@ -1219,6 +1224,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area cpuset_update_task_memory_state(); + gfp |= pol->gfp_flags; if (unlikely(pol->policy == MPOL_INTERLEAVE)) { unsigned nid; @@ -1255,6 +1261,7 @@ struct page *alloc_pages_current(gfp_t g cpuset_update_task_memory_state(); if (!pol || in_interrupt()) pol = &default_policy; + gfp |= pol->gfp_flags; if (pol->policy == MPOL_INTERLEAVE) return alloc_page_interleave(gfp, order, interleave_nodes(pol)); return __alloc_pages(gfp, order, zonelist_policy(gfp, pol)); @@ -1590,6 +1597,9 @@ void mpol_rebind_policy(struct mempolicy if (nodes_equal(*mpolmask, *newmask)) return; + if (!nodes_equal(*newmask, node_online_map)) + pol->gfp_flags |= __GFP_NO_OOM_KILLER; + switch (pol->policy) { case MPOL_DEFAULT: break; Index: linux-2.6.16-rc2/include/linux/mempolicy.h =================================================================== --- linux-2.6.16-rc2.orig/include/linux/mempolicy.h 2006-02-02 22:03:08.000000000 -0800 +++ linux-2.6.16-rc2/include/linux/mempolicy.h 2006-02-06 17:07:41.000000000 -0800 @@ -62,6 +62,7 @@ struct vm_area_struct; struct mempolicy { atomic_t refcnt; short policy; /* See MPOL_* above */ + gfp_t gfp_flags; /* flags ORed into gfp_flags for each allocation */ union { struct zonelist *zonelist; /* bind */ short preferred_node; /* preferred */ Index: linux-2.6.16-rc2/include/linux/gfp.h =================================================================== --- linux-2.6.16-rc2.orig/include/linux/gfp.h 2006-02-02 22:03:08.000000000 -0800 +++ linux-2.6.16-rc2/include/linux/gfp.h 2006-02-06 17:07:41.000000000 -0800 @@ -47,6 +47,7 @@ struct vm_area_struct; #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ +#define __GFP_NO_OOM_KILLER ((__force gfp_t)0x40000u) /* Terminate process do not call OOM killer */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))