Index: linux-2.6.16-rc3/arch/ia64/kernel/ivt.S =================================================================== --- linux-2.6.16-rc3.orig/arch/ia64/kernel/ivt.S 2006-02-12 16:27:25.000000000 -0800 +++ linux-2.6.16-rc3/arch/ia64/kernel/ivt.S 2006-02-17 18:16:00.000000000 -0800 @@ -12,6 +12,7 @@ * * 00/08/23 Asit Mallick TLB handling for SMP * 00/12/20 David Mosberger-Tang DTLB/ITLB handler now uses virtual PT. + * 06/02/20 Christoph Lameter Count dirty ptes. */ /* * This file defines the interruption vector table used by the CPU. @@ -535,7 +536,7 @@ ENTRY(dkey_miss) END(dkey_miss) .org ia64_ivt+0x2000 -///////////////////////////////////////////////////////////////////////////////////////// +//////////://///////////////////////////////////////////////////////////////////////////// // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) ENTRY(dirty_bit) DBG_FAULT(8) @@ -547,23 +548,31 @@ ENTRY(dirty_bit) * normally do this without additional TLB misses. In case the necessary virtual * page table TLB entry isn't present, we take a nested TLB miss hit where we look * up the physical address of the L3 PTE and then continue at label 1 below. + * + * We also count the number of pte dirties in the thread_info structure + * to help us decide when to throttle a process that is dirtying too much + * memory. */ mov r16=cr.ifa // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault + mov r28=ar.ccv // save ar.ccv + mov r19=IA64_KR(CURRENT) + mov r29=b0 // save b0 in case of nested fault ;; thash r17=r16 // compute virtual address of L3 PTE - mov r29=b0 // save b0 in case of nested fault mov r31=pr // save pr + adds r19=TI_DIRTY_PTE_COUNT+IA64_TASK_SIZE,r19 // address of nr dirty ptes #ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv ;; 1: ld8 r18=[r17] - ;; // avoid RAW on r18 + ld8 r20=[r19] // number of dirty ptes + flags + mov r24=PAGE_SHIFT<<2 + ;; mov ar.ccv=r18 // set compare value for cmpxchg + dep.z r20=1, TIF_PTE_DIRTY + 32, 1 // Set TIF_DIRTY bit in process flags or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits ;; cmpxchg8.acq r26=[r17],r25,ar.ccv - mov r24=PAGE_SHIFT<<2 ;; cmp.eq p6,p7=r26,r18 ;; @@ -576,22 +585,28 @@ ENTRY(dirty_bit) dv_serialize_data ld8 r18=[r17] // read PTE again +(p6) add r20=1,r20 // And count dirtied PTEs ;; + st8 [r19]=r20 // Save new number of dirty ptes */ cmp.eq p6,p7=r18,r25 // is it same as the newly installed + mov ar.ccv=r28 // restore ar.ccv ;; (p7) ptc.l r16,r24 - mov b0=r29 // restore b0 - mov ar.ccv=r28 #else ;; 1: ld8 r18=[r17] + ld8 r20=[r19] // Load number of dirty ptes ;; // avoid RAW on r18 + add r20=1,r20 // increment number of dirty ptes or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - mov b0=r29 // restore b0 ;; + dep.z r20=1, TIF_PTE_DIRTY + 32, 1 // Set TIF_DIRTY bit in process flags + ;; + st8 [r19]=r20 // Save new number of dirty ptes */ st8 [r17]=r18 // store back updated PTE itc.d r18 // install updated PTE #endif + mov b0=r29 // restore b0 mov pr=r31,-1 // restore pr rfi END(dirty_bit) Index: linux-2.6.16-rc3/arch/ia64/kernel/asm-offsets.c =================================================================== --- linux-2.6.16-rc3.orig/arch/ia64/kernel/asm-offsets.c 2006-02-12 16:27:25.000000000 -0800 +++ linux-2.6.16-rc3/arch/ia64/kernel/asm-offsets.c 2006-02-17 18:16:00.000000000 -0800 @@ -37,6 +37,7 @@ void foo(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_DIRTY_PTE_COUNT, offsetof(struct thread_info, dirty_pte_count)); BLANK(); Index: linux-2.6.16-rc3/include/asm-ia64/thread_info.h =================================================================== --- linux-2.6.16-rc3.orig/include/asm-ia64/thread_info.h 2006-02-12 16:27:25.000000000 -0800 +++ linux-2.6.16-rc3/include/asm-ia64/thread_info.h 2006-02-17 18:16:00.000000000 -0800 @@ -24,10 +24,11 @@ struct thread_info { struct task_struct *task; /* XXX not really needed, except for dup_task_struct() */ struct exec_domain *exec_domain;/* execution domain */ + __u32 dirty_pte_count; /* Number of outstanding dirty pages */ __u32 flags; /* thread_info flags (see TIF_*) */ + __s32 preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ __u32 cpu; /* current CPU */ mm_segment_t addr_limit; /* user-level address space limit */ - int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ struct restart_block restart_block; struct { int signo; @@ -48,6 +49,7 @@ struct thread_info { .cpu = 0, \ .addr_limit = KERNEL_DS, \ .preempt_count = 0, \ + .dirty_pte_count = 0, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ @@ -94,6 +96,7 @@ struct thread_info { #define TIF_MEMDIE 17 #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ #define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */ +#define TIF_PTE_DIRTY 20 /* At least one pte was dirtied */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) @@ -105,6 +108,7 @@ struct thread_info { #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) +#define _TIF_PTE_DIRTY (1 << TIF_PTE_DIRTY) /* "work to do on user-return" bits */ #define TIF_ALLWORK_MASK (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED) Index: linux-2.6.16-rc3/fs/proc/array.c =================================================================== --- linux-2.6.16-rc3.orig/fs/proc/array.c 2006-02-12 16:27:25.000000000 -0800 +++ linux-2.6.16-rc3/fs/proc/array.c 2006-02-17 18:16:00.000000000 -0800 @@ -171,14 +171,16 @@ static inline char * task_state(struct t "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n", + "Gid:\t%d\t%d\t%d\t%d\n" + "Dirty:\t%d Pages\n", get_task_state(p), (p->sleep_avg/1024)*100/(1020000000/1024), p->tgid, p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, pid_alive(p) && p->ptrace ? p->parent->pid : 0, p->uid, p->euid, p->suid, p->fsuid, - p->gid, p->egid, p->sgid, p->fsgid); + p->gid, p->egid, p->sgid, p->fsgid, + p->thread_info->dirty_pte_count); read_unlock(&tasklist_lock); task_lock(p); rcu_read_lock(); Index: linux-2.6.16-rc3/mm/memory.c =================================================================== --- linux-2.6.16-rc3.orig/mm/memory.c 2006-02-12 16:27:25.000000000 -0800 +++ linux-2.6.16-rc3/mm/memory.c 2006-02-17 18:43:07.000000000 -0800 @@ -2221,6 +2221,7 @@ static inline int handle_pte_fault(struc return do_wp_page(mm, vma, address, pte, pmd, ptl, entry); entry = pte_mkdirty(entry); + current->thread_info->dirty_pte_count++; } entry = pte_mkyoung(entry); if (!pte_same(old_entry, entry)) {