diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/arch/i386/mm/hugetlbpage.c 860-per_node_rss/arch/i386/mm/hugetlbpage.c
--- 851-mbind_part2/arch/i386/mm/hugetlbpage.c	Tue Jun 24 21:29:16 2003
+++ 860-per_node_rss/arch/i386/mm/hugetlbpage.c	Wed Aug 13 20:51:52 2003
@@ -61,6 +61,27 @@ static struct page *alloc_fresh_huge_pag
 
 void free_huge_page(struct page *page);
 
+#ifdef CONFIG_NUMA
+
+static inline void huge_inc_rss(struct mm_struct *mm, struct page *page)
+{
+	mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+	mm->pernode_rss[page_to_nid(page)] += (HPAGE_SIZE / PAGE_SIZE);
+}
+
+static inline void huge_dec_rss(struct mm_struct *mm, struct page *page)
+{
+	mm->rss -= (HPAGE_SIZE / PAGE_SIZE);
+	mm->pernode_rss[page_to_nid(page)] -= (HPAGE_SIZE / PAGE_SIZE);
+}
+
+#else /* !CONFIG_NUMA */
+
+#define huge_inc_rss(mm, page)	((mm)->rss += (HPAGE_SIZE / PAGE_SIZE))
+#define huge_dec_rss(mm, page)	((mm)->rss -= (HPAGE_SIZE / PAGE_SIZE))
+
+#endif /* CONFIG_NUMA */
+
 static struct page *alloc_hugetlb_page(void)
 {
 	int i;
@@ -105,7 +126,7 @@ static void set_huge_pte(struct mm_struc
 {
 	pte_t entry;
 
-	mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+	huge_inc_rss(mm, page);
 	if (write_access) {
 		entry =
 		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
@@ -145,7 +166,7 @@ int copy_hugetlb_page_range(struct mm_st
 		ptepage = pte_page(entry);
 		get_page(ptepage);
 		set_pte(dst_pte, entry);
-		dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+		huge_inc_rss(dst, ptepage);
 		addr += HPAGE_SIZE;
 	}
 	return 0;
@@ -314,8 +335,8 @@ void unmap_hugepage_range(struct vm_area
 		page = pte_page(*pte);
 		huge_page_release(page);
 		pte_clear(pte);
+		huge_dec_rss(mm, page);
 	}
-	mm->rss -= (end - start) >> PAGE_SHIFT;
 	flush_tlb_range(vma, start, end);
 }
 
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/fs/binfmt_aout.c 860-per_node_rss/fs/binfmt_aout.c
--- 851-mbind_part2/fs/binfmt_aout.c	Tue Aug  5 20:01:42 2003
+++ 860-per_node_rss/fs/binfmt_aout.c	Wed Aug 13 20:51:52 2003
@@ -310,7 +310,7 @@ static int load_aout_binary(struct linux
 		(current->mm->start_brk = N_BSSADDR(ex));
 	current->mm->free_area_cache = TASK_UNMAPPED_BASE;
 
-	current->mm->rss = 0;
+	zero_rss(current->mm);
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/fs/binfmt_elf.c 860-per_node_rss/fs/binfmt_elf.c
--- 851-mbind_part2/fs/binfmt_elf.c	Tue Aug  5 20:01:54 2003
+++ 860-per_node_rss/fs/binfmt_elf.c	Wed Aug 13 20:51:52 2003
@@ -634,7 +634,7 @@ static int load_elf_binary(struct linux_
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
-	current->mm->rss = 0;
+	zero_rss(current->mm);
 	current->mm->free_area_cache = TASK_UNMAPPED_BASE;
 	retval = setup_arg_pages(bprm);
 	if (retval < 0) {
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/fs/binfmt_flat.c 860-per_node_rss/fs/binfmt_flat.c
--- 851-mbind_part2/fs/binfmt_flat.c	Wed Aug 13 20:24:28 2003
+++ 860-per_node_rss/fs/binfmt_flat.c	Wed Aug 13 20:51:52 2003
@@ -643,7 +643,7 @@ static int load_flat_file(struct linux_b
 		current->mm->start_brk = datapos + data_len + bss_len;
 		current->mm->brk = (current->mm->start_brk + 3) & ~3;
 		current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len;
-		current->mm->rss = 0;
+		zero_rss(current->mm);
 	}
 
 	if (flags & FLAT_FLAG_KTRACE)
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/fs/binfmt_som.c 860-per_node_rss/fs/binfmt_som.c
--- 851-mbind_part2/fs/binfmt_som.c	Thu Feb 13 11:08:11 2003
+++ 860-per_node_rss/fs/binfmt_som.c	Wed Aug 13 20:51:52 2003
@@ -259,7 +259,7 @@ load_som_binary(struct linux_binprm * bp
 	create_som_tables(bprm);
 
 	current->mm->start_stack = bprm->p;
-	current->mm->rss = 0;
+	zero_rss(current->mm);
 
 #if 0
 	printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk);
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/fs/exec.c 860-per_node_rss/fs/exec.c
--- 851-mbind_part2/fs/exec.c	Wed Aug 13 20:29:24 2003
+++ 860-per_node_rss/fs/exec.c	Wed Aug 13 20:51:52 2003
@@ -321,7 +321,7 @@ void put_dirty_page(struct task_struct *
 	set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
 	pte_chain = page_add_rmap(page, pte, pte_chain);
 	pte_unmap(pte);
-	tsk->mm->rss++;
+	inc_rss(tsk->mm, page);
 	spin_unlock(&tsk->mm->page_table_lock);
 
 	/* no need for flush_tlb */
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/fs/proc/task_mmu.c 860-per_node_rss/fs/proc/task_mmu.c
--- 851-mbind_part2/fs/proc/task_mmu.c	Tue Jun 24 21:29:23 2003
+++ 860-per_node_rss/fs/proc/task_mmu.c	Wed Aug 13 20:51:52 2003
@@ -3,6 +3,22 @@
 #include <linux/seq_file.h>
 #include <asm/uaccess.h>
 
+#ifdef CONFIG_NUMA
+char *task_mem_pernode(struct mm_struct *mm, char *buffer)
+{
+	int nid;
+
+	for (nid = 0; nid < MAX_NUMNODES; nid++){
+		buffer += sprintf(buffer, "VmRSS-node_%d:\t%8lu kb\n",
+			nid, mm->pernode_rss[nid] << (PAGE_SHIFT-10));
+	}
+
+	return buffer;
+}
+#else /* !CONFIG_NUMA */
+#define task_mem_pernode(mm, buffer)	(buffer)
+#endif /* CONFIG_NUMA */
+
 char *task_mem(struct mm_struct *mm, char *buffer)
 {
 	unsigned long data = 0, stack = 0, exec = 0, lib = 0;
@@ -39,6 +55,7 @@ char *task_mem(struct mm_struct *mm, cha
 		mm->rss << (PAGE_SHIFT-10),
 		data - stack, stack,
 		exec - lib, lib);
+	buffer = task_mem_pernode(mm, buffer);
 	up_read(&mm->mmap_sem);
 	return buffer;
 }
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/include/asm-generic/tlb.h 860-per_node_rss/include/asm-generic/tlb.h
--- 851-mbind_part2/include/asm-generic/tlb.h	Fri May 30 19:02:20 2003
+++ 860-per_node_rss/include/asm-generic/tlb.h	Wed Aug 13 20:51:52 2003
@@ -39,7 +39,6 @@ struct mmu_gather {
 	unsigned int		nr;	/* set to ~0U means fast mode */
 	unsigned int		need_flush;/* Really unmapped some ptes? */
 	unsigned int		fullmm; /* non-zero means full mm flush */
-	unsigned long		freed;
 	struct page *		pages[FREE_PTE_NR];
 };
 
@@ -60,7 +59,6 @@ tlb_gather_mmu(struct mm_struct *mm, uns
 	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
 
 	tlb->fullmm = full_mm_flush;
-	tlb->freed = 0;
 
 	return tlb;
 }
@@ -85,13 +83,6 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	int freed = tlb->freed;
-	struct mm_struct *mm = tlb->mm;
-	int rss = mm->rss;
-
-	if (rss < freed)
-		freed = rss;
-	mm->rss = rss - freed;
 	tlb_flush_mmu(tlb, start, end);
 
 	/* keep the page table cache within bounds */
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/include/linux/mm.h 860-per_node_rss/include/linux/mm.h
--- 851-mbind_part2/include/linux/mm.h	Wed Aug 13 20:29:24 2003
+++ 860-per_node_rss/include/linux/mm.h	Wed Aug 13 20:51:52 2003
@@ -612,6 +612,39 @@ extern struct page * follow_page(struct 
 extern int remap_page_range(struct vm_area_struct *vma, unsigned long from,
 		unsigned long to, unsigned long size, pgprot_t prot);
 
+/* 
+ * Given a struct page, determine which node's memory it is from.
+ * TODO: There's probably a more efficient way to do this...
+ */
+static inline int page_to_nid(struct page *page)
+{
+	return pfn_to_nid(page_to_pfn(page));
+}
+
+#ifdef CONFIG_NUMA
+static inline void zero_rss(struct mm_struct *mm)
+{
+	mm->rss = 0;
+	memset(mm->pernode_rss, 0, MAX_NUMNODES * sizeof(*mm->pernode_rss));
+}
+
+static inline void inc_rss(struct mm_struct *mm, struct page *page)
+{
+	mm->rss++;
+	mm->pernode_rss[page_to_nid(page)]++;
+}
+
+static inline void dec_rss(struct mm_struct *mm, struct page *page)
+{
+	mm->rss--;
+	mm->pernode_rss[page_to_nid(page)]--;
+}
+#else /* !CONFIG_NUMA */
+#define zero_rss(mm)		((mm)->rss = 0)
+#define inc_rss(mm, page)	((mm)->rss++)
+#define dec_rss(mm, page)	((mm)->rss--)
+#endif /* CONFIG_NUMA */
+
 #ifndef CONFIG_DEBUG_PAGEALLOC
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable)
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/include/linux/sched.h 860-per_node_rss/include/linux/sched.h
--- 851-mbind_part2/include/linux/sched.h	Wed Aug 13 20:51:45 2003
+++ 860-per_node_rss/include/linux/sched.h	Wed Aug 13 20:51:52 2003
@@ -205,7 +205,7 @@ struct mm_struct {
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
 	int map_count;				/* number of VMAs */
 	struct rw_semaphore mmap_sem;
-	spinlock_t page_table_lock;		/* Protects task page tables and mm->rss */
+	spinlock_t page_table_lock;		/* Protects task page tables and RSS data */
 
 	struct list_head mmlist;		/* List of all active mm's.  These are globally strung
 						 * together off init_mm.mmlist, and are protected
@@ -215,7 +215,11 @@ struct mm_struct {
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long start_brk, brk, start_stack;
 	unsigned long arg_start, arg_end, env_start, env_end;
-	unsigned long rss, total_vm, locked_vm;
+	unsigned long total_vm, locked_vm;
+	unsigned long rss;
+#ifdef CONFIG_NUMA
+	unsigned long pernode_rss[MAX_NUMNODES];
+#endif
 	unsigned long def_flags;
 	unsigned long cpu_vm_mask;
 	unsigned long swap_address;
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/kernel/fork.c 860-per_node_rss/kernel/fork.c
--- 851-mbind_part2/kernel/fork.c	Wed Aug 13 20:29:31 2003
+++ 860-per_node_rss/kernel/fork.c	Wed Aug 13 20:51:52 2003
@@ -235,7 +235,7 @@ static inline int dup_mmap(struct mm_str
 	mm->mmap_cache = NULL;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->map_count = 0;
-	mm->rss = 0;
+	zero_rss(mm);
 	mm->cpu_vm_mask = 0;
 	pprev = &mm->mmap;
 
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/mm/fremap.c 860-per_node_rss/mm/fremap.c
--- 851-mbind_part2/mm/fremap.c	Wed Aug 13 20:29:24 2003
+++ 860-per_node_rss/mm/fremap.c	Wed Aug 13 20:51:52 2003
@@ -36,7 +36,7 @@ static inline int zap_pte(struct mm_stru
 					set_page_dirty(page);
 				page_remove_rmap(page, ptep);
 				page_cache_release(page);
-				mm->rss--;
+				dec_rss(mm, page);
 			}
 		}
 		return 1;
@@ -93,7 +93,7 @@ int install_page(struct mm_struct *mm, s
 
 	flush = zap_pte(mm, vma, addr, pte);
 
-	mm->rss++;
+	inc_rss(mm, page);
 	flush_icache_page(vma, page);
 	set_pte(pte, mk_pte(page, prot));
 	pte_chain = page_add_rmap(page, pte, pte_chain);
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/mm/memory.c 860-per_node_rss/mm/memory.c
--- 851-mbind_part2/mm/memory.c	Wed Aug 13 20:51:19 2003
+++ 860-per_node_rss/mm/memory.c	Wed Aug 13 20:51:52 2003
@@ -332,7 +332,7 @@ skip_copy_pte_range:
 					pte = pte_mkclean(pte);
 				pte = pte_mkold(pte);
 				get_page(page);
-				dst->rss++;
+				inc_rss(dst, page);
 
 				set_pte(dst_pte, pte);
 				pte_chain = page_add_rmap(page, dst_pte,
@@ -424,7 +424,14 @@ zap_pte_range(struct mmu_gather *tlb, pm
 					if (page->mapping && pte_young(pte) &&
 							!PageSwapCache(page))
 						mark_page_accessed(page);
-					tlb->freed++;
+					/*
+					 * While we have the page that is being
+					 * freed handy, make sure we decrement
+					 * the mm's RSS accordingly.  This is 
+					 * only important for NUMA per-node
+					 * RSS accounting.
+					 */
+					dec_rss(tlb->mm, page);
 					page_remove_rmap(page, ptep);
 					tlb_remove_page(tlb, page);
 				}
@@ -1050,7 +1057,7 @@ static int do_wp_page(struct mm_struct *
 	page_table = pte_offset_map(pmd, address);
 	if (pte_same(*page_table, pte)) {
 		if (PageReserved(old_page))
-			++mm->rss;
+			inc_rss(mm, new_page);
 		page_remove_rmap(old_page, page_table);
 		break_cow(vma, new_page, address, page_table);
 		SetPageAnon(new_page);
@@ -1284,7 +1291,7 @@ static int do_swap_page(struct mm_struct
 	if (vm_swap_full())
 		remove_exclusive_swap_page(page);
 
-	mm->rss++;
+	inc_rss(mm, page);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page))
 		pte = pte_mkdirty(pte_mkwrite(pte));
@@ -1354,7 +1361,7 @@ do_anonymous_page(struct mm_struct *mm, 
 			ret = VM_FAULT_MINOR;
 			goto out;
 		}
-		mm->rss++;
+		inc_rss(mm, page);
 		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
 		lru_cache_add_active(page);
 		mark_page_accessed(page);
@@ -1470,7 +1477,7 @@ retry:
 	 */
 	/* Only go through if we didn't race with anybody else... */
 	if (pte_none(*page_table)) {
-		++mm->rss;
+		inc_rss(mm, new_page);
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/mm/mmap.c 860-per_node_rss/mm/mmap.c
--- 851-mbind_part2/mm/mmap.c	Wed Aug 13 20:51:42 2003
+++ 860-per_node_rss/mm/mmap.c	Wed Aug 13 20:51:52 2003
@@ -1402,7 +1402,7 @@ void exit_mmap(struct mm_struct *mm)
 	vma = mm->mmap;
 	mm->mmap = mm->mmap_cache = NULL;
 	mm->mm_rb = RB_ROOT;
-	mm->rss = 0;
+	zero_rss(mm);
 	mm->total_vm = 0;
 	mm->locked_vm = 0;
 
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/mm/rmap.c 860-per_node_rss/mm/rmap.c
--- 851-mbind_part2/mm/rmap.c	Wed Aug 13 20:29:24 2003
+++ 860-per_node_rss/mm/rmap.c	Wed Aug 13 20:51:53 2003
@@ -623,7 +623,7 @@ static int try_to_unmap_one(struct page 
 	if (pte_dirty(pte))
 		set_page_dirty(page);
 
-	mm->rss--;
+	dec_rss(mm, page);
 	page_cache_release(page);
 	ret = SWAP_SUCCESS;
 
diff -urpN -X /home/fletch/.diff.exclude 851-mbind_part2/mm/swapfile.c 860-per_node_rss/mm/swapfile.c
--- 851-mbind_part2/mm/swapfile.c	Wed Aug 13 20:51:42 2003
+++ 860-per_node_rss/mm/swapfile.c	Wed Aug 13 20:51:53 2003
@@ -386,7 +386,7 @@ static void
 unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
 	swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
 {
-	vma->vm_mm->rss++;
+	inc_rss(vma->vm_mm, page);
 	get_page(page);
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	SetPageAnon(page);