From: Rik van Riel the patch below (softly) enforces RLIMIT_RSS in the 2.6 kernel, it has been tested by Pavel and seems to work ok for his workload. --- include/linux/init_task.h | 2 ++ include/linux/sched.h | 1 + include/linux/swap.h | 4 ++-- kernel/sys.c | 8 ++++++++ mm/rmap.c | 18 +++++++++++++++++- mm/vmscan.c | 12 ++++++++---- 6 files changed, 38 insertions(+), 7 deletions(-) diff -puN include/linux/init_task.h~vm-rss-limit-enforcement include/linux/init_task.h --- 25/include/linux/init_task.h~vm-rss-limit-enforcement 2004-01-29 17:44:04.000000000 -0800 +++ 25-akpm/include/linux/init_task.h 2004-01-29 19:11:49.000000000 -0800 @@ -2,6 +2,7 @@ #define _LINUX__INIT_TASK_H #include +#include #define INIT_FILES \ { \ @@ -41,6 +42,7 @@ .page_table_lock = SPIN_LOCK_UNLOCKED, \ .mmlist = LIST_HEAD_INIT(name.mmlist), \ .default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \ + .rlimit_rss = RLIM_INFINITY \ } #define INIT_SIGNALS(sig) { \ diff -puN include/linux/sched.h~vm-rss-limit-enforcement include/linux/sched.h --- 25/include/linux/sched.h~vm-rss-limit-enforcement 2004-01-29 17:44:04.000000000 -0800 +++ 25-akpm/include/linux/sched.h 2004-01-29 19:12:09.000000000 -0800 @@ -205,6 +205,7 @@ struct mm_struct { unsigned long arg_start, arg_end, env_start, env_end; unsigned long rss, total_vm, locked_vm; unsigned long def_flags; + unsigned long rlimit_rss; cpumask_t cpu_vm_mask; unsigned long saved_auxv[40]; /* for /proc/PID/auxv */ diff -puN include/linux/swap.h~vm-rss-limit-enforcement include/linux/swap.h --- 25/include/linux/swap.h~vm-rss-limit-enforcement 2004-01-29 17:44:04.000000000 -0800 +++ 25-akpm/include/linux/swap.h 2004-01-29 17:44:04.000000000 -0800 @@ -179,7 +179,7 @@ extern int vm_swappiness; /* linux/mm/rmap.c */ #ifdef CONFIG_MMU -int FASTCALL(page_referenced(struct page *)); +int FASTCALL(page_referenced(struct page *, int *)); struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *, struct pte_chain *)); void FASTCALL(page_remove_rmap(struct page *, pte_t *)); @@ -188,7 +188,7 @@ int FASTCALL(try_to_unmap(struct page *) /* linux/mm/shmem.c */ extern int shmem_unuse(swp_entry_t entry, struct page *page); #else -#define page_referenced(page) TestClearPageReferenced(page) +#define page_referenced(page, _x) TestClearPageReferenced(page) #define try_to_unmap(page) SWAP_FAIL #endif /* CONFIG_MMU */ diff -puN kernel/sys.c~vm-rss-limit-enforcement kernel/sys.c --- 25/kernel/sys.c~vm-rss-limit-enforcement 2004-01-29 17:44:04.000000000 -0800 +++ 25-akpm/kernel/sys.c 2004-01-29 19:11:51.000000000 -0800 @@ -1308,6 +1308,14 @@ asmlinkage long sys_setrlimit(unsigned i if (retval) return retval; + /* The rlimit is specified in bytes, convert to pages for mm. */ + if (resource == RLIMIT_RSS && current->mm) { + unsigned long pages = RLIM_INFINITY; + if (new_rlim.rlim_cur != RLIM_INFINITY) + pages = new_rlim.rlim_cur >> PAGE_SHIFT; + current->mm->rlimit_rss = pages; + } + *old_rlim = new_rlim; return 0; } diff -puN mm/rmap.c~vm-rss-limit-enforcement mm/rmap.c --- 25/mm/rmap.c~vm-rss-limit-enforcement 2004-01-29 17:44:04.000000000 -0800 +++ 25-akpm/mm/rmap.c 2004-01-29 19:12:20.000000000 -0800 @@ -104,6 +104,7 @@ pte_chain_encode(struct pte_chain *pte_c /** * page_referenced - test if the page was referenced * @page: the page to test + * @rsslimit: set if the process(es) using the page is(are) over RSS limit. * * Quick test_and_clear_referenced for all mappings to a page, * returns the number of processes which referenced the page. @@ -111,9 +112,13 @@ pte_chain_encode(struct pte_chain *pte_c * * If the page has a single-entry pte_chain, collapse that back to a PageDirect * representation. This way, it's only done under memory pressure. + * + * The pte_chain_lock() is sufficient to pin down mm_structs while we examine + * them. */ -int page_referenced(struct page * page) +int page_referenced(struct page *page, int *rsslimit) { + struct mm_struct * mm; struct pte_chain *pc; int referenced = 0; @@ -127,10 +132,17 @@ int page_referenced(struct page * page) pte_t *pte = rmap_ptep_map(page->pte.direct); if (ptep_test_and_clear_young(pte)) referenced++; + + mm = ptep_to_mm(pte); + if (mm->rss > mm->rlimit_rss) + *rsslimit = 1; rmap_ptep_unmap(pte); } else { int nr_chains = 0; + /* We clear it if any task using the page is under its limit. */ + *rsslimit = 1; + /* Check all the page tables mapping this page. */ for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) { int i; @@ -142,6 +154,10 @@ int page_referenced(struct page * page) p = rmap_ptep_map(pte_paddr); if (ptep_test_and_clear_young(p)) referenced++; + + mm = ptep_to_mm(p); + if (mm->rss < mm->rlimit_rss) + *rsslimit = 0; rmap_ptep_unmap(p); nr_chains++; } diff -puN mm/vmscan.c~vm-rss-limit-enforcement mm/vmscan.c --- 25/mm/vmscan.c~vm-rss-limit-enforcement 2004-01-29 17:44:04.000000000 -0800 +++ 25-akpm/mm/vmscan.c 2004-01-29 19:12:39.000000000 -0800 @@ -250,6 +250,7 @@ shrink_list(struct list_head *page_list, LIST_HEAD(ret_pages); struct pagevec freed_pvec; int pgactivate = 0; + int over_rsslimit = 0; int ret = 0; cond_resched(); @@ -278,8 +279,8 @@ shrink_list(struct list_head *page_list, goto keep_locked; pte_chain_lock(page); - referenced = page_referenced(page); - if (referenced && page_mapping_inuse(page)) { + referenced = page_referenced(page, &over_rsslimit); + if (referenced && page_mapping_inuse(page) && !over_rsslimit) { /* In active use or really unfreeable. Activate it. */ pte_chain_unlock(page); goto activate_locked; @@ -597,6 +598,7 @@ refill_inactive_zone(struct zone *zone, long mapped_ratio; long distress; long swap_tendency; + int over_rsslimit = 0; lru_add_drain(); pgmoved = 0; @@ -657,13 +659,15 @@ refill_inactive_zone(struct zone *zone, list_del(&page->lru); if (page_mapped(page)) { pte_chain_lock(page); - if (page_mapped(page) && page_referenced(page)) { + if (page_mapped(page) && + page_referenced(page, &over_rsslimit) && + !over_rsslimit) { pte_chain_unlock(page); list_add(&page->lru, &l_active); continue; } pte_chain_unlock(page); - if (!reclaim_mapped) { + if (!reclaim_mapped && !over_rsslimit) { list_add(&page->lru, &l_active); continue; } _