diff -urN linux-2.4.16-preempt/Documentation/Configure.help linux/Documentation/Configure.help
--- linux-2.4.16-preempt/Documentation/Configure.help	Mon Nov 26 15:58:35 2001
+++ linux/Documentation/Configure.help	Tue Nov 27 23:13:16 2001
@@ -277,6 +277,18 @@
   system where throughput is more important than interactive response,
   such as a server system.  Say N if you are unsure.
 
+Break Selected Locks
+CONFIG_LOCK_BREAK
+  This option will break certain locks in high-latency regions
+  throughout the kernel.  It is intended for use in conjunction with
+  the preemptible kernel (CONFIG_PREEMPT).  Since in-kernel preemption
+  can not occur while locks are held, temporarily releasing and then
+  reacquiring long-held locks will further improve system response.
+
+  Say Y if you are compiling for a system with strict latency
+  requirements such as an embedded, real-time, or audio processing
+  system.  Say N otherwise.
+
 Kernel math emulation
 CONFIG_MATH_EMULATION
   Linux can emulate a math coprocessor (used for floating point
diff -urN linux-2.4.16-preempt/arch/i386/config.in linux/arch/i386/config.in
--- linux-2.4.16-preempt/arch/i386/config.in	Mon Nov 26 15:58:16 2001
+++ linux/arch/i386/config.in	Tue Nov 27 23:13:16 2001
@@ -171,6 +171,9 @@
 bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
 bool 'Symmetric multi-processing support' CONFIG_SMP
 bool 'Preemptible Kernel' CONFIG_PREEMPT
+if [ "$CONFIG_PREEMPT" = "y" ]; then
+   bool 'Break selected locks' CONFIG_LOCK_BREAK
+fi
 if [ "$CONFIG_SMP" != "y" ]; then
    bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
    dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC
diff -urN linux-2.4.16-preempt/drivers/char/mem.c linux/drivers/char/mem.c
--- linux-2.4.16-preempt/drivers/char/mem.c	Mon Nov 26 15:57:59 2001
+++ linux/drivers/char/mem.c	Tue Nov 27 23:13:16 2001
@@ -364,7 +364,7 @@
 		if (count > size)
 			count = size;
 
-		zap_page_range(mm, addr, count);
+		zap_page_range(mm, addr, count, ZPR_NORMAL);
         	zeromap_page_range(addr, count, PAGE_COPY);
 
 		size -= count;
diff -urN linux-2.4.16-preempt/drivers/char/tty_io.c linux/drivers/char/tty_io.c
--- linux-2.4.16-preempt/drivers/char/tty_io.c	Mon Nov 26 15:57:59 2001
+++ linux/drivers/char/tty_io.c	Tue Nov 27 23:13:16 2001
@@ -722,6 +722,7 @@
 			ret = -ERESTARTSYS;
 			if (signal_pending(current))
 				break;
+			debug_lock_break(551);
 			if (current->need_resched)
 				schedule();
 		}
diff -urN linux-2.4.16-preempt/fs/buffer.c linux/fs/buffer.c
--- linux-2.4.16-preempt/fs/buffer.c	Mon Nov 26 15:57:34 2001
+++ linux/fs/buffer.c	Tue Nov 27 23:14:13 2001
@@ -254,7 +254,6 @@
 	while (next && --nr >= 0) {
 		struct buffer_head *bh = next;
 		next = bh->b_next_free;
-
 		if (!buffer_locked(bh)) {
 			if (refile)
 				__refile_buffer(bh);
@@ -262,7 +261,11 @@
 		}
 		if (dev && bh->b_dev != dev)
 			continue;
-
+		if (conditional_schedule_needed()) {
+			debug_lock_break(1);
+			spin_unlock(&lru_list_lock);
+			return -EAGAIN;
+		}
 		get_bh(bh);
 		spin_unlock(&lru_list_lock);
 		wait_on_buffer (bh);
@@ -672,6 +675,13 @@
 			/* Not hashed? */
 			if (!bh->b_pprev)
 				continue;
+			if (conditional_schedule_needed()) {
+				debug_lock_break(2); /* bkl is held too */
+				get_bh(bh);
+				break_spin_lock_and_resched(&lru_list_lock);
+				put_bh(bh);
+				slept = 1;
+			}
 			if (buffer_locked(bh)) {
 				get_bh(bh);
 				spin_unlock(&lru_list_lock);
@@ -823,6 +833,8 @@
 	struct buffer_head *bh;
 	struct inode tmp;
 	int err = 0, err2;
+
+	DEFINE_LOCK_COUNT();
 	
 	INIT_LIST_HEAD(&tmp.i_dirty_buffers);
 	
@@ -844,6 +856,12 @@
 				spin_lock(&lru_list_lock);
 			}
 		}
+		/* haven't hit this code path ... */
+		debug_lock_break(551);
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			break_spin_lock(&lru_list_lock);
+		}
 	}
 
 	while (!list_empty(&tmp.i_dirty_buffers)) {
@@ -873,6 +891,7 @@
 	struct inode tmp;
 	int err = 0, err2;
 	
+	DEFINE_LOCK_COUNT();
 	INIT_LIST_HEAD(&tmp.i_dirty_data_buffers);
 	
 	spin_lock(&lru_list_lock);
@@ -904,9 +923,14 @@
 		if (!buffer_uptodate(bh))
 			err = -EIO;
 		brelse(bh);
+		debug_lock_break(1);
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			conditional_schedule();
+		}
 		spin_lock(&lru_list_lock);
 	}
-	
+
 	spin_unlock(&lru_list_lock);
 	err2 = osync_inode_data_buffers(inode);
 
@@ -933,6 +957,8 @@
 	struct list_head *list;
 	int err = 0;
 
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&lru_list_lock);
 	
  repeat:
@@ -940,6 +966,17 @@
 	for (list = inode->i_dirty_buffers.prev; 
 	     bh = BH_ENTRY(list), list != &inode->i_dirty_buffers;
 	     list = bh->b_inode_buffers.prev) {
+		/* untested code path ... */
+		debug_lock_break(551);
+ 
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			if (conditional_schedule_needed()) {
+				break_spin_lock(&lru_list_lock);
+				goto repeat;
+			}
+		}
+ 
 		if (buffer_locked(bh)) {
 			get_bh(bh);
 			spin_unlock(&lru_list_lock);
diff -urN linux-2.4.16-preempt/fs/dcache.c linux/fs/dcache.c
--- linux-2.4.16-preempt/fs/dcache.c	Mon Nov 26 15:57:34 2001
+++ linux/fs/dcache.c	Tue Nov 27 23:13:16 2001
@@ -320,11 +320,24 @@
  
 void prune_dcache(int count)
 {
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&dcache_lock);
+
+redo:
 	for (;;) {
 		struct dentry *dentry;
 		struct list_head *tmp;
 
+		if (TEST_LOCK_COUNT(100)) {
+			RESET_LOCK_COUNT();
+			debug_lock_break(1);
+			if (conditional_schedule_needed()) {
+				break_spin_lock(&dcache_lock);
+				goto redo;
+			}
+		}
+
 		tmp = dentry_unused.prev;
 
 		if (tmp == &dentry_unused)
@@ -480,6 +493,8 @@
 	struct list_head *next;
 	int found = 0;
 
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&dcache_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
@@ -493,6 +508,12 @@
 			list_add(&dentry->d_lru, dentry_unused.prev);
 			found++;
 		}
+		if (TEST_LOCK_COUNT(500) && found > 10) {
+			debug_lock_break(1);
+			if (conditional_schedule_needed())
+				goto out;
+			RESET_LOCK_COUNT();
+		}
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
@@ -517,6 +538,7 @@
 #endif
 		goto resume;
 	}
+out:
 	spin_unlock(&dcache_lock);
 	return found;
 }
diff -urN linux-2.4.16-preempt/fs/ext3/inode.c linux/fs/ext3/inode.c
--- linux-2.4.16-preempt/fs/ext3/inode.c	Mon Nov 26 15:57:38 2001
+++ linux/fs/ext3/inode.c	Tue Nov 27 23:13:16 2001
@@ -1627,6 +1627,8 @@
 	}
 
 	for (p = first; p < last; p++) {
+		debug_lock_break(1); /* bkl is held */
+		conditional_schedule();
 		nr = le32_to_cpu(*p);
 		if (nr) {
 			/* accumulate blocks to free if they're contiguous */
@@ -1691,6 +1693,8 @@
 
 			/* Go read the buffer for the next level down */
 			bh = bread(inode->i_dev, nr, inode->i_sb->s_blocksize);
+			debug_lock_break(1);
+			conditional_schedule();
 
 			/*
 			 * A read failure? Report error and clear slot
diff -urN linux-2.4.16-preempt/fs/ext3/namei.c linux/fs/ext3/namei.c
--- linux-2.4.16-preempt/fs/ext3/namei.c	Mon Nov 26 15:57:38 2001
+++ linux/fs/ext3/namei.c	Tue Nov 27 23:13:16 2001
@@ -157,6 +157,8 @@
 		if ((bh = bh_use[ra_ptr++]) == NULL)
 			goto next;
 		wait_on_buffer(bh);
+		debug_lock_break(1);
+		conditional_schedule();
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
 			brelse(bh);
diff -urN linux-2.4.16-preempt/fs/inode.c linux/fs/inode.c
--- linux-2.4.16-preempt/fs/inode.c	Mon Nov 26 15:57:34 2001
+++ linux/fs/inode.c	Tue Nov 27 23:13:16 2001
@@ -567,6 +567,12 @@
 		if (tmp == head)
 			break;
 		inode = list_entry(tmp, struct inode, i_list);
+
+		debug_lock_break(2); /* bkl is also held */
+		atomic_inc(&inode->i_count);
+		break_spin_lock_and_resched(&inode_lock);
+		atomic_dec(&inode->i_count);
+
 		if (inode->i_sb != sb)
 			continue;
 		invalidate_inode_buffers(inode);
@@ -668,8 +674,11 @@
 	int count;
 	struct inode * inode;
 
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&inode_lock);
 
+free_unused:
 	count = 0;
 	entry = inode_unused.prev;
 	while (entry != &inode_unused)
@@ -692,6 +701,14 @@
 		count++;
 		if (!--goal)
 			break;
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			debug_lock_break(1);
+			if (conditional_schedule_needed()) {
+				break_spin_lock(&inode_lock);
+				goto free_unused;
+			}
+		}
 	}
 	inodes_stat.nr_unused -= count;
 	spin_unlock(&inode_lock);
diff -urN linux-2.4.16-preempt/fs/jbd/commit.c linux/fs/jbd/commit.c
--- linux-2.4.16-preempt/fs/jbd/commit.c	Mon Nov 26 15:57:38 2001
+++ linux/fs/jbd/commit.c	Tue Nov 27 23:23:47 2001
@@ -211,6 +211,9 @@
 				__journal_remove_journal_head(bh);
 				refile_buffer(bh);
 				__brelse(bh);
+				debug_lock_break(2);
+				if (conditional_schedule_needed())
+					break;
 			}
 		}
 		if (bufs == ARRAY_SIZE(wbuf)) {
@@ -234,8 +237,7 @@
 		journal_brelse_array(wbuf, bufs);
 		lock_journal(journal);
 		spin_lock(&journal_datalist_lock);
-		if (bufs)
-			goto write_out_data_locked;
+		goto write_out_data_locked;
 	}
 
 	/*
@@ -271,6 +273,14 @@
 	 */
 	while ((jh = commit_transaction->t_async_datalist)) {
 		struct buffer_head *bh = jh2bh(jh);
+		if (conditional_schedule_needed()) {
+			debug_lock_break(551);
+			spin_unlock(&journal_datalist_lock);
+			unlock_journal(journal);
+			lock_journal(journal);
+			spin_lock(&journal_datalist_lock);
+			continue;
+		}
 		if (buffer_locked(bh)) {
 			spin_unlock(&journal_datalist_lock);
 			unlock_journal(journal);
diff -urN linux-2.4.16-preempt/include/linux/lock_break.h linux/include/linux/lock_break.h
--- linux-2.4.16-preempt/include/linux/lock_break.h	Wed Dec 31 19:00:00 1969
+++ linux/include/linux/lock_break.h	Tue Nov 27 23:13:16 2001
@@ -0,0 +1,84 @@
+/*
+ * include/linux/lock_break.h - lock breaking routines
+ *
+ * since in-kernel preemption can not occur while a lock is held,
+ * we can just drop and reacquire long-held locks when they are
+ * in a natural quiescent state to further lower system latency.
+ *
+ * (C) 2001 Robert Love
+ *
+ */
+
+#ifndef _LINUX_LOCK_BREAK_H
+#define _LINUX_LOCK_BREAK_H
+
+#include <linux/compiler.h>
+
+/*
+ * setting this to 1 will instruct debug_lock_break to
+ * note when the expected lock count does not equal the
+ * actual count. if the lock count is higher than expected,
+ * we aren't dropping enough locks.  if it is 0, we are
+ * wasting our time since the system is already preemptible.
+ */
+#ifndef DEBUG_LOCK_BREAK
+#define DEBUG_LOCK_BREAK 0
+#endif
+
+#ifdef CONFIG_LOCK_BREAK
+
+#define conditional_schedule_needed() (unlikely(current->need_resched))
+
+/*
+ * setting the task's state to TASK_RUNNING is nothing but paranoia,
+ * in the case where a task is delinquent in properly putting itself
+ * to sleep.  we should test without it.
+ */
+#define unconditional_schedule() do { \
+	__set_current_state(TASK_RUNNING); \
+	schedule(); \
+} while(0)
+
+#define conditional_schedule() do { \
+	if (conditional_schedule_needed()) \
+		unconditional_schedule(); \
+} while(0)
+
+#define break_spin_lock(n) do { \
+	spin_unlock(n); \
+	spin_lock(n); \
+} while(0)
+
+#define break_spin_lock_and_resched(n) do { \
+	spin_unlock(n); \
+	conditional_schedule(); \
+	spin_lock(n); \
+} while(0)
+
+#if DEBUG_LOCK_BREAK
+#define debug_lock_break(n) do { \
+	if (current->preempt_count != n) \
+		printk(KERN_ERR "lock_break: %s:%d: count was %d not %d\n", \
+			__FILE__, __LINE__, current->preempt_count, n); \
+} while(0)
+#else
+#define debug_lock_break(n)
+#endif
+
+#define DEFINE_LOCK_COUNT() int _lock_break_count = 0
+#define TEST_LOCK_COUNT(n) (++_lock_break_count > (n))
+#define RESET_LOCK_COUNT() _lock_break_count = 0
+
+#else
+#define unconditional_schedule()
+#define conditional_schedule()
+#define conditional_schedule_needed() 0
+#define break_spin_lock(n)
+#define break_spin_lock_and_resched(n)
+#define debug_lock_break(n)
+#define DEFINE_LOCK_COUNT()
+#define TEST_LOCK_COUNT(n) 0
+#define RESET_LOCK_COUNT()
+#endif
+
+#endif /* _LINUX_LOCK_BREAK_H */
diff -urN linux-2.4.16-preempt/include/linux/mm.h linux/include/linux/mm.h
--- linux-2.4.16-preempt/include/linux/mm.h	Mon Nov 26 15:57:38 2001
+++ linux/include/linux/mm.h	Tue Nov 27 23:13:16 2001
@@ -121,6 +121,9 @@
  */
 extern pgprot_t protection_map[16];
 
+#define ZPR_MAX_BYTES 256*PAGE_SIZE
+#define ZPR_NORMAL 0 /* perform zap_page_range request in one walk */
+#define ZPR_PARTITION 1 /* partition into a series of smaller operations */
 
 /*
  * These are the virtual MM functions - opening of an area, closing and
@@ -404,7 +407,7 @@
 extern void shmem_lock(struct file * file, int lock);
 extern int shmem_zero_setup(struct vm_area_struct *);
 
-extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
+extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions);
 extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
diff -urN linux-2.4.16-preempt/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.4.16-preempt/include/linux/sched.h	Mon Nov 26 15:57:38 2001
+++ linux/include/linux/sched.h	Tue Nov 27 23:13:16 2001
@@ -26,6 +26,7 @@
 #include <linux/signal.h>
 #include <linux/securebits.h>
 #include <linux/fs_struct.h>
+#include <linux/lock_break.h>
 
 struct exec_domain;
 
diff -urN linux-2.4.16-preempt/kernel/exit.c linux/kernel/exit.c
--- linux-2.4.16-preempt/kernel/exit.c	Mon Nov 26 15:57:38 2001
+++ linux/kernel/exit.c	Tue Nov 27 23:13:16 2001
@@ -190,6 +190,8 @@
 			}
 			i++;
 			set >>= 1;
+			debug_lock_break(1);
+			conditional_schedule();
 		}
 	}
 }
diff -urN linux-2.4.16-preempt/mm/filemap.c linux/mm/filemap.c
--- linux-2.4.16-preempt/mm/filemap.c	Mon Nov 26 15:57:38 2001
+++ linux/mm/filemap.c	Tue Nov 27 23:16:19 2001
@@ -296,6 +296,7 @@
 
 			page_cache_release(page);
 
+			/* we hit this with lock depth of 1 or 2 */
 			if (current->need_resched) {
 				__set_current_state(TASK_RUNNING);
 				schedule();
@@ -406,6 +407,8 @@
 		}
 
 		page_cache_release(page);
+
+		debug_lock_break(551);
 		if (current->need_resched) {
 			__set_current_state(TASK_RUNNING);
 			schedule();
@@ -594,12 +597,16 @@
 		list_del(&page->list);
 		list_add(&page->list, &mapping->locked_pages);
 
-		if (!PageDirty(page))
-			continue;
-
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
 
+		/* BKL is held ... */
+		debug_lock_break(1);
+		conditional_schedule();
+
+		if (!PageDirty(page))
+			goto clean;
+
 		lock_page(page);
 
 		if (PageDirty(page)) {
@@ -607,7 +614,7 @@
 			writepage(page);
 		} else
 			UnlockPage(page);
-
+clean:
 		page_cache_release(page);
 		spin_lock(&pagecache_lock);
 	}
@@ -623,14 +630,28 @@
  */
 void filemap_fdatawait(struct address_space * mapping)
 {
+	DEFINE_LOCK_COUNT();
+
 	spin_lock(&pagecache_lock);
 
+restart:
         while (!list_empty(&mapping->locked_pages)) {
 		struct page *page = list_entry(mapping->locked_pages.next, struct page, list);
 
 		list_del(&page->list);
 		list_add(&page->list, &mapping->clean_pages);
-
+ 
+		if (TEST_LOCK_COUNT(32)) {
+			RESET_LOCK_COUNT();
+			debug_lock_break(2);
+			if (conditional_schedule_needed()) {
+				page_cache_get(page);
+				break_spin_lock_and_resched(&pagecache_lock);
+				page_cache_release(page);
+				goto restart;
+			}
+		}
+ 
 		if (!PageLocked(page))
 			continue;
 
@@ -894,6 +915,7 @@
 	 * the hash-list needs a held write-lock.
 	 */
 repeat:
+	break_spin_lock(&pagecache_lock);
 	page = __find_page_nolock(mapping, offset, hash);
 	if (page) {
 		page_cache_get(page);
@@ -2055,6 +2077,8 @@
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+	debug_lock_break(1);
+	break_spin_lock(&vma->vm_mm->page_table_lock);
 	return error;
 }
 
@@ -2085,6 +2109,9 @@
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
+
+	debug_lock_break(1);
+	break_spin_lock(&vma->vm_mm->page_table_lock);
 	return error;
 }
 
@@ -2443,7 +2470,7 @@
 	if (vma->vm_flags & VM_LOCKED)
 		return -EINVAL;
 
-	zap_page_range(vma->vm_mm, start, end - start);
+	zap_page_range(vma->vm_mm, start, end - start, ZPR_PARTITION);
 	return 0;
 }
 
diff -urN linux-2.4.16-preempt/mm/memory.c linux/mm/memory.c
--- linux-2.4.16-preempt/mm/memory.c	Mon Nov 26 15:57:38 2001
+++ linux/mm/memory.c	Tue Nov 27 23:13:16 2001
@@ -355,7 +355,8 @@
 /*
  * remove user pages in a given range.
  */
-void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
+void do_zap_page_range(struct mm_struct *mm, unsigned long address,
+			      unsigned long size)
 {
 	mmu_gather_t *tlb;
 	pgd_t * dir;
@@ -397,6 +398,20 @@
 	spin_unlock(&mm->page_table_lock);
 }
 
+void zap_page_range(struct mm_struct *mm, unsigned long address,
+		    unsigned long size, int actions)
+{
+	while (size) {
+		unsigned long chunk = size;
+		
+		if (actions & ZPR_PARTITION && chunk > ZPR_MAX_BYTES)
+			chunk = ZPR_MAX_BYTES;
+		do_zap_page_range(mm, address, chunk);
+
+		address += chunk;
+		size -= chunk;
+	}
+}
 
 /*
  * Do a quick page-table lookup for a single page. 
@@ -705,11 +720,15 @@
 	return 0;
 }
 
-static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
-                                     unsigned long size, pgprot_t prot)
+static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte,
+				     unsigned long address, unsigned long size,
+				     pgprot_t prot)
 {
 	unsigned long end;
 
+	debug_lock_break(1);
+	break_spin_lock(&mm->page_table_lock);
+
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
@@ -737,7 +756,7 @@
 		pte_t * pte = pte_alloc(mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
-		zeromap_pte_range(pte, address, end - address, prot);
+		zeromap_pte_range(mm, pte, address, end - address, prot);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -971,7 +990,7 @@
 
 		/* mapping wholly truncated? */
 		if (mpnt->vm_pgoff >= pgoff) {
-			zap_page_range(mm, start, len);
+			zap_page_range(mm, start, len, ZPR_NORMAL);
 			continue;
 		}
 
@@ -984,7 +1003,7 @@
 		/* Ok, partially affected.. */
 		start += diff << PAGE_SHIFT;
 		len = (len - diff) << PAGE_SHIFT;
-		zap_page_range(mm, start, len);
+		zap_page_range(mm, start, len, ZPR_NORMAL);
 	} while ((mpnt = mpnt->vm_next_share) != NULL);
 }
 
diff -urN linux-2.4.16-preempt/mm/mmap.c linux/mm/mmap.c
--- linux-2.4.16-preempt/mm/mmap.c	Mon Nov 26 15:57:38 2001
+++ linux/mm/mmap.c	Tue Nov 27 23:13:16 2001
@@ -569,7 +569,7 @@
 	fput(file);
 
 	/* Undo any partial mapping done by a device driver. */
-	zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+	zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start, ZPR_NORMAL);
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 	return error;
@@ -967,7 +967,7 @@
 		remove_shared_vm_struct(mpnt);
 		mm->map_count--;
 
-		zap_page_range(mm, st, size);
+		zap_page_range(mm, st, size, ZPR_PARTITION);
 
 		/*
 		 * Fix the mapping, and free the old area if it wasn't reused.
@@ -1127,7 +1127,7 @@
 		}
 		mm->map_count--;
 		remove_shared_vm_struct(mpnt);
-		zap_page_range(mm, start, size);
+		zap_page_range(mm, start, size, ZPR_PARTITION);
 		if (mpnt->vm_file)
 			fput(mpnt->vm_file);
 		kmem_cache_free(vm_area_cachep, mpnt);
diff -urN linux-2.4.16-preempt/mm/mremap.c linux/mm/mremap.c
--- linux-2.4.16-preempt/mm/mremap.c	Mon Nov 26 15:57:38 2001
+++ linux/mm/mremap.c	Tue Nov 27 23:13:16 2001
@@ -118,7 +118,7 @@
 	flush_cache_range(mm, new_addr, new_addr + len);
 	while ((offset += PAGE_SIZE) < len)
 		move_one_page(mm, new_addr + offset, old_addr + offset);
-	zap_page_range(mm, new_addr, len);
+	zap_page_range(mm, new_addr, len, ZPR_NORMAL);
 	return -1;
 }
 
diff -urN linux-2.4.16-preempt/mm/swapfile.c linux/mm/swapfile.c
--- linux-2.4.16-preempt/mm/swapfile.c	Mon Nov 26 15:57:38 2001
+++ linux/mm/swapfile.c	Tue Nov 27 23:13:16 2001
@@ -696,6 +696,7 @@
 		 * interactive performance.  Interruptible check on
 		 * signal_pending() would be nice, but changes the spec?
 		 */
+		debug_lock_break(551);
 		if (current->need_resched)
 			schedule();
 	}
@@ -1121,6 +1122,13 @@
 		if (swap_info[i].flags != SWP_USED)
 			continue;
 		for (j = 0; j < swap_info[i].max; ++j) {
+			if (conditional_schedule_needed()) {
+				debug_lock_break(551);
+				swap_list_unlock();
+				debug_lock_break(551);
+				unconditional_schedule();
+				swap_list_lock();
+			}
 			switch (swap_info[i].swap_map[j]) {
 				case 0:
 				case SWAP_MAP_BAD:
diff -urN linux-2.4.16-preempt/mm/vmscan.c linux/mm/vmscan.c
--- linux-2.4.16-preempt/mm/vmscan.c	Mon Nov 26 15:57:38 2001
+++ linux/mm/vmscan.c	Tue Nov 27 23:13:16 2001
@@ -158,6 +158,8 @@
 	pte_t * pte;
 	unsigned long pmd_end;
 
+	DEFINE_LOCK_COUNT();
+
 	if (pmd_none(*dir))
 		return count;
 	if (pmd_bad(*dir)) {
@@ -182,6 +184,14 @@
 					address += PAGE_SIZE;
 					break;
 				}
+				/* we reach this with a lock depth of 1 or 2 */
+#if 0
+				if (TEST_LOCK_COUNT(4)) {
+					if (conditional_schedule_needed())
+						return count;
+					RESET_LOCK_COUNT();
+				}
+#endif
 			}
 		}
 		address += PAGE_SIZE;
@@ -215,6 +225,11 @@
 		count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone);
 		if (!count)
 			break;
+		/* lock depth can be 1 or 2 */
+#if 0
+		if (conditional_schedule_needed())
+			return count;
+#endif
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -240,6 +255,11 @@
 		count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
 		if (!count)
 			break;
+		/* lock depth can be 1 or 2 */
+#if 0
+		if (conditional_schedule_needed())
+			return count;
+#endif
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		pgdir++;
 	} while (address && (address < end));
@@ -262,6 +282,8 @@
 	 * and ptes.
 	 */
 	spin_lock(&mm->page_table_lock);
+
+continue_scan:
 	address = mm->swap_address;
 	if (address == TASK_SIZE || swap_mm != mm) {
 		/* We raced: don't count this mm but try again */
@@ -278,6 +300,13 @@
 			vma = vma->vm_next;
 			if (!vma)
 				break;
+			/* we reach this with a lock depth of 1 and 2 */
+#if 0
+			if (conditional_schedule_needed()) {
+				break_spin_lock(&mm->page_table_lock);
+				goto continue_scan;
+			}
+#endif
 			if (!count)
 				goto out_unlock;
 			address = vma->vm_start;
@@ -299,6 +328,7 @@
 
 	counter = mmlist_nr;
 	do {
+		/* lock depth can be 0 or 1 */
 		if (unlikely(current->need_resched)) {
 			__set_current_state(TASK_RUNNING);
 			schedule();
@@ -344,6 +374,7 @@
 	while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) {
 		struct page * page;
 
+		/* lock depth is 1 or 2 */
 		if (unlikely(current->need_resched)) {
 			spin_unlock(&pagemap_lru_lock);
 			__set_current_state(TASK_RUNNING);
@@ -624,8 +655,11 @@
 
 	for (i = pgdat->nr_zones-1; i >= 0; i--) {
 		zone = pgdat->node_zones + i;
+		debug_lock_break(0);
+#ifndef CONFIG_PREEMPT
 		if (unlikely(current->need_resched))
 			schedule();
+#endif
 		if (!zone->need_balance)
 			continue;
 		if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {