diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/Kconfig current/arch/i386/Kconfig
--- reference/arch/i386/Kconfig	2004-04-30 12:59:52.000000000 -0700
+++ current/arch/i386/Kconfig	2004-05-01 15:01:04.000000000 -0700
@@ -422,6 +422,54 @@ config X86_OOSTORE
 	depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
 	default y
 
+config X86_4G
+	bool "4 GB kernel-space and 4 GB user-space virtual memory support"
+	help
+          This option is only useful for systems that have more than 1 GB
+          of RAM.
+
+          The default kernel VM layout leaves 1 GB of virtual memory for
+          kernel-space mappings, and 3 GB of VM for user-space applications.
+          This option ups both the kernel-space VM and the user-space VM to
+          4 GB.
+
+          The cost of this option is additional TLB flushes done at
+          system-entry points that transition from user-mode into kernel-mode.
+          I.e. system calls and page faults, and IRQs that interrupt user-mode
+          code. There's also additional overhead to kernel operations that copy
+          memory to/from user-space. The overhead from this is hard to tell and
+          depends on the workload - it can be anything from no visible overhead
+          to 20-30% overhead. A good rule of thumb is to count with a runtime
+          overhead of 20%.
+
+          The upside is the much increased kernel-space VM, which more than
+          quadruples the maximum amount of RAM supported. Kernels compiled with
+          this option boot on 64GB of RAM and still have more than 3.1 GB of
+          'lowmem' left. Another bonus is that highmem IO bouncing decreases,
+          if used with drivers that still use bounce-buffers.
+
+          There's also a 33% increase in user-space VM size - database
+          applications might see a boost from this.
+
+          But the cost of the TLB flushes and the runtime overhead has to be
+          weighed against the bonuses offered by the larger VM spaces. The
+          dividing line depends on the actual workload - there might be 4 GB
+          systems that benefit from this option. Systems with less than 4 GB
+          of RAM will rarely see a benefit from this option - but it's not
+          out of question, the exact circumstances have to be considered.
+
+config X86_SWITCH_PAGETABLES
+	def_bool X86_4G
+
+config X86_4G_VM_LAYOUT
+	def_bool X86_4G
+
+config X86_UACCESS_INDIRECT
+	def_bool X86_4G
+
+config X86_HIGH_ENTRY
+	def_bool X86_4G
+
 config HPET_TIMER
 	bool "HPET Timer Support"
 	help
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/boot/setup.S current/arch/i386/boot/setup.S
--- reference/arch/i386/boot/setup.S	2004-04-07 14:53:56.000000000 -0700
+++ current/arch/i386/boot/setup.S	2004-05-01 15:01:04.000000000 -0700
@@ -156,7 +156,7 @@ cmd_line_ptr:	.long 0			# (Header versio
 					# can be located anywhere in
 					# low memory 0x10000 or higher.
 
-ramdisk_max:	.long MAXMEM-1		# (Header version 0x0203 or later)
+ramdisk_max:	.long __MAXMEM-1	# (Header version 0x0203 or later)
 					# The highest safe address for
 					# the contents of an initrd
 
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/Makefile current/arch/i386/kernel/Makefile
--- reference/arch/i386/kernel/Makefile	2004-04-30 12:55:07.000000000 -0700
+++ current/arch/i386/kernel/Makefile	2004-05-01 15:01:04.000000000 -0700
@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.ld
 obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
 		ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \
 		pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
-		doublefault.o
+		doublefault.o entry_trampoline.o
 
 obj-y				+= cpu/
 obj-y				+= timers/
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/acpi/boot.c current/arch/i386/kernel/acpi/boot.c
--- reference/arch/i386/kernel/acpi/boot.c	2004-04-07 14:53:56.000000000 -0700
+++ current/arch/i386/kernel/acpi/boot.c	2004-05-01 15:01:04.000000000 -0700
@@ -438,7 +438,7 @@ acpi_scan_rsdp (
 	 * RSDP signature.
 	 */
 	for (offset = 0; offset < length; offset += 16) {
-		if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
+		if (strncmp((char *) __va(start + offset), "RSD PTR ", sig_len))
 			continue;
 		return (start + offset);
 	}
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/acpi/sleep.c current/arch/i386/kernel/acpi/sleep.c
--- reference/arch/i386/kernel/acpi/sleep.c	2003-10-01 11:34:28.000000000 -0700
+++ current/arch/i386/kernel/acpi/sleep.c	2004-05-01 15:01:04.000000000 -0700
@@ -19,13 +19,29 @@ extern void zap_low_mappings(void);
 
 extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
 
-static void init_low_mapping(pgd_t *pgd, int pgd_limit)
+static void map_low(pgd_t *pgd_base, unsigned long start, unsigned long end)
 {
-	int pgd_ofs = 0;
-
-	while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) {
-		set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD));
-		pgd_ofs++, pgd++;
+	unsigned long vaddr;
+	pmd_t *pmd;
+	pgd_t *pgd;
+	int i, j;
+
+	pgd = pgd_base;
+
+	for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
+		vaddr = i*PGDIR_SIZE;
+		if (end && (vaddr >= end))
+			break;
+		pmd = pmd_offset(pgd, 0);
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+			if (end && (vaddr >= end))
+				break;
+			if (vaddr < start)
+				continue;
+			set_pmd(pmd, __pmd(_KERNPG_TABLE + _PAGE_PSE +
+								vaddr - start));
+		}
 	}
 }
 
@@ -39,7 +55,9 @@ int acpi_save_state_mem (void)
 {
 	if (!acpi_wakeup_address)
 		return 1;
-	init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD);
+	if (!cpu_has_pse)
+		return 1;
+	map_low(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE);
 	memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start);
 	acpi_copy_wakeup_routine(acpi_wakeup_address);
 
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/asm-offsets.c current/arch/i386/kernel/asm-offsets.c
--- reference/arch/i386/kernel/asm-offsets.c	2004-04-07 14:53:56.000000000 -0700
+++ current/arch/i386/kernel/asm-offsets.c	2004-05-01 15:01:04.000000000 -0700
@@ -31,5 +31,19 @@ void foo(void)
 	DEFINE(RT_SIGFRAME_sigcontext,
 	       offsetof (struct rt_sigframe, uc.uc_mcontext));
 
+	DEFINE(TI_task, offsetof (struct thread_info, task));
+	DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain));
+	DEFINE(TI_flags, offsetof (struct thread_info, flags));
+	DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count));
+	DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit));
+	DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack));
+	DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack));
+	DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd));
+
+	DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr,
+			__fix_to_virt(FIX_ENTRY_TRAMPOLINE_0));
+	DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL));
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+	DEFINE(task_thread_db7,
+		offsetof (struct task_struct, thread.debugreg[7]));
 }
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/cpu/common.c current/arch/i386/kernel/cpu/common.c
--- reference/arch/i386/kernel/cpu/common.c	2004-01-15 10:41:00.000000000 -0800
+++ current/arch/i386/kernel/cpu/common.c	2004-05-01 15:01:04.000000000 -0700
@@ -514,12 +514,16 @@ void __init cpu_init (void)
 	set_tss_desc(cpu,t);
 	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 	load_TR_desc();
-	load_LDT(&init_mm.context);
+	if (cpu)
+		load_LDT(&init_mm.context);
 
 	/* Set up doublefault TSS pointer in the GDT */
 	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
 	cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff;
 
+	if (cpu)
+		trap_init_virtual_GDT();
+
 	/* Clear %fs and %gs. */
 	asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
 
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/cpu/intel.c current/arch/i386/kernel/cpu/intel.c
--- reference/arch/i386/kernel/cpu/intel.c	2004-03-11 14:33:36.000000000 -0800
+++ current/arch/i386/kernel/cpu/intel.c	2004-05-01 15:01:04.000000000 -0700
@@ -10,6 +10,7 @@
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/uaccess.h>
+#include <asm/desc.h>
 
 #include "cpu.h"
 
@@ -19,8 +20,6 @@
 #include <mach_apic.h>
 #endif
 
-extern int trap_init_f00f_bug(void);
-
 #ifdef CONFIG_X86_INTEL_USERCOPY
 /*
  * Alignment at which movsl is preferred for bulk memory copies.
@@ -165,7 +164,7 @@ static void __init init_intel(struct cpu
 
 		c->f00f_bug = 1;
 		if ( !f00f_workaround_enabled ) {
-			trap_init_f00f_bug();
+			trap_init_virtual_IDT();
 			printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
 			f00f_workaround_enabled = 1;
 		}
@@ -250,6 +249,12 @@ static void __init init_intel(struct cpu
 	/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
 	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
 		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+	/*
+	 * FIXME: SEP is disabled for 4G/4G for now:
+	 */
+#ifdef CONFIG_X86_HIGH_ENTRY
+	clear_bit(X86_FEATURE_SEP, c->x86_capability);
+#endif
 
 	/* Names for the Pentium II/Celeron processors 
 	   detectable only by also checking the cache size.
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/doublefault.c current/arch/i386/kernel/doublefault.c
--- reference/arch/i386/kernel/doublefault.c	2004-04-30 11:23:02.000000000 -0700
+++ current/arch/i386/kernel/doublefault.c	2004-05-01 15:01:04.000000000 -0700
@@ -8,12 +8,13 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
+#include <asm/fixmap.h>
 
 #define DOUBLEFAULT_STACKSIZE (1024)
 static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
 #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
 
-#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000)
+#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START))
 
 static void doublefault_fn(void)
 {
@@ -39,8 +40,8 @@ static void doublefault_fn(void)
 
 			printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
 				t->eax, t->ebx, t->ecx, t->edx);
-			printk("esi = %08lx, edi = %08lx\n",
-				t->esi, t->edi);
+			printk("esi = %08lx, edi = %08lx, ebp = %08lx\n",
+				t->esi, t->edi, t->ebp);
 		}
 	}
 
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/entry.S current/arch/i386/kernel/entry.S
--- reference/arch/i386/kernel/entry.S	2004-05-01 10:25:42.000000000 -0700
+++ current/arch/i386/kernel/entry.S	2004-05-01 15:01:04.000000000 -0700
@@ -43,8 +43,10 @@
 #include <linux/config.h>
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
+#include <asm/asm_offsets.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
+#include <asm/page.h>
 #include <asm/smp.h>
 #include <asm/page.h>
 #include "irq_vectors.h"
@@ -99,7 +101,102 @@ TSS_ESP0_OFFSET = (4 - 0x200)
 #define resume_kernel		restore_all
 #endif
 
-#define SAVE_ALL \
+#ifdef CONFIG_X86_HIGH_ENTRY
+
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+/*
+ * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu,
+ * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is
+ * left stale, so we must check whether to repeat the real stack calculation.
+ */
+#define repeat_if_esp_changed				\
+	xorl %esp, %ebp;				\
+	testl $-THREAD_SIZE, %ebp;			\
+	jnz 0b
+#else
+#define repeat_if_esp_changed
+#endif
+
+/* clobbers ebx, edx and ebp */
+
+#define __SWITCH_KERNELSPACE				\
+	cmpl $0xff000000, %esp;				\
+	jb 1f;						\
+							\
+	/*						\
+	 * switch pagetables and load the real stack,	\
+	 * keep the stack offset:			\
+	 */						\
+							\
+	movl $swapper_pg_dir-__PAGE_OFFSET, %edx;	\
+							\
+	/* GET_THREAD_INFO(%ebp) intermixed */		\
+0:							\
+	movl %esp, %ebp;				\
+	movl %esp, %ebx;				\
+	andl $(-THREAD_SIZE), %ebp;				\
+	andl $(THREAD_SIZE-1), %ebx;				\
+	orl TI_real_stack(%ebp), %ebx;			\
+	repeat_if_esp_changed;				\
+							\
+	movl %edx, %cr3;				\
+	movl %ebx, %esp;				\
+1:
+
+#endif
+
+
+#define __SWITCH_USERSPACE \
+	/* interrupted any of the user return paths? */	\
+							\
+	movl EIP(%esp), %eax;				\
+							\
+	cmpl $int80_ret_start_marker, %eax;		\
+	jb 33f; /* nope - continue with sysexit check */\
+	cmpl $int80_ret_end_marker, %eax;		\
+	jb 22f; /* yes - switch to virtual stack */	\
+33:							\
+	cmpl $sysexit_ret_start_marker, %eax;		\
+	jb 44f; /* nope - continue with user check */	\
+	cmpl $sysexit_ret_end_marker, %eax;		\
+	jb 22f; /* yes - switch to virtual stack */	\
+	/* return to userspace? */			\
+44:							\
+	movl EFLAGS(%esp),%ecx;				\
+	movb CS(%esp),%cl;				\
+	testl $(VM_MASK | 3),%ecx;			\
+	jz 2f;						\
+22:							\
+	/*						\
+	 * switch to the virtual stack, then switch to	\
+	 * the userspace pagetables.			\
+	 */						\
+							\
+	GET_THREAD_INFO(%ebp);				\
+	movl TI_virtual_stack(%ebp), %edx;		\
+	movl TI_user_pgd(%ebp), %ecx;			\
+							\
+	movl %esp, %ebx;				\
+	andl $(THREAD_SIZE-1), %ebx;				\
+	orl %ebx, %edx;					\
+int80_ret_start_marker:					\
+	movl %edx, %esp; 				\
+	movl %ecx, %cr3;				\
+							\
+	__RESTORE_ALL;					\
+int80_ret_end_marker:					\
+2:
+
+#else /* !CONFIG_X86_HIGH_ENTRY */
+
+#define __SWITCH_KERNELSPACE
+#define __SWITCH_USERSPACE
+
+#endif
+
+#define __SAVE_ALL \
 	cld; \
 	pushl %es; \
 	pushl %ds; \
@@ -112,10 +209,9 @@ TSS_ESP0_OFFSET = (4 - 0x200)
 	pushl %ebx; \
 	movl $(__USER_DS), %edx; \
 	movl %edx, %ds; \
-	movl %edx, %es; \
-        STACK_OVERFLOW_TEST
+	movl %edx, %es;
 
-#define RESTORE_INT_REGS \
+#define __RESTORE_INT_REGS \
 	popl %ebx;	\
 	popl %ecx;	\
 	popl %edx;	\
@@ -124,29 +220,28 @@ TSS_ESP0_OFFSET = (4 - 0x200)
 	popl %ebp;	\
 	popl %eax
 
-#define RESTORE_REGS	\
-	RESTORE_INT_REGS; \
-1:	popl %ds;	\
-2:	popl %es;	\
+#define __RESTORE_REGS	\
+	__RESTORE_INT_REGS; \
+111:	popl %ds;	\
+222:	popl %es;	\
 .section .fixup,"ax";	\
-3:	movl $0,(%esp);	\
-	jmp 1b;		\
-4:	movl $0,(%esp);	\
-	jmp 2b;		\
+444:	movl $0,(%esp);	\
+	jmp 111b;	\
+555:	movl $0,(%esp);	\
+	jmp 222b;	\
 .previous;		\
 .section __ex_table,"a";\
 	.align 4;	\
-	.long 1b,3b;	\
-	.long 2b,4b;	\
+	.long 111b,444b;\
+	.long 222b,555b;\
 .previous
 
-
-#define RESTORE_ALL	\
-	RESTORE_REGS	\
+#define __RESTORE_ALL	\
+	__RESTORE_REGS	\
 	addl $4, %esp;	\
-1:	iret;		\
+333:	iret;		\
 .section .fixup,"ax";   \
-2:	sti;		\
+666:	sti;		\
 	movl $(__USER_DS), %edx; \
 	movl %edx, %ds; \
 	movl %edx, %es; \
@@ -155,10 +250,19 @@ TSS_ESP0_OFFSET = (4 - 0x200)
 .previous;		\
 .section __ex_table,"a";\
 	.align 4;	\
-	.long 1b,2b;	\
+	.long 333b,666b;\
 .previous
 
+#define SAVE_ALL \
+	__SAVE_ALL;					\
+	__SWITCH_KERNELSPACE;				\
+        STACK_OVERFLOW_TEST;
+
+#define RESTORE_ALL					\
+	__SWITCH_USERSPACE;				\
+	__RESTORE_ALL;
 
+.section .entry.text,"ax"
 
 ENTRY(lcall7)
 	pushfl			# We get a different stack layout with call
@@ -176,7 +280,7 @@ do_lcall:
 	movl %edx,EIP(%ebp)	# Now we move them to their "normal" places
 	movl %ecx,CS(%ebp)	#
 	GET_THREAD_INFO_WITH_ESP(%ebp)	# GET_THREAD_INFO
-	movl TI_EXEC_DOMAIN(%ebp), %edx	# Get the execution domain
+	movl TI_exec_domain(%ebp), %edx	# Get the execution domain
 	call *4(%edx)		# Call the lcall7 handler for the domain
 	addl $4, %esp
 	popl %eax
@@ -221,7 +325,7 @@ ENTRY(resume_userspace)
  	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
-	movl TI_FLAGS(%ebp), %ecx
+	movl TI_flags(%ebp), %ecx
 	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
 					# int/exception return?
 	jne work_pending
@@ -229,18 +333,18 @@ ENTRY(resume_userspace)
 
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
-	cmpl $0,TI_PRE_COUNT(%ebp)	# non-zero preempt_count ?
+	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
 	jnz restore_all
 need_resched:
-	movl TI_FLAGS(%ebp), %ecx	# need_resched set ?
+	movl TI_flags(%ebp), %ecx	# need_resched set ?
 	testb $_TIF_NEED_RESCHED, %cl
 	jz restore_all
 	testl $IF_MASK,EFLAGS(%esp)     # interrupts off (exception path) ?
 	jz restore_all
-	movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp)
+	movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp)
 	sti
 	call schedule
-	movl $0,TI_PRE_COUNT(%ebp)
+	movl $0,TI_preempt_count(%ebp)
 	cli
 	jmp need_resched
 #endif
@@ -259,37 +363,50 @@ sysenter_past_esp:
 	pushl $(__USER_CS)
 	pushl $SYSENTER_RETURN
 
-/*
- * Load the potential sixth argument from user stack.
- * Careful about security.
- */
-	cmpl $__PAGE_OFFSET-3,%ebp
-	jae syscall_fault
-1:	movl (%ebp),%ebp
-.section __ex_table,"a"
-	.align 4
-	.long 1b,syscall_fault
-.previous
-
 	pushl %eax
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 	cmpl $(nr_syscalls), %eax
 	jae syscall_badsys
 
-	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_FLAGS(%ebp)
+	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
 	jnz syscall_trace_entry
 	call *sys_call_table(,%eax,4)
 	movl %eax,EAX(%esp)
 	cli
-	movl TI_FLAGS(%ebp), %ecx
+	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	jne syscall_exit_work
+
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+
+	GET_THREAD_INFO(%ebp)
+	movl TI_virtual_stack(%ebp), %edx
+	movl TI_user_pgd(%ebp), %ecx
+	movl %esp, %ebx
+	andl $0x1fff, %ebx
+	orl %ebx, %edx
+sysexit_ret_start_marker:
+	movl %edx, %esp
+	movl %ecx, %cr3
+#endif
+	/*
+	 * only ebx is not restored by the userspace sysenter vsyscall
+	 * code, it assumes it to be callee-saved.
+	 */
+	movl EBX(%esp), %ebx
+
 /* if something modifies registers it must also disable sysexit */
+
 	movl EIP(%esp), %edx
 	movl OLDESP(%esp), %ecx
+
 	sti
 	sysexit
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+sysexit_ret_end_marker:
+	nop
+#endif
 
 
 	# system call handler stub
@@ -300,7 +417,7 @@ ENTRY(system_call)
 	cmpl $(nr_syscalls), %eax
 	jae syscall_badsys
 					# system call tracing in operation
-	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_FLAGS(%ebp)
+	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
 	jnz syscall_trace_entry
 syscall_call:
 	call *sys_call_table(,%eax,4)
@@ -309,7 +426,7 @@ syscall_exit:
 	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
-	movl TI_FLAGS(%ebp), %ecx
+	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx	# current->work
 	jne syscall_exit_work
 restore_all:
@@ -319,7 +436,7 @@ restore_all:
 	testl $(VM_MASK | 3), %eax
 	jz resume_kernelX		# returning to kernel or vm86-space
 
-	cmpl $0,TI_PRE_COUNT(%ebx)	# non-zero preempt_count ?
+	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
 	jz resume_kernelX
 
         int $3
@@ -338,7 +455,7 @@ work_resched:
 	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
-	movl TI_FLAGS(%ebp), %ecx
+	movl TI_flags(%ebp), %ecx
 	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
 					# than syscall tracing?
 	jz restore_all
@@ -353,6 +470,22 @@ work_notifysig:				# deal with pending s
 					# vm86-space
 	xorl %edx, %edx
 	call do_notify_resume
+
+#if CONFIG_X86_HIGH_ENTRY
+	/*
+	 * Reload db7 if necessary:
+	 */
+	movl TI_flags(%ebp), %ecx
+	testb $_TIF_DB7, %cl
+	jnz work_db7
+
+	jmp restore_all
+
+work_db7:
+	movl TI_task(%ebp), %edx;
+	movl task_thread_db7(%edx), %edx;
+	movl %edx, %db7;
+#endif
 	jmp restore_all
 
 	ALIGN
@@ -408,7 +541,7 @@ syscall_badsys:
  */
 .data
 ENTRY(interrupt)
-.text
+.previous
 
 vector=0
 ENTRY(irq_entries_start)
@@ -418,7 +551,7 @@ ENTRY(irq_entries_start)
 	jmp common_interrupt
 .data
 	.long 1b
-.text
+.previous
 vector=vector+1
 .endr
 
@@ -459,12 +592,17 @@ error_code:
 	movl ES(%esp), %edi		# get the function address
 	movl %eax, ORIG_EAX(%esp)
 	movl %ecx, ES(%esp)
-	movl %esp, %edx
 	pushl %esi			# push the error code
-	pushl %edx			# push the pt_regs pointer
 	movl $(__USER_DS), %edx
 	movl %edx, %ds
 	movl %edx, %es
+
+/* clobbers edx, ebx and ebp */
+	__SWITCH_KERNELSPACE
+
+	leal 4(%esp), %edx		# prepare pt_regs
+	pushl %edx			# push pt_regs
+
 	call *%edi
 	addl $8, %esp
 	jmp ret_from_exception
@@ -555,7 +693,7 @@ nmi_stack_correct:
 	pushl %edx
 	call do_nmi
 	addl $8, %esp
-	RESTORE_ALL
+	jmp restore_all
 
 nmi_stack_fixup:
 	FIX_STACK(12,nmi_stack_correct, 1)
@@ -632,6 +770,8 @@ ENTRY(spurious_interrupt_bug)
 	pushl $do_spurious_interrupt_bug
 	jmp error_code
 
+.previous
+
 .data
 ENTRY(sys_call_table)
 	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/entry_trampoline.c current/arch/i386/kernel/entry_trampoline.c
--- reference/arch/i386/kernel/entry_trampoline.c	1969-12-31 16:00:00.000000000 -0800
+++ current/arch/i386/kernel/entry_trampoline.c	2004-05-01 15:01:04.000000000 -0700
@@ -0,0 +1,73 @@
+/*
+ * linux/arch/i386/kernel/entry_trampoline.c
+ *
+ * (C) Copyright 2003 Ingo Molnar
+ *
+ * This file contains the needed support code for 4GB userspace
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/highmem.h>
+#include <asm/desc.h>
+#include <asm/atomic_kmap.h>
+
+extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text;
+
+void __init init_entry_mappings(void)
+{
+#ifdef CONFIG_X86_HIGH_ENTRY
+	void *tramp;
+
+	/*
+	 * We need a high IDT and GDT for the 4G/4G split:
+	 */
+	trap_init_virtual_IDT();
+
+	__set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL);
+	__set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL);
+	tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0);
+
+	printk("mapped 4G/4G trampoline to %p.\n", tramp);
+	BUG_ON((void *)&__start___entry_text != tramp);
+	/*
+	 * Virtual kernel stack:
+	 */
+	BUG_ON(__kmap_atomic_vaddr(KM_VSTACK0) & (THREAD_SIZE-1));
+	BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE);
+	BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE);
+
+	/*
+	 * set up the initial thread's virtual stack related
+	 * fields:
+	 */
+	current->thread.stack_page0 = virt_to_page((char *)current->thread_info);
+	current->thread.stack_page1 = virt_to_page((char *)current->thread_info + PAGE_SIZE);
+	current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0);
+
+	__kunmap_atomic_type(KM_VSTACK0);
+	__kunmap_atomic_type(KM_VSTACK1);
+        __kmap_atomic(current->thread.stack_page0, KM_VSTACK0);
+        __kmap_atomic(current->thread.stack_page1, KM_VSTACK1);
+
+#endif
+	current->thread_info->real_stack = (void *)current->thread_info;
+	current->thread_info->user_pgd = NULL;
+	current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE;
+}
+
+
+
+void __init entry_trampoline_setup(void)
+{
+	/*
+	 * old IRQ entries set up by the boot code will still hang
+	 * around - they are a sign of hw trouble anyway, now they'll
+	 * produce a double fault message.
+	 */
+	trap_init_virtual_GDT();
+}
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/i386_ksyms.c current/arch/i386/kernel/i386_ksyms.c
--- reference/arch/i386/kernel/i386_ksyms.c	2004-04-07 14:53:56.000000000 -0700
+++ current/arch/i386/kernel/i386_ksyms.c	2004-05-01 15:01:04.000000000 -0700
@@ -93,7 +93,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter
 EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
 EXPORT_SYMBOL_NOVERS(__up_wakeup);
 /* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial_copy_generic);
 /* Delay loops */
 EXPORT_SYMBOL(__ndelay);
 EXPORT_SYMBOL(__udelay);
@@ -107,13 +106,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4);
 EXPORT_SYMBOL(strpbrk);
 EXPORT_SYMBOL(strstr);
 
+#if !defined(CONFIG_X86_UACCESS_INDIRECT)
 EXPORT_SYMBOL(strncpy_from_user);
-EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__direct_strncpy_from_user);
 EXPORT_SYMBOL(clear_user);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(__copy_from_user_ll);
 EXPORT_SYMBOL(__copy_to_user_ll);
 EXPORT_SYMBOL(strnlen_user);
+#else /* CONFIG_X86_UACCESS_INDIRECT */
+EXPORT_SYMBOL(direct_csum_partial_copy_generic);
+#endif
 
 EXPORT_SYMBOL(dma_alloc_coherent);
 EXPORT_SYMBOL(dma_free_coherent);
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/i387.c current/arch/i386/kernel/i387.c
--- reference/arch/i386/kernel/i387.c	2004-02-18 14:56:47.000000000 -0800
+++ current/arch/i386/kernel/i387.c	2004-05-01 15:01:04.000000000 -0700
@@ -218,6 +218,7 @@ void set_fpu_mxcsr( struct task_struct *
 static int convert_fxsr_to_user( struct _fpstate __user *buf,
 					struct i387_fxsave_struct *fxsave )
 {
+	struct _fpreg tmp[8]; /* 80 bytes scratch area */
 	unsigned long env[7];
 	struct _fpreg __user *to;
 	struct _fpxreg *from;
@@ -234,23 +235,25 @@ static int convert_fxsr_to_user( struct 
 	if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
 		return 1;
 
-	to = &buf->_st[0];
+	to = tmp;
 	from = (struct _fpxreg *) &fxsave->st_space[0];
 	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
 		unsigned long *t = (unsigned long *)to;
 		unsigned long *f = (unsigned long *)from;
 
-		if (__put_user(*f, t) ||
-				__put_user(*(f + 1), t + 1) ||
-				__put_user(from->exponent, &to->exponent))
-			return 1;
+		*t = *f;
+		*(t + 1) = *(f+1);
+		to->exponent = from->exponent;
 	}
+	if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8])))
+		return 1;
 	return 0;
 }
 
 static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
 					  struct _fpstate __user *buf )
 {
+	struct _fpreg tmp[8]; /* 80 bytes scratch area */
 	unsigned long env[7];
 	struct _fpxreg *to;
 	struct _fpreg __user *from;
@@ -258,6 +261,8 @@ static int convert_fxsr_from_user( struc
 
 	if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
 		return 1;
+	if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8])))
+		return 1;
 
 	fxsave->cwd = (unsigned short)(env[0] & 0xffff);
 	fxsave->swd = (unsigned short)(env[1] & 0xffff);
@@ -269,15 +274,14 @@ static int convert_fxsr_from_user( struc
 	fxsave->fos = env[6];
 
 	to = (struct _fpxreg *) &fxsave->st_space[0];
-	from = &buf->_st[0];
+	from = tmp;
 	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
 		unsigned long *t = (unsigned long *)to;
 		unsigned long *f = (unsigned long *)from;
 
-		if (__get_user(*t, f) ||
-				__get_user(*(t + 1), f + 1) ||
-				__get_user(to->exponent, &from->exponent))
-			return 1;
+		*t = *f;
+		*(t + 1) = *(f + 1);
+		to->exponent = from->exponent;
 	}
 	return 0;
 }
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/init_task.c current/arch/i386/kernel/init_task.c
--- reference/arch/i386/kernel/init_task.c	2004-04-07 14:53:56.000000000 -0700
+++ current/arch/i386/kernel/init_task.c	2004-05-01 15:01:04.000000000 -0700
@@ -26,7 +26,7 @@ EXPORT_SYMBOL(init_mm);
  */
 union thread_union init_thread_union 
 	__attribute__((__section__(".data.init_task"))) =
-		{ INIT_THREAD_INFO(init_task) };
+		{ INIT_THREAD_INFO(init_task, init_thread_union) };
 
 /*
  * Initial task structure.
@@ -44,5 +44,5 @@ EXPORT_SYMBOL(init_task);
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */ 
-struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
+struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS };
 
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/ldt.c current/arch/i386/kernel/ldt.c
--- reference/arch/i386/kernel/ldt.c	2003-10-01 11:40:40.000000000 -0700
+++ current/arch/i386/kernel/ldt.c	2004-05-01 15:01:04.000000000 -0700
@@ -2,7 +2,7 @@
  * linux/kernel/ldt.c
  *
  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 1999, 2003 Ingo Molnar <mingo@redhat.com>
  */
 
 #include <linux/errno.h>
@@ -18,6 +18,8 @@
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
+#include <linux/highmem.h>
+#include <asm/atomic_kmap.h>
 
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
@@ -29,34 +31,31 @@ static void flush_ldt(void *null)
 
 static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 {
-	void *oldldt;
-	void *newldt;
-	int oldsize;
+	int oldsize, newsize, i;
 
 	if (mincount <= pc->size)
 		return 0;
+	/*
+	 * LDT got larger - reallocate if necessary.
+	 */
 	oldsize = pc->size;
 	mincount = (mincount+511)&(~511);
-	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
-	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
-	if (!newldt)
-		return -ENOMEM;
-
-	if (oldsize)
-		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
-	oldldt = pc->ldt;
-	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
-	pc->ldt = newldt;
-	wmb();
+	newsize = mincount*LDT_ENTRY_SIZE;
+	for (i = 0; i < newsize; i += PAGE_SIZE) {
+		int nr = i/PAGE_SIZE;
+		BUG_ON(i >= 64*1024);
+		if (!pc->ldt_pages[nr]) {
+			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER);
+			if (!pc->ldt_pages[nr])
+				return -ENOMEM;
+			clear_highpage(pc->ldt_pages[nr]);
+		}
+	}
 	pc->size = mincount;
-	wmb();
-
 	if (reload) {
 #ifdef CONFIG_SMP
 		cpumask_t mask;
+
 		preempt_disable();
 		load_LDT(pc);
 		mask = cpumask_of_cpu(smp_processor_id());
@@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i
 		load_LDT(pc);
 #endif
 	}
-	if (oldsize) {
-		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(oldldt);
-		else
-			kfree(oldldt);
-	}
 	return 0;
 }
 
 static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
 {
-	int err = alloc_ldt(new, old->size, 0);
-	if (err < 0)
+	int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE;
+
+	err = alloc_ldt(new, size, 0);
+	if (err < 0) {
+		new->size = 0;
 		return err;
-	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+	}
+	for (i = 0; i < nr_pages; i++)
+		copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0);
 	return 0;
 }
 
@@ -96,6 +94,7 @@ int init_new_context(struct task_struct 
 
 	init_MUTEX(&mm->context.sem);
 	mm->context.size = 0;
+	memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES);
 	old_mm = current->mm;
 	if (old_mm && old_mm->context.size > 0) {
 		down(&old_mm->context.sem);
@@ -107,23 +106,21 @@ int init_new_context(struct task_struct 
 
 /*
  * No need to lock the MM as we are the last user
+ * Do not touch the ldt register, we are already
+ * in the next thread.
  */
 void destroy_context(struct mm_struct *mm)
 {
-	if (mm->context.size) {
-		if (mm == current->active_mm)
-			clear_LDT();
-		if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(mm->context.ldt);
-		else
-			kfree(mm->context.ldt);
-		mm->context.size = 0;
-	}
+	int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
+
+	for (i = 0; i < nr_pages; i++)
+		__free_page(mm->context.ldt_pages[i]);
+	mm->context.size = 0;
 }
 
 static int read_ldt(void __user * ptr, unsigned long bytecount)
 {
-	int err;
+	int err, i;
 	unsigned long size;
 	struct mm_struct * mm = current->mm;
 
@@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u
 		size = bytecount;
 
 	err = 0;
-	if (copy_to_user(ptr, mm->context.ldt, size))
-		err = -EFAULT;
+	/*
+	 * This is necessary just in case we got here straight from a
+	 * context-switch where the ptes were set but no tlb flush
+	 * was done yet. We rather avoid doing a TLB flush in the
+	 * context-switch path and do it here instead.
+	 */
+	__flush_tlb_global();
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int nr = i / PAGE_SIZE, bytes;
+		char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+		bytes = size - i;
+		if (bytes > PAGE_SIZE)
+			bytes = PAGE_SIZE;
+		if (copy_to_user(ptr + i, kaddr, size - i))
+			err = -EFAULT;
+		kunmap(mm->context.ldt_pages[nr]);
+	}
 	up(&mm->context.sem);
 	if (err < 0)
 		return err;
@@ -158,7 +172,7 @@ static int read_default_ldt(void __user 
 
 	err = 0;
 	address = &default_ldt[0];
-	size = 5*sizeof(struct desc_struct);
+	size = 5*LDT_ENTRY_SIZE;
 	if (size > bytecount)
 		size = bytecount;
 
@@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr, 
 			goto out_unlock;
 	}
 
-	lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
+	/*
+	 * No rescheduling allowed from this point to the install.
+	 *
+	 * We do a TLB flush for the same reason as in the read_ldt() path.
+	 */
+	preempt_disable();
+	__flush_tlb_global();
+	lp = (__u32 *) ((ldt_info.entry_number << 3) +
+			(char *) __kmap_atomic_vaddr(KM_LDT_PAGE0));
 
    	/* Allow LDTs to be cleared by the user. */
    	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
@@ -221,6 +243,7 @@ install:
 	*lp	= entry_1;
 	*(lp+1)	= entry_2;
 	error = 0;
+	preempt_enable();
 
 out_unlock:
 	up(&mm->context.sem);
@@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func, 
 	}
 	return ret;
 }
+
+/*
+ * load one particular LDT into the current CPU
+ */
+void load_LDT_nolock(mm_context_t *pc, int cpu)
+{
+	struct page **pages = pc->ldt_pages;
+	int count = pc->size;
+	int nr_pages, i;
+
+	if (likely(!count)) {
+		pages = &default_ldt_page;
+		count = 5;
+	}
+       	nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
+
+	for (i = 0; i < nr_pages; i++) {
+		__kunmap_atomic_type(KM_LDT_PAGE0 - i);
+		__kmap_atomic(pages[i], KM_LDT_PAGE0 - i);
+	}
+	set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count);
+	load_LDT_desc();
+}
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/mpparse.c current/arch/i386/kernel/mpparse.c
--- reference/arch/i386/kernel/mpparse.c	2004-04-07 14:53:56.000000000 -0700
+++ current/arch/i386/kernel/mpparse.c	2004-05-01 15:01:04.000000000 -0700
@@ -675,7 +675,7 @@ void __init get_smp_config (void)
 		 * Read the physical hardware table.  Anything here will
 		 * override the defaults.
 		 */
-		if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+		if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) {
 			smp_found_config = 0;
 			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
 			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/process.c current/arch/i386/kernel/process.c
--- reference/arch/i386/kernel/process.c	2004-04-30 11:23:02.000000000 -0700
+++ current/arch/i386/kernel/process.c	2004-05-01 15:01:04.000000000 -0700
@@ -46,6 +46,7 @@
 #include <asm/i387.h>
 #include <asm/irq.h>
 #include <asm/desc.h>
+#include <asm/atomic_kmap.h>
 #ifdef CONFIG_MATH_EMULATION
 #include <asm/math_emu.h>
 #endif
@@ -303,6 +304,9 @@ void flush_thread(void)
 	struct task_struct *tsk = current;
 
 	memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+#ifdef CONFIG_X86_HIGH_ENTRY
+	clear_thread_flag(TIF_DB7);
+#endif
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
 	/*
 	 * Forget coprocessor state..
@@ -316,9 +320,8 @@ void release_thread(struct task_struct *
 	if (dead_task->mm) {
 		// temporary debugging check
 		if (dead_task->mm->context.size) {
-			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
+			printk("WARNING: dead process %8s still has LDT? <%d>\n",
 					dead_task->comm,
-					dead_task->mm->context.ldt,
 					dead_task->mm->context.size);
 			BUG();
 		}
@@ -353,7 +356,17 @@ int copy_thread(int nr, unsigned long cl
 	p->thread.esp = (unsigned long) childregs;
 	p->thread.esp0 = (unsigned long) (childregs+1);
 
+	/*
+	 * get the two stack pages, for the virtual stack.
+	 *
+	 * IMPORTANT: this code relies on the fact that the task
+	 * structure is an 8K aligned piece of physical memory.
+	 */
+	p->thread.stack_page0 = virt_to_page((unsigned long)p->thread_info);
+	p->thread.stack_page1 = virt_to_page((unsigned long)p->thread_info + PAGE_SIZE);
+
 	p->thread.eip = (unsigned long) ret_from_fork;
+	p->thread_info->real_stack = p->thread_info;
 
 	savesegment(fs,p->thread.fs);
 	savesegment(gs,p->thread.gs);
@@ -505,10 +518,41 @@ struct task_struct fastcall * __switch_t
 
 	__unlazy_fpu(prev_p);
 
+#ifdef CONFIG_X86_HIGH_ENTRY
+	/*
+	 * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is
+	 * needed because otherwise NMIs could interrupt the
+	 * user-return code with a virtual stack and stale TLBs.)
+	 */
+	__kunmap_atomic_type(KM_VSTACK0);
+	__kunmap_atomic_type(KM_VSTACK1);
+	__kmap_atomic(next->stack_page0, KM_VSTACK0);
+	__kmap_atomic(next->stack_page1, KM_VSTACK1);
+
+	/*
+	 * NOTE: here we rely on the task being the stack as well
+	 */
+	next_p->thread_info->virtual_stack =
+			(void *)__kmap_atomic_vaddr(KM_VSTACK0);
+
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+	/*
+	 * If next was preempted on entry from userspace to kernel,
+	 * and now it's on a different cpu, we need to adjust %esp.
+	 * This assumes that entry.S does not copy %esp while on the
+	 * virtual stack (with interrupts enabled): which is so,
+	 * except within __SWITCH_KERNELSPACE itself.
+	 */
+	if (unlikely(next->esp >= TASK_SIZE)) {
+		next->esp &= THREAD_SIZE - 1;
+		next->esp |= (unsigned long) next_p->thread_info->virtual_stack;
+	}
+#endif
+#endif
 	/*
 	 * Reload esp0, LDT and the page table pointer:
 	 */
-	load_esp0(tss, next);
+	load_virtual_esp0(tss, next_p);
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/reboot.c current/arch/i386/kernel/reboot.c
--- reference/arch/i386/kernel/reboot.c	2004-01-15 10:41:00.000000000 -0800
+++ current/arch/i386/kernel/reboot.c	2004-05-01 15:01:04.000000000 -0700
@@ -155,12 +155,11 @@ void machine_real_restart(unsigned char 
 	CMOS_WRITE(0x00, 0x8f);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	/* Remap the kernel at virtual address zero, as well as offset zero
-	   from the kernel segment.  This assumes the kernel segment starts at
-	   virtual address PAGE_OFFSET. */
-
-	memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
-		sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
+	/*
+	 * Remap the first 16 MB of RAM (which includes the kernel image)
+	 * at virtual address zero:
+	 */
+	setup_identity_mappings(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE);
 
 	/*
 	 * Use `swapper_pg_dir' as our page directory.
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/signal.c current/arch/i386/kernel/signal.c
--- reference/arch/i386/kernel/signal.c	2004-04-30 11:23:02.000000000 -0700
+++ current/arch/i386/kernel/signal.c	2004-05-01 15:01:04.000000000 -0700
@@ -129,28 +129,29 @@ sys_sigaltstack(const stack_t __user *us
  */
 
 static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
+restore_sigcontext(struct pt_regs *regs,
+		struct sigcontext __user *__sc, int *peax)
 {
-	unsigned int err = 0;
+	struct sigcontext scratch; /* 88 bytes of scratch area */
 
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+	if (copy_from_user(&scratch, __sc, sizeof(scratch)))
+		return -EFAULT;
+
+#define COPY(x)		regs->x = scratch.x
 
 #define COPY_SEG(seg)							\
-	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
+	{ unsigned short tmp = scratch.seg;				\
 	  regs->x##seg = tmp; }
 
 #define COPY_SEG_STRICT(seg)						\
-	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
+	{ unsigned short tmp = scratch.seg;				\
 	  regs->x##seg = tmp|3; }
 
 #define GET_SEG(seg)							\
-	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
+	{ unsigned short tmp = scratch.seg;				\
 	  loadsegment(seg,tmp); }
 
 #define	FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \
@@ -173,27 +174,23 @@ restore_sigcontext(struct pt_regs *regs,
 	COPY_SEG_STRICT(ss);
 	
 	{
-		unsigned int tmpflags;
-		err |= __get_user(tmpflags, &sc->eflags);
+		unsigned int tmpflags = scratch.eflags;
 		regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
 		regs->orig_eax = -1;		/* disable syscall checks */
 	}
 
 	{
-		struct _fpstate __user * buf;
-		err |= __get_user(buf, &sc->fpstate);
+		struct _fpstate * buf = scratch.fpstate;
 		if (buf) {
 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
-				goto badframe;
-			err |= restore_i387(buf);
+				return -EFAULT;
+			if (restore_i387(buf))
+				return -EFAULT;
 		}
 	}
 
-	err |= __get_user(*peax, &sc->eax);
-	return err;
-
-badframe:
-	return 1;
+	*peax = scratch.eax;
+	return 0;
 }
 
 asmlinkage int sys_sigreturn(unsigned long __unused)
@@ -271,46 +268,47 @@ badframe:
  */
 
 static int
-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
+setup_sigcontext(struct sigcontext __user *__sc, struct _fpstate __user *fpstate,
 		 struct pt_regs *regs, unsigned long mask)
 {
-	int tmp, err = 0;
+	struct sigcontext sc; /* 88 bytes of scratch area */
+	int tmp;
 
 	tmp = 0;
 	__asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
-	err |= __put_user(tmp, (unsigned int *)&sc->gs);
+	*(unsigned int *)&sc.gs = tmp;
 	__asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
-	err |= __put_user(tmp, (unsigned int *)&sc->fs);
-
-	err |= __put_user(regs->xes, (unsigned int *)&sc->es);
-	err |= __put_user(regs->xds, (unsigned int *)&sc->ds);
-	err |= __put_user(regs->edi, &sc->edi);
-	err |= __put_user(regs->esi, &sc->esi);
-	err |= __put_user(regs->ebp, &sc->ebp);
-	err |= __put_user(regs->esp, &sc->esp);
-	err |= __put_user(regs->ebx, &sc->ebx);
-	err |= __put_user(regs->edx, &sc->edx);
-	err |= __put_user(regs->ecx, &sc->ecx);
-	err |= __put_user(regs->eax, &sc->eax);
-	err |= __put_user(current->thread.trap_no, &sc->trapno);
-	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user(regs->eip, &sc->eip);
-	err |= __put_user(regs->xcs, (unsigned int *)&sc->cs);
-	err |= __put_user(regs->eflags, &sc->eflags);
-	err |= __put_user(regs->esp, &sc->esp_at_signal);
-	err |= __put_user(regs->xss, (unsigned int *)&sc->ss);
+	*(unsigned int *)&sc.fs = tmp;
+	*(unsigned int *)&sc.es = regs->xes;
+	*(unsigned int *)&sc.ds = regs->xds;
+	sc.edi = regs->edi;
+	sc.esi = regs->esi;
+	sc.ebp = regs->ebp;
+	sc.esp = regs->esp;
+	sc.ebx = regs->ebx;
+	sc.edx = regs->edx;
+	sc.ecx = regs->ecx;
+	sc.eax = regs->eax;
+	sc.trapno = current->thread.trap_no;
+	sc.err = current->thread.error_code;
+	sc.eip = regs->eip;
+	*(unsigned int *)&sc.cs = regs->xcs;
+	sc.eflags = regs->eflags;
+	sc.esp_at_signal = regs->esp;
+	*(unsigned int *)&sc.ss = regs->xss;
 
 	tmp = save_i387(fpstate);
 	if (tmp < 0)
-	  err = 1;
-	else
-	  err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+		return 1;
+	sc.fpstate = tmp ? fpstate : NULL;
 
 	/* non-iBCS2 extensions.. */
-	err |= __put_user(mask, &sc->oldmask);
-	err |= __put_user(current->thread.cr2, &sc->cr2);
+	sc.oldmask = mask;
+	sc.cr2 = current->thread.cr2;
 
-	return err;
+	if (copy_to_user(__sc, &sc, sizeof(sc)))
+		return 1;
+	return 0;
 }
 
 /*
@@ -448,7 +446,7 @@ static void setup_rt_frame(int sig, stru
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(current->sas_ss_sp, (unsigned long *)&frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->esp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/smp.c current/arch/i386/kernel/smp.c
--- reference/arch/i386/kernel/smp.c	2004-04-30 12:55:07.000000000 -0700
+++ current/arch/i386/kernel/smp.c	2004-05-01 15:01:04.000000000 -0700
@@ -327,10 +327,12 @@ asmlinkage void smp_invalidate_interrupt
 		 
 	if (flush_mm == cpu_tlbstate[cpu].active_mm) {
 		if (cpu_tlbstate[cpu].state == TLBSTATE_OK) {
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
 			if (flush_va == FLUSH_ALL)
 				local_flush_tlb();
 			else
 				__flush_tlb_one(flush_va);
+#endif
 		} else
 			leave_mm(cpu);
 	}
@@ -396,21 +398,6 @@ static void flush_tlb_others(cpumask_t c
 	spin_unlock(&tlbstate_lock);
 }
 	
-void flush_tlb_current_task(void)
-{
-	struct mm_struct *mm = current->mm;
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-	preempt_enable();
-}
-
 void flush_tlb_mm (struct mm_struct * mm)
 {
 	cpumask_t cpu_mask;
@@ -442,7 +429,10 @@ void flush_tlb_page(struct vm_area_struc
 
 	if (current->active_mm == mm) {
 		if(current->mm)
-			__flush_tlb_one(va);
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
+			__flush_tlb_one(va)
+#endif
+				;
 		 else
 		 	leave_mm(smp_processor_id());
 	}
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/sysenter.c current/arch/i386/kernel/sysenter.c
--- reference/arch/i386/kernel/sysenter.c	2003-10-01 11:34:29.000000000 -0700
+++ current/arch/i386/kernel/sysenter.c	2004-05-01 15:01:04.000000000 -0700
@@ -18,13 +18,18 @@
 #include <asm/msr.h>
 #include <asm/pgtable.h>
 #include <asm/unistd.h>
+#include <linux/highmem.h>
 
 extern asmlinkage void sysenter_entry(void);
 
 void enable_sep_cpu(void *info)
 {
 	int cpu = get_cpu();
+#ifdef CONFIG_X86_HIGH_ENTRY
+	struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
+#else
 	struct tss_struct *tss = init_tss + cpu;
+#endif
 
 	tss->ss1 = __KERNEL_CS;
 	tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/traps.c current/arch/i386/kernel/traps.c
--- reference/arch/i386/kernel/traps.c	2004-05-01 10:18:58.000000000 -0700
+++ current/arch/i386/kernel/traps.c	2004-05-01 15:01:04.000000000 -0700
@@ -55,12 +55,8 @@
 
 #include "mach_traps.h"
 
-asmlinkage int system_call(void);
-asmlinkage void lcall7(void);
-asmlinkage void lcall27(void);
-
-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
-		{ 0, 0 }, { 0, 0 } };
+struct desc_struct default_ldt[] __attribute__((__section__(".data.default_ldt"))) = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } };
+struct page *default_ldt_page;
 
 /* Do we ignore FPU interrupts ? */
 char ignore_fpu_irq = 0;
@@ -109,6 +105,7 @@ static void set_intr_usr_gate(unsigned i
  */
 void breakpoint(void)
 {
+	init_entry_mappings();
         set_intr_usr_gate(3,&int3); /* disable ints on trap */
 	set_intr_gate(1,&debug);
 	set_intr_gate(14,&page_fault);
@@ -280,9 +277,11 @@ void show_registers(struct pt_regs *regs
 
 		eip = (u8 *)regs->eip - 43;
 		for (i = 0; i < 64; i++, eip++) {
-			unsigned char c;
+			unsigned char c = 0xff;
+
+			if ((user_mode(regs) && get_user(c, eip)) ||
+			    (!user_mode(regs) && __direct_get_user(c, eip))) {
 
-			if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) {
 				printk(" Bad EIP value.");
 				break;
 			}
@@ -308,16 +307,14 @@ static void handle_BUG(struct pt_regs *r
 
 	eip = regs->eip;
 
-	if (eip < PAGE_OFFSET)
-		goto no_bug;
-	if (__get_user(ud2, (unsigned short *)eip))
+	if (__direct_get_user(ud2, (unsigned short *)eip))
 		goto no_bug;
 	if (ud2 != 0x0b0f)
 		goto no_bug;
-	if (__get_user(line, (unsigned short *)(eip + 2)))
+	if (__direct_get_user(line, (unsigned short *)(eip + 2)))
 		goto bug;
-	if (__get_user(file, (char **)(eip + 4)) ||
-		(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
+	if (__direct_get_user(file, (char **)(eip + 4)) ||
+			__direct_get_user(c, file))
 		file = "<bad filename>";
 
 	printk("------------[ cut here ]------------\n");
@@ -644,10 +641,18 @@ asmlinkage void do_debug(struct pt_regs 
 	if (regs->eflags & X86_EFLAGS_IF)
 		local_irq_enable();
 
-	/* Mask out spurious debug traps due to lazy DR7 setting */
+	/*
+	 * Mask out spurious debug traps due to lazy DR7 setting or
+	 * due to 4G/4G kernel mode:
+	 */
 	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
 		if (!tsk->thread.debugreg[7])
 			goto clear_dr7;
+		if (!user_mode(regs)) {
+			// restore upon return-to-userspace:
+			set_thread_flag(TIF_DB7);
+			goto clear_dr7;
+		}
 	}
 
 	if (regs->eflags & VM_MASK)
@@ -911,19 +916,53 @@ asmlinkage void math_emulate(long arg)
 
 #endif /* CONFIG_MATH_EMULATION */
 
-#ifdef CONFIG_X86_F00F_BUG
-void __init trap_init_f00f_bug(void)
+void __init trap_init_virtual_IDT(void)
 {
-	__set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
-
 	/*
-	 * Update the IDT descriptor and reload the IDT so that
-	 * it uses the read-only mapped virtual address.
+	 * "idt" is magic - it overlaps the idt_descr
+	 * variable so that updating idt will automatically
+	 * update the idt descriptor..
 	 */
-	idt_descr.address = fix_to_virt(FIX_F00F_IDT);
+	__set_fixmap(FIX_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+	idt_descr.address = __fix_to_virt(FIX_IDT);
+
 	__asm__ __volatile__("lidt %0" : : "m" (idt_descr));
 }
+
+void __init trap_init_virtual_GDT(void)
+{
+	int cpu = smp_processor_id();
+	struct Xgt_desc_struct *gdt_desc = cpu_gdt_descr + cpu;
+	struct Xgt_desc_struct tmp_desc = {0, 0};
+	struct tss_struct * t;
+
+	__asm__ __volatile__("sgdt %0": "=m" (tmp_desc): :"memory");
+
+#ifdef CONFIG_X86_HIGH_ENTRY
+	if (!cpu) {
+		__set_fixmap(FIX_GDT_0, __pa(cpu_gdt_table), PAGE_KERNEL);
+		__set_fixmap(FIX_GDT_1, __pa(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL);
+		__set_fixmap(FIX_TSS_0, __pa(init_tss), PAGE_KERNEL);
+		__set_fixmap(FIX_TSS_1, __pa(init_tss) + 1*PAGE_SIZE, PAGE_KERNEL);
+		__set_fixmap(FIX_TSS_2, __pa(init_tss) + 2*PAGE_SIZE, PAGE_KERNEL);
+		__set_fixmap(FIX_TSS_3, __pa(init_tss) + 3*PAGE_SIZE, PAGE_KERNEL);
+	}
+
+	gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu;
+#else
+	gdt_desc->address = (unsigned long)cpu_gdt_table[cpu];
 #endif
+	__asm__ __volatile__("lgdt %0": "=m" (*gdt_desc));
+
+#ifdef CONFIG_X86_HIGH_ENTRY
+	t = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
+#else
+	t = init_tss + cpu;
+#endif
+	set_tss_desc(cpu, t);
+	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
+	load_TR_desc();
+}
 
 #define _set_gate(gate_addr,type,dpl,addr,seg) \
 do { \
@@ -950,17 +989,17 @@ void set_intr_gate(unsigned int n, void 
 	_set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
 }
 
-static void __init set_trap_gate(unsigned int n, void *addr)
+void __init set_trap_gate(unsigned int n, void *addr)
 {
 	_set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
 }
 
-static void __init set_system_gate(unsigned int n, void *addr)
+void __init set_system_gate(unsigned int n, void *addr)
 {
 	_set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
 }
 
-static void __init set_call_gate(void *a, void *addr)
+void __init set_call_gate(void *a, void *addr)
 {
 	_set_gate(a,12,3,addr,__KERNEL_CS);
 }
@@ -988,6 +1027,7 @@ void __init trap_init(void)
 #ifdef CONFIG_X86_LOCAL_APIC
 	init_apic_mappings();
 #endif
+	init_entry_mappings();
 
 	set_trap_gate(0,&divide_error);
 	set_intr_gate(1,&debug);
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/vm86.c current/arch/i386/kernel/vm86.c
--- reference/arch/i386/kernel/vm86.c	2004-03-11 14:33:37.000000000 -0800
+++ current/arch/i386/kernel/vm86.c	2004-05-01 15:01:04.000000000 -0700
@@ -125,7 +125,7 @@ struct pt_regs * fastcall save_v86_state
 	tss = init_tss + get_cpu();
 	current->thread.esp0 = current->thread.saved_esp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
-	load_esp0(tss, &current->thread);
+	load_virtual_esp0(tss, current);
 	current->thread.saved_esp0 = 0;
 	put_cpu();
 
@@ -305,7 +305,7 @@ static void do_sys_vm86(struct kernel_vm
 	tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
-	load_esp0(tss, &tsk->thread);
+	load_virtual_esp0(tss, tsk);
 	put_cpu();
 
 	tsk->thread.screen_bitmap = info->screen_bitmap;
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/vmlinux.lds.S current/arch/i386/kernel/vmlinux.lds.S
--- reference/arch/i386/kernel/vmlinux.lds.S	2004-04-30 11:23:02.000000000 -0700
+++ current/arch/i386/kernel/vmlinux.lds.S	2004-05-01 15:01:04.000000000 -0700
@@ -5,13 +5,17 @@
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/thread_info.h>
 
+#include <linux/config.h>
+#include <asm/page.h>
+#include <asm/asm_offsets.h>
+
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
 ENTRY(startup_32)
 jiffies = jiffies_64;
 SECTIONS
 {
-  . = 0xC0000000 + 0x100000;
+  . = __PAGE_OFFSET + 0x100000;
   /* read-only */
   _text = .;			/* Text and read-only data */
   .text : {
@@ -21,6 +25,19 @@ SECTIONS
 	*(.gnu.warning)
 	} = 0x9090
 
+#ifdef CONFIG_X86_4G
+  . = ALIGN(PAGE_SIZE_asm);
+  __entry_tramp_start = .;
+  . = FIX_ENTRY_TRAMPOLINE_0_addr;
+  __start___entry_text = .;
+  .entry.text : AT (__entry_tramp_start) { *(.entry.text) }
+  __entry_tramp_end = __entry_tramp_start + SIZEOF(.entry.text);
+  . = __entry_tramp_end;
+  . = ALIGN(PAGE_SIZE_asm);
+#else
+  .entry.text : { *(.entry.text) }
+#endif
+
   _etext = .;			/* End of text section */
 
   . = ALIGN(16);		/* Exception table */
@@ -36,15 +53,12 @@ SECTIONS
 	CONSTRUCTORS
 	}
 
-  . = ALIGN(4096);
+  . = ALIGN(PAGE_SIZE_asm);
   __nosave_begin = .;
   .data_nosave : { *(.data.nosave) }
-  . = ALIGN(4096);
+  . = ALIGN(PAGE_SIZE_asm);
   __nosave_end = .;
 
-  . = ALIGN(4096);
-  .data.page_aligned : { *(.data.idt) }
-
   . = ALIGN(32);
   .data.cacheline_aligned : { *(.data.cacheline_aligned) }
 
@@ -54,7 +68,7 @@ SECTIONS
   .data.init_task : { *(.data.init_task) }
 
   /* will be freed after init */
-  . = ALIGN(4096);		/* Init code and data */
+  . = ALIGN(PAGE_SIZE_asm);		/* Init code and data */
   __init_begin = .;
   .init.text : { 
 	_sinittext = .;
@@ -93,7 +107,7 @@ SECTIONS
      from .altinstructions and .eh_frame */
   .exit.text : { *(.exit.text) }
   .exit.data : { *(.exit.data) }
-  . = ALIGN(4096);
+  . = ALIGN(PAGE_SIZE_asm);
   __initramfs_start = .;
   .init.ramfs : { *(.init.ramfs) }
   __initramfs_end = .;
@@ -101,10 +115,22 @@ SECTIONS
   __per_cpu_start = .;
   .data.percpu  : { *(.data.percpu) }
   __per_cpu_end = .;
-  . = ALIGN(4096);
+  . = ALIGN(PAGE_SIZE_asm);
   __init_end = .;
   /* freed after init ends here */
-	
+
+  . = ALIGN(PAGE_SIZE_asm);
+  .data.page_aligned_tss : { *(.data.tss) }
+
+  . = ALIGN(PAGE_SIZE_asm);
+  .data.page_aligned_default_ldt : { *(.data.default_ldt) }
+
+  . = ALIGN(PAGE_SIZE_asm);
+  .data.page_aligned_idt : { *(.data.idt) }
+
+  . = ALIGN(PAGE_SIZE_asm);
+  .data.page_aligned_gdt : { *(.data.gdt) }
+
   __bss_start = .;		/* BSS */
   .bss : {
 	*(.bss.page_aligned)
@@ -132,4 +158,6 @@ SECTIONS
   .stab.index 0 : { *(.stab.index) }
   .stab.indexstr 0 : { *(.stab.indexstr) }
   .comment 0 : { *(.comment) }
+
+
 }
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/vsyscall-sysenter.S current/arch/i386/kernel/vsyscall-sysenter.S
--- reference/arch/i386/kernel/vsyscall-sysenter.S	2004-04-07 14:53:56.000000000 -0700
+++ current/arch/i386/kernel/vsyscall-sysenter.S	2004-05-01 15:01:04.000000000 -0700
@@ -12,6 +12,11 @@
 	.type __kernel_vsyscall,@function
 __kernel_vsyscall:
 .LSTART_vsyscall:
+	cmpl $192, %eax
+	jne 1f
+	int $0x80
+	ret
+1:
 	push %ecx
 .Lpush_ecx:
 	push %edx
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/kernel/vsyscall.lds current/arch/i386/kernel/vsyscall.lds
--- reference/arch/i386/kernel/vsyscall.lds	2003-06-05 14:35:02.000000000 -0700
+++ current/arch/i386/kernel/vsyscall.lds	2004-05-01 15:01:04.000000000 -0700
@@ -5,7 +5,7 @@
  */
 
 /* This must match <asm/fixmap.h>.  */
-VSYSCALL_BASE = 0xffffe000;
+VSYSCALL_BASE = 0xffffd000;
 
 SECTIONS
 {
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/lib/checksum.S current/arch/i386/lib/checksum.S
--- reference/arch/i386/lib/checksum.S	2002-12-09 18:45:52.000000000 -0800
+++ current/arch/i386/lib/checksum.S	2004-05-01 15:01:04.000000000 -0700
@@ -280,14 +280,14 @@ unsigned int csum_partial_copy_generic (
 	.previous
 
 .align 4
-.globl csum_partial_copy_generic
+.globl direct_csum_partial_copy_generic
 				
 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
 
 #define ARGBASE 16		
 #define FP		12
 		
-csum_partial_copy_generic:
+direct_csum_partial_copy_generic:
 	subl  $4,%esp	
 	pushl %edi
 	pushl %esi
@@ -422,7 +422,7 @@ DST(	movb %cl, (%edi)	)
 
 #define ARGBASE 12
 		
-csum_partial_copy_generic:
+direct_csum_partial_copy_generic:
 	pushl %ebx
 	pushl %edi
 	pushl %esi
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/lib/getuser.S current/arch/i386/lib/getuser.S
--- reference/arch/i386/lib/getuser.S	2002-12-09 18:45:40.000000000 -0800
+++ current/arch/i386/lib/getuser.S	2004-05-01 15:01:04.000000000 -0700
@@ -9,6 +9,7 @@
  * return value.
  */
 #include <asm/thread_info.h>
+#include <asm/asm_offsets.h>
 
 
 /*
@@ -28,7 +29,7 @@
 .globl __get_user_1
 __get_user_1:
 	GET_THREAD_INFO(%edx)
-	cmpl TI_ADDR_LIMIT(%edx),%eax
+	cmpl TI_addr_limit(%edx),%eax
 	jae bad_get_user
 1:	movzbl (%eax),%edx
 	xorl %eax,%eax
@@ -40,7 +41,7 @@ __get_user_2:
 	addl $1,%eax
 	jc bad_get_user
 	GET_THREAD_INFO(%edx)
-	cmpl TI_ADDR_LIMIT(%edx),%eax
+	cmpl TI_addr_limit(%edx),%eax
 	jae bad_get_user
 2:	movzwl -1(%eax),%edx
 	xorl %eax,%eax
@@ -52,7 +53,7 @@ __get_user_4:
 	addl $3,%eax
 	jc bad_get_user
 	GET_THREAD_INFO(%edx)
-	cmpl TI_ADDR_LIMIT(%edx),%eax
+	cmpl TI_addr_limit(%edx),%eax
 	jae bad_get_user
 3:	movl -3(%eax),%edx
 	xorl %eax,%eax
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/lib/usercopy.c current/arch/i386/lib/usercopy.c
--- reference/arch/i386/lib/usercopy.c	2004-05-01 15:01:05.000000000 -0700
+++ current/arch/i386/lib/usercopy.c	2004-05-01 15:01:04.000000000 -0700
@@ -76,7 +76,7 @@ do {									   \
  * and returns @count.
  */
 long
-__strncpy_from_user(char *dst, const char __user *src, long count)
+__direct_strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	long res;
 	__do_strncpy_from_user(dst, src, count, res);
@@ -102,7 +102,7 @@ __strncpy_from_user(char *dst, const cha
  * and returns @count.
  */
 long
-strncpy_from_user(char *dst, const char __user *src, long count)
+direct_strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	long res = -EFAULT;
 	if (access_ok(VERIFY_READ, src, 1))
@@ -147,7 +147,7 @@ do {									\
  * On success, this will be zero.
  */
 unsigned long
-clear_user(void __user *to, unsigned long n)
+direct_clear_user(void __user *to, unsigned long n)
 {
 	might_sleep();
 	if (access_ok(VERIFY_WRITE, to, n))
@@ -167,7 +167,7 @@ clear_user(void __user *to, unsigned lon
  * On success, this will be zero.
  */
 unsigned long
-__clear_user(void __user *to, unsigned long n)
+__direct_clear_user(void __user *to, unsigned long n)
 {
 	__do_clear_user(to, n);
 	return n;
@@ -184,7 +184,7 @@ __clear_user(void __user *to, unsigned l
  * On exception, returns 0.
  * If the string is too long, returns a value greater than @n.
  */
-long strnlen_user(const char __user *s, long n)
+long direct_strnlen_user(const char __user *s, long n)
 {
 	unsigned long mask = -__addr_ok(s);
 	unsigned long res, tmp;
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/math-emu/fpu_system.h current/arch/i386/math-emu/fpu_system.h
--- reference/arch/i386/math-emu/fpu_system.h	2002-12-09 18:45:53.000000000 -0800
+++ current/arch/i386/math-emu/fpu_system.h	2004-05-01 15:01:04.000000000 -0700
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <asm/atomic_kmap.h>
 
 /* This sets the pointer FPU_info to point to the argument part
    of the stack frame of math_emulate() */
@@ -22,7 +23,7 @@
 
 /* s is always from a cpu register, and the cpu does bounds checking
  * during register load --> no further bounds checks needed */
-#define LDT_DESCRIPTOR(s)	(((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
+#define LDT_DESCRIPTOR(s)	(((struct desc_struct *)__kmap_atomic_vaddr(KM_LDT_PAGE0))[(s) >> 3])
 #define SEG_D_SIZE(x)		((x).b & (3 << 21))
 #define SEG_G_BIT(x)		((x).b & (1 << 23))
 #define SEG_GRANULARITY(x)	(((x).b & (1 << 23)) ? 4096 : 1)
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/mm/fault.c current/arch/i386/mm/fault.c
--- reference/arch/i386/mm/fault.c	2004-04-30 12:55:07.000000000 -0700
+++ current/arch/i386/mm/fault.c	2004-05-01 15:01:04.000000000 -0700
@@ -27,6 +27,7 @@
 #include <asm/pgalloc.h>
 #include <asm/hardirq.h>
 #include <asm/desc.h>
+#include <asm/tlbflush.h>
 
 extern void die(const char *,struct pt_regs *,long);
 
@@ -104,8 +105,17 @@ static inline unsigned long get_segment_
 	if (seg & (1<<2)) {
 		/* Must lock the LDT while reading it. */
 		down(&current->mm->context.sem);
+#if 1
+		/* horrible hack for 4/4 disabled kernels.
+		   I'm not quite sure what the TLB flush is good for,
+		   it's mindlessly copied from the read_ldt code */
+		__flush_tlb_global();
+		desc = kmap(current->mm->context.ldt_pages[(seg&~7)/PAGE_SIZE]);
+		desc = (void *)desc + ((seg & ~7) % PAGE_SIZE);
+#else
 		desc = current->mm->context.ldt;
 		desc = (void *)desc + (seg & ~7);
+#endif
 	} else {
 		/* Must disable preemption while reading the GDT. */
 		desc = (u32 *)&cpu_gdt_table[get_cpu()];
@@ -118,6 +128,9 @@ static inline unsigned long get_segment_
 		 (desc[1] & 0xff000000);
 
 	if (seg & (1<<2)) { 
+#if 1
+		kunmap((void *)((unsigned long)desc & PAGE_MASK));
+#endif
 		up(&current->mm->context.sem);
 	} else
 		put_cpu();
@@ -243,6 +256,19 @@ asmlinkage void do_page_fault(struct pt_
 	 * (error_code & 4) == 0, and that the fault was not a
 	 * protection error (error_code & 1) == 0.
 	 */
+#ifdef CONFIG_X86_4G
+	/*
+	 * On 4/4 all kernels faults are either bugs, vmalloc or prefetch
+	 */
+	if (unlikely((regs->xcs & 3) == 0)) {
+		if (error_code & 3)
+			goto bad_area_nosemaphore;
+
+		/* If it's vm86 fall through */
+		if (!(regs->eflags & VM_MASK))
+			goto vmalloc_fault;
+	}
+#else
 	if (unlikely(address >= TASK_SIZE)) { 
 		if (!(error_code & 5))
 			goto vmalloc_fault;
@@ -252,6 +278,7 @@ asmlinkage void do_page_fault(struct pt_
 		 */
 		goto bad_area_nosemaphore;
 	} 
+#endif
 
 	mm = tsk->mm;
 
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/mm/init.c current/arch/i386/mm/init.c
--- reference/arch/i386/mm/init.c	2004-04-30 11:23:03.000000000 -0700
+++ current/arch/i386/mm/init.c	2004-05-01 15:01:04.000000000 -0700
@@ -40,125 +40,13 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/desc.h>
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
 static int do_test_wp_bit(void);
 
-/*
- * Creates a middle page table and puts a pointer to it in the
- * given global directory entry. This only returns the gd entry
- * in non-PAE compilation mode, since the middle layer is folded.
- */
-static pmd_t * __init one_md_table_init(pgd_t *pgd)
-{
-	pmd_t *pmd_table;
-		
-#ifdef CONFIG_X86_PAE
-	pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-	set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-	if (pmd_table != pmd_offset(pgd, 0)) 
-		BUG();
-#else
-	pmd_table = pmd_offset(pgd, 0);
-#endif
-
-	return pmd_table;
-}
-
-/*
- * Create a page table and place a pointer to it in a middle page
- * directory entry.
- */
-static pte_t * __init one_page_table_init(pmd_t *pmd)
-{
-	if (pmd_none(*pmd)) {
-		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
-		if (page_table != pte_offset_kernel(pmd, 0))
-			BUG();	
-
-		return page_table;
-	}
-	
-	return pte_offset_kernel(pmd, 0);
-}
-
-/*
- * This function initializes a certain range of kernel virtual memory 
- * with new bootmem page tables, everywhere page tables are missing in
- * the given range.
- */
-
-/*
- * NOTE: The pagetables are allocated contiguous on the physical space 
- * so we can cache the place of the first one and move around without 
- * checking the pgd every time.
- */
-static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
-{
-	pgd_t *pgd;
-	pmd_t *pmd;
-	int pgd_idx, pmd_idx;
-	unsigned long vaddr;
-
-	vaddr = start;
-	pgd_idx = pgd_index(vaddr);
-	pmd_idx = pmd_index(vaddr);
-	pgd = pgd_base + pgd_idx;
-
-	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
-		if (pgd_none(*pgd)) 
-			one_md_table_init(pgd);
-
-		pmd = pmd_offset(pgd, vaddr);
-		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
-			if (pmd_none(*pmd)) 
-				one_page_table_init(pmd);
-
-			vaddr += PMD_SIZE;
-		}
-		pmd_idx = 0;
-	}
-}
-
-/*
- * This maps the physical memory to kernel virtual address space, a total 
- * of max_low_pfn pages, by creating page tables starting from address 
- * PAGE_OFFSET.
- */
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
-{
-	unsigned long pfn;
-	pgd_t *pgd;
-	pmd_t *pmd;
-	pte_t *pte;
-	int pgd_idx, pmd_idx, pte_ofs;
-
-	pgd_idx = pgd_index(PAGE_OFFSET);
-	pgd = pgd_base + pgd_idx;
-	pfn = 0;
-
-	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
-		pmd = one_md_table_init(pgd);
-		if (pfn >= max_low_pfn)
-			continue;
-		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
-			/* Map with big pages if possible, otherwise create normal page tables. */
-			if (cpu_has_pse) {
-				set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
-				pfn += PTRS_PER_PTE;
-			} else {
-				pte = one_page_table_init(pmd);
-
-				for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++)
-					set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
-			}
-		}
-	}	
-}
-
 static inline int page_kills_ppro(unsigned long pagenr)
 {
 	if (pagenr >= 0x70000 && pagenr <= 0x7003F)
@@ -206,11 +94,8 @@ static inline int page_is_ram(unsigned l
 	return 0;
 }
 
-#ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
-pgprot_t kmap_prot;
 
-EXPORT_SYMBOL(kmap_prot);
 EXPORT_SYMBOL(kmap_pte);
 
 #define kmap_get_fixmap_pte(vaddr)					\
@@ -218,29 +103,7 @@ EXPORT_SYMBOL(kmap_pte);
 
 void __init kmap_init(void)
 {
-	unsigned long kmap_vstart;
-
-	/* cache the first kmap pte */
-	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
-	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
-	kmap_prot = PAGE_KERNEL;
-}
-
-void __init permanent_kmaps_init(pgd_t *pgd_base)
-{
-	pgd_t *pgd;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long vaddr;
-
-	vaddr = PKMAP_BASE;
-	page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
-
-	pgd = swapper_pg_dir + pgd_index(vaddr);
-	pmd = pmd_offset(pgd, vaddr);
-	pte = pte_offset_kernel(pmd, vaddr);
-	pkmap_page_table = pte;	
+	kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN));
 }
 
 void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
@@ -255,6 +118,8 @@ void __init one_highpage_init(struct pag
 		SetPageReserved(page);
 }
 
+#ifdef CONFIG_HIGHMEM
+
 #ifndef CONFIG_DISCONTIGMEM
 void __init set_highmem_pages_init(int bad_ppro) 
 {
@@ -266,12 +131,9 @@ void __init set_highmem_pages_init(int b
 #else
 extern void set_highmem_pages_init(int);
 #endif /* !CONFIG_DISCONTIGMEM */
-
 #else
-#define kmap_init() do { } while (0)
-#define permanent_kmaps_init(pgd_base) do { } while (0)
-#define set_highmem_pages_init(bad_ppro) do { } while (0)
-#endif /* CONFIG_HIGHMEM */
+# define set_highmem_pages_init(bad_ppro) do { } while (0)
+#endif
 
 unsigned long __PAGE_KERNEL = _PAGE_KERNEL;
 
@@ -281,30 +143,125 @@ unsigned long __PAGE_KERNEL = _PAGE_KERN
 extern void __init remap_numa_kva(void);
 #endif
 
-static void __init pagetable_init (void)
+static __init void prepare_pagetables(pgd_t *pgd_base, unsigned long address)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_base + pgd_index(address);
+	pmd = pmd_offset(pgd, address);
+	if (!pmd_present(*pmd)) {
+		pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+		set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));
+	}
+}
+
+static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
 {
 	unsigned long vaddr;
-	pgd_t *pgd_base = swapper_pg_dir;
 
+	for (vaddr = start; vaddr != end; vaddr += PAGE_SIZE)
+		prepare_pagetables(pgd_base, vaddr);
+}
+
+void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
+{
+	unsigned long vaddr;
+	pgd_t *pgd;
+	int i, j, k;
+	pmd_t *pmd;
+	pte_t *pte, *pte_base;
+
+	pgd = pgd_base;
+
+	for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
+		vaddr = i*PGDIR_SIZE;
+		if (end && (vaddr >= end))
+			break;
+		pmd = pmd_offset(pgd, 0);
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+			if (end && (vaddr >= end))
+				break;
+			if (vaddr < start)
+				continue;
+			if (cpu_has_pse) {
+				unsigned long __pe;
+
+				set_in_cr4(X86_CR4_PSE);
+				boot_cpu_data.wp_works_ok = 1;
+				__pe = _KERNPG_TABLE + _PAGE_PSE + vaddr - start;
+				/* Make it "global" too if supported */
+				if (cpu_has_pge) {
+					set_in_cr4(X86_CR4_PGE);
+#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
+					__pe += _PAGE_GLOBAL;
+					__PAGE_KERNEL |= _PAGE_GLOBAL;
+#endif
+				}
+				set_pmd(pmd, __pmd(__pe));
+				continue;
+			}
+			if (!pmd_present(*pmd))
+				pte_base = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+			else
+				pte_base = (pte_t *) page_address(pmd_page(*pmd));
+			pte = pte_base;
+			for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
+				vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
+				if (end && (vaddr >= end))
+					break;
+				if (vaddr < start)
+					continue;
+				*pte = mk_pte_phys(vaddr-start, PAGE_KERNEL);
+			}
+			set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
+		}
+	}
+}
+
+static void __init pagetable_init (void)
+{
+	unsigned long vaddr, end;
+	pgd_t *pgd_base;
 #ifdef CONFIG_X86_PAE
 	int i;
-	/* Init entries of the first-level page table to the zero page */
-	for (i = 0; i < PTRS_PER_PGD; i++)
-		set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
 #endif
 
-	/* Enable PSE if available */
-	if (cpu_has_pse) {
-		set_in_cr4(X86_CR4_PSE);
-	}
+	/*
+	 * This can be zero as well - no problem, in that case we exit
+	 * the loops anyway due to the PTRS_PER_* conditions.
+	 */
+	end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
 
-	/* Enable PGE if available */
-	if (cpu_has_pge) {
-		set_in_cr4(X86_CR4_PGE);
-		__PAGE_KERNEL |= _PAGE_GLOBAL;
+	pgd_base = swapper_pg_dir;
+#ifdef CONFIG_X86_PAE
+	/*
+	 * It causes too many problems if there's no proper pmd set up
+	 * for all 4 entries of the PGD - so we allocate all of them.
+	 * PAE systems will not miss this extra 4-8K anyway ...
+	 */
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		pmd_t *pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+		set_pgd(pgd_base + i, __pgd(__pa(pmd) + 0x1));
 	}
+#endif
+	/*
+	 * Set up lowmem-sized identity mappings at PAGE_OFFSET:
+	 */
+	setup_identity_mappings(pgd_base, PAGE_OFFSET, end);
 
-	kernel_physical_mapping_init(pgd_base);
+	/*
+	 * Add flat-mode identity-mappings - SMP needs it when
+	 * starting up on an AP from real-mode. (In the non-PAE
+	 * case we already have these mappings through head.S.)
+	 * All user-space mappings are explicitly cleared after
+	 * SMP startup.
+	 */
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_PAE)
+	setup_identity_mappings(pgd_base, 0, 16*1024*1024);
+#endif
 	remap_numa_kva();
 
 	/*
@@ -312,38 +269,64 @@ static void __init pagetable_init (void)
 	 * created - mappings will be set by set_fixmap():
 	 */
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-	page_table_range_init(vaddr, 0, pgd_base);
+	fixrange_init(vaddr, 0, pgd_base);
 
-	permanent_kmaps_init(pgd_base);
+#ifdef CONFIG_HIGHMEM
+	{
+		pgd_t *pgd;
+		pmd_t *pmd;
+		pte_t *pte;
 
-#ifdef CONFIG_X86_PAE
-	/*
-	 * Add low memory identity-mappings - SMP needs it when
-	 * starting up on an AP from real-mode. In the non-PAE
-	 * case we already have these mappings through head.S.
-	 * All user-space mappings are explicitly cleared after
-	 * SMP startup.
-	 */
-	pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+		/*
+		 * Permanent kmaps:
+		 */
+		vaddr = PKMAP_BASE;
+		fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+
+		pgd = swapper_pg_dir + pgd_index(vaddr);
+		pmd = pmd_offset(pgd, vaddr);
+		pte = pte_offset_kernel(pmd, vaddr);
+		pkmap_page_table = pte;
+	}
 #endif
 }
 
-void zap_low_mappings (void)
+/*
+ * Clear kernel pagetables in a PMD_SIZE-aligned range.
+ */
+static void clear_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
 {
-	int i;
+	unsigned long vaddr;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	int i, j;
+
+	pgd = pgd_base;
+
+	for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
+		vaddr = i*PGDIR_SIZE;
+		if (end && (vaddr >= end))
+			break;
+		pmd = pmd_offset(pgd, 0);
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+			if (end && (vaddr >= end))
+				break;
+			if (vaddr < start)
+				continue;
+			pmd_clear(pmd);
+		}
+	}
+	flush_tlb_all();
+}
+
+void zap_low_mappings(void)
+{
+	printk("zapping low mappings.\n");
 	/*
 	 * Zap initial low-memory mappings.
-	 *
-	 * Note that "pgd_clear()" doesn't do it for
-	 * us, because pgd_clear() is a no-op on i386.
 	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++)
-#ifdef CONFIG_X86_PAE
-		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
-#else
-		set_pgd(swapper_pg_dir+i, __pgd(0));
-#endif
-	flush_tlb_all();
+	clear_mappings(swapper_pg_dir, 0, 16*1024*1024);
 }
 
 #ifndef CONFIG_DISCONTIGMEM
@@ -393,7 +376,15 @@ void __init paging_init(void)
 		set_in_cr4(X86_CR4_PAE);
 #endif
 	__flush_tlb_all();
-
+	/*
+	 * Subtle. SMP is doing it's boot stuff late (because it has to
+	 * fork idle threads) - but it also needs low mappings for the
+	 * protected-mode entry to work. We zap these entries only after
+	 * the WP-bit has been tested.
+	 */
+#ifndef CONFIG_SMP
+	zap_low_mappings();
+#endif
 	kmap_init();
 	zone_sizes_init();
 }
@@ -512,22 +503,18 @@ void __init mem_init(void)
 	if (boot_cpu_data.wp_works_ok < 0)
 		test_wp_bit();
 
-	/*
-	 * Subtle. SMP is doing it's boot stuff late (because it has to
-	 * fork idle threads) - but it also needs low mappings for the
-	 * protected-mode entry to work. We zap these entries only after
-	 * the WP-bit has been tested.
-	 */
-#ifndef CONFIG_SMP
-	zap_low_mappings();
-#endif
+	entry_trampoline_setup();
+	default_ldt_page = virt_to_page(default_ldt);
+	load_LDT(&init_mm.context);
 }
 
-kmem_cache_t *pgd_cache;
-kmem_cache_t *pmd_cache;
+kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache;
 
 void __init pgtable_cache_init(void)
 {
+	void (*ctor)(void *, kmem_cache_t *, unsigned long);
+	void (*dtor)(void *, kmem_cache_t *, unsigned long);
+
 	if (PTRS_PER_PMD > 1) {
 		pmd_cache = kmem_cache_create("pmd",
 					PTRS_PER_PMD*sizeof(pmd_t),
@@ -537,13 +524,36 @@ void __init pgtable_cache_init(void)
 					NULL);
 		if (!pmd_cache)
 			panic("pgtable_cache_init(): cannot create pmd cache");
-	}
+
+		if (TASK_SIZE > PAGE_OFFSET) {
+			kpmd_cache = kmem_cache_create("kpmd",
+					PTRS_PER_PMD*sizeof(pmd_t),
+					PTRS_PER_PMD*sizeof(pmd_t),
+					SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
+					kpmd_ctor,
+					NULL);
+			if (!kpmd_cache)
+				panic("pgtable_cache_init(): "
+						"cannot create kpmd cache");
+		}
+  	}
+
+	if (PTRS_PER_PMD == 1 || TASK_SIZE <= PAGE_OFFSET)
+		ctor = pgd_ctor;
+	else
+		ctor = NULL;
+
+	if (PTRS_PER_PMD == 1 && TASK_SIZE <= PAGE_OFFSET)
+		dtor = pgd_dtor;
+	else
+		dtor = NULL;
+
 	pgd_cache = kmem_cache_create("pgd",
 				PTRS_PER_PGD*sizeof(pgd_t),
 				PTRS_PER_PGD*sizeof(pgd_t),
 				0,
-				pgd_ctor,
-				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+				ctor,
+				dtor);
 	if (!pgd_cache)
 		panic("pgtable_cache_init(): Cannot create pgd cache");
 }
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/mm/pageattr.c current/arch/i386/mm/pageattr.c
--- reference/arch/i386/mm/pageattr.c	2004-04-30 11:23:03.000000000 -0700
+++ current/arch/i386/mm/pageattr.c	2004-05-01 15:01:04.000000000 -0700
@@ -71,7 +71,7 @@ static void set_pmd_pte(pte_t *kpte, uns
 	unsigned long flags;
 
 	set_pte_atomic(kpte, pte); 	/* change init_mm */
-	if (PTRS_PER_PMD > 1)
+	if (PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET)
 		return;
 
 	spin_lock_irqsave(&pgd_lock, flags);
diff -purN -X /home/mbligh/.diff.exclude reference/arch/i386/mm/pgtable.c current/arch/i386/mm/pgtable.c
--- reference/arch/i386/mm/pgtable.c	2004-04-30 11:23:03.000000000 -0700
+++ current/arch/i386/mm/pgtable.c	2004-05-01 15:01:04.000000000 -0700
@@ -21,6 +21,7 @@
 #include <asm/e820.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/atomic_kmap.h>
 
 void show_mem(void)
 {
@@ -157,11 +158,20 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
 	memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
 }
 
+void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags)
+{
+	pmd_t *kpmd, *pmd;
+	kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1],
+				(PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE);
+	pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS);
+
+	memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t));
+	memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t));
+}
+
 /*
- * List of all pgd's needed for non-PAE so it can invalidate entries
- * in both cached and uncached pgd's; not needed for PAE since the
- * kernel pmd is shared. If PAE were not to share the pmd a similar
- * tactic would be needed. This is essentially codepath-based locking
+ * List of all pgd's needed so it can invalidate entries in both cached
+ * and uncached pgd's. This is essentially codepath-based locking
  * against pageattr.c; it is the unique case in which a valid change
  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
  * vmalloc faults work because attached pagetables are never freed.
@@ -194,26 +204,54 @@ static inline void pgd_list_del(pgd_t *p
 		next->private = (unsigned long)pprev;
 }
 
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+/*
+ * This is not that hard to figure out.
+ * (a) PTRS_PER_PMD == 1 means non-PAE.
+ * (b) PTRS_PER_PMD > 1 means PAE.
+ * (c) TASK_SIZE > PAGE_OFFSET means XKVA.
+ * (d) TASK_SIZE <= PAGE_OFFSET means non-XKVA.
+ *
+ * Do *NOT* back out the preconstruction like the patch I'm cleaning
+ * up after this very instant did, or at all, for that matter.
+ * This is never called when PTRS_PER_PMD > 1 && TASK_SIZE > PAGE_OFFSET.
+ * -- wli
+ */
+void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused)
 {
+	pgd_t *pgd = (pgd_t *)__pgd;
 	unsigned long flags;
 
-	if (PTRS_PER_PMD == 1)
-		spin_lock_irqsave(&pgd_lock, flags);
-
-	memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
-			swapper_pg_dir + USER_PTRS_PER_PGD,
-			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
-
-	if (PTRS_PER_PMD > 1)
-		return;
-
-	pgd_list_add(pgd);
-	spin_unlock_irqrestore(&pgd_lock, flags);
-	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-}
-
-/* never called when PTRS_PER_PMD > 1 */
+	if (PTRS_PER_PMD == 1) {
+		if (TASK_SIZE <= PAGE_OFFSET)
+			spin_lock_irqsave(&pgd_lock, flags);
+		else
+ 			memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS],
+ 				&swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS],
+ 				NR_SHARED_PMDS * sizeof(pgd_t));
+	}
+  
+	if (TASK_SIZE <= PAGE_OFFSET)
+ 		memcpy(pgd + USER_PTRS_PER_PGD,
+ 			swapper_pg_dir + USER_PTRS_PER_PGD,
+ 			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+  
+  	if (PTRS_PER_PMD > 1)
+  		return;
+  
+	if (TASK_SIZE > PAGE_OFFSET)
+		memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t));
+	else {
+		list_add(&virt_to_page(pgd)->lru, &pgd_list);
+		spin_unlock_irqrestore(&pgd_lock, flags);
+		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+	}
+} 
+  
+/*
+ * Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET
+ * for with PAE we would list_del() multiple times, and for non-PAE
+ * with XKVA all the AGP pgd shootdown code is unnecessary.
+ */
 void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
 {
 	unsigned long flags; /* can be called from interrupt context */
@@ -223,6 +261,12 @@ void pgd_dtor(void *pgd, kmem_cache_t *c
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
+/*
+ * See the comments above pgd_ctor() wrt. preconstruction.
+ * Do *NOT* memcpy() here. If you do, you back out important
+ * anti- cache pollution code.
+ *
+ */
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	int i;
@@ -231,15 +275,33 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	if (PTRS_PER_PMD == 1 || !pgd)
 		return pgd;
 
+	/*
+	 * In the 4G userspace case alias the top 16 MB virtual
+	 * memory range into the user mappings as well (these
+	 * include the trampoline and CPU data structures).
+	 */
 	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
-		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+		kmem_cache_t *cache;
+		pmd_t *pmd;
+
+		if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
+			cache = kpmd_cache;
+		else
+			cache = pmd_cache;
+
+		pmd = kmem_cache_alloc(cache, GFP_KERNEL);
 		if (!pmd)
 			goto out_oom;
 		set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd))));
 	}
-	return pgd;
 
+	return pgd;
 out_oom:
+	/*
+	 * we don't have to handle the kpmd_cache here, since it's the
+	 * last allocation, and has either nothing to free or when it
+	 * succeeds the whole operation succeeds.
+	 */
 	for (i--; i >= 0; i--)
 		kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
 	kmem_cache_free(pgd_cache, pgd);
@@ -250,10 +312,28 @@ void pgd_free(pgd_t *pgd)
 {
 	int i;
 
+ 	/* in the non-PAE case, clear_page_tables() clears user pgd entries */
+	if (PTRS_PER_PMD == 1)
+		goto out_free;
+
 	/* in the PAE case user pgd entries are overwritten before usage */
-	if (PTRS_PER_PMD > 1)
-		for (i = 0; i < USER_PTRS_PER_PGD; ++i)
-			kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
-	/* in the non-PAE case, clear_page_tables() clears user pgd entries */
+	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+		kmem_cache_t *cache;
+		pmd_t *pmd = __va(pgd_val(pgd[i]) - 1);
+
+		/*
+		 * only userspace pmd's are cleared for us
+		 * by mm/memory.c; it's a slab cache invariant
+		 * that we must separate the kernel pmd slab
+		 * all times, else we'll have bad pmd's.
+		 */
+		if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
+			cache = kpmd_cache;
+		else
+			cache = pmd_cache;
+
+		kmem_cache_free(cache, pmd);
+	}
+out_free:
 	kmem_cache_free(pgd_cache, pgd);
 }
diff -purN -X /home/mbligh/.diff.exclude reference/drivers/block/scsi_ioctl.c current/drivers/block/scsi_ioctl.c
--- reference/drivers/block/scsi_ioctl.c	2004-04-30 11:23:13.000000000 -0700
+++ current/drivers/block/scsi_ioctl.c	2004-05-01 15:01:04.000000000 -0700
@@ -232,7 +232,7 @@ static int sg_scsi_ioctl(request_queue_t
 		return -EFAULT;
 	if (in_len > PAGE_SIZE || out_len > PAGE_SIZE)
 		return -EINVAL;
-	if (get_user(opcode, sic->data))
+	if (get_user(opcode, (int *)sic->data))
 		return -EFAULT;
 
 	bytes = max(in_len, out_len);
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/atomic_kmap.h current/include/asm-i386/atomic_kmap.h
--- reference/include/asm-i386/atomic_kmap.h	1969-12-31 16:00:00.000000000 -0800
+++ current/include/asm-i386/atomic_kmap.h	2004-05-01 15:01:04.000000000 -0700
@@ -0,0 +1,95 @@
+/*
+ * atomic_kmap.h: temporary virtual kernel memory mappings
+ *
+ * Copyright (C) 2003 Ingo Molnar <mingo@redhat.com>
+ */
+
+#ifndef _ASM_ATOMIC_KMAP_H
+#define _ASM_ATOMIC_KMAP_H
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define HIGHMEM_DEBUG 1
+#else
+#define HIGHMEM_DEBUG 0
+#endif
+
+extern pte_t *kmap_pte;
+#define kmap_prot PAGE_KERNEL
+
+#define PKMAP_BASE (0xff000000UL)
+#define NR_SHARED_PMDS ((0xffffffff-PKMAP_BASE+1)/PMD_SIZE)
+
+static inline unsigned long __kmap_atomic_vaddr(enum km_type type)
+{
+	enum fixed_addresses idx;
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	return __fix_to_virt(FIX_KMAP_BEGIN + idx);
+}
+
+static inline void *__kmap_atomic_noflush(struct page *page, enum km_type type)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	/*
+	 * NOTE: entries that rely on some secondary TLB-flush
+	 * effect must not be global:
+	 */
+	set_pte(kmap_pte-idx, mk_pte(page, PAGE_KERNEL));
+
+	return (void*) vaddr;
+}
+
+static inline void *__kmap_atomic(struct page *page, enum km_type type)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#if HIGHMEM_DEBUG
+	BUG_ON(!pte_none(*(kmap_pte-idx)));
+#else
+	/*
+	 * Performance optimization - do not flush if the new
+	 * pte is the same as the old one:
+	 */
+	if (pte_val(*(kmap_pte-idx)) == pte_val(mk_pte(page, kmap_prot)))
+		return (void *) vaddr;
+#endif
+	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+	__flush_tlb_one(vaddr);
+
+	return (void*) vaddr;
+}
+
+static inline void __kunmap_atomic(void *kvaddr, enum km_type type)
+{
+#if HIGHMEM_DEBUG
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+	BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx));
+	/*
+	 * force other mappings to Oops if they'll try to access
+	 * this pte without first remap it
+	 */
+	pte_clear(kmap_pte-idx);
+	__flush_tlb_one(vaddr);
+#endif
+}
+
+#define __kunmap_atomic_type(type) \
+		__kunmap_atomic((void *)__kmap_atomic_vaddr(type), (type))
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_ATOMIC_KMAP_H */
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/checksum.h current/include/asm-i386/checksum.h
--- reference/include/asm-i386/checksum.h	2003-11-24 16:12:32.000000000 -0800
+++ current/include/asm-i386/checksum.h	2004-05-01 15:01:04.000000000 -0700
@@ -25,7 +25,7 @@ asmlinkage unsigned int csum_partial(con
  * better 64-bit) boundary
  */
 
-asmlinkage unsigned int csum_partial_copy_generic( const char *src, char *dst, int len, int sum,
+asmlinkage unsigned int direct_csum_partial_copy_generic( const char *src, char *dst, int len, int sum,
 						   int *src_err_ptr, int *dst_err_ptr);
 
 /*
@@ -39,14 +39,19 @@ static __inline__
 unsigned int csum_partial_copy_nocheck ( const char *src, char *dst,
 					int len, int sum)
 {
-	return csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL);
+	/*
+	 * The direct function is OK for kernel-space => kernel-space copies:
+	 */
+	return direct_csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL);
 }
 
 static __inline__
 unsigned int csum_partial_copy_from_user ( const char *src, char *dst,
 						int len, int sum, int *err_ptr)
 {
-	return csum_partial_copy_generic ( src, dst, len, sum, err_ptr, NULL);
+	if (copy_from_user(dst, src, len))
+		*err_ptr = -EFAULT;
+	return csum_partial(dst, len, sum);
 }
 
 /*
@@ -172,11 +177,26 @@ static __inline__ unsigned short int csu
  *	Copy and checksum to user
  */
 #define HAVE_CSUM_COPY_USER
-static __inline__ unsigned int csum_and_copy_to_user(const char *src, char *dst,
+static __inline__ unsigned int direct_csum_and_copy_to_user(const char *src, char *dst,
 				    int len, int sum, int *err_ptr)
 {
 	if (access_ok(VERIFY_WRITE, dst, len))
-		return csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr);
+		return direct_csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr);
+
+	if (len)
+		*err_ptr = -EFAULT;
+
+	return -1; /* invalid checksum */
+}
+
+static __inline__ unsigned int csum_and_copy_to_user(const char *src, char *dst,
+				    int len, int sum, int *err_ptr)
+{
+	if (access_ok(VERIFY_WRITE, dst, len)) {
+		if (copy_to_user(dst, src, len))
+			*err_ptr = -EFAULT;
+		return csum_partial(src, len, sum);
+	}
 
 	if (len)
 		*err_ptr = -EFAULT;
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/desc.h current/include/asm-i386/desc.h
--- reference/include/asm-i386/desc.h	2003-02-27 11:17:19.000000000 -0800
+++ current/include/asm-i386/desc.h	2004-05-01 15:01:04.000000000 -0700
@@ -21,6 +21,13 @@ struct Xgt_desc_struct {
 
 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
 
+extern void trap_init_virtual_IDT(void);
+extern void trap_init_virtual_GDT(void);
+
+asmlinkage int system_call(void);
+asmlinkage void lcall7(void);
+asmlinkage void lcall27(void);
+
 #define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
 #define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
 
@@ -30,6 +37,7 @@ extern struct Xgt_desc_struct idt_descr,
  */
 extern struct desc_struct default_ldt[];
 extern void set_intr_gate(unsigned int irq, void * addr);
+extern void set_trap_gate(unsigned int n, void *addr);
 
 #define _set_tssldt_desc(n,addr,limit,type) \
 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
@@ -90,31 +98,8 @@ static inline void load_TLS(struct threa
 #undef C
 }
 
-static inline void clear_LDT(void)
-{
-	int cpu = get_cpu();
-
-	set_ldt_desc(cpu, &default_ldt[0], 5);
-	load_LDT_desc();
-	put_cpu();
-}
-
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
-{
-	void *segments = pc->ldt;
-	int count = pc->size;
-
-	if (likely(!count)) {
-		segments = &default_ldt[0];
-		count = 5;
-	}
-		
-	set_ldt_desc(cpu, segments, count);
-	load_LDT_desc();
-}
+extern struct page *default_ldt_page;
+extern void load_LDT_nolock(mm_context_t *pc, int cpu);
 
 static inline void load_LDT(mm_context_t *pc)
 {
@@ -123,6 +108,6 @@ static inline void load_LDT(mm_context_t
 	put_cpu();
 }
 
-#endif /* !__ASSEMBLY__ */
 
+#endif /* !__ASSEMBLY__ */
 #endif
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/fixmap.h current/include/asm-i386/fixmap.h
--- reference/include/asm-i386/fixmap.h	2004-03-11 14:35:15.000000000 -0800
+++ current/include/asm-i386/fixmap.h	2004-05-01 15:01:04.000000000 -0700
@@ -18,17 +18,15 @@
 #include <asm/acpi.h>
 #include <asm/apicdef.h>
 #include <asm/page.h>
-#ifdef CONFIG_HIGHMEM
 #include <linux/threads.h>
 #include <asm/kmap_types.h>
-#endif
 
 /*
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
  * compile time, but to set the physical address only
- * in the boot process. We allocate these special addresses
- * from the end of virtual memory (0xfffff000) backwards.
+ * in the boot process. We allocate these special  addresses
+ * from the end of virtual memory (0xffffe000) backwards.
  * Also this lets us do fail-safe vmalloc(), we
  * can guarantee that these special addresses and
  * vmalloc()-ed addresses never overlap.
@@ -41,11 +39,20 @@
  * TLB entries of such buffers will not be flushed across
  * task switches.
  */
+
+/*
+ * on UP currently we will have no trace of the fixmap mechanizm,
+ * no page table allocations, etc. This might change in the
+ * future, say framebuffers for the console driver(s) could be
+ * fix-mapped?
+ */
 enum fixed_addresses {
 	FIX_HOLE,
 	FIX_VSYSCALL,
 #ifdef CONFIG_X86_LOCAL_APIC
 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
+#else
+	FIX_VSTACK_HOLE_1,
 #endif
 #ifdef CONFIG_X86_IO_APIC
 	FIX_IO_APIC_BASE_0,
@@ -57,16 +64,21 @@ enum fixed_addresses {
 	FIX_LI_PCIA,	/* Lithium PCI Bridge A */
 	FIX_LI_PCIB,	/* Lithium PCI Bridge B */
 #endif
-#ifdef CONFIG_X86_F00F_BUG
-	FIX_F00F_IDT,	/* Virtual mapping for IDT */
-#endif
+	FIX_IDT,
+	FIX_GDT_1,
+	FIX_GDT_0,
+	FIX_TSS_3,
+	FIX_TSS_2,
+	FIX_TSS_1,
+	FIX_TSS_0,
+	FIX_ENTRY_TRAMPOLINE_1,
+	FIX_ENTRY_TRAMPOLINE_0,
 #ifdef CONFIG_X86_CYCLONE_TIMER
 	FIX_CYCLONE_TIMER, /*cyclone timer register*/
+	FIX_VSTACK_HOLE_2,
 #endif 
-#ifdef CONFIG_HIGHMEM
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-#endif
 #ifdef CONFIG_ACPI_BOOT
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
@@ -98,12 +110,15 @@ extern void __set_fixmap (enum fixed_add
 		__set_fixmap(idx, 0, __pgprot(0))
 
 /*
- * used by vmalloc.c.
+ * used by vmalloc.c and various other places.
  *
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
+ *
+ * IMPORTANT: dont change FIXADDR_TOP without adjusting KM_VSTACK0
+ * and KM_VSTACK1 so that the virtual stack is 8K aligned.
  */
-#define FIXADDR_TOP	(0xfffff000UL)
+#define FIXADDR_TOP	(0xffffe000UL)
 #define __FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START	(FIXADDR_TOP - __FIXADDR_SIZE)
 
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/highmem.h current/include/asm-i386/highmem.h
--- reference/include/asm-i386/highmem.h	2003-10-14 15:50:32.000000000 -0700
+++ current/include/asm-i386/highmem.h	2004-05-01 15:01:04.000000000 -0700
@@ -25,26 +25,19 @@
 #include <linux/threads.h>
 #include <asm/kmap_types.h>
 #include <asm/tlbflush.h>
+#include <asm/atomic_kmap.h>
 
 /* declarations for highmem.c */
 extern unsigned long highstart_pfn, highend_pfn;
 
-extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
 extern pte_t *pkmap_page_table;
-
-extern void kmap_init(void);
+extern void kmap_init(void) __init;
 
 /*
  * Right now we initialize only a single pte table. It can be extended
  * easily, subsequent pte tables have to be allocated in one physical
  * chunk of RAM.
  */
-#if NR_CPUS <= 32
-#define PKMAP_BASE (0xff800000UL)
-#else
-#define PKMAP_BASE (0xff600000UL)
-#endif
 #ifdef CONFIG_X86_PAE
 #define LAST_PKMAP 512
 #else
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/kmap_types.h current/include/asm-i386/kmap_types.h
--- reference/include/asm-i386/kmap_types.h	2004-05-01 10:22:44.000000000 -0700
+++ current/include/asm-i386/kmap_types.h	2004-05-01 15:01:04.000000000 -0700
@@ -3,29 +3,37 @@
 
 #include <linux/config.h>
 
-#ifdef CONFIG_DEBUG_HIGHMEM
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
-#endif
-
 enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
+	/*
+	 * IMPORTANT: don't move these 3 entries, and only add entries in
+	 * pairs: the 4G/4G virtual stack must be 8K aligned on each cpu.
+	 */
+	KM_BOUNCE_READ,
+	KM_VSTACK1,
+	KM_VSTACK0,
+  
+	KM_LDT_PAGE15,
+	KM_LDT_PAGE0 = KM_LDT_PAGE15 + 16-1,
+	KM_USER_COPY,
+	KM_VSTACK_HOLE,
+	KM_SKB_SUNRPC_DATA,
+	KM_SKB_DATA_SOFTIRQ,
+	KM_USER0,
+	KM_USER1,
+	KM_BIO_SRC_IRQ,
+	KM_BIO_DST_IRQ,
+	KM_PTE0,
+	KM_PTE1,
+	KM_IRQ0,
+	KM_IRQ1,
+	KM_SOFTIRQ0,
+	KM_SOFTIRQ1,
+	KM_FILLER,
+	/*
+	 * Add new entries in pairs:
+	 * the 4G/4G virtual stack must be 8K aligned on each cpu.
+	 */
+	KM_TYPE_NR
 };
 
-#undef D
-
 #endif
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/mmu.h current/include/asm-i386/mmu.h
--- reference/include/asm-i386/mmu.h	2002-12-09 18:46:11.000000000 -0800
+++ current/include/asm-i386/mmu.h	2004-05-01 15:01:04.000000000 -0700
@@ -8,10 +8,13 @@
  *
  * cpu_vm_mask is used to optimize ldt flushing.
  */
+
+#define MAX_LDT_PAGES 16
+
 typedef struct { 
 	int size;
 	struct semaphore sem;
-	void *ldt;
+	struct page *ldt_pages[MAX_LDT_PAGES];
 } mm_context_t;
 
 #endif
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/mmu_context.h current/include/asm-i386/mmu_context.h
--- reference/include/asm-i386/mmu_context.h	2003-10-01 11:41:15.000000000 -0700
+++ current/include/asm-i386/mmu_context.h	2004-05-01 15:01:04.000000000 -0700
@@ -29,6 +29,10 @@ static inline void switch_mm(struct mm_s
 {
 	int cpu = smp_processor_id();
 
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+	if (tsk->mm)
+		tsk->thread_info->user_pgd = (void *)__pa(tsk->mm->pgd);
+#endif
 	if (likely(prev != next)) {
 		/* stop flush ipis for the previous mm */
 		cpu_clear(cpu, prev->cpu_vm_mask);
@@ -39,12 +43,14 @@ static inline void switch_mm(struct mm_s
 		cpu_set(cpu, next->cpu_vm_mask);
 
 		/* Re-load page tables */
+#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
 		load_cr3(next->pgd);
+#endif
 
 		/*
 		 * load the LDT, if the LDT is different:
 		 */
-		if (unlikely(prev->context.ldt != next->context.ldt))
+		if (unlikely(prev->context.size + next->context.size))
 			load_LDT_nolock(&next->context, cpu);
 	}
 #ifdef CONFIG_SMP
@@ -56,7 +62,9 @@ static inline void switch_mm(struct mm_s
 			/* We were in lazy tlb mode and leave_mm disabled 
 			 * tlb flush IPI delivery. We must reload %cr3.
 			 */
+#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
 			load_cr3(next->pgd);
+#endif
 			load_LDT_nolock(&next->context, cpu);
 		}
 	}
@@ -67,6 +75,6 @@ static inline void switch_mm(struct mm_s
 	asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
 
 #define activate_mm(prev, next) \
-	switch_mm((prev),(next),NULL)
+	switch_mm((prev),(next),current)
 
 #endif
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/page.h current/include/asm-i386/page.h
--- reference/include/asm-i386/page.h	2003-04-09 11:48:05.000000000 -0700
+++ current/include/asm-i386/page.h	2004-05-01 15:01:04.000000000 -0700
@@ -1,6 +1,8 @@
 #ifndef _I386_PAGE_H
 #define _I386_PAGE_H
 
+#include <linux/config.h>
+
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT	12
 #define PAGE_SIZE	(1UL << PAGE_SHIFT)
@@ -9,11 +11,10 @@
 #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
 #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
 
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
 #include <linux/config.h>
 
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 #ifdef CONFIG_X86_USE_3DNOW
 
 #include <asm/mmx.h>
@@ -88,8 +89,19 @@ typedef struct { unsigned long pgprot; }
  *
  * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
  * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ *
+ * Note: on PAE the kernel must never go below 32 MB, we use the
+ * first 8 entries of the 2-level boot pgd for PAE magic.
  */
 
+#ifdef CONFIG_X86_4G_VM_LAYOUT
+#define __PAGE_OFFSET		(0x02000000)
+#define TASK_SIZE		(0xff000000)
+#else
+#define __PAGE_OFFSET		(0xc0000000)
+#define TASK_SIZE		(0xc0000000)
+#endif
+
 /*
  * This much address space is reserved for vmalloc() and iomap()
  * as well as fixmap mappings.
@@ -114,16 +126,10 @@ static __inline__ int get_order(unsigned
 
 #endif /* __ASSEMBLY__ */
 
-#ifdef __ASSEMBLY__
-#define __PAGE_OFFSET		(0xC0000000)
-#else
-#define __PAGE_OFFSET		(0xC0000000UL)
-#endif
-
-
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define __MAXMEM		(-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define MAXMEM			((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/pgtable.h current/include/asm-i386/pgtable.h
--- reference/include/asm-i386/pgtable.h	2004-05-01 10:22:44.000000000 -0700
+++ current/include/asm-i386/pgtable.h	2004-05-01 15:01:04.000000000 -0700
@@ -32,16 +32,23 @@
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 extern unsigned long empty_zero_page[1024];
 extern pgd_t swapper_pg_dir[1024];
-extern kmem_cache_t *pgd_cache;
-extern kmem_cache_t *pmd_cache;
+extern kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache;
 extern spinlock_t pgd_lock;
 extern struct page *pgd_list;
 
 void pmd_ctor(void *, kmem_cache_t *, unsigned long);
+void kpmd_ctor(void *, kmem_cache_t *, unsigned long);
 void pgd_ctor(void *, kmem_cache_t *, unsigned long);
 void pgd_dtor(void *, kmem_cache_t *, unsigned long);
 void pgtable_cache_init(void);
 void paging_init(void);
+void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end);
+
+/*
+ * The size of the low 1:1 mappings we use during bootup,
+ * SMP-boot and ACPI-sleep:
+ */
+#define LOW_MAPPINGS_SIZE (16*1024*1024)
 
 #endif /* !__ASSEMBLY__ */
 
@@ -51,6 +58,11 @@ void paging_init(void);
  * newer 3-level PAE-mode page tables.
  */
 #ifndef __ASSEMBLY__
+
+extern void set_system_gate(unsigned int n, void *addr);
+extern void init_entry_mappings(void);
+extern void entry_trampoline_setup(void);
+
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else
@@ -63,7 +75,12 @@ void paging_init(void);
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-#define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
+#if defined(CONFIG_X86_PAE) && defined(CONFIG_X86_4G_VM_LAYOUT)
+# define USER_PTRS_PER_PGD	4
+#else
+# define USER_PTRS_PER_PGD	((TASK_SIZE/PGDIR_SIZE) + ((TASK_SIZE % PGDIR_SIZE) + PGDIR_SIZE-1)/PGDIR_SIZE)
+#endif
+
 #define FIRST_USER_PGD_NR	0
 
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
@@ -239,6 +256,7 @@ static inline void ptep_mkdirty(pte_t *p
 
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
 #define mk_pte_huge(entry) ((entry).pte_low |= _PAGE_PRESENT | _PAGE_PSE)
+#define mk_pte_phys(physpage, pgprot) pfn_pte((physpage) >> PAGE_SHIFT, pgprot)
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/processor.h current/include/asm-i386/processor.h
--- reference/include/asm-i386/processor.h	2004-04-30 12:59:14.000000000 -0700
+++ current/include/asm-i386/processor.h	2004-05-01 15:01:04.000000000 -0700
@@ -291,11 +291,6 @@ extern unsigned int machine_submodel_id;
 extern unsigned int BIOS_revision;
 extern unsigned int mca_pentium_flag;
 
-/*
- * User space process size: 3GB (default).
- */
-#define TASK_SIZE	(PAGE_OFFSET)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
@@ -408,6 +403,7 @@ struct tss_struct {
 struct thread_struct {
 /* cached TLS descriptors. */
 	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+	void *stack_page0, *stack_page1;
 	unsigned long	esp0;
 	unsigned long	sysenter_cs;
 	unsigned long	eip;
@@ -451,7 +447,8 @@ struct thread_struct {
 	.io_bitmap	= { [ 0 ... IO_BITMAP_LONGS] = ~0 },		\
 }
 
-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+static inline void
+load_esp0(struct tss_struct *tss, struct thread_struct *thread)
 {
 	tss->esp0 = thread->esp0;
 	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
@@ -487,6 +484,23 @@ extern void prepare_to_copy(struct task_
  */
 extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
+#ifdef CONFIG_X86_HIGH_ENTRY
+#define virtual_esp0(tsk) \
+	((unsigned long)(tsk)->thread_info->virtual_stack + ((tsk)->thread.esp0 - (unsigned long)(tsk)->thread_info->real_stack))
+#else
+# define virtual_esp0(tsk) ((tsk)->thread.esp0)
+#endif
+
+#define load_virtual_esp0(tss, task)					\
+	do {								\
+		tss->esp0 = virtual_esp0(task);				\
+		if (likely(cpu_has_sep) && unlikely(tss->ss1 != task->thread.sysenter_cs)) {	\
+			tss->ss1 = task->thread.sysenter_cs;		\
+			wrmsr(MSR_IA32_SYSENTER_CS,			\
+				task->thread.sysenter_cs, 0);		\
+		}							\
+	} while (0)
+
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 void show_trace(struct task_struct *task, unsigned long *stack);
 
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/string.h current/include/asm-i386/string.h
--- reference/include/asm-i386/string.h	2004-02-18 14:57:17.000000000 -0800
+++ current/include/asm-i386/string.h	2004-05-01 15:01:04.000000000 -0700
@@ -58,6 +58,29 @@ __asm__ __volatile__(
 return dest;
 }
 
+/*
+ * This is a more generic variant of strncpy_count() suitable for
+ * implementing string-access routines with all sorts of return
+ * code semantics. It's used by mm/usercopy.c.
+ */
+static inline size_t strncpy_count(char * dest,const char *src,size_t count)
+{
+	__asm__ __volatile__(
+
+	"1:\tdecl %0\n\t"
+	"js 2f\n\t"
+	"lodsb\n\t"
+	"stosb\n\t"
+	"testb %%al,%%al\n\t"
+	"jne 1b\n\t"
+	"2:"
+	"incl %0"
+	: "=c" (count)
+	:"S" (src),"D" (dest),"0" (count) : "memory");
+
+	return count;
+}
+
 static inline char * strcat(char * dest,const char * src)
 {
 int d0, d1, d2, d3;
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/thread_info.h current/include/asm-i386/thread_info.h
--- reference/include/asm-i386/thread_info.h	2004-04-30 11:23:48.000000000 -0700
+++ current/include/asm-i386/thread_info.h	2004-05-01 15:01:04.000000000 -0700
@@ -37,6 +37,7 @@ struct thread_info {
 					 	   0-0xBFFFFFFF for user-thead
 						   0-0xFFFFFFFF for kernel-thread
 						*/
+	void *real_stack, *virtual_stack, *user_pgd;
 	struct restart_block    restart_block;
 
 	unsigned long           previous_esp;   /* ESP of the previous stack in case
@@ -45,18 +46,6 @@ struct thread_info {
 	__u8			supervisor_stack[0];
 };
 
-#else /* !__ASSEMBLY__ */
-
-/* offsets into the thread_info struct for assembly code access */
-#define TI_TASK		0x00000000
-#define TI_EXEC_DOMAIN	0x00000004
-#define TI_FLAGS	0x00000008
-#define TI_STATUS	0x0000000C
-#define TI_CPU		0x00000010
-#define TI_PRE_COUNT	0x00000014
-#define TI_ADDR_LIMIT	0x00000018
-#define TI_RESTART_BLOCK 0x000001C
-
 #endif
 
 #define PREEMPT_ACTIVE		0x4000000
@@ -74,7 +63,7 @@ struct thread_info {
  */
 #ifndef __ASSEMBLY__
 
-#define INIT_THREAD_INFO(tsk)			\
+#define INIT_THREAD_INFO(tsk, thread_info)	\
 {						\
 	.task		= &tsk,			\
 	.exec_domain	= &default_exec_domain,	\
@@ -85,6 +74,7 @@ struct thread_info {
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
+	.real_stack	= &thread_info,		\
 }
 
 #define init_thread_info	(init_thread_union.thread_info)
@@ -151,6 +141,7 @@ static inline unsigned long current_stac
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
 #define TIF_IRET		5	/* return with iret */
+#define	TIF_DB7			6	/* has debug registers */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 
@@ -160,6 +151,7 @@ static inline unsigned long current_stac
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
 #define _TIF_IRET		(1<<TIF_IRET)
+#define	_TIF_DB7		(1<<TIF_DB7)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/tlbflush.h current/include/asm-i386/tlbflush.h
--- reference/include/asm-i386/tlbflush.h	2002-12-09 18:46:24.000000000 -0800
+++ current/include/asm-i386/tlbflush.h	2004-05-01 15:01:04.000000000 -0700
@@ -85,22 +85,28 @@ extern unsigned long pgkern_mask;
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
 	if (mm == current->active_mm)
 		__flush_tlb();
+#endif
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 	unsigned long addr)
 {
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
 	if (vma->vm_mm == current->active_mm)
 		__flush_tlb_one(addr);
+#endif
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 	unsigned long start, unsigned long end)
 {
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
 	if (vma->vm_mm == current->active_mm)
 		__flush_tlb();
+#endif
 }
 
 #else
@@ -111,11 +117,10 @@ static inline void flush_tlb_range(struc
 	__flush_tlb()
 
 extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
 extern void flush_tlb_mm(struct mm_struct *);
 extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
 
-#define flush_tlb()	flush_tlb_current_task()
+#define flush_tlb()	flush_tlb_all()
 
 static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
 {
diff -purN -X /home/mbligh/.diff.exclude reference/include/asm-i386/uaccess.h current/include/asm-i386/uaccess.h
--- reference/include/asm-i386/uaccess.h	2004-05-01 15:01:05.000000000 -0700
+++ current/include/asm-i386/uaccess.h	2004-05-01 15:03:05.000000000 -0700
@@ -26,7 +26,7 @@
 
 
 #define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFFUL)
-#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
+#define USER_DS		MAKE_MM_SEG(TASK_SIZE)
 
 #define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
@@ -149,6 +149,45 @@ extern void __get_user_4(void);
 		:"=a" (ret),"=d" (x) \
 		:"0" (ptr))
 
+extern int get_user_size(unsigned int size, void *val, const void *ptr);
+extern int put_user_size(unsigned int size, const void *val, void *ptr);
+extern int zero_user_size(unsigned int size, void *ptr);
+extern int copy_str_fromuser_size(unsigned int size, void *val, const void *ptr);
+extern int strlen_fromuser_size(unsigned int size, const void *ptr);
+
+
+# define indirect_get_user(x,ptr)					\
+({	int __ret_gu,__val_gu;						\
+	__typeof__(ptr) __ptr_gu = (ptr);				\
+	__ret_gu = get_user_size(sizeof(*__ptr_gu), &__val_gu,__ptr_gu) ? -EFAULT : 0;\
+	(x) = (__typeof__(*__ptr_gu))__val_gu;				\
+	__ret_gu;							\
+})
+#define indirect_put_user(x,ptr)					\
+({									\
+	__typeof__(*(ptr)) *__ptr_pu = (ptr), __x_pu = (x);		\
+	put_user_size(sizeof(*__ptr_pu), &__x_pu, __ptr_pu) ? -EFAULT : 0; \
+})
+#define __indirect_put_user indirect_put_user
+#define __indirect_get_user indirect_get_user
+
+#define indirect_copy_from_user(to,from,n) get_user_size(n,to,from)
+#define indirect_copy_to_user(to,from,n) put_user_size(n,from,to)
+
+#define __indirect_copy_from_user indirect_copy_from_user
+#define __indirect_copy_to_user indirect_copy_to_user
+
+#define indirect_strncpy_from_user(dst, src, count) \
+		copy_str_fromuser_size(count, dst, src)
+
+extern int strlen_fromuser_size(unsigned int size, const void *ptr);
+#define indirect_strnlen_user(str, n) strlen_fromuser_size(n, str)
+#define indirect_strlen_user(str) indirect_strnlen_user(str, ~0UL >> 1)
+
+extern int zero_user_size(unsigned int size, void *ptr);
+
+#define indirect_clear_user(mem, len) zero_user_size(len, mem)
+#define __indirect_clear_user clear_user
 
 /* Careful: we have to cast the result to the type of the pointer for sign reasons */
 /**
@@ -168,7 +207,7 @@ extern void __get_user_4(void);
  * Returns zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
-#define get_user(x,ptr)							\
+#define direct_get_user(x,ptr)						\
 ({	int __ret_gu,__val_gu;						\
 	switch(sizeof (*(ptr))) {					\
 	case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;		\
@@ -198,7 +237,7 @@ extern void __put_user_bad(void);
  *
  * Returns zero on success, or -EFAULT on error.
  */
-#define put_user(x,ptr)							\
+#define direct_put_user(x,ptr)						\
   __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
 
 
@@ -222,7 +261,7 @@ extern void __put_user_bad(void);
  * Returns zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
-#define __get_user(x,ptr) \
+#define __direct_get_user(x,ptr) \
   __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
 
 
@@ -245,7 +284,7 @@ extern void __put_user_bad(void);
  *
  * Returns zero on success, or -EFAULT on error.
  */
-#define __put_user(x,ptr) \
+#define __direct_put_user(x,ptr) \
   __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
 
 #define __put_user_nocheck(x,ptr,size)				\
@@ -396,7 +435,7 @@ unsigned long __copy_from_user_ll(void *
  * On success, this will be zero.
  */
 static inline unsigned long
-__copy_to_user(void __user *to, const void *from, unsigned long n)
+__direct_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
@@ -434,7 +473,7 @@ __copy_to_user(void __user *to, const vo
  * data to the requested size using zero bytes.
  */
 static inline unsigned long
-__copy_from_user(void *to, const void __user *from, unsigned long n)
+__direct_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
@@ -468,11 +507,11 @@ __copy_from_user(void *to, const void __
  * On success, this will be zero.
  */
 static inline unsigned long
-copy_to_user(void __user *to, const void *from, unsigned long n)
+direct_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_sleep();
 	if (access_ok(VERIFY_WRITE, to, n))
-		n = __copy_to_user(to, from, n);
+		n = __direct_copy_to_user(to, from, n);
 	return n;
 }
 
@@ -493,11 +532,11 @@ copy_to_user(void __user *to, const void
  * data to the requested size using zero bytes.
  */
 static inline unsigned long
-copy_from_user(void *to, const void __user *from, unsigned long n)
+direct_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	might_sleep();
 	if (access_ok(VERIFY_READ, from, n))
-		n = __copy_from_user(to, from, n);
+		n = __direct_copy_from_user(to, from, n);
 	else
 		memset(to, 0, n);
 	return n;
@@ -520,10 +559,68 @@ long __strncpy_from_user(char *dst, cons
  * If there is a limit on the length of a valid string, you may wish to
  * consider using strnlen_user() instead.
  */
-#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
 
-long strnlen_user(const char __user *str, long n);
-unsigned long clear_user(void __user *mem, unsigned long len);
-unsigned long __clear_user(void __user *mem, unsigned long len);
+long direct_strncpy_from_user(char *dst, const char *src, long count);
+long __direct_strncpy_from_user(char *dst, const char *src, long count);
+#define direct_strlen_user(str) direct_strnlen_user(str, ~0UL >> 1)
+long direct_strnlen_user(const char *str, long n);
+unsigned long direct_clear_user(void *mem, unsigned long len);
+unsigned long __direct_clear_user(void *mem, unsigned long len);
+
+extern int indirect_uaccess;
+
+#ifdef CONFIG_X86_UACCESS_INDIRECT
+
+/*
+ * Return code and zeroing semantics:
+
+ __clear_user          0                      <-> bytes not done
+ clear_user            0                      <-> bytes not done
+ __copy_to_user        0                      <-> bytes not done
+ copy_to_user          0                      <-> bytes not done
+ __copy_from_user      0                      <-> bytes not done, zero rest
+ copy_from_user        0                      <-> bytes not done, zero rest
+ __get_user            0                      <-> -EFAULT
+ get_user              0                      <-> -EFAULT
+ __put_user            0                      <-> -EFAULT
+ put_user              0                      <-> -EFAULT
+ strlen_user           strlen + 1             <-> 0
+ strnlen_user          strlen + 1 (or n+1)    <-> 0
+ strncpy_from_user     strlen (or n)          <-> -EFAULT
+
+ */
+
+#define __clear_user(mem,len) __indirect_clear_user(mem,len)
+#define clear_user(mem,len) indirect_clear_user(mem,len)
+#define __copy_to_user(to,from,n) __indirect_copy_to_user(to,from,n)
+#define copy_to_user(to,from,n) indirect_copy_to_user(to,from,n)
+#define __copy_from_user(to,from,n) __indirect_copy_from_user(to,from,n)
+#define copy_from_user(to,from,n) indirect_copy_from_user(to,from,n)
+#define __get_user(val,ptr) __indirect_get_user(val,ptr)
+#define get_user(val,ptr) indirect_get_user(val,ptr)
+#define __put_user(val,ptr) __indirect_put_user(val,ptr)
+#define put_user(val,ptr) indirect_put_user(val,ptr)
+#define strlen_user(str) indirect_strlen_user(str)
+#define strnlen_user(src,count) indirect_strnlen_user(src,count)
+#define strncpy_from_user(dst,src,count) \
+			indirect_strncpy_from_user(dst,src,count)
+
+#else
+
+#define __clear_user __direct_clear_user
+#define clear_user direct_clear_user
+#define __copy_to_user __direct_copy_to_user
+#define copy_to_user direct_copy_to_user
+#define __copy_from_user __direct_copy_from_user
+#define copy_from_user direct_copy_from_user
+#define __get_user __direct_get_user
+#define get_user direct_get_user
+#define __put_user __direct_put_user
+#define put_user direct_put_user
+#define strlen_user direct_strlen_user
+#define strnlen_user direct_strnlen_user
+#define strncpy_from_user direct_strncpy_from_user
+
+#endif /* CONFIG_X86_UACCESS_INDIRECT */
 
 #endif /* __i386_UACCESS_H */
diff -purN -X /home/mbligh/.diff.exclude reference/mm/Makefile current/mm/Makefile
--- reference/mm/Makefile	2004-05-01 10:28:28.000000000 -0700
+++ current/mm/Makefile	2004-05-01 15:01:04.000000000 -0700
@@ -13,5 +13,6 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+obj-$(CONFIG_X86_4G)	+= usercopy.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
diff -purN -X /home/mbligh/.diff.exclude reference/mm/memory.c current/mm/memory.c
--- reference/mm/memory.c	2004-05-01 10:28:28.000000000 -0700
+++ current/mm/memory.c	2004-05-01 15:01:04.000000000 -0700
@@ -108,7 +108,8 @@ static inline void free_one_pmd(struct m
 	pte_free_tlb(tlb, page);
 }
 
-static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
+static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir,
+							int pgd_idx)
 {
 	int j;
 	pmd_t * pmd;
@@ -122,8 +123,11 @@ static inline void free_one_pgd(struct m
 	}
 	pmd = pmd_offset(dir, 0);
 	pgd_clear(dir);
-	for (j = 0; j < PTRS_PER_PMD ; j++)
+	for (j = 0; j < PTRS_PER_PMD ; j++) {
+		if (pgd_idx * PGDIR_SIZE + j * PMD_SIZE >= TASK_SIZE)
+			break;
 		free_one_pmd(tlb, pmd+j);
+	}
 	pmd_free_tlb(tlb, pmd);
 }
 
@@ -136,11 +140,13 @@ static inline void free_one_pgd(struct m
 void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr)
 {
 	pgd_t * page_dir = tlb->mm->pgd;
+	int pgd_idx = first;
 
 	page_dir += first;
 	do {
-		free_one_pgd(tlb, page_dir);
+		free_one_pgd(tlb, page_dir, pgd_idx);
 		page_dir++;
+		pgd_idx++;
 	} while (--nr);
 }
 
@@ -432,7 +438,7 @@ static void zap_pmd_range(struct mmu_gat
 		unsigned long size, struct zap_details *details)
 {
 	pmd_t * pmd;
-	unsigned long end;
+	unsigned long end, pgd_boundary;
 
 	if (pgd_none(*dir))
 		return;
@@ -443,8 +449,9 @@ static void zap_pmd_range(struct mmu_gat
 	}
 	pmd = pmd_offset(dir, address);
 	end = address + size;
-	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
-		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+	pgd_boundary = ((address + PGDIR_SIZE) & PGDIR_MASK);
+	if (pgd_boundary && (end > pgd_boundary))
+		end = pgd_boundary;
 	do {
 		zap_pte_range(tlb, pmd, address, end - address, details);
 		address = (address + PMD_SIZE) & PMD_MASK; 
diff -purN -X /home/mbligh/.diff.exclude reference/mm/slab.c current/mm/slab.c
--- reference/mm/slab.c	2004-05-01 10:21:04.000000000 -0700
+++ current/mm/slab.c	2004-05-01 15:01:04.000000000 -0700
@@ -1327,10 +1327,13 @@ next:
 			 * the cache that's used by kmalloc(24), otherwise
 			 * the creation of further caches will BUG().
 			 */
-			cachep->array[smp_processor_id()] = &initarray_generic.cache;
+			cachep->array[smp_processor_id()] =
+					&initarray_generic.cache;
 			g_cpucache_up = PARTIAL;
 		} else {
-			cachep->array[smp_processor_id()] = kmalloc(sizeof(struct arraycache_init),GFP_KERNEL);
+			cachep->array[smp_processor_id()] =
+				kmalloc(sizeof(struct arraycache_init),
+					GFP_KERNEL);
 		}
 		BUG_ON(!ac_data(cachep));
 		ac_data(cachep)->avail = 0;
@@ -1344,7 +1347,7 @@ next:
 	} 
 
 	cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
-					((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+				((unsigned long)cachep)%REAPTIMEOUT_LIST3;
 
 	/* Need the semaphore to access the chain. */
 	down(&cache_chain_sem);
@@ -1357,16 +1360,24 @@ next:
 		list_for_each(p, &cache_chain) {
 			kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
 			char tmp;
-			/* This happens when the module gets unloaded and doesn't
-			   destroy its slab cache and noone else reuses the vmalloc
-			   area of the module. Print a warning. */
-			if (__get_user(tmp,pc->name)) { 
-				printk("SLAB: cache with size %d has lost its name\n", 
-					pc->objsize); 
+
+			/*
+			 * This happens when the module gets unloaded and
+			 * doesn't destroy its slab cache and noone else reuses
+			 * the vmalloc area of the module. Print a warning.
+			 */
+#ifdef CONFIG_X86_UACCESS_INDIRECT
+			if (__direct_get_user(tmp,pc->name)) {
+#else
+			if (__get_user(tmp,pc->name)) {
+#endif
+				printk("SLAB: cache with size %d has lost its "
+						"name\n", pc->objsize);
 				continue; 
 			} 	
 			if (!strcmp(pc->name,name)) { 
-				printk("kmem_cache_create: duplicate cache %s\n",name); 
+				printk("kmem_cache_create: duplicate "
+						"cache %s\n",name);
 				up(&cache_chain_sem); 
 				unlock_cpu_hotplug();
 				BUG(); 
diff -purN -X /home/mbligh/.diff.exclude reference/mm/usercopy.c current/mm/usercopy.c
--- reference/mm/usercopy.c	1969-12-31 16:00:00.000000000 -0800
+++ current/mm/usercopy.c	2004-05-01 15:01:04.000000000 -0700
@@ -0,0 +1,276 @@
+/*
+ * linux/mm/usercopy.c
+ *
+ * (C) Copyright 2003 Ingo Molnar
+ *
+ * Generic implementation of all the user-VM access functions, without
+ * relying on being able to access the VM directly.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/atomic_kmap.h>
+
+/*
+ * Get kernel address of the user page and pin it.
+ */
+static inline struct page *pin_page(unsigned long addr, int write)
+{
+	struct mm_struct *mm = current->mm ? : &init_mm;
+	struct page *page = NULL;
+	int ret;
+
+	spin_lock(&mm->page_table_lock);
+	/*
+	 * Do a quick atomic lookup first - this is the fastpath.
+	 */
+	page = follow_page(mm, addr, write);
+	if (likely(page != NULL)) {
+		if (!PageReserved(page))
+			get_page(page);
+		spin_unlock(&mm->page_table_lock);
+		return page;
+	}
+
+	/*
+	 * No luck - bad address or need to fault in the page:
+	 */
+	spin_unlock(&mm->page_table_lock);
+
+	/*
+	 * In the context of filemap_copy_from_user(), we are not allowed
+	 * to sleep.  We must fail this usercopy attempt and allow
+	 * filemap_copy_from_user() to recover: drop its atomic kmap and use
+	 * a sleeping kmap instead.
+	 */
+	if (in_atomic())
+		return NULL;
+
+	down_read(&mm->mmap_sem);
+	ret = get_user_pages(current, mm, addr, 1, write, 0, &page, NULL);
+	up_read(&mm->mmap_sem);
+	if (ret <= 0)
+		return NULL;
+	return page;
+}
+
+static inline void unpin_page(struct page *page)
+{
+	put_page(page);
+}
+
+/*
+ * Access another process' address space.
+ * Source/target buffer must be kernel space,
+ * Do not walk the page table directly, use get_user_pages
+ */
+static int rw_vm(unsigned long addr, void *buf, int len, int write)
+{
+	if (!len)
+		return 0;
+
+	/* ignore errors, just check how much was sucessfully transfered */
+	while (len) {
+		struct page *page = NULL;
+		int bytes, offset;
+		void *maddr;
+
+		page = pin_page(addr, write);
+		if (!page)
+			break;
+
+		bytes = len;
+		offset = addr & (PAGE_SIZE-1);
+		if (bytes > PAGE_SIZE-offset)
+			bytes = PAGE_SIZE-offset;
+
+		maddr = kmap_atomic(page, KM_USER_COPY);
+
+#define HANDLE_TYPE(type) \
+	case sizeof(type): *(type *)(maddr+offset) = *(type *)(buf); break;
+
+		if (write) {
+			switch (bytes) {
+			HANDLE_TYPE(char);
+			HANDLE_TYPE(int);
+			HANDLE_TYPE(long long);
+			default:
+				memcpy(maddr + offset, buf, bytes);
+			}
+		} else {
+#undef HANDLE_TYPE
+#define HANDLE_TYPE(type) \
+	case sizeof(type): *(type *)(buf) = *(type *)(maddr+offset); break;
+			switch (bytes) {
+			HANDLE_TYPE(char);
+			HANDLE_TYPE(int);
+			HANDLE_TYPE(long long);
+			default:
+				memcpy(buf, maddr + offset, bytes);
+			}
+#undef HANDLE_TYPE
+		}
+		kunmap_atomic(maddr, KM_USER_COPY);
+		unpin_page(page);
+		len -= bytes;
+		buf += bytes;
+		addr += bytes;
+	}
+
+	return len;
+}
+
+static int str_vm(unsigned long addr, void *buf0, int len, int copy)
+{
+	struct mm_struct *mm = current->mm ? : &init_mm;
+	struct page *page;
+	void *buf = buf0;
+
+	if (!len)
+		return len;
+
+	down_read(&mm->mmap_sem);
+	/* ignore errors, just check how much was sucessfully transfered */
+	while (len) {
+		int bytes, ret, offset, left, copied;
+		char *maddr;
+
+		ret = get_user_pages(current, mm, addr, 1, copy == 2, 0, &page, NULL);
+		if (ret <= 0) {
+			up_read(&mm->mmap_sem);
+			return -EFAULT;
+		}
+
+		bytes = len;
+		offset = addr & (PAGE_SIZE-1);
+		if (bytes > PAGE_SIZE-offset)
+			bytes = PAGE_SIZE-offset;
+
+		maddr = kmap_atomic(page, KM_USER_COPY);
+		if (copy == 2) {
+			memset(maddr + offset, 0, bytes);
+			copied = bytes;
+			left = 0;
+		} else if (copy == 1) {
+			left = strncpy_count(buf, maddr + offset, bytes);
+			copied = bytes - left;
+		} else {
+			copied = strnlen(maddr + offset, bytes);
+			left = bytes - copied;
+		}
+		BUG_ON(bytes < 0 || copied < 0);
+		kunmap_atomic(maddr, KM_USER_COPY);
+		page_cache_release(page);
+		len -= copied;
+		buf += copied;
+		addr += copied;
+		if (left)
+			break;
+	}
+	up_read(&mm->mmap_sem);
+
+	return len;
+}
+
+/*
+ * Copies memory from userspace (ptr) into kernelspace (val).
+ *
+ * returns # of bytes not copied.
+ */
+int get_user_size(unsigned int size, void *val, const void *ptr)
+{
+	int ret;
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
+		ret = __direct_copy_from_user(val, ptr, size);
+	else
+		ret = rw_vm((unsigned long)ptr, val, size, 0);
+	if (ret)
+		/*
+		 * Zero the rest:
+		 */
+		memset(val + size - ret, 0, ret);
+	return ret;
+}
+
+/*
+ * Copies memory from kernelspace (val) into userspace (ptr).
+ *
+ * returns # of bytes not copied.
+ */
+int put_user_size(unsigned int size, const void *val, void *ptr)
+{
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
+		return __direct_copy_to_user(ptr, val, size);
+	else
+		return rw_vm((unsigned long)ptr, (void *)val, size, 1);
+}
+
+int copy_str_fromuser_size(unsigned int size, void *val, const void *ptr)
+{
+	int copied, left;
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+		left = strncpy_count(val, ptr, size);
+		copied = size - left;
+		BUG_ON(copied < 0);
+
+		return copied;
+	}
+	left = str_vm((unsigned long)ptr, val, size, 1);
+	if (left < 0)
+		return left;
+	copied = size - left;
+	BUG_ON(copied < 0);
+
+	return copied;
+}
+
+int strlen_fromuser_size(unsigned int size, const void *ptr)
+{
+	int copied, left;
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+		copied = strnlen(ptr, size) + 1;
+		BUG_ON(copied < 0);
+
+		return copied;
+	}
+	left = str_vm((unsigned long)ptr, NULL, size, 0);
+	if (left < 0)
+		return 0;
+	copied = size - left + 1;
+	BUG_ON(copied < 0);
+
+	return copied;
+}
+
+int zero_user_size(unsigned int size, void *ptr)
+{
+	int left;
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+		memset(ptr, 0, size);
+		return 0;
+	}
+	left = str_vm((unsigned long)ptr, NULL, size, 2);
+	if (left < 0)
+		return size;
+	return left;
+}
+
+EXPORT_SYMBOL(get_user_size);
+EXPORT_SYMBOL(put_user_size);
+EXPORT_SYMBOL(zero_user_size);
+EXPORT_SYMBOL(copy_str_fromuser_size);
+EXPORT_SYMBOL(strlen_fromuser_size);