CREDITS | 2 Documentation/Configure.help | 11 ++++ Documentation/preempt-locking.txt | 104 ++++++++++++++++++++++++++++++++++++++ MAINTAINERS | 8 ++ arch/alpha/kernel/process.c | 1 arch/i386/config.in | 8 ++ arch/i386/kernel/cpuid.c | 4 + arch/i386/kernel/entry.S | 49 +++++++++++++++++ arch/i386/kernel/i387.c | 3 + arch/i386/kernel/ioport.c | 5 + arch/i386/kernel/irq.c | 15 ++++- arch/i386/kernel/ldt.c | 2 arch/i386/kernel/microcode.c | 3 + arch/i386/kernel/msr.c | 15 +++-- arch/i386/kernel/mtrr.c | 6 ++ arch/i386/kernel/smp.c | 29 ++++++++-- arch/i386/kernel/traps.c | 2 arch/i386/lib/dec_and_lock.c | 1 arch/i386/mm/init.c | 2 arch/mips/config-shared.in | 1 arch/mips/kernel/i8259.c | 1 arch/mips/kernel/irq.c | 29 ++++++++++ arch/mips/mm/extable.c | 1 arch/ppc/config.in | 2 arch/ppc/kernel/entry.S | 40 ++++++++++++++ arch/ppc/kernel/irq.c | 52 ++++++++++++++++--- arch/ppc/kernel/mk_defs.c | 3 + arch/ppc/kernel/open_pic.c | 9 ++- arch/ppc/kernel/setup.c | 14 +++++ arch/ppc/kernel/temp.c | 8 ++ arch/ppc/lib/dec_and_lock.c | 1 arch/ppc/mm/init.c | 6 ++ arch/ppc/mm/tlb.c | 16 +++++ drivers/ieee1394/csr.c | 1 drivers/sound/sound_core.c | 1 fs/adfs/map.c | 1 fs/exec.c | 2 fs/fat/cache.c | 1 fs/nfsd/nfssvc.c | 1 fs/nls/nls_base.c | 1 include/asm-i386/desc.h | 5 + include/asm-i386/hardirq.h | 14 +++-- include/asm-i386/highmem.h | 7 ++ include/asm-i386/hw_irq.h | 19 +++++- include/asm-i386/i387.h | 3 - include/asm-i386/pgalloc.h | 12 ++++ include/asm-i386/smplock.h | 14 +++++ include/asm-i386/softirq.h | 11 ++-- include/asm-i386/spinlock.h | 18 +++--- include/asm-i386/system.h | 7 ++ include/asm-mips/smplock.h | 15 +++++ include/asm-mips/softirq.h | 3 + include/asm-mips/system.h | 14 +++++ include/asm-ppc/dma.h | 1 include/asm-ppc/hardirq.h | 9 ++- include/asm-ppc/highmem.h | 6 +- include/asm-ppc/hw_irq.h | 6 ++ include/asm-ppc/mmu_context.h | 4 + include/asm-ppc/pgalloc.h | 9 +++ include/asm-ppc/smplock.h | 14 +++++ include/asm-ppc/softirq.h | 13 ++++ include/linux/brlock.h | 10 +-- include/linux/dcache.h | 56 +++++++++++--------- include/linux/fs_struct.h | 13 +++- include/linux/highmem.h | 14 ----- include/linux/sched.h | 12 ++++ include/linux/smp_lock.h | 2 include/linux/spinlock.h | 82 +++++++++++++++++++++++++++-- include/linux/tqueue.h | 31 ++++++----- kernel/exit.c | 9 ++- kernel/fork.c | 7 ++ kernel/ksyms.c | 3 + kernel/sched.c | 48 ++++++++++++++++- kernel/softirq.c | 13 +++- kernel/sys.c | 2 lib/dec_and_lock.c | 1 mm/slab.c | 5 + net/core/dev.c | 11 +++- net/core/skbuff.c | 30 ++++++---- net/socket.c | 2 net/sunrpc/pmap_clnt.c | 1 81 files changed, 858 insertions(+), 159 deletions(-) diff -urN linux-2.4.22/arch/alpha/kernel/process.c linux/arch/alpha/kernel/process.c --- linux-2.4.22/arch/alpha/kernel/process.c 2003-08-25 07:44:39.000000000 -0400 +++ linux/arch/alpha/kernel/process.c 2003-09-23 18:56:11.000000000 -0400 @@ -186,6 +186,7 @@ args.mode = mode; args.restart_cmd = restart_cmd; #ifdef CONFIG_SMP + preempt_disable(); smp_call_function(common_shutdown_1, &args, 1, 0); #endif common_shutdown_1(&args); diff -urN linux-2.4.22/arch/i386/config.in linux/arch/i386/config.in --- linux-2.4.22/arch/i386/config.in 2003-08-25 07:44:39.000000000 -0400 +++ linux/arch/i386/config.in 2003-09-23 18:56:12.000000000 -0400 @@ -221,6 +221,7 @@ bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP +bool 'Preemptible Kernel' CONFIG_PREEMPT if [ "$CONFIG_SMP" != "y" ]; then bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC @@ -253,9 +254,12 @@ fi fi -if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then - define_bool CONFIG_HAVE_DEC_LOCK y +if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then + if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y + fi fi + endmenu mainmenu_option next_comment diff -urN linux-2.4.22/arch/i386/kernel/cpuid.c linux/arch/i386/kernel/cpuid.c --- linux-2.4.22/arch/i386/kernel/cpuid.c 2001-10-11 12:04:57.000000000 -0400 +++ linux/arch/i386/kernel/cpuid.c 2003-09-23 18:56:12.000000000 -0400 @@ -60,7 +60,8 @@ static inline void do_cpuid(int cpu, u32 reg, u32 *data) { struct cpuid_command cmd; - + + preempt_disable(); if ( cpu == smp_processor_id() ) { cpuid(reg, &data[0], &data[1], &data[2], &data[3]); } else { @@ -70,6 +71,7 @@ smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1); } + preempt_enable(); } #else /* ! CONFIG_SMP */ diff -urN linux-2.4.22/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S --- linux-2.4.22/arch/i386/kernel/entry.S 2003-06-13 10:51:29.000000000 -0400 +++ linux/arch/i386/kernel/entry.S 2003-09-23 18:56:12.000000000 -0400 @@ -73,7 +73,7 @@ * these are offsets into the task-struct. */ state = 0 -flags = 4 +preempt_count = 4 sigpending = 8 addr_limit = 12 exec_domain = 16 @@ -81,8 +81,28 @@ tsk_ptrace = 24 processor = 52 +/* These are offsets into the irq_stat structure + * There is one per cpu and it is aligned to 32 + * byte boundry (we put that here as a shift count) + */ +irq_array_shift = CONFIG_X86_L1_CACHE_SHIFT + +irq_stat_local_irq_count = 4 +irq_stat_local_bh_count = 8 + ENOSYS = 38 +#ifdef CONFIG_SMP +#define GET_CPU_INDX movl processor(%ebx),%eax; \ + shll $irq_array_shift,%eax +#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx); \ + GET_CPU_INDX +#define CPU_INDX (,%eax) +#else +#define GET_CPU_INDX +#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx) +#define CPU_INDX +#endif #define SAVE_ALL \ cld; \ @@ -255,12 +275,30 @@ ALIGN ENTRY(ret_from_intr) GET_CURRENT(%ebx) +#ifdef CONFIG_PREEMPT + cli + decl preempt_count(%ebx) +#endif ret_from_exception: movl EFLAGS(%esp),%eax # mix EFLAGS and CS movb CS(%esp),%al testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? jne ret_from_sys_call +#ifdef CONFIG_PREEMPT + cmpl $0,preempt_count(%ebx) + jnz restore_all + cmpl $0,need_resched(%ebx) + jz restore_all + movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx + addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx + jnz restore_all + incl preempt_count(%ebx) + sti + call SYMBOL_NAME(preempt_schedule) + jmp ret_from_intr +#else jmp restore_all +#endif ALIGN reschedule: @@ -297,6 +335,9 @@ GET_CURRENT(%ebx) call *%edi addl $8,%esp +#ifdef CONFIG_PREEMPT + cli +#endif jmp ret_from_exception ENTRY(coprocessor_error) @@ -316,12 +357,18 @@ movl %cr0,%eax testl $0x4,%eax # EM (math emulation bit) jne device_not_available_emulate +#ifdef CONFIG_PREEMPT + cli +#endif call SYMBOL_NAME(math_state_restore) jmp ret_from_exception device_not_available_emulate: pushl $0 # temporary storage for ORIG_EIP call SYMBOL_NAME(math_emulate) addl $4,%esp +#ifdef CONFIG_PREEMPT + cli +#endif jmp ret_from_exception ENTRY(debug) diff -urN linux-2.4.22/arch/i386/kernel/i387.c linux/arch/i386/kernel/i387.c --- linux-2.4.22/arch/i386/kernel/i387.c 2003-08-25 07:44:39.000000000 -0400 +++ linux/arch/i386/kernel/i387.c 2003-09-23 18:56:12.000000000 -0400 @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -89,6 +90,8 @@ { struct task_struct *tsk = current; + preempt_disable(); + if (tsk->flags & PF_USEDFPU) { __save_init_fpu(tsk); return; diff -urN linux-2.4.22/arch/i386/kernel/ioport.c linux/arch/i386/kernel/ioport.c --- linux-2.4.22/arch/i386/kernel/ioport.c 2003-06-13 10:51:29.000000000 -0400 +++ linux/arch/i386/kernel/ioport.c 2003-09-23 18:56:12.000000000 -0400 @@ -55,7 +55,7 @@ asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) { struct thread_struct * t = ¤t->thread; - struct tss_struct * tss = init_tss + smp_processor_id(); + struct tss_struct * tss; if ((from + num <= from) || (from + num > IO_BITMAP_SIZE*32)) return -EINVAL; @@ -66,6 +66,8 @@ * IO bitmap up. ioperm() is much less timing critical than clone(), * this is why we delay this operation until now: */ + preempt_disable(); + tss = init_tss + smp_processor_id(); if (!t->ioperm) { /* * just in case ... @@ -84,6 +86,7 @@ memcpy(tss->io_bitmap, t->io_bitmap, IO_BITMAP_BYTES); tss->bitmap = IO_BITMAP_OFFSET; /* Activate it in the TSS */ } + preempt_enable(); return 0; } diff -urN linux-2.4.22/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c --- linux-2.4.22/arch/i386/kernel/irq.c 2002-11-28 18:53:09.000000000 -0500 +++ linux/arch/i386/kernel/irq.c 2003-09-23 18:56:12.000000000 -0400 @@ -283,9 +283,11 @@ show("wait_on_irq"); count = ~0; } + preempt_disable(); __sti(); SYNC_OTHER_CORES(cpu); __cli(); + preempt_enable_no_resched(); if (irqs_running()) continue; if (global_irq_lock) @@ -359,8 +361,9 @@ __save_flags(flags); if (flags & (1 << EFLAGS_IF_SHIFT)) { - int cpu = smp_processor_id(); + int cpu; __cli(); + cpu = smp_processor_id(); if (!local_irq_count(cpu)) get_irqlock(cpu); } @@ -368,11 +371,14 @@ void __global_sti(void) { - int cpu = smp_processor_id(); + int cpu; + preempt_disable(); + cpu = smp_processor_id(); if (!local_irq_count(cpu)) release_irqlock(cpu); __sti(); + preempt_enable(); } /* @@ -387,13 +393,15 @@ int retval; int local_enabled; unsigned long flags; - int cpu = smp_processor_id(); + int cpu; __save_flags(flags); local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1; /* default to local */ retval = 2 + local_enabled; + preempt_disable(); + cpu = smp_processor_id(); /* check for global flags if we're not in an interrupt */ if (!local_irq_count(cpu)) { if (local_enabled) @@ -401,6 +409,7 @@ if (global_irq_holder == cpu) retval = 0; } + preempt_enable(); return retval; } diff -urN linux-2.4.22/arch/i386/kernel/ldt.c linux/arch/i386/kernel/ldt.c --- linux-2.4.22/arch/i386/kernel/ldt.c 2001-10-17 17:46:29.000000000 -0400 +++ linux/arch/i386/kernel/ldt.c 2003-09-23 18:56:12.000000000 -0400 @@ -92,6 +92,7 @@ * the GDT index of the LDT is allocated dynamically, and is * limited by MAX_LDT_DESCRIPTORS. */ + preempt_disable(); down_write(&mm->mmap_sem); if (!mm->context.segments) { void * segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); @@ -144,6 +145,7 @@ out_unlock: up_write(&mm->mmap_sem); + preempt_enable(); out: return error; } diff -urN linux-2.4.22/arch/i386/kernel/microcode.c linux/arch/i386/kernel/microcode.c --- linux-2.4.22/arch/i386/kernel/microcode.c 2003-06-13 10:51:29.000000000 -0400 +++ linux/arch/i386/kernel/microcode.c 2003-09-23 18:56:12.000000000 -0400 @@ -182,11 +182,14 @@ int i, error = 0, err; struct microcode *m; + preempt_disable(); if (smp_call_function(do_update_one, NULL, 1, 1) != 0) { printk(KERN_ERR "microcode: IPI timeout, giving up\n"); + preempt_enable(); return -EIO; } do_update_one(NULL); + preempt_enable(); for (i=0; imm; - unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id()); + unsigned long cpu_mask; + preempt_disable(); + cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id()); local_flush_tlb(); if (cpu_mask) flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + preempt_enable(); } void flush_tlb_mm (struct mm_struct * mm) { - unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id()); + unsigned long cpu_mask; + preempt_disable(); + cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id()); if (current->active_mm == mm) { if (current->mm) local_flush_tlb(); @@ -452,13 +463,16 @@ } if (cpu_mask) flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + preempt_enable(); } void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id()); + unsigned long cpu_mask; + preempt_disable(); + cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id()); if (current->active_mm == mm) { if(current->mm) __flush_tlb_one(va); @@ -468,6 +482,7 @@ if (cpu_mask) flush_tlb_others(cpu_mask, mm, va); + preempt_enable(); } static inline void do_flush_tlb_all_local(void) @@ -486,9 +501,11 @@ void flush_tlb_all(void) { + preempt_disable(); smp_call_function (flush_tlb_all_ipi,0,1,1); do_flush_tlb_all_local(); + preempt_enable(); } /* @@ -572,7 +589,7 @@ static void stop_this_cpu (void * dummy) { /* - * Remove this CPU: + * Remove this CPU: assumes preemption is disabled */ clear_bit(smp_processor_id(), &cpu_online_map); __cli(); diff -urN linux-2.4.22/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux-2.4.22/arch/i386/kernel/traps.c 2002-11-28 18:53:09.000000000 -0500 +++ linux/arch/i386/kernel/traps.c 2003-09-23 18:56:12.000000000 -0400 @@ -751,6 +751,8 @@ * * Careful.. There are problems with IBM-designed IRQ13 behaviour. * Don't touch unless you *really* know how it works. + * + * Must be called with kernel preemption disabled. */ asmlinkage void math_state_restore(struct pt_regs regs) { diff -urN linux-2.4.22/arch/i386/lib/dec_and_lock.c linux/arch/i386/lib/dec_and_lock.c --- linux-2.4.22/arch/i386/lib/dec_and_lock.c 2000-07-07 21:20:16.000000000 -0400 +++ linux/arch/i386/lib/dec_and_lock.c 2003-09-23 18:56:12.000000000 -0400 @@ -8,6 +8,7 @@ */ #include +#include #include int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) diff -urN linux-2.4.22/arch/i386/mm/init.c linux/arch/i386/mm/init.c --- linux-2.4.22/arch/i386/mm/init.c 2003-06-13 10:51:29.000000000 -0400 +++ linux/arch/i386/mm/init.c 2003-09-23 18:56:12.000000000 -0400 @@ -46,6 +46,7 @@ int do_check_pgt_cache(int low, int high) { int freed = 0; + preempt_disable(); if(pgtable_cache_size > high) { do { if (pgd_quicklist) { @@ -62,6 +63,7 @@ } } while(pgtable_cache_size > low); } + preempt_enable(); return freed; } diff -urN linux-2.4.22/arch/mips/config-shared.in linux/arch/mips/config-shared.in --- linux-2.4.22/arch/mips/config-shared.in 2003-08-25 07:44:39.000000000 -0400 +++ linux/arch/mips/config-shared.in 2003-09-23 18:56:12.000000000 -0400 @@ -867,6 +867,7 @@ define_bool CONFIG_HOTPLUG_PCI n fi +dep_bool 'Preemptible Kernel' CONFIG_PREEMPT $CONFIG_NEW_IRQ bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL diff -urN linux-2.4.22/arch/mips/kernel/i8259.c linux/arch/mips/kernel/i8259.c --- linux-2.4.22/arch/mips/kernel/i8259.c 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/mips/kernel/i8259.c 2003-09-23 18:56:12.000000000 -0400 @@ -8,6 +8,7 @@ * Copyright (C) 1992 Linus Torvalds * Copyright (C) 1994 - 2000 Ralf Baechle */ +#include #include #include #include diff -urN linux-2.4.22/arch/mips/kernel/irq.c linux/arch/mips/kernel/irq.c --- linux-2.4.22/arch/mips/kernel/irq.c 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/mips/kernel/irq.c 2003-09-23 18:56:12.000000000 -0400 @@ -8,6 +8,8 @@ * Copyright (C) 1992 Linus Torvalds * Copyright (C) 1994 - 2000 Ralf Baechle */ + +#include #include #include #include @@ -19,11 +21,13 @@ #include #include #include -#include +#include +#include #include #include #include +#include /* * Controller mappings for all interrupt sources: @@ -429,6 +433,8 @@ struct irqaction * action; unsigned int status; + preempt_disable(); + kstat.irqs[cpu][irq]++; spin_lock(&desc->lock); desc->handler->ack(irq); @@ -490,6 +496,27 @@ if (softirq_pending(cpu)) do_softirq(); + +#if defined(CONFIG_PREEMPT) + while (--current->preempt_count == 0) { + db_assert(intr_off()); + db_assert(!in_interrupt()); + + if (current->need_resched == 0) { + break; + } + + current->preempt_count ++; + sti(); + if (user_mode(regs)) { + schedule(); + } else { + preempt_schedule(); + } + cli(); + } +#endif + return 1; } diff -urN linux-2.4.22/arch/mips/mm/extable.c linux/arch/mips/mm/extable.c --- linux-2.4.22/arch/mips/mm/extable.c 2002-11-28 18:53:10.000000000 -0500 +++ linux/arch/mips/mm/extable.c 2003-09-23 18:56:12.000000000 -0400 @@ -3,6 +3,7 @@ */ #include #include +#include #include #include diff -urN linux-2.4.22/arch/ppc/config.in linux/arch/ppc/config.in --- linux-2.4.22/arch/ppc/config.in 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/ppc/config.in 2003-09-23 18:56:12.000000000 -0400 @@ -125,6 +125,8 @@ bool ' Distribute interrupts on all CPUs by default' CONFIG_IRQ_ALL_CPUS fi +bool 'Preemptible kernel support' CONFIG_PREEMPT + if [ "$CONFIG_6xx" = "y" -a "$CONFIG_8260" = "n" ];then bool 'AltiVec Support' CONFIG_ALTIVEC bool 'Thermal Management Support' CONFIG_TAU diff -urN linux-2.4.22/arch/ppc/kernel/entry.S linux/arch/ppc/kernel/entry.S --- linux-2.4.22/arch/ppc/kernel/entry.S 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/ppc/kernel/entry.S 2003-09-23 18:56:12.000000000 -0400 @@ -274,6 +274,46 @@ */ cmpi 0,r3,0 beq restore +#ifdef CONFIG_PREEMPT + lwz r3,PREEMPT_COUNT(r2) + cmpi 0,r3,1 + bge ret_from_except + lwz r5,_MSR(r1) + andi. r5,r5,MSR_PR + bne do_signal_ret + lwz r5,NEED_RESCHED(r2) + cmpi 0,r5,0 + beq ret_from_except + lis r3,irq_stat@h + ori r3,r3,irq_stat@l +#ifdef CONFIG_SMP + lwz r5,CPU(r2) + rlwinm r5,r5,5,0,26 + add r3,r3,r5 +#endif + lwz r5,4(r3) + lwz r3,8(r3) + add r3,r3,r5 + cmpi 0,r3,0 + bne ret_from_except + lwz r3,PREEMPT_COUNT(r2) + addi r3,r3,1 + stw r3,PREEMPT_COUNT(r2) + mfmsr r0 + ori r0,r0,MSR_EE + mtmsr r0 + sync + bl preempt_schedule + mfmsr r0 + rlwinm r0,r0,0,17,15 + mtmsr r0 + sync + lwz r3,PREEMPT_COUNT(r2) + subi r3,r3,1 + stw r3,PREEMPT_COUNT(r2) + li r3,1 + b ret_from_intercept +#endif /* CONFIG_PREEMPT */ .globl ret_from_except ret_from_except: lwz r3,_MSR(r1) /* Returning to user mode? */ diff -urN linux-2.4.22/arch/ppc/kernel/irq.c linux/arch/ppc/kernel/irq.c --- linux-2.4.22/arch/ppc/kernel/irq.c 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/ppc/kernel/irq.c 2003-09-23 18:56:12.000000000 -0400 @@ -538,6 +538,34 @@ return 1; /* lets ret_from_int know we can do checks */ } +#ifdef CONFIG_PREEMPT +int +preempt_intercept(struct pt_regs *regs) +{ + int ret; + + preempt_disable(); + + switch(regs->trap) { + case 0x500: + ret = do_IRQ(regs); + break; +#ifndef CONFIG_4xx + case 0x900: +#else + case 0x1000: +#endif + ret = timer_interrupt(regs); + break; + default: + BUG(); + } + + preempt_enable(); + return ret; +} +#endif /* CONFIG_PREEMPT */ + unsigned long probe_irq_on (void) { return 0; @@ -634,11 +662,13 @@ show("wait_on_irq"); count = ~0; } + preempt_disable(); __sti(); /* don't worry about the lock race Linus found * on intel here. -- Cort */ __cli(); + preempt_enable_no_resched(); if (atomic_read(&global_irq_count)) continue; if (global_irq_lock) @@ -714,6 +744,8 @@ global_irq_holder = cpu; } +#define EFLAGS_IF_SHIFT 15 + /* * A global "cli()" while in an interrupt context * turns into just a local cli(). Interrupts @@ -731,9 +763,10 @@ unsigned long flags; __save_flags(flags); - if (flags & (1 << 15)) { - int cpu = smp_processor_id(); + if (flags & (1 << EFLAGS_IF_SHIFT)) { + int cpu; __cli(); + cpu = smp_processor_id(); if (!local_irq_count(cpu)) get_irqlock(cpu); } @@ -741,11 +774,14 @@ void __global_sti(void) { - int cpu = smp_processor_id(); + int cpu; + preempt_disable(); + cpu = smp_processor_id(); if (!local_irq_count(cpu)) release_irqlock(cpu); __sti(); + preempt_enable(); } /* @@ -760,19 +796,23 @@ int retval; int local_enabled; unsigned long flags; + int cpu; __save_flags(flags); - local_enabled = (flags >> 15) & 1; + local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1; /* default to local */ retval = 2 + local_enabled; /* check for global flags if we're not in an interrupt */ - if (!local_irq_count(smp_processor_id())) { + preempt_disable(); + cpu = smp_processor_id(); + if (!local_irq_count(cpu)) { if (local_enabled) retval = 1; - if (global_irq_holder == (unsigned char) smp_processor_id()) + if (global_irq_holder == cpu) retval = 0; } + preempt_enable(); return retval; } diff -urN linux-2.4.22/arch/ppc/kernel/mk_defs.c linux/arch/ppc/kernel/mk_defs.c --- linux-2.4.22/arch/ppc/kernel/mk_defs.c 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/ppc/kernel/mk_defs.c 2003-09-23 18:56:12.000000000 -0400 @@ -39,6 +39,9 @@ DEFINE(SIGPENDING, offsetof(struct task_struct, sigpending)); DEFINE(THREAD, offsetof(struct task_struct, thread)); DEFINE(MM, offsetof(struct task_struct, mm)); +#ifdef CONFIG_PREEMPT + DEFINE(PREEMPT_COUNT, offsetof(struct task_struct, preempt_count)); +#endif DEFINE(ACTIVE_MM, offsetof(struct task_struct, active_mm)); DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct)); DEFINE(KSP, offsetof(struct thread_struct, ksp)); diff -urN linux-2.4.22/arch/ppc/kernel/open_pic.c linux/arch/ppc/kernel/open_pic.c --- linux-2.4.22/arch/ppc/kernel/open_pic.c 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/ppc/kernel/open_pic.c 2003-09-23 18:56:12.000000000 -0400 @@ -594,19 +594,24 @@ void __init do_openpic_setup_cpu(void) { int i; - u32 msk = 1 << smp_hw_index[smp_processor_id()]; +#ifdef CONFIG_IRQ_ALL_CPUS + u32 msk; +#endif /* CONFIG_IRQ_ALL_CPUS */ spin_lock(&openpic_setup_lock); #ifdef CONFIG_IRQ_ALL_CPUS + msk = 1 << smp_hw_index[smp_processor_id()]; + /* let the openpic know we want intrs. default affinity * is 0xffffffff until changed via /proc * That's how it's done on x86. If we want it differently, then * we should make sure we also change the default values of irq_affinity * in irq.c. */ - for (i = 0; i < NumSources; i++) + for (i = 0; i < NumSources; i++) { openpic_mapirq(i, msk, ~0U); + } #endif /* CONFIG_IRQ_ALL_CPUS */ openpic_set_priority(0); diff -urN linux-2.4.22/arch/ppc/kernel/setup.c linux/arch/ppc/kernel/setup.c --- linux-2.4.22/arch/ppc/kernel/setup.c 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/ppc/kernel/setup.c 2003-09-23 18:56:12.000000000 -0400 @@ -502,6 +502,20 @@ strcpy(cmd_line, CONFIG_CMDLINE); #endif /* CONFIG_CMDLINE */ +#ifdef CONFIG_PREEMPT + /* Override the irq routines for external & timer interrupts here, + * as the MMU has only been minimally setup at this point and + * there are no protections on page zero. + */ + { + extern int preempt_intercept(struct pt_regs *); + + do_IRQ_intercept = (unsigned long) &preempt_intercept; + timer_interrupt_intercept = (unsigned long) &preempt_intercept; + + } +#endif /* CONFIG_PREEMPT */ + platform_init(r3, r4, r5, r6, r7); if (ppc_md.progress) diff -urN linux-2.4.22/arch/ppc/kernel/temp.c linux/arch/ppc/kernel/temp.c --- linux-2.4.22/arch/ppc/kernel/temp.c 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/ppc/kernel/temp.c 2003-09-23 18:56:12.000000000 -0400 @@ -138,7 +138,7 @@ static void tau_timeout(void * info) { - unsigned long cpu = smp_processor_id(); + unsigned long cpu; unsigned long flags; int size; int shrink; @@ -146,6 +146,8 @@ /* disabling interrupts *should* be okay */ save_flags(flags); cli(); + cpu = smp_processor_id(); + #ifndef CONFIG_TAU_INT TAUupdate(cpu); #endif @@ -191,13 +193,15 @@ static void tau_timeout_smp(unsigned long unused) { - /* schedule ourselves to be run again */ mod_timer(&tau_timer, jiffies + shrink_timer) ; + + preempt_disable(); #ifdef CONFIG_SMP smp_call_function(tau_timeout, NULL, 1, 0); #endif tau_timeout(NULL); + preempt_enable(); } /* diff -urN linux-2.4.22/arch/ppc/lib/dec_and_lock.c linux/arch/ppc/lib/dec_and_lock.c --- linux-2.4.22/arch/ppc/lib/dec_and_lock.c 2001-11-16 13:10:08.000000000 -0500 +++ linux/arch/ppc/lib/dec_and_lock.c 2003-09-23 18:56:12.000000000 -0400 @@ -1,4 +1,5 @@ #include +#include #include #include #include diff -urN linux-2.4.22/arch/ppc/mm/init.c linux/arch/ppc/mm/init.c --- linux-2.4.22/arch/ppc/mm/init.c 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/ppc/mm/init.c 2003-09-23 18:56:12.000000000 -0400 @@ -101,6 +101,9 @@ int do_check_pgt_cache(int low, int high) { int freed = 0; + + preempt_disable(); + if (pgtable_cache_size > high) { do { if (pgd_quicklist) { @@ -113,6 +116,9 @@ } } while (pgtable_cache_size > low); } + + preempt_enable(); + return freed; } diff -urN linux-2.4.22/arch/ppc/mm/tlb.c linux/arch/ppc/mm/tlb.c --- linux-2.4.22/arch/ppc/mm/tlb.c 2003-08-25 07:44:40.000000000 -0400 +++ linux/arch/ppc/mm/tlb.c 2003-09-23 18:57:47.000000000 -0400 @@ -58,11 +58,14 @@ * we can and should dispense with flush_tlb_all(). * -- paulus. */ + + preempt_disable(); local_flush_tlb_range(&init_mm, TASK_SIZE, ~0UL); #ifdef CONFIG_SMP smp_send_tlb_invalidate(0); #endif /* CONFIG_SMP */ + preempt_enable(); } /* @@ -73,8 +76,10 @@ void local_flush_tlb_mm(struct mm_struct *mm) { + preempt_disable(); if (Hash == 0) { _tlbia(); + preempt_enable(); return; } @@ -88,6 +93,7 @@ #ifdef CONFIG_SMP smp_send_tlb_invalidate(0); #endif + preempt_enable(); } void @@ -97,8 +103,10 @@ pmd_t *pmd; pte_t *pte; + preempt_disable(); if (Hash == 0) { _tlbie(vmaddr); + preempt_enable(); return; } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; @@ -111,6 +119,7 @@ #ifdef CONFIG_SMP smp_send_tlb_invalidate(0); #endif + preempt_enable(); } @@ -127,13 +136,17 @@ unsigned long pmd_end; unsigned int ctx = mm->context; + preempt_disable(); if (Hash == 0) { _tlbia(); + preempt_enable(); return; } start &= PAGE_MASK; - if (start >= end) + if (start >= end) { + preempt_enable(); return; + } pmd = pmd_offset(pgd_offset(mm, start), start); do { pmd_end = (start + PGDIR_SIZE) & PGDIR_MASK; @@ -156,4 +169,5 @@ #ifdef CONFIG_SMP smp_send_tlb_invalidate(0); #endif + preempt_enable(); } diff -urN linux-2.4.22/CREDITS linux/CREDITS --- linux-2.4.22/CREDITS 2003-08-25 07:44:39.000000000 -0400 +++ linux/CREDITS 2003-09-23 18:56:13.000000000 -0400 @@ -999,8 +999,8 @@ N: Nigel Gamble E: nigel@nrg.org -E: nigel@sgi.com D: Interrupt-driven printer driver +D: Preemptible kernel S: 120 Alley Way S: Mountain View, California 94040 S: USA diff -urN linux-2.4.22/Documentation/Configure.help linux/Documentation/Configure.help --- linux-2.4.22/Documentation/Configure.help 2003-08-25 07:44:39.000000000 -0400 +++ linux/Documentation/Configure.help 2003-09-23 18:56:13.000000000 -0400 @@ -287,6 +287,17 @@ If you have a system with several CPUs, you do not need to say Y here: the local APIC will be used automatically. +Preemptible Kernel +CONFIG_PREEMPT + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + This allows applications to run more reliably even when the system is + under load. + + Say Y here if you are building a kernel for a desktop, embedded or + real-time system. Say N if you are unsure. + Kernel math emulation CONFIG_MATH_EMULATION Linux can emulate a math coprocessor (used for floating point diff -urN linux-2.4.22/Documentation/preempt-locking.txt linux/Documentation/preempt-locking.txt --- linux-2.4.22/Documentation/preempt-locking.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux/Documentation/preempt-locking.txt 2003-09-23 18:56:13.000000000 -0400 @@ -0,0 +1,104 @@ + Proper Locking Under a Preemptible Kernel: + Keeping Kernel Code Preempt-Safe + Robert Love + Last Updated: 22 Jan 2002 + + +INTRODUCTION + + +A preemptible kernel creates new locking issues. The issues are the same as +those under SMP: concurrency and reentrancy. Thankfully, the Linux preemptible +kernel model leverages existing SMP locking mechanisms. Thus, the kernel +requires explicit additional locking for very few additional situations. + +This document is for all kernel hackers. Developing code in the kernel +requires protecting these situations. + + +RULE #1: Per-CPU data structures need explicit protection + + +Two similar problems arise. An example code snippet: + + struct this_needs_locking tux[NR_CPUS]; + tux[smp_processor_id()] = some_value; + /* task is preempted here... */ + something = tux[smp_processor_id()]; + +First, since the data is per-CPU, it may not have explicit SMP locking, but +require it otherwise. Second, when a preempted task is finally rescheduled, +the previous value of smp_processor_id may not equal the current. You must +protect these situations by disabling preemption around them. + + +RULE #2: CPU state must be protected. + + +Under preemption, the state of the CPU must be protected. This is arch- +dependent, but includes CPU structures and state not preserved over a context +switch. For example, on x86, entering and exiting FPU mode is now a critical +section that must occur while preemption is disabled. Think what would happen +if the kernel is executing a floating-point instruction and is then preempted. +Remember, the kernel does not save FPU state except for user tasks. Therefore, +upon preemption, the FPU registers will be sold to the lowest bidder. Thus, +preemption must be disabled around such regions. + +Note, some FPU functions are already explicitly preempt safe. For example, +kernel_fpu_begin and kernel_fpu_end will disable and enable preemption. +However, math_state_restore must be called with preemption disabled. + + +RULE #3: Lock acquire and release must be performed by same task + + +A lock acquired in one task must be released by the same task. This +means you can't do oddball things like acquire a lock and go off to +play while another task releases it. If you want to do something +like this, acquire and release the task in the same code path and +have the caller wait on an event by the other task. + + +SOLUTION + + +Data protection under preemption is achieved by disabling preemption for the +duration of the critical region. + +preempt_enable() decrement the preempt counter +preempt_disable() increment the preempt counter +preempt_enable_no_resched() decrement, but do not immediately preempt +preempt_get_count() return the preempt counter + +The functions are nestable. In other words, you can call preempt_disable +n-times in a code path, and preemption will not be reenabled until the n-th +call to preempt_enable. The preempt statements define to nothing if +preemption is not enabled. + +Note that you do not need to explicitly prevent preemption if you are holding +any locks or interrupts are disabled, since preemption is implicitly disabled +in those cases. + +Example: + + cpucache_t *cc; /* this is per-CPU */ + preempt_disable(); + cc = cc_data(searchp); + if (cc && cc->avail) { + __free_block(searchp, cc_entry(cc), cc->avail); + cc->avail = 0; + } + preempt_enable(); + return 0; + +Notice how the preemption statements must encompass every reference of the +critical variables. Another example: + + int buf[NR_CPUS]; + set_cpu_val(buf); + if (buf[smp_processor_id()] == -1) printf(KERN_INFO "wee!\n"); + spin_lock(&buf_lock); + /* ... */ + +This code is not preempt-safe, but see how easily we can fix it by simply +moving the spin_lock up two lines. diff -urN linux-2.4.22/drivers/ieee1394/csr.c linux/drivers/ieee1394/csr.c --- linux-2.4.22/drivers/ieee1394/csr.c 2003-08-25 07:44:41.000000000 -0400 +++ linux/drivers/ieee1394/csr.c 2003-09-23 18:56:13.000000000 -0400 @@ -18,6 +18,7 @@ */ #include +#include #include /* needed for MODULE_PARM */ #include diff -urN linux-2.4.22/drivers/sound/sound_core.c linux/drivers/sound/sound_core.c --- linux-2.4.22/drivers/sound/sound_core.c 2001-09-30 15:26:08.000000000 -0400 +++ linux/drivers/sound/sound_core.c 2003-09-23 18:56:13.000000000 -0400 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff -urN linux-2.4.22/fs/adfs/map.c linux/fs/adfs/map.c --- linux-2.4.22/fs/adfs/map.c 2001-10-25 16:53:53.000000000 -0400 +++ linux/fs/adfs/map.c 2003-09-23 18:56:13.000000000 -0400 @@ -12,6 +12,7 @@ #include #include #include +#include #include "adfs.h" diff -urN linux-2.4.22/fs/exec.c linux/fs/exec.c --- linux-2.4.22/fs/exec.c 2003-08-25 07:44:43.000000000 -0400 +++ linux/fs/exec.c 2003-09-23 18:56:13.000000000 -0400 @@ -450,8 +450,8 @@ active_mm = current->active_mm; current->mm = mm; current->active_mm = mm; - task_unlock(current); activate_mm(active_mm, mm); + task_unlock(current); mm_release(); if (old_mm) { if (active_mm != old_mm) BUG(); diff -urN linux-2.4.22/fs/fat/cache.c linux/fs/fat/cache.c --- linux-2.4.22/fs/fat/cache.c 2001-10-12 16:48:42.000000000 -0400 +++ linux/fs/fat/cache.c 2003-09-23 18:56:13.000000000 -0400 @@ -14,6 +14,7 @@ #include #include #include +#include #if 0 # define PRINTK(x) printk x diff -urN linux-2.4.22/fs/nfsd/nfssvc.c linux/fs/nfsd/nfssvc.c --- linux-2.4.22/fs/nfsd/nfssvc.c 2002-11-28 18:53:15.000000000 -0500 +++ linux/fs/nfsd/nfssvc.c 2003-09-23 18:56:13.000000000 -0400 @@ -250,6 +250,7 @@ svc_exit_thread(rqstp); /* Release module */ + unlock_kernel(); MOD_DEC_USE_COUNT; } diff -urN linux-2.4.22/fs/nls/nls_base.c linux/fs/nls/nls_base.c --- linux-2.4.22/fs/nls/nls_base.c 2002-08-02 20:39:45.000000000 -0400 +++ linux/fs/nls/nls_base.c 2003-09-23 18:56:13.000000000 -0400 @@ -18,6 +18,7 @@ #ifdef CONFIG_KMOD #include #endif +#include #include static struct nls_table *tables; diff -urN linux-2.4.22/include/asm-i386/desc.h linux/include/asm-i386/desc.h --- linux-2.4.22/include/asm-i386/desc.h 2001-07-26 16:40:32.000000000 -0400 +++ linux/include/asm-i386/desc.h 2003-09-23 18:56:14.000000000 -0400 @@ -71,9 +71,12 @@ static inline void clear_LDT(void) { - int cpu = smp_processor_id(); + int cpu; + preempt_disable(); + cpu = smp_processor_id(); set_ldt_desc(cpu, &default_ldt[0], 5); __load_LDT(cpu); + preempt_enable(); } /* diff -urN linux-2.4.22/include/asm-i386/hardirq.h linux/include/asm-i386/hardirq.h --- linux-2.4.22/include/asm-i386/hardirq.h 2001-11-22 14:46:19.000000000 -0500 +++ linux/include/asm-i386/hardirq.h 2003-09-23 18:56:14.000000000 -0400 @@ -19,12 +19,16 @@ /* * Are we in an interrupt context? Either doing bottom half - * or hardware interrupt processing? + * or hardware interrupt processing? Note the preempt check, + * this is both a bugfix and an optimization. If we are + * preemptible, we cannot be in an interrupt. */ -#define in_interrupt() ({ int __cpu = smp_processor_id(); \ - (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }) +#define in_interrupt() (preempt_is_disabled() && \ + ({unsigned long __cpu = smp_processor_id(); \ + (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })) -#define in_irq() (local_irq_count(smp_processor_id()) != 0) +#define in_irq() (preempt_is_disabled() && \ + (local_irq_count(smp_processor_id()) != 0)) #ifndef CONFIG_SMP @@ -36,6 +40,8 @@ #define synchronize_irq() barrier() +#define release_irqlock(cpu) do { } while (0) + #else #include diff -urN linux-2.4.22/include/asm-i386/highmem.h linux/include/asm-i386/highmem.h --- linux-2.4.22/include/asm-i386/highmem.h 2003-06-13 10:51:38.000000000 -0400 +++ linux/include/asm-i386/highmem.h 2003-09-23 18:56:14.000000000 -0400 @@ -91,6 +91,7 @@ enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); if (page < highmem_start_page) return page_address(page); @@ -112,8 +113,10 @@ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIXADDR_START) // FIXME + if (vaddr < FIXADDR_START) { // FIXME + preempt_enable(); return; + } if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) out_of_line_bug(); @@ -125,6 +128,8 @@ pte_clear(kmap_pte-idx); __flush_tlb_one(vaddr); #endif + + preempt_enable(); } #endif /* __KERNEL__ */ diff -urN linux-2.4.22/include/asm-i386/hw_irq.h linux/include/asm-i386/hw_irq.h --- linux-2.4.22/include/asm-i386/hw_irq.h 2003-08-25 07:44:43.000000000 -0400 +++ linux/include/asm-i386/hw_irq.h 2003-09-23 18:56:14.000000000 -0400 @@ -95,6 +95,18 @@ #define __STR(x) #x #define STR(x) __STR(x) +#define GET_CURRENT \ + "movl %esp, %ebx\n\t" \ + "andl $-8192, %ebx\n\t" + +#ifdef CONFIG_PREEMPT +#define BUMP_LOCK_COUNT \ + GET_CURRENT \ + "incl 4(%ebx)\n\t" +#else +#define BUMP_LOCK_COUNT +#endif + #define SAVE_ALL \ "cld\n\t" \ "pushl %es\n\t" \ @@ -108,15 +120,12 @@ "pushl %ebx\n\t" \ "movl $" STR(__KERNEL_DS) ",%edx\n\t" \ "movl %edx,%ds\n\t" \ - "movl %edx,%es\n\t" + "movl %edx,%es\n\t" \ + BUMP_LOCK_COUNT #define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) -#define GET_CURRENT \ - "movl %esp, %ebx\n\t" \ - "andl $-8192, %ebx\n\t" - /* * SMP has a few special interrupts for IPI messages */ diff -urN linux-2.4.22/include/asm-i386/i387.h linux/include/asm-i386/i387.h --- linux-2.4.22/include/asm-i386/i387.h 2002-08-02 20:39:45.000000000 -0400 +++ linux/include/asm-i386/i387.h 2003-09-23 18:56:14.000000000 -0400 @@ -12,6 +12,7 @@ #define __ASM_I386_I387_H #include +#include #include #include #include @@ -24,7 +25,7 @@ extern void restore_fpu( struct task_struct *tsk ); extern void kernel_fpu_begin(void); -#define kernel_fpu_end() stts() +#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0) #define unlazy_fpu( tsk ) do { \ diff -urN linux-2.4.22/include/asm-i386/pgalloc.h linux/include/asm-i386/pgalloc.h --- linux-2.4.22/include/asm-i386/pgalloc.h 2003-08-25 07:44:43.000000000 -0400 +++ linux/include/asm-i386/pgalloc.h 2003-09-23 18:56:14.000000000 -0400 @@ -75,20 +75,26 @@ { unsigned long *ret; + preempt_disable(); if ((ret = pgd_quicklist) != NULL) { pgd_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; - } else + preempt_enable(); + } else { + preempt_enable(); ret = (unsigned long *)get_pgd_slow(); + } return (pgd_t *)ret; } static inline void free_pgd_fast(pgd_t *pgd) { + preempt_disable(); *(unsigned long *)pgd = (unsigned long) pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; + preempt_enable(); } static inline void free_pgd_slow(pgd_t *pgd) @@ -119,19 +125,23 @@ { unsigned long *ret; + preempt_disable(); if ((ret = (unsigned long *)pte_quicklist) != NULL) { pte_quicklist = (unsigned long *)(*ret); ret[0] = ret[1]; pgtable_cache_size--; } + preempt_enable(); return (pte_t *)ret; } static inline void pte_free_fast(pte_t *pte) { + preempt_disable(); *(unsigned long *)pte = (unsigned long) pte_quicklist; pte_quicklist = (unsigned long *) pte; pgtable_cache_size++; + preempt_enable(); } static __inline__ void pte_free_slow(pte_t *pte) diff -urN linux-2.4.22/include/asm-i386/smplock.h linux/include/asm-i386/smplock.h --- linux-2.4.22/include/asm-i386/smplock.h 2003-06-13 10:51:38.000000000 -0400 +++ linux/include/asm-i386/smplock.h 2003-09-23 18:56:14.000000000 -0400 @@ -14,7 +14,15 @@ extern spinlock_cacheline_t kernel_flag_cacheline; #define kernel_flag kernel_flag_cacheline.lock +#ifdef CONFIG_SMP #define kernel_locked() spin_is_locked(&kernel_flag) +#else +#ifdef CONFIG_PREEMPT +#define kernel_locked() preempt_get_count() +#else +#define kernel_locked() 1 +#endif +#endif /* * Release global kernel lock and global interrupt lock @@ -46,6 +54,11 @@ */ static __inline__ void lock_kernel(void) { +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; +#else #if 1 if (!++current->lock_depth) spin_lock(&kernel_flag); @@ -58,6 +71,7 @@ :"=m" (__dummy_lock(&kernel_flag)), "=m" (current->lock_depth)); #endif +#endif } static __inline__ void unlock_kernel(void) diff -urN linux-2.4.22/include/asm-i386/softirq.h linux/include/asm-i386/softirq.h --- linux-2.4.22/include/asm-i386/softirq.h 2002-08-02 20:39:45.000000000 -0400 +++ linux/include/asm-i386/softirq.h 2003-09-23 18:56:14.000000000 -0400 @@ -5,14 +5,15 @@ #include #define __cpu_bh_enable(cpu) \ - do { barrier(); local_bh_count(cpu)--; } while (0) + do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0) #define cpu_bh_disable(cpu) \ - do { local_bh_count(cpu)++; barrier(); } while (0) + do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0) #define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) -#define in_softirq() (local_bh_count(smp_processor_id()) != 0) +#define in_softirq() ( preempt_is_disabled() & \ + (local_bh_count(smp_processor_id()) != 0)) /* * NOTE: this assembly code assumes: @@ -22,7 +23,7 @@ * If you change the offsets in irq_stat then you have to * update this code as well. */ -#define local_bh_enable() \ +#define _local_bh_enable() \ do { \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \ \ @@ -45,4 +46,6 @@ /* no registers clobbered */ ); \ } while (0) +#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0) + #endif /* __ASM_SOFTIRQ_H */ diff -urN linux-2.4.22/include/asm-i386/spinlock.h linux/include/asm-i386/spinlock.h --- linux-2.4.22/include/asm-i386/spinlock.h 2002-11-28 18:53:15.000000000 -0500 +++ linux/include/asm-i386/spinlock.h 2003-09-23 18:56:14.000000000 -0400 @@ -77,7 +77,7 @@ :"=m" (lock->lock) : : "memory" -static inline void spin_unlock(spinlock_t *lock) +static inline void _raw_spin_unlock(spinlock_t *lock) { #if SPINLOCK_DEBUG if (lock->magic != SPINLOCK_MAGIC) @@ -97,7 +97,7 @@ :"=q" (oldval), "=m" (lock->lock) \ :"0" (oldval) : "memory" -static inline void spin_unlock(spinlock_t *lock) +static inline void _raw_spin_unlock(spinlock_t *lock) { char oldval = 1; #if SPINLOCK_DEBUG @@ -113,7 +113,7 @@ #endif -static inline int spin_trylock(spinlock_t *lock) +static inline int _raw_spin_trylock(spinlock_t *lock) { char oldval; __asm__ __volatile__( @@ -123,7 +123,7 @@ return oldval > 0; } -static inline void spin_lock(spinlock_t *lock) +static inline void _raw_spin_lock(spinlock_t *lock) { #if SPINLOCK_DEBUG __label__ here; @@ -179,7 +179,7 @@ */ /* the spinlock helpers are in arch/i386/kernel/semaphore.c */ -static inline void read_lock(rwlock_t *rw) +static inline void _raw_read_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -188,7 +188,7 @@ __build_read_lock(rw, "__read_lock_failed"); } -static inline void write_lock(rwlock_t *rw) +static inline void _raw_write_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -197,10 +197,10 @@ __build_write_lock(rw, "__write_lock_failed"); } -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") +#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") -static inline int write_trylock(rwlock_t *lock) +static inline int _raw_write_trylock(rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; if (atomic_sub_and_test(RW_LOCK_BIAS, count)) diff -urN linux-2.4.22/include/asm-i386/system.h linux/include/asm-i386/system.h --- linux-2.4.22/include/asm-i386/system.h 2003-08-25 07:44:43.000000000 -0400 +++ linux/include/asm-i386/system.h 2003-09-23 18:56:14.000000000 -0400 @@ -326,6 +326,13 @@ #define __save_and_cli(x) do { __save_flags(x); __cli(); } while(0); #define __save_and_sti(x) do { __save_flags(x); __sti(); } while(0); +#define irqs_disabled() \ +({ \ + unsigned long flags; \ + __save_flags(flags); \ + !(flags & (1<<9)); \ +}) + /* For spinlocks etc */ #if 0 #define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") diff -urN linux-2.4.22/include/asm-mips/smplock.h linux/include/asm-mips/smplock.h --- linux-2.4.22/include/asm-mips/smplock.h 2003-08-25 07:44:44.000000000 -0400 +++ linux/include/asm-mips/smplock.h 2003-09-23 18:56:54.000000000 -0400 @@ -8,12 +8,21 @@ #ifndef __ASM_SMPLOCK_H #define __ASM_SMPLOCK_H +#include #include #include extern spinlock_t kernel_flag; +#ifdef CONFIG_SMP #define kernel_locked() spin_is_locked(&kernel_flag) +#else +#ifdef CONFIG_PREEMPT +#define kernel_locked() preempt_get_count() +#else +#define kernel_locked() 1 +#endif +#endif /* * Release global kernel lock and global interrupt lock @@ -45,8 +54,14 @@ */ extern __inline__ void lock_kernel(void) { +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; +#else if (!++current->lock_depth) spin_lock(&kernel_flag); +#endif } extern __inline__ void unlock_kernel(void) diff -urN linux-2.4.22/include/asm-mips/softirq.h linux/include/asm-mips/softirq.h --- linux-2.4.22/include/asm-mips/softirq.h 2002-11-28 18:53:15.000000000 -0500 +++ linux/include/asm-mips/softirq.h 2003-09-23 18:56:14.000000000 -0400 @@ -15,6 +15,7 @@ static inline void cpu_bh_disable(int cpu) { + preempt_disable(); local_bh_count(cpu)++; barrier(); } @@ -23,6 +24,7 @@ { barrier(); local_bh_count(cpu)--; + preempt_enable(); } @@ -36,6 +38,7 @@ cpu = smp_processor_id(); \ if (!--local_bh_count(cpu) && softirq_pending(cpu)) \ do_softirq(); \ + preempt_enable(); \ } while (0) #define in_softirq() (local_bh_count(smp_processor_id()) != 0) diff -urN linux-2.4.22/include/asm-mips/system.h linux/include/asm-mips/system.h --- linux-2.4.22/include/asm-mips/system.h 2003-08-25 07:44:44.000000000 -0400 +++ linux/include/asm-mips/system.h 2003-09-23 18:56:14.000000000 -0400 @@ -337,4 +337,18 @@ #define die_if_kernel(msg, regs) \ __die_if_kernel(msg, regs, __FILE__ ":", __FUNCTION__, __LINE__) +extern __inline__ int intr_on(void) +{ + unsigned long flags; + save_flags(flags); + return flags & 1; +} + +extern __inline__ int intr_off(void) +{ + return ! intr_on(); +} + +#define irqs_disabled() intr_off() + #endif /* _ASM_SYSTEM_H */ diff -urN linux-2.4.22/include/asm-ppc/dma.h linux/include/asm-ppc/dma.h --- linux-2.4.22/include/asm-ppc/dma.h 2003-06-13 10:51:38.000000000 -0400 +++ linux/include/asm-ppc/dma.h 2003-09-23 18:56:14.000000000 -0400 @@ -11,6 +11,7 @@ #include #include #include +#include #include /* diff -urN linux-2.4.22/include/asm-ppc/hardirq.h linux/include/asm-ppc/hardirq.h --- linux-2.4.22/include/asm-ppc/hardirq.h 2003-08-25 07:44:44.000000000 -0400 +++ linux/include/asm-ppc/hardirq.h 2003-09-23 18:56:14.000000000 -0400 @@ -31,10 +31,12 @@ * Are we in an interrupt context? Either doing bottom half * or hardware interrupt processing? */ -#define in_interrupt() ({ int __cpu = smp_processor_id(); \ - (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }) +#define in_interrupt() (preempt_is_disabled() && \ + ({ unsigned long __cpu = smp_processor_id(); \ + (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })) -#define in_irq() (local_irq_count(smp_processor_id()) != 0) +#define in_irq() (preempt_is_disabled() && \ + (local_irq_count(smp_processor_id()) != 0)) #ifndef CONFIG_SMP @@ -45,6 +47,7 @@ #define hardirq_exit(cpu) (local_irq_count(cpu)--) #define synchronize_irq() do { } while (0) +#define release_irqlock(cpu) do { } while (0) #else /* CONFIG_SMP */ diff -urN linux-2.4.22/include/asm-ppc/highmem.h linux/include/asm-ppc/highmem.h --- linux-2.4.22/include/asm-ppc/highmem.h 2003-08-25 07:44:44.000000000 -0400 +++ linux/include/asm-ppc/highmem.h 2003-09-23 18:56:14.000000000 -0400 @@ -84,6 +84,7 @@ unsigned int idx; unsigned long vaddr; + preempt_disable(); if (page < highmem_start_page) return page_address(page); @@ -105,8 +106,10 @@ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; unsigned int idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < KMAP_FIX_BEGIN) // FIXME + if (vaddr < KMAP_FIX_BEGIN) { // FIXME + preempt_enable(); return; + } if (vaddr != KMAP_FIX_BEGIN + idx * PAGE_SIZE) BUG(); @@ -118,6 +121,7 @@ pte_clear(kmap_pte+idx); flush_tlb_page(0, vaddr); #endif + preempt_enable(); } #endif /* __KERNEL__ */ diff -urN linux-2.4.22/include/asm-ppc/hw_irq.h linux/include/asm-ppc/hw_irq.h --- linux-2.4.22/include/asm-ppc/hw_irq.h 2003-06-13 10:51:38.000000000 -0400 +++ linux/include/asm-ppc/hw_irq.h 2003-09-23 18:56:14.000000000 -0400 @@ -20,6 +20,12 @@ #define __save_and_cli(flags) ({__save_flags(flags);__cli();}) #define __save_and_sti(flags) ({__save_flags(flags);__sti();}) +#define mfmsr() ({unsigned int rval; \ + asm volatile("mfmsr %0" : "=r" (rval)); rval;}) +#define mtmsr(v) asm volatile("mtmsr %0" : : "r" (v)) + +#define irqs_disabled() ((mfmsr() & MSR_EE) == 0) + extern void do_lost_interrupts(unsigned long); #define mask_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->disable) irq_desc[irq].handler->disable(irq);}) diff -urN linux-2.4.22/include/asm-ppc/mmu_context.h linux/include/asm-ppc/mmu_context.h --- linux-2.4.22/include/asm-ppc/mmu_context.h 2003-06-13 10:51:38.000000000 -0400 +++ linux/include/asm-ppc/mmu_context.h 2003-09-23 18:56:15.000000000 -0400 @@ -155,6 +155,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, int cpu) { +#ifdef CONFIG_PREEMPT + if (preempt_get_count() == 0) + BUG(); +#endif tsk->thread.pgdir = next->pgd; get_mmu_context(next); set_context(next->context, next->pgd); diff -urN linux-2.4.22/include/asm-ppc/pgalloc.h linux/include/asm-ppc/pgalloc.h --- linux-2.4.22/include/asm-ppc/pgalloc.h 2003-06-13 10:51:38.000000000 -0400 +++ linux/include/asm-ppc/pgalloc.h 2003-09-23 18:56:15.000000000 -0400 @@ -65,20 +65,25 @@ { unsigned long *ret; + preempt_disable(); if ((ret = pgd_quicklist) != NULL) { pgd_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; + preempt_enable(); } else + preempt_enable(); ret = (unsigned long *)get_pgd_slow(); return (pgd_t *)ret; } extern __inline__ void free_pgd_fast(pgd_t *pgd) { + preempt_disable(); *(unsigned long **)pgd = pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; + preempt_enable(); } extern __inline__ void free_pgd_slow(pgd_t *pgd) @@ -117,19 +122,23 @@ { unsigned long *ret; + preempt_disable(); if ((ret = pte_quicklist) != NULL) { pte_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; } + preempt_enable(); return (pte_t *)ret; } extern __inline__ void pte_free_fast(pte_t *pte) { + preempt_disable(); *(unsigned long **)pte = pte_quicklist; pte_quicklist = (unsigned long *) pte; pgtable_cache_size++; + preempt_enable(); } extern __inline__ void pte_free_slow(pte_t *pte) diff -urN linux-2.4.22/include/asm-ppc/smplock.h linux/include/asm-ppc/smplock.h --- linux-2.4.22/include/asm-ppc/smplock.h 2003-06-13 10:51:38.000000000 -0400 +++ linux/include/asm-ppc/smplock.h 2003-09-23 18:56:15.000000000 -0400 @@ -12,7 +12,15 @@ extern spinlock_t kernel_flag; +#ifdef CONFIG_SMP #define kernel_locked() spin_is_locked(&kernel_flag) +#else +#ifdef CONFIG_PREEMPT +#define kernel_locked() preempt_get_count() +#else +#define kernel_locked() 1 +#endif +#endif /* * Release global kernel lock and global interrupt lock @@ -44,8 +52,14 @@ */ static __inline__ void lock_kernel(void) { +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; +#else if (!++current->lock_depth) spin_lock(&kernel_flag); +#endif } static __inline__ void unlock_kernel(void) diff -urN linux-2.4.22/include/asm-ppc/softirq.h linux/include/asm-ppc/softirq.h --- linux-2.4.22/include/asm-ppc/softirq.h 2003-06-13 10:51:38.000000000 -0400 +++ linux/include/asm-ppc/softirq.h 2003-09-23 18:56:15.000000000 -0400 @@ -7,6 +7,7 @@ #define local_bh_disable() \ do { \ + preempt_disable(); \ local_bh_count(smp_processor_id())++; \ barrier(); \ } while (0) @@ -15,9 +16,10 @@ do { \ barrier(); \ local_bh_count(smp_processor_id())--; \ + preempt_enable(); \ } while (0) -#define local_bh_enable() \ +#define _local_bh_enable() \ do { \ if (!--local_bh_count(smp_processor_id()) \ && softirq_pending(smp_processor_id())) { \ @@ -25,7 +27,14 @@ } \ } while (0) -#define in_softirq() (local_bh_count(smp_processor_id()) != 0) +#define local_bh_enable() \ +do { \ + _local_bh_enable(); \ + preempt_enable(); \ +} while (0) + +#define in_softirq() (preempt_is_disabled() && \ + (local_bh_count(smp_processor_id()) != 0)) #endif /* __ASM_SOFTIRQ_H */ #endif /* __KERNEL__ */ diff -urN linux-2.4.22/include/linux/brlock.h linux/include/linux/brlock.h --- linux-2.4.22/include/linux/brlock.h 2002-11-28 18:53:15.000000000 -0500 +++ linux/include/linux/brlock.h 2003-09-23 18:56:15.000000000 -0400 @@ -171,11 +171,11 @@ } #else -# define br_read_lock(idx) ((void)(idx)) -# define br_read_unlock(idx) ((void)(idx)) -# define br_write_lock(idx) ((void)(idx)) -# define br_write_unlock(idx) ((void)(idx)) -#endif +# define br_read_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); }) +# define br_write_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); }) +#endif /* CONFIG_SMP */ /* * Now enumerate all of the possible sw/hw IRQ protected diff -urN linux-2.4.22/include/linux/dcache.h linux/include/linux/dcache.h --- linux-2.4.22/include/linux/dcache.h 2002-11-28 18:53:15.000000000 -0500 +++ linux/include/linux/dcache.h 2003-09-23 18:56:15.000000000 -0400 @@ -127,31 +127,6 @@ extern spinlock_t dcache_lock; -/** - * d_drop - drop a dentry - * @dentry: dentry to drop - * - * d_drop() unhashes the entry from the parent - * dentry hashes, so that it won't be found through - * a VFS lookup any more. Note that this is different - * from deleting the dentry - d_delete will try to - * mark the dentry negative if possible, giving a - * successful _negative_ lookup, while d_drop will - * just make the cache lookup fail. - * - * d_drop() is used mainly for stuff that wants - * to invalidate a dentry for some reason (NFS - * timeouts or autofs deletes). - */ - -static __inline__ void d_drop(struct dentry * dentry) -{ - spin_lock(&dcache_lock); - list_del(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_hash); - spin_unlock(&dcache_lock); -} - static __inline__ int dname_external(struct dentry *d) { return d->d_name.name != d->d_iname; @@ -276,3 +251,34 @@ #endif /* __KERNEL__ */ #endif /* __LINUX_DCACHE_H */ + +#if !defined(__LINUX_DCACHE_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define __LINUX_DCACHE_H_INLINES + +#ifdef __KERNEL__ +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent + * dentry hashes, so that it won't be found through + * a VFS lookup any more. Note that this is different + * from deleting the dentry - d_delete will try to + * mark the dentry negative if possible, giving a + * successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants + * to invalidate a dentry for some reason (NFS + * timeouts or autofs deletes). + */ + +static __inline__ void d_drop(struct dentry * dentry) +{ + spin_lock(&dcache_lock); + list_del(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_hash); + spin_unlock(&dcache_lock); +} +#endif +#endif diff -urN linux-2.4.22/include/linux/fs_struct.h linux/include/linux/fs_struct.h --- linux-2.4.22/include/linux/fs_struct.h 2001-07-13 18:10:44.000000000 -0400 +++ linux/include/linux/fs_struct.h 2003-09-23 18:56:15.000000000 -0400 @@ -20,6 +20,15 @@ extern void exit_fs(struct task_struct *); extern void set_fs_altroot(void); +struct fs_struct *copy_fs_struct(struct fs_struct *old); +void put_fs_struct(struct fs_struct *fs); + +#endif +#endif + +#if !defined(_LINUX_FS_STRUCT_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define _LINUX_FS_STRUCT_H_INLINES +#ifdef __KERNEL__ /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. Requires the big lock held. @@ -65,9 +74,5 @@ mntput(old_pwdmnt); } } - -struct fs_struct *copy_fs_struct(struct fs_struct *old); -void put_fs_struct(struct fs_struct *fs); - #endif #endif diff -urN linux-2.4.22/include/linux/highmem.h linux/include/linux/highmem.h --- linux-2.4.22/include/linux/highmem.h 2003-08-25 07:44:44.000000000 -0400 +++ linux/include/linux/highmem.h 2003-09-23 18:56:15.000000000 -0400 @@ -33,18 +33,8 @@ { unsigned long addr; - __save_flags(*flags); + local_irq_save(*flags); - /* - * could be low - */ - if (!PageHighMem(bh->b_page)) - return bh->b_data; - - /* - * it's a highmem page - */ - __cli(); addr = (unsigned long) kmap_atomic(bh->b_page, KM_BH_IRQ); if (addr & ~PAGE_MASK) @@ -58,7 +48,7 @@ unsigned long ptr = (unsigned long) buffer & PAGE_MASK; kunmap_atomic((void *) ptr, KM_BH_IRQ); - __restore_flags(*flags); + local_irq_restore(*flags); } #else /* CONFIG_HIGHMEM */ diff -urN linux-2.4.22/include/linux/sched.h linux/include/linux/sched.h --- linux-2.4.22/include/linux/sched.h 2003-06-13 10:51:39.000000000 -0400 +++ linux/include/linux/sched.h 2003-09-23 18:56:15.000000000 -0400 @@ -91,6 +91,7 @@ #define TASK_UNINTERRUPTIBLE 2 #define TASK_ZOMBIE 4 #define TASK_STOPPED 8 +#define PREEMPT_ACTIVE 0x4000000 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -147,6 +148,9 @@ #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); asmlinkage void schedule(void); +#ifdef CONFIG_PREEMPT +asmlinkage void preempt_schedule(void); +#endif extern int schedule_task(struct tq_struct *task); extern void flush_scheduled_tasks(void); @@ -285,7 +289,7 @@ * offsets of these are hardcoded elsewhere - touch with care */ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - unsigned long flags; /* per process flags, defined below */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ int sigpending; mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thead @@ -327,6 +331,7 @@ struct mm_struct *active_mm; struct list_head local_pages; unsigned int allocation_order, nr_local_pages; + unsigned long flags; /* task state */ struct linux_binfmt *binfmt; @@ -956,5 +961,10 @@ __cond_resched(); } +#define _TASK_STRUCT_DEFINED +#include +#include +#include + #endif /* __KERNEL__ */ #endif diff -urN linux-2.4.22/include/linux/smp_lock.h linux/include/linux/smp_lock.h --- linux-2.4.22/include/linux/smp_lock.h 2001-11-22 14:46:27.000000000 -0500 +++ linux/include/linux/smp_lock.h 2003-09-23 18:56:15.000000000 -0400 @@ -3,7 +3,7 @@ #include -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT) #define lock_kernel() do { } while(0) #define unlock_kernel() do { } while(0) diff -urN linux-2.4.22/include/linux/spinlock.h linux/include/linux/spinlock.h --- linux-2.4.22/include/linux/spinlock.h 2002-11-28 18:53:15.000000000 -0500 +++ linux/include/linux/spinlock.h 2003-09-23 18:56:15.000000000 -0400 @@ -2,6 +2,7 @@ #define __LINUX_SPINLOCK_H #include +#include /* * These are the generic versions of the spinlocks and read-write @@ -62,8 +63,10 @@ #if (DEBUG_SPINLOCKS < 1) +#ifndef CONFIG_PREEMPT #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic) #define ATOMIC_DEC_AND_LOCK +#endif /* * Your basic spinlocks, allowing only a single CPU anywhere @@ -80,11 +83,11 @@ #endif #define spin_lock_init(lock) do { } while(0) -#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_spin_lock(lock) (void)(lock) /* Not "unused variable". */ #define spin_is_locked(lock) (0) -#define spin_trylock(lock) ({1; }) +#define _raw_spin_trylock(lock) ({1; }) #define spin_unlock_wait(lock) do { } while(0) -#define spin_unlock(lock) do { } while(0) +#define _raw_spin_unlock(lock) do { } while(0) #elif (DEBUG_SPINLOCKS < 2) @@ -144,13 +147,78 @@ #endif #define rwlock_init(lock) do { } while(0) -#define read_lock(lock) (void)(lock) /* Not "unused variable". */ -#define read_unlock(lock) do { } while(0) -#define write_lock(lock) (void)(lock) /* Not "unused variable". */ -#define write_unlock(lock) do { } while(0) +#define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_read_unlock(lock) do { } while(0) +#define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_write_unlock(lock) do { } while(0) #endif /* !SMP */ +#ifdef CONFIG_PREEMPT + +#define preempt_get_count() (current->preempt_count) +#define preempt_is_disabled() (preempt_get_count() != 0) + +#define preempt_disable() \ +do { \ + ++current->preempt_count; \ + barrier(); \ +} while (0) + +#define preempt_enable_no_resched() \ +do { \ + --current->preempt_count; \ + barrier(); \ +} while (0) + +#define preempt_enable() \ +do { \ + --current->preempt_count; \ + barrier(); \ + if (unlikely(current->preempt_count < current->need_resched)) \ + preempt_schedule(); \ +} while (0) + +#define spin_lock(lock) \ +do { \ + preempt_disable(); \ + _raw_spin_lock(lock); \ +} while(0) + +#define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) +#define spin_unlock(lock) \ +do { \ + _raw_spin_unlock(lock); \ + preempt_enable(); \ +} while (0) + +#define read_lock(lock) ({preempt_disable(); _raw_read_lock(lock);}) +#define read_unlock(lock) ({_raw_read_unlock(lock); preempt_enable();}) +#define write_lock(lock) ({preempt_disable(); _raw_write_lock(lock);}) +#define write_unlock(lock) ({_raw_write_unlock(lock); preempt_enable();}) +#define write_trylock(lock) ({preempt_disable();_raw_write_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) + +#else + +#define preempt_get_count() (0) +#define preempt_is_disabled() (1) +#define preempt_disable() do { } while (0) +#define preempt_enable_no_resched() do {} while(0) +#define preempt_enable() do { } while (0) + +#define spin_lock(lock) _raw_spin_lock(lock) +#define spin_trylock(lock) _raw_spin_trylock(lock) +#define spin_unlock(lock) _raw_spin_unlock(lock) + +#define read_lock(lock) _raw_read_lock(lock) +#define read_unlock(lock) _raw_read_unlock(lock) +#define write_lock(lock) _raw_write_lock(lock) +#define write_unlock(lock) _raw_write_unlock(lock) +#define write_trylock(lock) _raw_write_trylock(lock) +#endif + /* "lock on reference count zero" */ #ifndef ATOMIC_DEC_AND_LOCK #include diff -urN linux-2.4.22/include/linux/tqueue.h linux/include/linux/tqueue.h --- linux-2.4.22/include/linux/tqueue.h 2001-11-22 14:46:19.000000000 -0500 +++ linux/include/linux/tqueue.h 2003-09-23 18:56:15.000000000 -0400 @@ -94,6 +94,22 @@ extern spinlock_t tqueue_lock; /* + * Call all "bottom halfs" on a given list. + */ + +extern void __run_task_queue(task_queue *list); + +static inline void run_task_queue(task_queue *list) +{ + if (TQ_ACTIVE(*list)) + __run_task_queue(list); +} + +#endif /* _LINUX_TQUEUE_H */ + +#if !defined(_LINUX_TQUEUE_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define _LINUX_TQUEUE_H_INLINES +/* * Queue a task on a tq. Return non-zero if it was successfully * added. */ @@ -109,17 +125,4 @@ } return ret; } - -/* - * Call all "bottom halfs" on a given list. - */ - -extern void __run_task_queue(task_queue *list); - -static inline void run_task_queue(task_queue *list) -{ - if (TQ_ACTIVE(*list)) - __run_task_queue(list); -} - -#endif /* _LINUX_TQUEUE_H */ +#endif diff -urN linux-2.4.22/kernel/exit.c linux/kernel/exit.c --- linux-2.4.22/kernel/exit.c 2002-11-28 18:53:15.000000000 -0500 +++ linux/kernel/exit.c 2003-09-23 18:56:15.000000000 -0400 @@ -282,7 +282,9 @@ current->mm = NULL; /* active_mm is still 'mm' */ atomic_inc(&mm->mm_count); + preempt_disable(); enter_lazy_tlb(mm, current, smp_processor_id()); + preempt_enable(); return mm; } @@ -313,8 +315,8 @@ /* more a memory barrier than a real lock */ task_lock(tsk); tsk->mm = NULL; - task_unlock(tsk); enter_lazy_tlb(mm, current, smp_processor_id()); + task_unlock(tsk); mmput(mm); } } @@ -435,6 +437,11 @@ tsk->flags |= PF_EXITING; del_timer_sync(&tsk->real_timer); + if (unlikely(preempt_get_count())) + printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", + current->comm, current->pid, + preempt_get_count()); + fake_volatile: #ifdef CONFIG_BSD_PROCESS_ACCT acct_process(code); diff -urN linux-2.4.22/kernel/fork.c linux/kernel/fork.c --- linux-2.4.22/kernel/fork.c 2003-08-25 07:44:44.000000000 -0400 +++ linux/kernel/fork.c 2003-09-23 18:56:15.000000000 -0400 @@ -687,6 +687,13 @@ if (p->binfmt && p->binfmt->module) __MOD_INC_USE_COUNT(p->binfmt->module); +#ifdef CONFIG_PREEMPT + /* + * Continue with preemption disabled as part of the context + * switch, so start with preempt_count set to 1. + */ + p->preempt_count = 1; +#endif p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; diff -urN linux-2.4.22/kernel/ksyms.c linux/kernel/ksyms.c --- linux-2.4.22/kernel/ksyms.c 2003-08-25 07:44:44.000000000 -0400 +++ linux/kernel/ksyms.c 2003-09-23 18:56:16.000000000 -0400 @@ -456,6 +456,9 @@ EXPORT_SYMBOL(interruptible_sleep_on); EXPORT_SYMBOL(interruptible_sleep_on_timeout); EXPORT_SYMBOL(schedule); +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(preempt_schedule); +#endif EXPORT_SYMBOL(schedule_timeout); #if CONFIG_SMP EXPORT_SYMBOL(set_cpus_allowed); diff -urN linux-2.4.22/kernel/sched.c linux/kernel/sched.c --- linux-2.4.22/kernel/sched.c 2003-08-25 07:44:44.000000000 -0400 +++ linux/kernel/sched.c 2003-09-23 18:56:16.000000000 -0400 @@ -489,7 +489,7 @@ task_lock(prev); task_release_cpu(prev); mb(); - if (prev->state == TASK_RUNNING) + if (task_on_runqueue(prev)) goto needs_resched; out_unlock: @@ -519,7 +519,7 @@ goto out_unlock; spin_lock_irqsave(&runqueue_lock, flags); - if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev)) + if (task_on_runqueue(prev) && !task_has_cpu(prev)) reschedule_idle(prev); spin_unlock_irqrestore(&runqueue_lock, flags); goto out_unlock; @@ -532,6 +532,7 @@ asmlinkage void schedule_tail(struct task_struct *prev) { __schedule_tail(prev); + preempt_enable(); } /* @@ -551,9 +552,10 @@ struct list_head *tmp; int this_cpu, c; - spin_lock_prefetch(&runqueue_lock); + preempt_disable(); + BUG_ON(!current->active_mm); need_resched_back: prev = current; @@ -581,6 +583,14 @@ move_last_runqueue(prev); } +#ifdef CONFIG_PREEMPT + /* + * entering from preempt_schedule, off a kernel preemption, + * go straight to picking the next task. + */ + if (unlikely(preempt_get_count() & PREEMPT_ACTIVE)) + goto treat_like_run; +#endif switch (prev->state) { case TASK_INTERRUPTIBLE: if (signal_pending(prev)) { @@ -591,6 +601,9 @@ del_from_runqueue(prev); case TASK_RUNNING:; } +#ifdef CONFIG_PREEMPT + treat_like_run: +#endif prev->need_resched = 0; /* @@ -699,9 +712,31 @@ reacquire_kernel_lock(current); if (current->need_resched) goto need_resched_back; + preempt_enable_no_resched(); return; } +#ifdef CONFIG_PREEMPT +/* + * this is is the entry point to schedule() from in-kernel preemption + */ +asmlinkage void preempt_schedule(void) +{ + if (unlikely(irqs_disabled())) + return; + +need_resched: + current->preempt_count += PREEMPT_ACTIVE; + schedule(); + current->preempt_count -= PREEMPT_ACTIVE; + + /* we could miss a preemption opportunity between schedule and now */ + barrier(); + if (unlikely(current->need_resched)) + goto need_resched; +} +#endif /* CONFIG_PREEMPT */ + /* * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything * up. If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the @@ -1365,6 +1400,13 @@ sched_data->curr = current; sched_data->last_schedule = get_cycles(); clear_bit(current->processor, &wait_init_idle); +#ifdef CONFIG_PREEMPT + /* + * fix up the preempt_count for non-CPU0 idle threads + */ + if (current->processor) + current->preempt_count = 0; +#endif } extern void init_timervecs (void); diff -urN linux-2.4.22/kernel/softirq.c linux/kernel/softirq.c --- linux-2.4.22/kernel/softirq.c 2002-11-28 18:53:15.000000000 -0500 +++ linux/kernel/softirq.c 2003-09-23 18:56:16.000000000 -0400 @@ -60,7 +60,7 @@ asmlinkage void do_softirq() { - int cpu = smp_processor_id(); + int cpu; __u32 pending; unsigned long flags; __u32 mask; @@ -70,6 +70,8 @@ local_irq_save(flags); + cpu = smp_processor_id(); + pending = softirq_pending(cpu); if (pending) { @@ -151,10 +153,11 @@ void __tasklet_schedule(struct tasklet_struct *t) { - int cpu = smp_processor_id(); + int cpu; unsigned long flags; local_irq_save(flags); + cpu = smp_processor_id(); t->next = tasklet_vec[cpu].list; tasklet_vec[cpu].list = t; cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); @@ -175,10 +178,11 @@ static void tasklet_action(struct softirq_action *a) { - int cpu = smp_processor_id(); + int cpu; struct tasklet_struct *list; local_irq_disable(); + cpu = smp_processor_id(); list = tasklet_vec[cpu].list; tasklet_vec[cpu].list = NULL; local_irq_enable(); @@ -209,10 +213,11 @@ static void tasklet_hi_action(struct softirq_action *a) { - int cpu = smp_processor_id(); + int cpu; struct tasklet_struct *list; local_irq_disable(); + cpu = smp_processor_id(); list = tasklet_hi_vec[cpu].list; tasklet_hi_vec[cpu].list = NULL; local_irq_enable(); diff -urN linux-2.4.22/kernel/sys.c linux/kernel/sys.c --- linux-2.4.22/kernel/sys.c 2003-08-25 07:44:44.000000000 -0400 +++ linux/kernel/sys.c 2003-09-23 18:56:16.000000000 -0400 @@ -320,6 +320,7 @@ notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); printk(KERN_EMERG "System halted.\n"); machine_halt(); + unlock_kernel(); do_exit(0); break; @@ -327,6 +328,7 @@ notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); printk(KERN_EMERG "Power down.\n"); machine_power_off(); + unlock_kernel(); do_exit(0); break; diff -urN linux-2.4.22/lib/dec_and_lock.c linux/lib/dec_and_lock.c --- linux-2.4.22/lib/dec_and_lock.c 2001-10-03 12:11:26.000000000 -0400 +++ linux/lib/dec_and_lock.c 2003-09-23 18:56:16.000000000 -0400 @@ -1,5 +1,6 @@ #include #include +#include #include /* diff -urN linux-2.4.22/MAINTAINERS linux/MAINTAINERS --- linux-2.4.22/MAINTAINERS 2003-08-25 07:44:39.000000000 -0400 +++ linux/MAINTAINERS 2003-09-23 18:56:16.000000000 -0400 @@ -1465,6 +1465,14 @@ M: mostrows@styx.uwaterloo.ca S: Maintained +PREEMPTIBLE KERNEL +P: Robert M. Love +M: rml@tech9.net +L: linux-kernel@vger.kernel.org +L: kpreempt-tech@lists.sourceforge.net +W: http://tech9.net/rml/linux +S: Supported + PROMISE DC4030 CACHING DISK CONTROLLER DRIVER P: Peter Denison M: promise@pnd-pc.demon.co.uk diff -urN linux-2.4.22/mm/slab.c linux/mm/slab.c --- linux-2.4.22/mm/slab.c 2003-06-13 10:51:39.000000000 -0400 +++ linux/mm/slab.c 2003-09-23 18:56:16.000000000 -0400 @@ -49,7 +49,8 @@ * constructors and destructors are called without any locking. * Several members in kmem_cache_t and slab_t never change, they * are accessed without any locking. - * The per-cpu arrays are never accessed from the wrong cpu, no locking. + * The per-cpu arrays are never accessed from the wrong cpu, no locking, + * and local interrupts are disabled so slab code is preempt-safe. * The non-constant members are protected with a per-cache irq spinlock. * * Further notes from the original documentation: @@ -858,12 +859,14 @@ */ static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg) { + preempt_disable(); local_irq_disable(); func(arg); local_irq_enable(); if (smp_call_function(func, arg, 1, 1)) BUG(); + preempt_enable(); } typedef struct ccupdate_struct_s { diff -urN linux-2.4.22/net/core/dev.c linux/net/core/dev.c --- linux-2.4.22/net/core/dev.c 2003-08-25 07:44:44.000000000 -0400 +++ linux/net/core/dev.c 2003-09-23 18:56:16.000000000 -0400 @@ -1093,9 +1093,15 @@ int cpu = smp_processor_id(); if (dev->xmit_lock_owner != cpu) { + /* + * The spin_lock effectivly does a preempt lock, but + * we are about to drop that... + */ + preempt_disable(); spin_unlock(&dev->queue_lock); spin_lock(&dev->xmit_lock); dev->xmit_lock_owner = cpu; + preempt_enable(); if (!netif_queue_stopped(dev)) { if (netdev_nit) @@ -1274,7 +1280,7 @@ int netif_rx(struct sk_buff *skb) { - int this_cpu = smp_processor_id(); + int this_cpu; struct softnet_data *queue; unsigned long flags; @@ -1284,9 +1290,10 @@ /* The code is rearranged so that the path is the most short when CPU is congested, but is still operating. */ - queue = &softnet_data[this_cpu]; local_irq_save(flags); + this_cpu = smp_processor_id(); + queue = &softnet_data[this_cpu]; netdev_rx_stat[this_cpu].total++; if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { diff -urN linux-2.4.22/net/core/skbuff.c linux/net/core/skbuff.c --- linux-2.4.22/net/core/skbuff.c 2003-08-25 07:44:44.000000000 -0400 +++ linux/net/core/skbuff.c 2003-09-23 18:56:16.000000000 -0400 @@ -111,33 +111,37 @@ static __inline__ struct sk_buff *skb_head_from_pool(void) { - struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + struct sk_buff_head *list; + struct sk_buff *skb = NULL; + unsigned long flags; - if (skb_queue_len(list)) { - struct sk_buff *skb; - unsigned long flags; + local_irq_save(flags); - local_irq_save(flags); + list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list)) skb = __skb_dequeue(list); - local_irq_restore(flags); - return skb; - } - return NULL; + + local_irq_restore(flags); + return skb; } static __inline__ void skb_head_to_pool(struct sk_buff *skb) { - struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + struct sk_buff_head *list; + unsigned long flags; - if (skb_queue_len(list) < sysctl_hot_list_len) { - unsigned long flags; + local_irq_save(flags); + list = &skb_head_pool[smp_processor_id()].list; - local_irq_save(flags); + if (skb_queue_len(list) < sysctl_hot_list_len) { __skb_queue_head(list, skb); local_irq_restore(flags); return; } + + local_irq_restore(flags); kmem_cache_free(skbuff_head_cache, skb); } diff -urN linux-2.4.22/net/socket.c linux/net/socket.c --- linux-2.4.22/net/socket.c 2003-08-25 07:44:44.000000000 -0400 +++ linux/net/socket.c 2003-09-23 18:56:16.000000000 -0400 @@ -132,7 +132,7 @@ static struct net_proto_family *net_families[NPROTO]; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) static atomic_t net_family_lockct = ATOMIC_INIT(0); static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED; diff -urN linux-2.4.22/net/sunrpc/pmap_clnt.c linux/net/sunrpc/pmap_clnt.c --- linux-2.4.22/net/sunrpc/pmap_clnt.c 2002-08-02 20:39:46.000000000 -0400 +++ linux/net/sunrpc/pmap_clnt.c 2003-09-23 18:56:16.000000000 -0400 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include