linux/arch/x86/kernel
Alexander van Heukelum 2e04bc7656 i386: fix return to 16-bit stack from NMI handler
Returning to a task with a 16-bit stack requires special care: the iret
instruction does not restore the high word of esp in that case. The
espfix code fixes this, but currently is not invoked on NMIs. This means
that a running task gets the upper word of esp clobbered due intervening
NMIs. To reproduce, compile and run the following program with the nmi
watchdog enabled (nmi_watchdog=2 on the command line). Using gdb you can
see that the high bits of esp contain garbage, while the low bits are
still correct.

This patch puts the espfix code back into the NMI code path.

The patch is slightly complicated due to the irqtrace infrastructure not
being NMI-safe. The NMI return path cannot call TRACE_IRQS_IRET.
Otherwise, the tail of the normal iret-code is correct for the nmi code
path too. To be able to share this code-path, the TRACE_IRQS_IRET was
move up a bit. The espfix code exists after the TRACE_IRQS_IRET, but
this code explicitly disables interrupts. This short interrupts-off
section is now not traced anymore. The return-to-kernel path now always
includes the preliminary test to decide if the espfix code should be
called. This is never the case, but doing it this way keeps the patch as
simple as possible and the few extra instructions should not affect
timing in any significant way.

 #define _GNU_SOURCE
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <asm/ldt.h>

int modify_ldt(int func, void *ptr, unsigned long bytecount)
{
        return syscall(SYS_modify_ldt, func, ptr, bytecount);
}

/* this is assumed to be usable */
 #define SEGBASEADDR 0x10000
 #define SEGLIMIT 0x20000

/* 16-bit segment */
struct user_desc desc = {
        .entry_number = 0,
        .base_addr = SEGBASEADDR,
        .limit = SEGLIMIT,
        .seg_32bit = 0,
        .contents = 0, /* ??? */
        .read_exec_only = 0,
        .limit_in_pages = 0,
        .seg_not_present = 0,
        .useable = 1
};

int main(void)
{
        setvbuf(stdout, NULL, _IONBF, 0);

        /* map a 64 kb segment */
        char *pointer = mmap((void *)SEGBASEADDR, SEGLIMIT+1,
                        PROT_EXEC|PROT_READ|PROT_WRITE,
                        MAP_SHARED|MAP_ANONYMOUS, -1, 0);
        if (pointer == NULL) {
                printf("could not map space\n");
                return 0;
        }

        /* write ldt, new mode */
        int err = modify_ldt(0x11, &desc, sizeof(desc));
        if (err) {
                printf("error modifying ldt: %i\n", err);
                return 0;
        }

        for (int i=0; i<1000; i++) {
        asm volatile (
                "pusha\n\t"
                "mov %ss, %eax\n\t" /* preserve ss:esp */
                "mov %esp, %ebp\n\t"
                "push $7\n\t" /* index 0, ldt, user mode */
                "push $65536-4096\n\t" /* esp */
                "lss (%esp), %esp\n\t" /* switch to new stack */
                "push %eax\n\t" /* save old ss:esp on new stack */
                "push %ebp\n\t"
                "add $17*65536, %esp\n\t" /* set high bits */
                "mov %esp, %edx\n\t"

                "mov $10000000, %ecx\n\t" /* wait... */
                "1: loop 1b\n\t" /* ... a bit */

                "cmp %esp, %edx\n\t"
                "je 1f\n\t"
                "ud2\n\t" /* esp changed inexplicably! */
                "1:\n\t"
                "sub $17*65536, %esp\n\t" /* restore high bits */
                "lss (%esp), %esp\n\t" /* restore old ss:esp */
                "popa\n\t");

                printf("\rx%ix", i);
        }

        return 0;
}

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Stas Sergeev <stsp@aknet.ru>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-06-17 21:35:09 -07:00
..
acpi PM/ACPI/x86: Fix sparse warning in arch/x86/kernel/acpi/sleep.c 2009-06-12 21:32:29 +02:00
apic x86, ioapic: Don't call disconnect_bsp_APIC if no APIC present 2009-06-17 20:24:39 +02:00
cpu x86: nmi: Add Intel processor 0x6f4 to NMI perfctr1 workaround 2009-06-17 18:20:39 +02:00
.gitignore
alternative.c x86: expand irq-off region in text_poke() 2009-03-10 16:24:23 +01:00
amd_iommu.c amd-iommu: flush domain tlb when attaching a new device 2009-06-15 15:42:00 +02:00
amd_iommu_init.c amd-iommu: resume cleanup 2009-06-16 10:19:16 +02:00
aperture_64.c aperture_64.c: clarify that too small aperture is valid reason for this code 2008-11-28 15:24:39 +01:00
apm_32.c PM core: rename suspend and resume functions 2009-06-12 21:32:31 +02:00
asm-offsets.c
asm-offsets_32.c lguest: optimize by coding restore_flags and irq_enable in assembler. 2009-06-12 22:27:03 +09:30
asm-offsets_64.c x86, boot: make kernel_alignment adjustable; new bzImage fields 2009-05-11 17:44:39 -07:00
audit_64.c
bios_uv.c x86, UV: system table in bios accessed after unmap 2009-04-03 19:25:57 +02:00
bootflag.c
check.c x86: fix 64k corruption-check 2009-03-15 07:03:15 +01:00
cpuid.c Merge branch 'cpus4096-for-linus-3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-01-03 12:04:39 -08:00
crash.c x86: disable IOMMUs on kernel crash 2009-06-15 15:20:40 +02:00
crash_dump_32.c kdump: make elfcorehdr_addr independent of CONFIG_PROC_VMCORE 2008-10-20 08:52:39 -07:00
crash_dump_64.c kdump: make elfcorehdr_addr independent of CONFIG_PROC_VMCORE 2008-10-20 08:52:39 -07:00
doublefault_32.c x86: fix virt_addr_valid() with CONFIG_DEBUG_VIRTUAL=y, v2 2008-10-13 10:33:15 +02:00
ds.c x86, ds: support Core i7 2009-04-07 13:36:36 +02:00
ds_selftest.c x86, ds: fix buffer alignment in debug store selftest 2009-04-24 10:18:51 +02:00
ds_selftest.h x86, bts: cleanups 2009-03-13 11:57:22 +01:00
dumpstack.c Merge commit 'origin/master' into next 2009-03-30 14:04:53 +11:00
dumpstack.h x86: avoid multiple declaration of kstack_depth_to_print 2009-04-14 11:45:33 +02:00
dumpstack_32.c ftrace: print real return in dumpstack for function graph 2008-12-03 08:56:25 +01:00
dumpstack_64.c x86-64: Move current task from PDA to per-cpu and consolidate with 32-bit. 2009-01-19 00:38:58 +09:00
e820.c x86/pci: remove rounding quirk from e820_setup_gap() 2009-05-11 09:45:14 +02:00
early-quirks.c x86 early quirks: eliminate unused function 2009-04-08 14:16:32 +02:00
early_printk.c x86: properly __init-annotate recent early_printk additions 2009-03-13 02:37:18 +01:00
efi.c x86: correct the conversion of EFI memory types 2009-06-16 17:47:32 -07:00
efi_32.c
efi_64.c Merge branch 'core/percpu' into percpu-cpumask-x86-for-linus-2 2009-03-27 17:28:43 +01:00
efi_stub_32.S Merge branch 'x86/asm' into x86/mm 2009-02-25 08:27:46 +01:00
efi_stub_64.S x86: efi_stub_32,64 - add missing ENDPROCs 2009-02-24 18:08:40 +01:00
entry_32.S i386: fix return to 16-bit stack from NMI handler 2009-06-17 21:35:09 -07:00
entry_64.S Merge branch 'linus' into x86/mce3 2009-06-11 23:31:52 +02:00
ftrace.c x86/function-graph: fix constraint for recording old return value 2009-05-13 13:52:19 -04:00
geode_32.c
head.c x86, debug: remove EBDA debug printk 2008-12-12 11:08:42 +01:00
head32.c x86-32: use brk segment for allocating initial kernel pagetable 2009-03-14 17:23:47 -07:00
head64.c x86: add brk allocation for very, very early allocations 2009-03-14 15:37:14 -07:00
head_32.S x86, vmlinux.lds: unify .text output sections 2009-04-29 10:20:31 +02:00
head_64.S x86: head_64.S - use IDT_ENTRIES instead of hardcoded number 2009-02-24 18:08:38 +01:00
hpet.c x86: hpet: Mark per cpu interrupts IRQF_TIMER to prevent resume failure 2009-06-14 18:24:29 +02:00
i386_ksyms_32.c ftrace: rename FTRACE to FUNCTION_TRACER 2008-10-20 18:27:03 +02:00
i387.c x86, math-emu: fix init_fpu for task != current 2009-03-04 20:33:16 +01:00
i8237.c i8327: fix outb() parameter order 2009-02-10 13:13:23 +01:00
i8253.c clocksource: pass clocksource to read() callback 2009-04-21 13:41:47 -07:00
i8259.c x86: refactor x86_quirks support 2009-02-23 00:08:11 +01:00
init_task.c take init_fs to saner place 2008-12-31 18:07:42 -05:00
io_delay.c x86: io_delay.c cleanup 2009-03-21 16:57:04 +05:30
ioport.c x86-32: use non-lazy io bitmap context switching 2009-03-02 12:07:48 +01:00
irq.c Merge branch 'linus' into x86/mce3 2009-06-11 23:31:52 +02:00
irq_32.c Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu 2009-02-24 21:52:45 +01:00
irq_64.c x86: unify do_IRQ() 2009-02-09 12:16:05 +01:00
irqinit.c Merge branch 'linus' into x86/mce3 2009-06-11 23:31:52 +02:00
k8.c x86, gart: fix gart detection for Fam11h CPUs 2008-10-28 17:10:27 +01:00
kdebugfs.c x86: kdebugfs.c cleanup 2009-03-21 16:55:45 +05:30
kgdb.c Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 15:49:10 -07:00
kprobes.c Merge branch 'tracing/core-v2' into tracing-for-linus 2009-04-02 00:49:02 +02:00
kvm.c Merge branch 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm 2009-06-11 10:03:30 -07:00
kvmclock.c clocksource: pass clocksource to read() callback 2009-04-21 13:41:47 -07:00
ldt.c x86: ldt.c fix style problems 2009-01-02 17:46:24 +01:00
machine_kexec_32.c x86, kexec: fix crashdump panic with CONFIG_KEXEC_JUMP 2009-05-07 22:01:05 -07:00
machine_kexec_64.c x86, kexec: fix crashdump panic with CONFIG_KEXEC_JUMP 2009-05-07 22:01:05 -07:00
Makefile x86 module: merge the rest functions with macros 2009-06-12 21:47:01 +09:30
mca_32.c x86: refactor x86_quirks support 2009-02-23 00:08:11 +01:00
mfgpt_32.c cpumask: remove references to struct irqaction's mask field. 2009-03-30 22:05:14 +10:30
microcode_amd.c x86, microcode: Simplify vfree() use 2009-06-07 16:35:11 +02:00
microcode_core.c x86: microcode: use smp_call_function_single instead of set_cpus_allowed, cleanup of synchronization logic 2009-05-12 10:36:44 +02:00
microcode_intel.c x86: microcode: use smp_call_function_single instead of set_cpus_allowed, cleanup of synchronization logic 2009-05-12 10:36:44 +02:00
mmconf-fam10h_64.c x86: move various CPU initialization objects into .cpuinit.rodata 2009-03-12 13:13:07 +01:00
module.c module: cleanup FIXME comments about trimming exception table entries. 2009-06-12 21:47:05 +09:30
mpparse.c x86, irq: don't call mp_config_acpi_gsi() if update_mptable is not enabled 2009-05-18 09:33:29 +02:00
msr.c x86: msr.c fix style problems 2009-01-12 11:22:50 +01:00
olpc.c x86, olpc: fix model detection without OFW 2009-02-14 23:05:25 +01:00
paravirt-spinlocks.c x86: remove byte locks 2009-01-20 17:14:28 +01:00
paravirt.c Merge branch 'x86-xen-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 16:16:27 -07:00
paravirt_patch_32.c x86/pvops: add a paravirt_ident functions to allow special patching 2009-01-30 14:51:44 -08:00
paravirt_patch_64.c x86/pvops: add a paravirt_ident functions to allow special patching 2009-01-30 14:51:44 -08:00
pci-calgary_64.c x86: calgary: remove IOMMU_DEBUG 2009-04-14 13:03:36 +02:00
pci-dma.c amd-iommu: disable IOMMU hardware on shutdown 2009-06-15 15:20:40 +02:00
pci-gart_64.c x86: enable GART-IOMMU only after setting up protection methods 2009-06-06 09:42:09 +02:00
pci-nommu.c dma-mapping: replace all DMA_32BIT_MASK macro with DMA_BIT_MASK(32) 2009-04-07 08:31:11 -07:00
pci-swiotlb.c Merge commit 'v2.6.30-rc5' into core/iommu 2009-05-11 14:44:31 +02:00
pcspeaker.c x86: use platform_device_register_simple() 2008-09-22 12:58:36 +02:00
pmtimer_64.c
probe_roms_32.c x86: move mach-default/*.h files to asm/ 2009-01-29 14:16:51 +01:00
process.c Merge branch 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 19:53:40 -07:00
process_32.c Merge branch 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 19:53:40 -07:00
process_64.c Merge branch 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 19:53:40 -07:00
ptrace.c x86, bts, mm: clean up buffer allocation 2009-04-24 10:18:52 +02:00
pvclock.c x86: pvclock: fix shadowed variable warning 2008-10-15 14:25:14 +02:00
quirks.c Merge branch 'linus' into x86/cpu 2009-06-07 12:22:15 +02:00
reboot.c x86: Add quirk for reboot stalls on a Dell Optiplex 360 2009-06-07 15:51:20 +02:00
reboot_fixups_32.c
relocate_kernel_32.S x86, kexec: fix kexec x86 coding style 2009-03-10 18:13:25 -07:00
relocate_kernel_64.S x86, kexec: x86_64: add kexec jump support for x86_64 2009-03-10 18:13:25 -07:00
rtc.c x86: rtc.c cleanup 2009-03-21 16:56:37 +05:30
scx200_32.c
setup.c x86: handle initrd that extends into unusable memory 2009-06-11 15:19:13 -07:00
setup_percpu.c Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 16:13:20 -07:00
signal.c Merge branch 'linus' into x86/mce3 2009-06-11 23:31:52 +02:00
smp.c Merge branch 'linus' into x86/mce3 2009-06-11 23:31:52 +02:00
smpboot.c x86: make zap_low_mapping could be used early 2009-06-12 13:50:24 +03:00
stacktrace.c x86/stacktrace: return 0 instead of -1 for stack ops 2009-05-14 23:19:09 -04:00
step.c
sys_i386_32.c
sys_x86_64.c Merge branches 'x86/alternatives', 'x86/cleanups', 'x86/commandline', 'x86/crashdump', 'x86/debug', 'x86/defconfig', 'x86/doc', 'x86/exports', 'x86/fpu', 'x86/gart', 'x86/idle', 'x86/mm', 'x86/mtrr', 'x86/nmi-watchdog', 'x86/oprofile', 'x86/paravirt', 'x86/reboot', 'x86/sparse-fixes', 'x86/tsc', 'x86/urgent' and 'x86/vmalloc' into x86-v28-for-linus-phase1 2008-10-06 18:17:07 +02:00
syscall_64.c x86: Fix ASM_X86__ header guards 2008-10-22 22:55:23 -07:00
syscall_table_32.S Merge branch 'core/signal' into perfcounters/core 2009-04-30 21:16:49 +02:00
tce_64.c
test_nx.c
test_rodata.c
time_32.c x86: refactor x86_quirks support 2009-02-23 00:08:11 +01:00
time_64.c cpumask: remove references to struct irqaction's mask field. 2009-03-30 22:05:14 +10:30
tlb_uv.c Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 16:15:14 -07:00
tls.c
tls.h
topology.c x86: topology.c cleanup 2009-03-21 16:55:24 +05:30
trampoline.c x86: change static allocation of trampoline area 2008-12-08 13:49:45 +01:00
trampoline_32.S x86: use _types.h headers in asm where available 2009-02-13 11:35:01 -08:00
trampoline_64.S x86: use _types.h headers in asm where available 2009-02-13 11:35:01 -08:00
traps.c Merge branch 'linus' into x86/mce3 2009-06-11 23:31:52 +02:00
tsc.c Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 16:15:59 -07:00
tsc_sync.c x86: clean up arch/x86/kernel/tsc_sync.c a bit 2009-05-07 09:32:10 +02:00
uv_irq.c x86, apic: remove genapic.h 2009-02-17 17:52:44 +01:00
uv_sysfs.c x86: prevent /sys/firmware/sgi_uv from being created on non-uv systems 2009-04-08 14:58:10 +02:00
uv_time.c uv_time: add parameter to uv_read_rtc() 2009-04-22 17:41:25 +02:00
verify_cpu_64.S
visws_quirks.c x86: convert obsolete irq_desc_t typedef to struct irq_desc 2009-03-11 09:49:01 +01:00
vm86_32.c Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2009-06-10 16:15:14 -07:00
vmi_32.c Merge commit 'origin/master' into for-linus/xen/master 2009-04-07 13:34:16 -07:00
vmiclock_32.c clocksource: pass clocksource to read() callback 2009-04-21 13:41:47 -07:00
vmlinux.lds.S x86: Provide _sdata in the vmlinux.lds.S file 2009-06-12 09:21:33 +02:00
vsmp_64.c Revert "x86: don't compile vsmp_64 for 32bit" 2009-03-25 21:34:28 +01:00
vsyscall_64.c x86: move rdtsc_barrier() into the TSC vread method 2009-05-28 14:15:54 +02:00
x8664_ksyms_64.c x86: convert pda ops to wrappers around x86 percpu accessors 2009-01-16 14:20:22 +01:00
xsave.c x86-64: fix FPU corruption with signals and preemption 2009-04-20 14:33:00 -07:00