Save and restore segment registers on amd64 when entering and leaving

the kernel on amd64. Fill and read segment registers for mcontext and
signals. Handle traps caused by restoration of the
invalidated selectors.

Implement user-mode creation and manipulation of the process-specific
LDT descriptors for amd64, see sysarch(2).

Implement support for TSS i/o port access permission bitmap for amd64.

Context-switch LDT and TSS. Do not save and restore segment registers on
the context switch, that is handled by kernel enter/leave trampolines
now. Remove segment restore code from the signal trampolines for
freebsd/amd64, freebsd/ia32 and linux/i386 for the same reason.

Implement amd64-specific compat shims for sysarch.

Linuxolator (temporary ?) switched to use gsbase for thread_area pointer.

TODO:
Currently, gdb is not adapted to show segment registers from struct reg.
Also, no machine-depended ptrace command is added to set segment
registers for debugged process.

In collaboration with:	pho
Discussed with:	peter
Reviewed by:	jhb
Linuxolator tested by:	dchagin
This commit is contained in:
Konstantin Belousov 2009-04-01 13:09:26 +00:00
parent c11d6143ca
commit 2c66cccab7
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=190620
29 changed files with 1336 additions and 352 deletions

View file

@ -64,12 +64,17 @@ ENTRY(acpi_restorecpu)
/* Fetch PCB. */
movq WAKEUP_CTX(xpcb), %r11
/* Restore segment registers. */
mov WAKEUP_PCB(DS), %ds
mov WAKEUP_PCB(ES), %es
mov WAKEUP_XPCB(SS), %ss
mov WAKEUP_PCB(FS), %fs
mov WAKEUP_PCB(GS), %gs
/* Force kernel segment registers. */
movl $KDSEL, %eax
movw %ax, %ds
movl $KDSEL, %eax
movw %ax, %es
movl $KDSEL, %eax
movw %ax, %ss
movl $KUF32SEL, %eax
movw %ax, %fs
movl $KUG32SEL, %eax
movw %ax, %gs
movl $MSR_FSBASE, %ecx
movl WAKEUP_PCB(FSBASE), %eax

View file

@ -219,9 +219,7 @@ IDTVEC(cpustop)
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
call cpustop_handler
POP_FRAME
iretq
jmp doreti
/*
* Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
@ -251,6 +249,5 @@ IDTVEC(rendezvous)
call smp_rendezvous_action
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
POP_FRAME /* Why not doreti? */
iretq
jmp doreti
#endif /* SMP */

View file

@ -75,8 +75,6 @@ ENTRY(cpu_throw)
1:
movq TD_PCB(%rdi),%r8 /* Old pcb */
movl PCPU(CPUID), %eax
movq PCB_FSBASE(%r8),%r9
movq PCB_GSBASE(%r8),%r10
/* release bit from old pm_active */
movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */
movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */
@ -110,28 +108,6 @@ ENTRY(cpu_switch)
movq %rbx,PCB_RBX(%r8)
movq %rax,PCB_RIP(%r8)
/*
* Reread fs and gs bases. Explicit fs segment register load
* by the usermode code may change actual fs base without
* updating pcb_{fs,gs}base.
*
* %rdx still contains the mtx, save %rdx around rdmsr.
*/
movq %rdx,%r11
movl $MSR_FSBASE,%ecx
rdmsr
shlq $32,%rdx
leaq (%rax,%rdx),%r9
movl $MSR_KGSBASE,%ecx
rdmsr
shlq $32,%rdx
leaq (%rax,%rdx),%r10
movq %r11,%rdx
testl $PCB_32BIT,PCB_FLAGS(%r8)
jnz store_seg
done_store_seg:
testl $PCB_DBREGS,PCB_FLAGS(%r8)
jnz store_dr /* static predict not taken */
done_store_dr:
@ -192,36 +168,47 @@ sw1:
testl $TDP_KTHREAD,TD_PFLAGS(%rsi)
jnz do_kthread
testl $PCB_32BIT,PCB_FLAGS(%r8)
jnz load_seg
done_load_seg:
/*
* Load ldt register
*/
movq TD_PROC(%rsi),%rcx
cmpq $0, P_MD+MD_LDT(%rcx)
jne do_ldt
xorl %eax,%eax
ld_ldt: lldt %ax
cmpq PCB_FSBASE(%r8),%r9
jz 1f
/* Restore userland %fs */
restore_fsbase:
movl $MSR_FSBASE,%ecx
/* Restore fs base in GDT */
movl PCB_FSBASE(%r8),%eax
movl PCB_FSBASE+4(%r8),%edx
wrmsr
1:
cmpq PCB_GSBASE(%r8),%r10
jz 2f
/* Restore userland %gs */
movl $MSR_KGSBASE,%ecx
movl PCB_GSBASE(%r8),%eax
movl PCB_GSBASE+4(%r8),%edx
wrmsr
2:
movq PCPU(FS32P),%rdx
movw %ax,2(%rdx)
shrl $16,%eax
movb %al,4(%rdx)
shrl $8,%eax
movb %al,7(%rdx)
do_tss:
/* Restore gs base in GDT */
movl PCB_GSBASE(%r8),%eax
movq PCPU(GS32P),%rdx
movw %ax,2(%rdx)
shrl $16,%eax
movb %al,4(%rdx)
shrl $8,%eax
movb %al,7(%rdx)
do_kthread:
/* Do we need to reload tss ? */
movq PCPU(TSSP),%rax
movq PCB_TSSP(%r8),%rdx
testq %rdx,%rdx
cmovzq PCPU(COMMONTSSP),%rdx
cmpq %rax,%rdx
jne do_tss
done_tss:
movq %r8,PCPU(RSP0)
movq %r8,PCPU(CURPCB)
/* Update the TSS_RSP0 pointer for the next interrupt */
movq PCPU(TSSP), %rax
movq %r8, PCPU(RSP0)
movq %r8, PCPU(CURPCB)
addq $COMMON_TSS_RSP0, %rax
movq %rsi, PCPU(CURTHREAD) /* into next thread */
movq %r8, (%rax)
movq %r8,COMMON_TSS_RSP0(%rdx)
movq %rsi,PCPU(CURTHREAD) /* into next thread */
/* Test if debug registers should be restored. */
testl $PCB_DBREGS,PCB_FLAGS(%r8)
@ -250,45 +237,6 @@ done_load_dr:
* We use jumps rather than call in order to avoid the stack.
*/
do_kthread:
/*
* Copy old fs/gsbase to new kthread pcb for future switches
* This maintains curpcb->pcb_[fg]sbase as caches of the MSR
*/
movq %r9,PCB_FSBASE(%r8)
movq %r10,PCB_GSBASE(%r8)
jmp do_tss
store_seg:
mov %gs,PCB_GS(%r8)
testl $PCB_GS32BIT,PCB_FLAGS(%r8)
jnz 2f
1: mov %ds,PCB_DS(%r8)
mov %es,PCB_ES(%r8)
mov %fs,PCB_FS(%r8)
jmp done_store_seg
2: movq PCPU(GS32P),%rax
movq (%rax),%rax
movq %rax,PCB_GS32SD(%r8)
jmp 1b
load_seg:
testl $PCB_GS32BIT,PCB_FLAGS(%r8)
jnz 2f
1: movl $MSR_GSBASE,%ecx
rdmsr
mov PCB_GS(%r8),%gs
wrmsr
mov PCB_DS(%r8),%ds
mov PCB_ES(%r8),%es
mov PCB_FS(%r8),%fs
jmp restore_fsbase
/* Restore userland %gs while preserving kernel gsbase */
2: movq PCPU(GS32P),%rax
movq PCB_GS32SD(%r8),%rcx
movq %rcx,(%rax)
jmp 1b
store_dr:
movq %dr7,%rax /* yes, do the save */
movq %dr0,%r15
@ -325,6 +273,29 @@ load_dr:
movq %r11,%dr6
movq %rax,%dr7
jmp done_load_dr
do_tss: movq %rdx,PCPU(TSSP)
movq %rdx,%rcx
movq PCPU(TSS),%rax
movw %rcx,2(%rax)
shrq $16,%rcx
movb %cl,4(%rax)
shrq $8,%rcx
movb %cl,7(%rax)
shrq $8,%rcx
movl %ecx,8(%rax)
movb $0x89,5(%rax) /* unset busy */
movl $TSSSEL,%eax
ltr %ax
jmp done_tss
do_ldt: movq PCPU(LDT),%rax
movq P_MD+MD_LDT_SD(%rcx),%rdx
movq %rdx,(%rax)
movq P_MD+MD_LDT_SD+8(%rcx),%rdx
movq %rdx,8(%rax)
movl $LDTSEL,%eax
jmp ld_ldt
END(cpu_switch)
/*
@ -398,12 +369,6 @@ ENTRY(savectx2)
movq (%rsp),%rax
movq %rax,PCB_RIP(%r8)
mov %ds,PCB_DS(%r8)
mov %es,PCB_ES(%r8)
mov %ss,XPCB_SS(%r8)
mov %fs,PCB_FS(%r8)
mov %gs,PCB_GS(%r8)
movq %rbx,PCB_RBX(%r8)
movq %rsp,PCB_RSP(%r8)
movq %rbp,PCB_RBP(%r8)

View file

@ -139,7 +139,11 @@ void
db_show_mdpcpu(struct pcpu *pc)
{
#if 0
db_printf("currentldt = 0x%x\n", pc->pc_currentldt);
#endif
db_printf("curpmap = %p\n", pc->pc_curpmap);
db_printf("tssp = %p\n", pc->pc_tssp);
db_printf("commontssp = %p\n", pc->pc_commontssp);
db_printf("rsp0 = 0x%lx\n", pc->pc_rsp0);
db_printf("gs32p = %p\n", pc->pc_gs32p);
db_printf("ldt = %p\n", pc->pc_ldt);
db_printf("tss = %p\n", pc->pc_tss);
}

View file

@ -69,12 +69,10 @@ static db_varfcn_t db_ss;
#define DB_OFFSET(x) (db_expr_t *)offsetof(struct trapframe, x)
struct db_variable db_regs[] = {
{ "cs", DB_OFFSET(tf_cs), db_frame },
#if 0
{ "ds", DB_OFFSET(tf_ds), db_frame },
{ "es", DB_OFFSET(tf_es), db_frame },
{ "fs", DB_OFFSET(tf_fs), db_frame },
{ "gs", DB_OFFSET(tf_gs), db_frame },
#endif
{ "ss", NULL, db_ss },
{ "rax", DB_OFFSET(tf_rax), db_frame },
{ "rcx", DB_OFFSET(tf_rcx), db_frame },
@ -94,7 +92,7 @@ struct db_variable db_regs[] = {
{ "r15", DB_OFFSET(tf_r15), db_frame },
{ "rip", DB_OFFSET(tf_rip), db_frame },
{ "rflags", DB_OFFSET(tf_rflags), db_frame },
#define DB_N_SHOW_REGS 20 /* Don't show registers after here. */
#define DB_N_SHOW_REGS 24 /* Don't show registers after here. */
{ "dr0", NULL, db_dr0 },
{ "dr1", NULL, db_dr1 },
{ "dr2", NULL, db_dr2 },
@ -357,7 +355,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td)
rbp = tf->tf_rbp;
switch (frame_type) {
case TRAP:
db_printf("--- trap %#lr", tf->tf_trapno);
db_printf("--- trap %#r", tf->tf_trapno);
break;
case SYSCALL:
db_printf("--- syscall");

View file

@ -42,6 +42,7 @@
#include <machine/asmacros.h>
#include <machine/psl.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
#include "assym.s"
@ -99,7 +100,7 @@ MCOUNT_LABEL(btrap)
/* Traps that we leave interrupts disabled for.. */
#define TRAP_NOEN(a) \
subq $TF_RIP,%rsp; \
movq $(a),TF_TRAPNO(%rsp) ; \
movl $(a),TF_TRAPNO(%rsp) ; \
movq $0,TF_ADDR(%rsp) ; \
movq $0,TF_ERR(%rsp) ; \
jmp alltraps_noen
@ -111,7 +112,7 @@ IDTVEC(bpt)
/* Regular traps; The cpu does not supply tf_err for these. */
#define TRAP(a) \
subq $TF_RIP,%rsp; \
movq $(a),TF_TRAPNO(%rsp) ; \
movl $(a),TF_TRAPNO(%rsp) ; \
movq $0,TF_ADDR(%rsp) ; \
movq $0,TF_ERR(%rsp) ; \
jmp alltraps
@ -139,7 +140,7 @@ IDTVEC(xmm)
/* This group of traps have tf_err already pushed by the cpu */
#define TRAP_ERR(a) \
subq $TF_ERR,%rsp; \
movq $(a),TF_TRAPNO(%rsp) ; \
movl $(a),TF_TRAPNO(%rsp) ; \
movq $0,TF_ADDR(%rsp) ; \
jmp alltraps
IDTVEC(tss)
@ -164,6 +165,10 @@ alltraps:
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz alltraps_testi /* already running with kernel GS.base */
swapgs
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
alltraps_testi:
testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs
@ -185,6 +190,7 @@ alltraps_pushregs_no_rdi:
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
movl $TF_HASSEGS,TF_FLAGS(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
#ifdef KDTRACE_HOOKS
/*
@ -193,7 +199,7 @@ alltraps_pushregs_no_rdi:
* interrupt. For all other trap types, just handle them in
* the usual way.
*/
cmpq $T_BPTFLT,TF_TRAPNO(%rsp)
cmpl $T_BPTFLT,TF_TRAPNO(%rsp)
jne calltrap
/* Check if there is no DTrace hook registered. */
@ -228,13 +234,17 @@ calltrap:
.type alltraps_noen,@function
alltraps_noen:
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz alltraps_pushregs /* already running with kernel GS.base */
jz 1f /* already running with kernel GS.base */
swapgs
1: movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
jmp alltraps_pushregs
IDTVEC(dblfault)
subq $TF_ERR,%rsp
movq $T_DOUBLEFLT,TF_TRAPNO(%rsp)
movl $T_DOUBLEFLT,TF_TRAPNO(%rsp)
movq $0,TF_ADDR(%rsp)
movq $0,TF_ERR(%rsp)
movq %rdi,TF_RDI(%rsp)
@ -252,6 +262,11 @@ IDTVEC(dblfault)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
movl $TF_HASSEGS,TF_FLAGS(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
@ -262,7 +277,7 @@ IDTVEC(dblfault)
IDTVEC(page)
subq $TF_ERR,%rsp
movq $T_PAGEFLT,TF_TRAPNO(%rsp)
movl $T_PAGEFLT,TF_TRAPNO(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
@ -270,6 +285,10 @@ IDTVEC(page)
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
movq %cr2,%rdi /* preserve %cr2 before .. */
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs_no_rdi
sti
@ -283,17 +302,19 @@ IDTVEC(page)
*/
IDTVEC(prot)
subq $TF_ERR,%rsp
movq $T_PROTFLT,TF_TRAPNO(%rsp)
movl $T_PROTFLT,TF_TRAPNO(%rsp)
movq $0,TF_ADDR(%rsp)
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
leaq doreti_iret(%rip),%rdi
cmpq %rdi,TF_RIP(%rsp)
je 2f /* kernel but with user gsbase!! */
je 1f /* kernel but with user gsbase!! */
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
2:
swapgs
1:
jz 2f /* already running with kernel GS.base */
1: swapgs
2: movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs_no_rdi
sti
@ -316,6 +337,10 @@ IDTVEC(fast_syscall)
movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */
movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */
movq %r11,TF_RSP(%rsp) /* user stack pointer */
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
sti
movq $KUDSEL,TF_SS(%rsp)
movq $KUCSEL,TF_CS(%rsp)
@ -333,40 +358,11 @@ IDTVEC(fast_syscall)
movq %r13,TF_R13(%rsp) /* C preserved */
movq %r14,TF_R14(%rsp) /* C preserved */
movq %r15,TF_R15(%rsp) /* C preserved */
movl $TF_HASSEGS,TF_FLAGS(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
movq %rsp, %rdi
call syscall
movq PCPU(CURPCB),%rax
testq $PCB_FULLCTX,PCB_FLAGS(%rax)
jne 3f
1: /* Check for and handle AST's on return to userland */
cli
movq PCPU(CURTHREAD),%rax
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
je 2f
sti
movq %rsp, %rdi
call ast
jmp 1b
2: /* restore preserved registers */
MEXITCOUNT
movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */
movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */
movq TF_RDX(%rsp),%rdx /* return value 2 */
movq TF_RAX(%rsp),%rax /* return value 1 */
movq TF_RBX(%rsp),%rbx /* C preserved */
movq TF_RBP(%rsp),%rbp /* C preserved */
movq TF_R12(%rsp),%r12 /* C preserved */
movq TF_R13(%rsp),%r13 /* C preserved */
movq TF_R14(%rsp),%r14 /* C preserved */
movq TF_R15(%rsp),%r15 /* C preserved */
movq TF_RFLAGS(%rsp),%r11 /* original %rflags */
movq TF_RIP(%rsp),%rcx /* original %rip */
movq TF_RSP(%rsp),%r9 /* user stack pointer */
movq %r9,%rsp /* original %rsp */
swapgs
sysretq
3: /* Requested full context restore, use doreti for that */
andq $~PCB_FULLCTX,PCB_FLAGS(%rax)
MEXITCOUNT
jmp doreti
@ -405,7 +401,7 @@ IDTVEC(fast_syscall32)
IDTVEC(nmi)
subq $TF_RIP,%rsp
movq $(T_NMI),TF_TRAPNO(%rsp)
movl $(T_NMI),TF_TRAPNO(%rsp)
movq $0,TF_ADDR(%rsp)
movq $0,TF_ERR(%rsp)
movq %rdi,TF_RDI(%rsp)
@ -423,6 +419,11 @@ IDTVEC(nmi)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
movl $TF_HASSEGS,TF_FLAGS(%rsp)
xorl %ebx,%ebx
testb $SEL_RPL_MASK,TF_CS(%rsp)
jnz nmi_fromuserspace
@ -515,9 +516,7 @@ outofnmi:
nocallchain:
#endif
testl %ebx,%ebx
jz nmi_kernelexit
swapgs
jmp nmi_restoreregs
jnz doreti_exit
nmi_kernelexit:
/*
* Put back the preserved MSR_GSBASE value.
@ -633,7 +632,55 @@ doreti_ast:
*/
doreti_exit:
MEXITCOUNT
movq TF_RDI(%rsp),%rdi
movq PCPU(CURTHREAD),%r8
movq TD_PCB(%r8),%r8
/*
* Do not reload segment registers for kernel.
* Since we do not reload segments registers with sane
* values on kernel entry, descriptors referenced by
* segments registers may be not valid. This is fatal
* for the usermode, but is innocent for the kernel.
*/
testb $SEL_RPL_MASK,TF_CS(%rsp)
jz ld_regs
testl $TF_HASSEGS,TF_FLAGS(%rsp)
je set_segs
do_segs:
/* Restore %fs and fsbase */
movw TF_FS(%rsp),%ax
.globl ld_fs
ld_fs: movw %ax,%fs
cmpw $KUF32SEL,%ax
jne 1f
movl $MSR_FSBASE,%ecx
movl PCB_FSBASE(%r8),%eax
movl PCB_FSBASE+4(%r8),%edx
wrmsr
1:
/* Restore %gs and gsbase */
movw TF_GS(%rsp),%si
pushfq
cli
movl $MSR_GSBASE,%ecx
rdmsr
.globl ld_gs
ld_gs: movw %si,%gs
wrmsr
popfq
cmpw $KUG32SEL,%si
jne 1f
movl $MSR_KGSBASE,%ecx
movl PCB_GSBASE(%r8),%eax
movl PCB_GSBASE+4(%r8),%edx
wrmsr
1: .globl ld_es
ld_es: movw TF_ES(%rsp),%es
.globl ld_ds
ld_ds: movw TF_DS(%rsp),%ds
ld_regs:movq TF_RDI(%rsp),%rdi
movq TF_RSI(%rsp),%rsi
movq TF_RDX(%rsp),%rdx
movq TF_RCX(%rsp),%rcx
@ -657,6 +704,14 @@ doreti_exit:
doreti_iret:
iretq
set_segs:
movw $KUDSEL,%ax
movw %ax,TF_DS(%rsp)
movw %ax,TF_ES(%rsp)
movw $KUF32SEL,TF_FS(%rsp)
movw $KUG32SEL,TF_GS(%rsp)
jmp do_segs
/*
* doreti_iret_fault. Alternative return code for
* the case where we get a fault in the doreti_exit code
@ -671,7 +726,12 @@ doreti_iret_fault:
testl $PSL_I,TF_RFLAGS(%rsp)
jz 1f
sti
1: movq %rdi,TF_RDI(%rsp)
1: movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
movl $TF_HASSEGS,TF_FLAGS(%rsp)
movq %rdi,TF_RDI(%rsp)
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
@ -686,11 +746,48 @@ doreti_iret_fault:
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
movq $T_PROTFLT,TF_TRAPNO(%rsp)
movl $T_PROTFLT,TF_TRAPNO(%rsp)
movq $0,TF_ERR(%rsp) /* XXX should be the error code */
movq $0,TF_ADDR(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
jmp calltrap
ALIGN_TEXT
.globl ds_load_fault
ds_load_fault:
movl $T_PROTFLT,TF_TRAPNO(%rsp)
movzwl TF_DS(%rsp),%edx
movl %edx,TF_ERR(%rsp)
movw $KUDSEL,TF_DS(%rsp)
jmp calltrap
ALIGN_TEXT
.globl es_load_fault
es_load_fault:
movl $T_PROTFLT,TF_TRAPNO(%rsp)
movzwl TF_ES(%rsp),%edx
movl %edx,TF_ERR(%rsp)
movw $KUDSEL,TF_ES(%rsp)
jmp calltrap
ALIGN_TEXT
.globl fs_load_fault
fs_load_fault:
movl $T_PROTFLT,TF_TRAPNO(%rsp)
movzwl TF_FS(%rsp),%edx
movl %edx,TF_ERR(%rsp)
movw $KUF32SEL,TF_FS(%rsp)
jmp calltrap
ALIGN_TEXT
.globl gs_load_fault
gs_load_fault:
popfq
movl $T_PROTFLT,TF_TRAPNO(%rsp)
movzwl TF_GS(%rsp),%edx
movl %edx,TF_ERR(%rsp)
movw $KUG32SEL,TF_GS(%rsp)
jmp calltrap
#ifdef HWPMC_HOOKS
ENTRY(end_exceptions)
#endif

View file

@ -79,6 +79,10 @@ ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
ASSYM(P_MD, offsetof(struct proc, p_md));
ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
ASSYM(MD_LDT_SD, offsetof(struct mdproc, md_ldt_sd));
ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
@ -132,16 +136,13 @@ ASSYM(PCB_RBX, offsetof(struct pcb, pcb_rbx));
ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip));
ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase));
ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase));
ASSYM(PCB_DS, offsetof(struct pcb, pcb_ds));
ASSYM(PCB_ES, offsetof(struct pcb, pcb_es));
ASSYM(PCB_FS, offsetof(struct pcb, pcb_fs));
ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_32BIT, PCB_32BIT);
ASSYM(PCB_GS32BIT, PCB_GS32BIT);
@ -193,7 +194,13 @@ ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
ASSYM(TF_RFLAGS, offsetof(struct trapframe, tf_rflags));
ASSYM(TF_RSP, offsetof(struct trapframe, tf_rsp));
ASSYM(TF_SS, offsetof(struct trapframe, tf_ss));
ASSYM(TF_DS, offsetof(struct trapframe, tf_ds));
ASSYM(TF_ES, offsetof(struct trapframe, tf_es));
ASSYM(TF_FS, offsetof(struct trapframe, tf_fs));
ASSYM(TF_GS, offsetof(struct trapframe, tf_gs));
ASSYM(TF_FLAGS, offsetof(struct trapframe, tf_flags));
ASSYM(TF_SIZE, sizeof(struct trapframe));
ASSYM(TF_HASSEGS, TF_HASSEGS);
ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
@ -215,7 +222,11 @@ ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp));
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp));
ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0));
ASSYM(PC_FS32P, offsetof(struct pcpu, pc_fs32p));
ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p));
ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt));
ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp));
ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
ASSYM(LA_VER, offsetof(struct LAPIC, version));
ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
@ -230,6 +241,10 @@ ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL));
ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL));
ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL));
ASSYM(KUF32SEL, GSEL(GUFS32_SEL, SEL_UPL));
ASSYM(KUG32SEL, GSEL(GUGS32_SEL, SEL_UPL));
ASSYM(TSSSEL, GSEL(GPROC0_SEL, SEL_KPL));
ASSYM(LDTSEL, GSEL(GUSERLDT_SEL, SEL_KPL));
ASSYM(SEL_RPL_MASK, SEL_RPL_MASK);
ASSYM(MSR_GSBASE, MSR_GSBASE);

View file

@ -159,7 +159,7 @@ extern vm_offset_t ksym_start, ksym_end;
#define ICH_PMBASE 0x400
#define ICH_SMI_EN ICH_PMBASE + 0x30
int _udatasel, _ucodesel, _ucode32sel;
int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel;
int cold = 1;
@ -192,6 +192,8 @@ struct mtx icu_lock;
struct mem_range_softc mem_range_softc;
struct mtx dt_lock; /* lock for GDT and LDT */
static void
cpu_startup(dummy)
void *dummy;
@ -278,7 +280,7 @@ cpu_startup(dummy)
* Send an interrupt to process.
*
* Stack is set up to allow sigcode stored
* at top to call routine, followed by kcall
* at top to call routine, followed by call
* to sigreturn routine below. After sigreturn
* resets the signal mask, the stack, and the
* frame pointer, it returns to the user
@ -316,6 +318,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
get_fpcontext(td, &sf.sf_uc.uc_mcontext);
fpstate_drop(td);
sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase;
sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase;
/* Allocate space for the signal handler context. */
if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
@ -370,6 +374,11 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucodesel;
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@ -401,9 +410,16 @@ sigreturn(td, uap)
ksiginfo_t ksi;
error = copyin(uap->sigcntxp, &uc, sizeof(uc));
if (error != 0)
if (error != 0) {
printf("sigreturn (pid %d): copyin failed\n", p->p_pid);
return (error);
}
ucp = &uc;
if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
printf("sigreturn (pid %d): mc_flags %x\n", p->p_pid,
ucp->uc_mcontext.mc_flags);
return (EINVAL);
}
regs = td->td_frame;
rflags = ucp->uc_mcontext.mc_rflags;
/*
@ -420,7 +436,8 @@ sigreturn(td, uap)
* one less debugger trap, so allowing it is fairly harmless.
*/
if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
printf("sigreturn: rflags = 0x%lx\n", rflags);
printf("sigreturn (pid %d): rflags = 0x%lx\n", p->p_pid,
rflags);
return (EINVAL);
}
@ -431,7 +448,7 @@ sigreturn(td, uap)
*/
cs = ucp->uc_mcontext.mc_cs;
if (!CS_SECURE(cs)) {
printf("sigreturn: cs = 0x%x\n", cs);
printf("sigreturn (pid %d): cs = 0x%x\n", p->p_pid, cs);
ksiginfo_init_trap(&ksi);
ksi.ksi_signo = SIGBUS;
ksi.ksi_code = BUS_OBJERR;
@ -442,9 +459,13 @@ sigreturn(td, uap)
}
ret = set_fpcontext(td, &ucp->uc_mcontext);
if (ret != 0)
if (ret != 0) {
printf("sigreturn (pid %d): set_fpcontext\n", p->p_pid);
return (ret);
}
bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
td->td_pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
td->td_pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
PROC_LOCK(p);
#if defined(COMPAT_43)
@ -738,22 +759,16 @@ exec_setregs(td, entry, stack, ps_strings)
{
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
mtx_lock(&dt_lock);
if (td->td_proc->p_md.md_ldt != NULL)
user_ldt_free(td);
else
mtx_unlock(&dt_lock);
critical_enter();
wrmsr(MSR_FSBASE, 0);
wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
critical_exit();
pcb->pcb_flags &= ~(PCB_32BIT | PCB_GS32BIT);
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_udatasel);
load_gs(_udatasel);
pcb->pcb_ds = _udatasel;
pcb->pcb_es = _udatasel;
pcb->pcb_fs = _udatasel;
pcb->pcb_gs = _udatasel;
pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
bzero((char *)regs, sizeof(struct trapframe));
@ -763,6 +778,11 @@ exec_setregs(td, entry, stack, ps_strings)
regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
regs->tf_ss = _udatasel;
regs->tf_cs = _ucodesel;
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
/*
* Reset the hardware debug registers if they were in use.
@ -1380,12 +1400,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
/*
* make gdt memory segments
*/
gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
for (x = 0; x < NGDT; x++) {
if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1)
ssdtosd(&gdt_segs[x], &gdt[x]);
}
gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
ssdtosyssd(&gdt_segs[GPROC0_SEL],
(struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
@ -1403,6 +1423,10 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
PCPU_SET(curthread, &thread0);
PCPU_SET(curpcb, thread0.td_pcb);
PCPU_SET(tssp, &common_tss[0]);
PCPU_SET(commontssp, &common_tss[0]);
PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
/*
@ -1415,6 +1439,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
*/
mutex_init();
mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
/* exceptions */
for (x = 0; x < NIDT; x++)
@ -1503,7 +1528,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
common_tss[0].tss_ist2 = (long) np;
/* Set the IO permission bitmap (empty due to tss seg limit) */
common_tss[0].tss_iobase = sizeof(struct amd64tss);
common_tss[0].tss_iobase = sizeof(struct amd64tss) +
IOPAGES * PAGE_SIZE;
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
ltr(gsel_tss);
@ -1531,10 +1557,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
_ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
_ufssel = GSEL(GUFS32_SEL, SEL_UPL);
_ugssel = GSEL(GUGS32_SEL, SEL_UPL);
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_udatasel);
load_fs(_ufssel);
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
@ -1656,6 +1684,17 @@ fill_regs(struct thread *td, struct reg *regs)
regs->r_rflags = tp->tf_rflags;
regs->r_rsp = tp->tf_rsp;
regs->r_ss = tp->tf_ss;
if (tp->tf_flags & TF_HASSEGS) {
regs->r_ds = tp->tf_ds;
regs->r_es = tp->tf_es;
regs->r_fs = tp->tf_fs;
regs->r_gs = tp->tf_gs;
} else {
regs->r_ds = 0;
regs->r_es = 0;
regs->r_fs = 0;
regs->r_gs = 0;
}
return (0);
}
@ -1689,6 +1728,13 @@ set_regs(struct thread *td, struct reg *regs)
tp->tf_rflags = rflags;
tp->tf_rsp = regs->r_rsp;
tp->tf_ss = regs->r_ss;
if (0) { /* XXXKIB */
tp->tf_ds = regs->r_ds;
tp->tf_es = regs->r_es;
tp->tf_fs = regs->r_fs;
tp->tf_gs = regs->r_gs;
tp->tf_flags = TF_HASSEGS;
}
td->td_pcb->pcb_flags |= PCB_FULLCTX;
return (0);
}
@ -1808,8 +1854,15 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
mcp->mc_cs = tp->tf_cs;
mcp->mc_rsp = tp->tf_rsp;
mcp->mc_ss = tp->tf_ss;
mcp->mc_ds = tp->tf_ds;
mcp->mc_es = tp->tf_es;
mcp->mc_fs = tp->tf_fs;
mcp->mc_gs = tp->tf_gs;
mcp->mc_flags = tp->tf_flags;
mcp->mc_len = sizeof(*mcp);
get_fpcontext(td, mcp);
mcp->mc_fsbase = td->td_pcb->pcb_fsbase;
mcp->mc_gsbase = td->td_pcb->pcb_gsbase;
return (0);
}
@ -1827,7 +1880,8 @@ set_mcontext(struct thread *td, const mcontext_t *mcp)
int ret;
tp = td->td_frame;
if (mcp->mc_len != sizeof(*mcp))
if (mcp->mc_len != sizeof(*mcp) ||
(mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
return (EINVAL);
rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
(tp->tf_rflags & ~PSL_USERCHANGE);
@ -1853,6 +1907,17 @@ set_mcontext(struct thread *td, const mcontext_t *mcp)
tp->tf_rflags = rflags;
tp->tf_rsp = mcp->mc_rsp;
tp->tf_ss = mcp->mc_ss;
tp->tf_flags = mcp->mc_flags;
if (tp->tf_flags & TF_HASSEGS) {
tp->tf_ds = mcp->mc_ds;
tp->tf_es = mcp->mc_es;
tp->tf_fs = mcp->mc_fs;
tp->tf_gs = mcp->mc_gs;
}
if (mcp->mc_flags & _MC_HASBASES) {
td->td_pcb->pcb_fsbase = mcp->mc_fsbase;
td->td_pcb->pcb_gsbase = mcp->mc_gsbase;
}
td->td_pcb->pcb_flags |= PCB_FULLCTX;
return (0);
}

View file

@ -101,8 +101,6 @@ extern pt_entry_t *KPTphys;
/* SMP page table page */
extern pt_entry_t *SMPpt;
extern int _udatasel;
struct pcb stoppcbs[MAXCPU];
struct xpcb *stopxpcbs = NULL;
@ -463,7 +461,8 @@ init_secondary(void)
/* Init tss */
common_tss[cpu] = common_tss[0];
common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */
common_tss[cpu].tss_iobase = sizeof(struct amd64tss);
common_tss[cpu].tss_iobase = sizeof(struct amd64tss) +
IOPAGES * PAGE_SIZE;
common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
/* The NMI stack runs on IST2. */
@ -472,12 +471,13 @@ init_secondary(void)
/* Prepare private GDT */
gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
ssdtosyssd(&gdt_segs[GPROC0_SEL],
(struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
for (x = 0; x < NGDT; x++) {
if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1))
ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
}
ssdtosyssd(&gdt_segs[GPROC0_SEL],
(struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
ap_gdt.rd_base = (long) &gdt[NGDT * cpu];
lgdt(&ap_gdt); /* does magic intra-segment return */
@ -491,8 +491,14 @@ init_secondary(void)
pc->pc_prvspace = pc;
pc->pc_curthread = 0;
pc->pc_tssp = &common_tss[cpu];
pc->pc_commontssp = &common_tss[cpu];
pc->pc_rsp0 = 0;
pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
GPROC0_SEL];
pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL];
pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
GUSERLDT_SEL];
/* Save the per-cpu pointer for use by the NMI handler. */
np->np_pcpu = (register_t) pc;
@ -601,7 +607,7 @@ init_secondary(void)
load_cr4(rcr4() | CR4_PGE);
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_udatasel);
load_fs(_ufssel);
mtx_unlock_spin(&ap_boot_mtx);
/* wait until all the AP's are up */

View file

@ -36,16 +36,39 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/sysproto.h>
#include <machine/specialreg.h>
#include <machine/sysarch.h>
#include <machine/pcb.h>
#include <sys/uio.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_kern.h> /* for kernel_map */
#include <vm/vm_extern.h>
#include <machine/frame.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <machine/sysarch.h>
#include <machine/tss.h>
#include <machine/vmparam.h>
#include <security/audit/audit.h>
int max_ldt_segment = 1024;
#define LD_PER_PAGE 512
#define NULL_LDT_BASE ((caddr_t)NULL)
#ifdef notyet
#ifdef SMP
static void set_user_ldt_rv(struct vmspace *vmsp);
#endif
#endif
static void user_ldt_derefl(struct proc_ldt *pldt);
#ifndef _SYS_SYSPROTO_H_
struct sysarch_args {
int op;
@ -53,6 +76,83 @@ struct sysarch_args {
};
#endif
int
sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space)
{
struct i386_ldt_args *largs, la;
struct user_segment_descriptor *lp;
int error = 0;
/*
* XXXKIB check that the BSM generation code knows to encode
* the op argument.
*/
AUDIT_ARG(cmd, uap->op);
if (uap_space == UIO_USERSPACE) {
error = copyin(uap->parms, &la, sizeof(struct i386_ldt_args));
if (error != 0)
return (error);
largs = &la;
} else
largs = (struct i386_ldt_args *)uap->parms;
if (largs->num > max_ldt_segment || largs->num <= 0)
return (EINVAL);
switch (uap->op) {
case I386_GET_LDT:
error = amd64_get_ldt(td, largs);
break;
case I386_SET_LDT:
if (largs->descs != NULL) {
lp = (struct user_segment_descriptor *)
kmem_alloc(kernel_map, largs->num *
sizeof(struct user_segment_descriptor));
if (lp == NULL) {
error = ENOMEM;
break;
}
error = copyin(largs->descs, lp, largs->num *
sizeof(struct user_segment_descriptor));
if (error == 0)
error = amd64_set_ldt(td, largs, lp);
kmem_free(kernel_map, (vm_offset_t)lp, largs->num *
sizeof(struct user_segment_descriptor));
} else {
error = amd64_set_ldt(td, largs, NULL);
}
break;
}
return (error);
}
void
update_gdt_gsbase(struct thread *td, uint32_t base)
{
struct user_segment_descriptor *sd;
if (td != curthread)
return;
critical_enter();
sd = PCPU_GET(gs32p);
sd->sd_lobase = base & 0xffffff;
sd->sd_hibase = (base >> 24) & 0xff;
critical_exit();
}
void
update_gdt_fsbase(struct thread *td, uint32_t base)
{
struct user_segment_descriptor *sd;
if (td != curthread)
return;
critical_enter();
sd = PCPU_GET(fs32p);
sd->sd_lobase = base & 0xffffff;
sd->sd_hibase = (base >> 24) & 0xff;
critical_exit();
}
int
sysarch(td, uap)
struct thread *td;
@ -62,8 +162,36 @@ sysarch(td, uap)
struct pcb *pcb = curthread->td_pcb;
uint32_t i386base;
uint64_t a64base;
struct i386_ioperm_args iargs;
switch(uap->op) {
if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT)
return (sysarch_ldt(td, uap, UIO_USERSPACE));
/*
* XXXKIB check that the BSM generation code knows to encode
* the op argument.
*/
AUDIT_ARG(cmd, uap->op);
switch (uap->op) {
case I386_GET_IOPERM:
case I386_SET_IOPERM:
if ((error = copyin(uap->parms, &iargs,
sizeof(struct i386_ioperm_args))) != 0)
return (error);
break;
default:
break;
}
switch (uap->op) {
case I386_GET_IOPERM:
error = amd64_get_ioperm(td, &iargs);
if (error == 0)
error = copyout(&iargs, uap->parms,
sizeof(struct i386_ioperm_args));
break;
case I386_SET_IOPERM:
error = amd64_set_ioperm(td, &iargs);
break;
case I386_GET_FSBASE:
i386base = pcb->pcb_fsbase;
error = copyout(&i386base, uap->parms, sizeof(i386base));
@ -71,10 +199,9 @@ sysarch(td, uap)
case I386_SET_FSBASE:
error = copyin(uap->parms, &i386base, sizeof(i386base));
if (!error) {
critical_enter();
wrmsr(MSR_FSBASE, i386base);
pcb->pcb_fsbase = i386base;
critical_exit();
td->td_frame->tf_fs = _ufssel;
update_gdt_fsbase(td, i386base);
}
break;
case I386_GET_GSBASE:
@ -84,10 +211,9 @@ sysarch(td, uap)
case I386_SET_GSBASE:
error = copyin(uap->parms, &i386base, sizeof(i386base));
if (!error) {
critical_enter();
wrmsr(MSR_KGSBASE, i386base);
pcb->pcb_gsbase = i386base;
critical_exit();
td->td_frame->tf_gs = _ugssel;
update_gdt_gsbase(td, i386base);
}
break;
case AMD64_GET_FSBASE:
@ -98,13 +224,10 @@ sysarch(td, uap)
error = copyin(uap->parms, &a64base, sizeof(a64base));
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
critical_enter();
wrmsr(MSR_FSBASE, a64base);
pcb->pcb_fsbase = a64base;
critical_exit();
} else {
td->td_frame->tf_fs = _ufssel;
} else
error = EINVAL;
}
}
break;
@ -116,13 +239,10 @@ sysarch(td, uap)
error = copyin(uap->parms, &a64base, sizeof(a64base));
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
critical_enter();
wrmsr(MSR_KGSBASE, a64base);
pcb->pcb_gsbase = a64base;
critical_exit();
} else {
td->td_frame->tf_gs = _ugssel;
} else
error = EINVAL;
}
}
break;
@ -132,3 +252,424 @@ sysarch(td, uap)
}
return (error);
}
int
amd64_set_ioperm(td, uap)
struct thread *td;
struct i386_ioperm_args *uap;
{
int i, error;
char *iomap;
struct amd64tss *tssp;
struct system_segment_descriptor *tss_sd;
u_long *addr;
struct pcb *pcb;
if ((error = priv_check(td, PRIV_IO)) != 0)
return (error);
if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
return (error);
if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
return (EINVAL);
/*
* XXX
* While this is restricted to root, we should probably figure out
* whether any other driver is using this i/o address, as so not to
* cause confusion. This probably requires a global 'usage registry'.
*/
pcb = td->td_pcb;
if (pcb->pcb_tssp == NULL) {
tssp = (struct amd64tss *)kmem_alloc(kernel_map,
ctob(IOPAGES+1));
if (tssp == NULL)
return (ENOMEM);
iomap = (char *)&tssp[1];
addr = (u_long *)iomap;
for (i = 0; i < (ctob(IOPAGES) + 1) / sizeof(u_long); i++)
*addr++ = ~0;
critical_enter();
/* Takes care of tss_rsp0. */
memcpy(tssp, &common_tss[PCPU_GET(cpuid)],
sizeof(struct amd64tss));
tssp->tss_iobase = sizeof(*tssp);
pcb->pcb_tssp = tssp;
tss_sd = PCPU_GET(tss);
tss_sd->sd_lobase = (u_long)tssp & 0xffffff;
tss_sd->sd_hibase = ((u_long)tssp >> 24) & 0xfffffffffful;
tss_sd->sd_type = SDT_SYSTSS;
ltr(GSEL(GPROC0_SEL, SEL_KPL));
PCPU_SET(tssp, tssp);
critical_exit();
} else
iomap = (char *)&pcb->pcb_tssp[1];
for (i = uap->start; i < uap->start + uap->length; i++) {
if (uap->enable)
iomap[i >> 3] &= ~(1 << (i & 7));
else
iomap[i >> 3] |= (1 << (i & 7));
}
return (error);
}
int
amd64_get_ioperm(td, uap)
struct thread *td;
struct i386_ioperm_args *uap;
{
int i, state;
char *iomap;
if (uap->start >= IOPAGES * PAGE_SIZE * NBBY)
return (EINVAL);
if (td->td_pcb->pcb_tssp == NULL) {
uap->length = 0;
goto done;
}
iomap = (char *)&td->td_pcb->pcb_tssp[1];
i = uap->start;
state = (iomap[i >> 3] >> (i & 7)) & 1;
uap->enable = !state;
uap->length = 1;
for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
break;
uap->length++;
}
done:
return (0);
}
/*
* Update the GDT entry pointing to the LDT to point to the LDT of the
* current process.
*/
void
set_user_ldt(struct mdproc *mdp)
{
critical_enter();
*PCPU_GET(ldt) = mdp->md_ldt_sd;
lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
critical_exit();
}
#ifdef notyet
#ifdef SMP
static void
set_user_ldt_rv(struct vmspace *vmsp)
{
struct thread *td;
td = curthread;
if (vmsp != td->td_proc->p_vmspace)
return;
set_user_ldt(&td->td_proc->p_md);
}
#endif
#endif
struct proc_ldt *
user_ldt_alloc(struct proc *p, int force)
{
struct proc_ldt *pldt, *new_ldt;
struct mdproc *mdp;
struct soft_segment_descriptor sldt;
mtx_assert(&dt_lock, MA_OWNED);
mdp = &p->p_md;
if (!force && mdp->md_ldt != NULL)
return (mdp->md_ldt);
mtx_unlock(&dt_lock);
new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
max_ldt_segment * sizeof(struct user_segment_descriptor));
if (new_ldt->ldt_base == NULL) {
FREE(new_ldt, M_SUBPROC);
mtx_lock(&dt_lock);
return (NULL);
}
new_ldt->ldt_refcnt = 1;
sldt.ssd_base = (uint64_t)new_ldt->ldt_base;
sldt.ssd_limit = max_ldt_segment *
sizeof(struct user_segment_descriptor) - 1;
sldt.ssd_type = SDT_SYSLDT;
sldt.ssd_dpl = SEL_KPL;
sldt.ssd_p = 1;
sldt.ssd_long = 0;
sldt.ssd_def32 = 0;
sldt.ssd_gran = 0;
mtx_lock(&dt_lock);
pldt = mdp->md_ldt;
if (pldt != NULL && !force) {
kmem_free(kernel_map, (vm_offset_t)new_ldt->ldt_base,
max_ldt_segment * sizeof(struct user_segment_descriptor));
free(new_ldt, M_SUBPROC);
return (pldt);
}
mdp->md_ldt = new_ldt;
if (pldt != NULL) {
bcopy(pldt->ldt_base, new_ldt->ldt_base, max_ldt_segment *
sizeof(struct user_segment_descriptor));
user_ldt_derefl(pldt);
}
ssdtosyssd(&sldt, &p->p_md.md_ldt_sd);
if (p == curproc)
set_user_ldt(mdp);
return (mdp->md_ldt);
}
void
user_ldt_free(struct thread *td)
{
struct proc *p = td->td_proc;
struct mdproc *mdp = &p->p_md;
struct proc_ldt *pldt;
mtx_assert(&dt_lock, MA_OWNED);
if ((pldt = mdp->md_ldt) == NULL) {
mtx_unlock(&dt_lock);
return;
}
mdp->md_ldt = NULL;
bzero(&mdp->md_ldt_sd, sizeof(mdp->md_ldt_sd));
if (td == curthread)
lldt(GSEL(GNULL_SEL, SEL_KPL));
user_ldt_deref(pldt);
}
static void
user_ldt_derefl(struct proc_ldt *pldt)
{
if (--pldt->ldt_refcnt == 0) {
kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
max_ldt_segment * sizeof(struct user_segment_descriptor));
free(pldt, M_SUBPROC);
}
}
void
user_ldt_deref(struct proc_ldt *pldt)
{
mtx_assert(&dt_lock, MA_OWNED);
user_ldt_derefl(pldt);
mtx_unlock(&dt_lock);
}
/*
* Note for the authors of compat layers (linux, etc): copyout() in
* the function below is not a problem since it presents data in
* arch-specific format (i.e. i386-specific in this case), not in
* the OS-specific one.
*/
int
amd64_get_ldt(td, uap)
struct thread *td;
struct i386_ldt_args *uap;
{
int error = 0;
struct proc_ldt *pldt;
int num;
struct user_segment_descriptor *lp;
#ifdef DEBUG
printf("amd64_get_ldt: start=%d num=%d descs=%p\n",
uap->start, uap->num, (void *)uap->descs);
#endif
if ((pldt = td->td_proc->p_md.md_ldt) != NULL) {
lp = &((struct user_segment_descriptor *)(pldt->ldt_base))
[uap->start];
num = min(uap->num, max_ldt_segment);
} else
return (EINVAL);
if ((uap->start > (unsigned int)max_ldt_segment) ||
((unsigned int)num > (unsigned int)max_ldt_segment) ||
((unsigned int)(uap->start + num) > (unsigned int)max_ldt_segment))
return(EINVAL);
error = copyout(lp, uap->descs, num *
sizeof(struct user_segment_descriptor));
if (!error)
td->td_retval[0] = num;
return(error);
}
int
amd64_set_ldt(td, uap, descs)
struct thread *td;
struct i386_ldt_args *uap;
struct user_segment_descriptor *descs;
{
int error = 0, i;
int largest_ld;
struct mdproc *mdp = &td->td_proc->p_md;
struct proc_ldt *pldt;
struct user_segment_descriptor *dp;
struct proc *p;
#ifdef DEBUG
printf("amd64_set_ldt: start=%d num=%d descs=%p\n",
uap->start, uap->num, (void *)uap->descs);
#endif
p = td->td_proc;
if (descs == NULL) {
/* Free descriptors */
if (uap->start == 0 && uap->num == 0)
uap->num = max_ldt_segment;
if (uap->num <= 0)
return (EINVAL);
if ((pldt = mdp->md_ldt) == NULL ||
uap->start >= max_ldt_segment)
return (0);
largest_ld = uap->start + uap->num;
if (largest_ld > max_ldt_segment)
largest_ld = max_ldt_segment;
i = largest_ld - uap->start;
mtx_lock(&dt_lock);
bzero(&((struct user_segment_descriptor *)(pldt->ldt_base))
[uap->start], sizeof(struct user_segment_descriptor) * i);
mtx_unlock(&dt_lock);
return (0);
}
if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
/* verify range of descriptors to modify */
largest_ld = uap->start + uap->num;
if (uap->start >= max_ldt_segment ||
uap->num < 0 || largest_ld > max_ldt_segment)
return (EINVAL);
}
/* Check descriptors for access violations */
for (i = 0; i < uap->num; i++) {
dp = &descs[i];
switch (dp->sd_type) {
case SDT_SYSNULL: /* system null */
dp->sd_p = 0;
break;
case SDT_SYS286TSS:
case SDT_SYSLDT:
case SDT_SYS286BSY:
case SDT_SYS286CGT:
case SDT_SYSTASKGT:
case SDT_SYS286IGT:
case SDT_SYS286TGT:
case SDT_SYSNULL2:
case SDT_SYSTSS:
case SDT_SYSNULL3:
case SDT_SYSBSY:
case SDT_SYSCGT:
case SDT_SYSNULL4:
case SDT_SYSIGT:
case SDT_SYSTGT:
/* I can't think of any reason to allow a user proc
* to create a segment of these types. They are
* for OS use only.
*/
return (EACCES);
/*NOTREACHED*/
/* memory segment types */
case SDT_MEMEC: /* memory execute only conforming */
case SDT_MEMEAC: /* memory execute only accessed conforming */
case SDT_MEMERC: /* memory execute read conforming */
case SDT_MEMERAC: /* memory execute read accessed conforming */
/* Must be "present" if executable and conforming. */
if (dp->sd_p == 0)
return (EACCES);
break;
case SDT_MEMRO: /* memory read only */
case SDT_MEMROA: /* memory read only accessed */
case SDT_MEMRW: /* memory read write */
case SDT_MEMRWA: /* memory read write accessed */
case SDT_MEMROD: /* memory read only expand dwn limit */
case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
case SDT_MEMRWD: /* memory read write expand dwn limit */
case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
case SDT_MEME: /* memory execute only */
case SDT_MEMEA: /* memory execute only accessed */
case SDT_MEMER: /* memory execute read */
case SDT_MEMERA: /* memory execute read accessed */
break;
default:
return(EINVAL);
/*NOTREACHED*/
}
/* Only user (ring-3) descriptors may be present. */
if ((dp->sd_p != 0) && (dp->sd_dpl != SEL_UPL))
return (EACCES);
}
if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
/* Allocate a free slot */
mtx_lock(&dt_lock);
pldt = user_ldt_alloc(p, 0);
if (pldt == NULL) {
mtx_unlock(&dt_lock);
return (ENOMEM);
}
/*
* start scanning a bit up to leave room for NVidia and
* Wine, which still user the "Blat" method of allocation.
*/
i = 16;
dp = &((struct user_segment_descriptor *)(pldt->ldt_base))[i];
for (; i < max_ldt_segment; ++i, ++dp) {
if (dp->sd_type == SDT_SYSNULL)
break;
}
if (i >= max_ldt_segment) {
mtx_unlock(&dt_lock);
return (ENOSPC);
}
uap->start = i;
error = amd64_set_ldt_data(td, i, 1, descs);
mtx_unlock(&dt_lock);
} else {
largest_ld = uap->start + uap->num;
if (largest_ld > max_ldt_segment)
return (EINVAL);
mtx_lock(&dt_lock);
if (user_ldt_alloc(p, 0) != NULL) {
error = amd64_set_ldt_data(td, uap->start, uap->num,
descs);
}
mtx_unlock(&dt_lock);
}
if (error == 0)
td->td_retval[0] = uap->start;
return (error);
}
int
amd64_set_ldt_data(struct thread *td, int start, int num,
struct user_segment_descriptor *descs)
{
struct mdproc *mdp = &td->td_proc->p_md;
struct proc_ldt *pldt = mdp->md_ldt;
mtx_assert(&dt_lock, MA_OWNED);
/* Fill in range */
bcopy(descs,
&((struct user_segment_descriptor *)(pldt->ldt_base))[start],
num * sizeof(struct user_segment_descriptor));
return (0);
}

View file

@ -171,6 +171,52 @@ SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
extern char *syscallnames[];
/* #define DEBUG 1 */
#ifdef DEBUG
static void
report_seg_fault(const char *segn, struct trapframe *frame)
{
struct proc_ldt *pldt;
struct trapframe *pf;
pldt = curproc->p_md.md_ldt;
printf("%d: %s load fault %lx %p %d\n",
curproc->p_pid, segn, frame->tf_err,
pldt != NULL ? pldt->ldt_base : NULL,
pldt != NULL ? pldt->ldt_refcnt : 0);
kdb_backtrace();
pf = (struct trapframe *)frame->tf_rsp;
printf("rdi %lx\n", pf->tf_rdi);
printf("rsi %lx\n", pf->tf_rsi);
printf("rdx %lx\n", pf->tf_rdx);
printf("rcx %lx\n", pf->tf_rcx);
printf("r8 %lx\n", pf->tf_r8);
printf("r9 %lx\n", pf->tf_r9);
printf("rax %lx\n", pf->tf_rax);
printf("rbx %lx\n", pf->tf_rbx);
printf("rbp %lx\n", pf->tf_rbp);
printf("r10 %lx\n", pf->tf_r10);
printf("r11 %lx\n", pf->tf_r11);
printf("r12 %lx\n", pf->tf_r12);
printf("r13 %lx\n", pf->tf_r13);
printf("r14 %lx\n", pf->tf_r14);
printf("r15 %lx\n", pf->tf_r15);
printf("fs %x\n", pf->tf_fs);
printf("gs %x\n", pf->tf_gs);
printf("es %x\n", pf->tf_es);
printf("ds %x\n", pf->tf_ds);
printf("tno %x\n", pf->tf_trapno);
printf("adr %lx\n", pf->tf_addr);
printf("flg %x\n", pf->tf_flags);
printf("err %lx\n", pf->tf_err);
printf("rip %lx\n", pf->tf_rip);
printf("cs %lx\n", pf->tf_cs);
printf("rfl %lx\n", pf->tf_rflags);
printf("rsp %lx\n", pf->tf_rsp);
printf("ss %lx\n", pf->tf_ss);
}
#endif
/*
* Exception, fault, and trap interface to the FreeBSD kernel.
* This common code is called from assembly language IDT gate entry
@ -258,6 +304,9 @@ trap(struct trapframe *frame)
*/
printf("kernel trap %d with interrupts disabled\n",
type);
#ifdef DEBUG
report_seg_fault("hlt", frame);
#endif
/*
* We shouldn't enable interrupts while holding a
* spin lock or servicing an NMI.
@ -470,6 +519,38 @@ trap(struct trapframe *frame)
frame->tf_rip = (long)doreti_iret_fault;
goto out;
}
if (frame->tf_rip == (long)ld_ds) {
#ifdef DEBUG
report_seg_fault("ds", frame);
#endif
frame->tf_rip = (long)ds_load_fault;
frame->tf_ds = _udatasel;
goto out;
}
if (frame->tf_rip == (long)ld_es) {
#ifdef DEBUG
report_seg_fault("es", frame);
#endif
frame->tf_rip = (long)es_load_fault;
frame->tf_es = _udatasel;
goto out;
}
if (frame->tf_rip == (long)ld_fs) {
#ifdef DEBUG
report_seg_fault("fs", frame);
#endif
frame->tf_rip = (long)fs_load_fault;
frame->tf_fs = _ufssel;
goto out;
}
if (frame->tf_rip == (long)ld_gs) {
#ifdef DEBUG
report_seg_fault("gs", frame);
#endif
frame->tf_rip = (long)gs_load_fault;
frame->tf_gs = _ugssel;
goto out;
}
if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
frame->tf_rip =
(long)PCPU_GET(curpcb)->pcb_onfault;
@ -564,6 +645,9 @@ trap(struct trapframe *frame)
trapsignal(td, &ksi);
#ifdef DEBUG
{
register_t rg,rgk, rf;
if (type <= MAX_TRAP_MSG) {
uprintf("fatal process exception: %s",
trap_msg[type]);
@ -571,6 +655,17 @@ trap(struct trapframe *frame)
uprintf(", fault VA = 0x%lx", frame->tf_addr);
uprintf("\n");
}
rf = rdmsr(0xc0000100);
rg = rdmsr(0xc0000101);
rgk = rdmsr(0xc0000102);
uprintf("pid %d TRAP %d rip %lx err %lx addr %lx cs %lx ss %lx ds %x "
"es %x fs %x fsbase %lx %lx gs %x gsbase %lx %lx %lx\n",
curproc->p_pid, type, frame->tf_rip, frame->tf_err,
frame->tf_addr,
frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es,
frame->tf_fs, td->td_pcb->pcb_fsbase, rf,
frame->tf_gs, td->td_pcb->pcb_gsbase, rg, rgk);
}
#endif
user:

View file

@ -71,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <machine/tss.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@ -102,12 +103,24 @@ cpu_fork(td1, p2, td2, flags)
{
register struct proc *p1;
struct pcb *pcb2;
struct mdproc *mdp2;
struct mdproc *mdp1, *mdp2;
struct proc_ldt *pldt;
pmap_t pmap2;
p1 = td1->td_proc;
if ((flags & RFPROC) == 0)
if ((flags & RFPROC) == 0) {
if ((flags & RFMEM) == 0) {
/* unshare user LDT */
mdp1 = &p1->p_md;
mtx_lock(&dt_lock);
if ((pldt = mdp1->md_ldt) != NULL &&
pldt->ldt_refcnt > 1 &&
user_ldt_alloc(p1, 1) == NULL)
panic("could not copy LDT");
mtx_unlock(&dt_lock);
}
return;
}
/* Ensure that p1's pcb is up to date. */
fpuexit(td1);
@ -170,6 +183,32 @@ cpu_fork(td1, p2, td2, flags)
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
/* As an i386, do not copy io permission bitmap. */
pcb2->pcb_tssp = NULL;
/* Copy the LDT, if necessary. */
mdp1 = &td1->td_proc->p_md;
mdp2 = &p2->p_md;
mtx_lock(&dt_lock);
if (mdp1->md_ldt != NULL) {
if (flags & RFMEM) {
mdp1->md_ldt->ldt_refcnt++;
mdp2->md_ldt = mdp1->md_ldt;
bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct
system_segment_descriptor));
} else {
mdp2->md_ldt = NULL;
mdp2->md_ldt = user_ldt_alloc(p2, 0);
if (mdp2->md_ldt == NULL)
panic("could not copy LDT");
amd64_set_ldt_data(td2, 0, max_ldt_segment,
(struct user_segment_descriptor *)
mdp1->md_ldt->ldt_base);
}
} else
mdp2->md_ldt = NULL;
mtx_unlock(&dt_lock);
/*
* Now, cpu_switch() can schedule the new process.
* pcb_rsp is loaded pointing to the cpu_switch() stack frame
@ -204,25 +243,49 @@ cpu_set_fork_handler(td, func, arg)
void
cpu_exit(struct thread *td)
{
/*
* If this process has a custom LDT, release it.
*/
mtx_lock(&dt_lock);
if (td->td_proc->p_md.md_ldt != 0)
user_ldt_free(td);
else
mtx_unlock(&dt_lock);
}
void
cpu_thread_exit(struct thread *td)
{
struct pcb *pcb;
if (td == PCPU_GET(fpcurthread))
fpudrop();
pcb = td->td_pcb;
/* Disable any hardware breakpoints. */
if (td->td_pcb->pcb_flags & PCB_DBREGS) {
if (pcb->pcb_flags & PCB_DBREGS) {
reset_dbregs();
td->td_pcb->pcb_flags &= ~PCB_DBREGS;
pcb->pcb_flags &= ~PCB_DBREGS;
}
}
void
cpu_thread_clean(struct thread *td)
{
struct pcb *pcb;
pcb = td->td_pcb;
/*
* Clean TSS/iomap
*/
if (pcb->pcb_tssp != NULL) {
kmem_free(kernel_map, (vm_offset_t)pcb->pcb_tssp,
ctob(IOPAGES + 1));
pcb->pcb_tssp = NULL;
}
}
void
@ -247,6 +310,8 @@ cpu_thread_alloc(struct thread *td)
void
cpu_thread_free(struct thread *td)
{
cpu_thread_clean(td);
}
/*
@ -358,6 +423,11 @@ cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
((register_t)stack->ss_sp + stack->ss_size) & ~0x0f;
td->td_frame->tf_rsp -= 8;
td->td_frame->tf_rip = (register_t)entry;
td->td_frame->tf_ds = _udatasel;
td->td_frame->tf_es = _udatasel;
td->td_frame->tf_fs = _ufssel;
td->td_frame->tf_gs = _ugssel;
td->td_frame->tf_flags = TF_HASSEGS;
/*
* Pass the address of the mailbox for this kse to the uts
@ -375,25 +445,11 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
#ifdef COMPAT_IA32
if (td->td_proc->p_sysent->sv_flags & SV_ILP32) {
if (td == curthread) {
critical_enter();
td->td_pcb->pcb_gsbase = (register_t)tls_base;
wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase);
critical_exit();
} else {
td->td_pcb->pcb_gsbase = (register_t)tls_base;
}
td->td_pcb->pcb_gsbase = (register_t)tls_base;
return (0);
}
#endif
if (td == curthread) {
critical_enter();
td->td_pcb->pcb_fsbase = (register_t)tls_base;
wrmsr(MSR_FSBASE, td->td_pcb->pcb_fsbase);
critical_exit();
} else {
td->td_pcb->pcb_fsbase = (register_t)tls_base;
}
td->td_pcb->pcb_fsbase = (register_t)tls_base;
return (0);
}

View file

@ -60,6 +60,11 @@ IDTVEC(int0x80_syscall)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
movl $TF_HASSEGS,TF_FLAGS(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
movq %rsp, %rdi
call ia32_syscall

View file

@ -0,0 +1,71 @@
/*-
* Copyright (c) 2009 Konstantin Belousov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_compat.h"
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
#include <sys/uio.h>
#include <machine/cpu.h>
#include <machine/sysarch.h>
#include <compat/freebsd32/freebsd32_util.h>
#include <compat/freebsd32/freebsd32.h>
#include <compat/freebsd32/freebsd32_proto.h>
int
freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap)
{
struct sysarch_args uap1;
struct i386_ldt_args uapl;
struct i386_ldt_args32 uapl32;
int error;
if (uap->op == I386_SET_LDT || uap->op == I386_GET_LDT) {
if ((error = copyin(uap->parms, &uapl32, sizeof(uapl32))) != 0)
return (error);
uap1.op = uap->op;
uap1.parms = (char *)&uapl;
uapl.start = uapl32.start;
uapl.descs = (struct user_segment_descriptor *)(uintptr_t)
uapl32.descs;
uapl.num = uapl32.num;
return (sysarch_ldt(td, &uap1, UIO_SYSSPACE));
} else {
uap1.op = uap->op;
uap1.parms = uap->parms;
return (sysarch(td, &uap1));
}
}

View file

@ -85,9 +85,17 @@ fill_regs32(struct thread *td, struct reg32 *regs)
tp = td->td_frame;
pcb = td->td_pcb;
regs->r_fs = pcb->pcb_fs;
regs->r_es = pcb->pcb_es;
regs->r_ds = pcb->pcb_ds;
if (tp->tf_flags & TF_HASSEGS) {
regs->r_gs = tp->tf_gs;
regs->r_fs = tp->tf_fs;
regs->r_es = tp->tf_es;
regs->r_ds = tp->tf_ds;
} else {
regs->r_gs = _ugssel;
regs->r_fs = _ufssel;
regs->r_es = _udatasel;
regs->r_ds = _udatasel;
}
regs->r_edi = tp->tf_rdi;
regs->r_esi = tp->tf_rsi;
regs->r_ebp = tp->tf_rbp;
@ -100,7 +108,6 @@ fill_regs32(struct thread *td, struct reg32 *regs)
regs->r_eflags = tp->tf_rflags;
regs->r_esp = tp->tf_rsp;
regs->r_ss = tp->tf_ss;
regs->r_gs = pcb->pcb_gs;
return (0);
}
@ -114,14 +121,11 @@ set_regs32(struct thread *td, struct reg32 *regs)
if (!EFL_SECURE(regs->r_eflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
return (EINVAL);
pcb = td->td_pcb;
#if 0
load_fs(regs->r_fs);
pcb->pcb_fs = regs->r_fs;
load_es(regs->r_es);
pcb->pcb_es = regs->r_es;
load_ds(regs->r_ds);
pcb->pcb_ds = regs->r_ds;
#endif
tp->tf_gs = regs->r_gs;
tp->tf_fs = regs->r_fs;
tp->tf_es = regs->r_es;
tp->tf_ds = regs->r_ds;
tp->tf_flags = TF_HASSEGS;
tp->tf_rdi = regs->r_edi;
tp->tf_rsi = regs->r_esi;
tp->tf_rbp = regs->r_ebp;
@ -134,10 +138,6 @@ set_regs32(struct thread *td, struct reg32 *regs)
tp->tf_rflags = regs->r_eflags;
tp->tf_rsp = regs->r_esp;
tp->tf_ss = regs->r_ss;
#if 0
load_gs(regs->r_gs);
pcb->pcb_gs = regs->r_gs;
#endif
return (0);
}
@ -166,7 +166,8 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs)
penv_87->en_fcs = td->td_frame->tf_cs;
penv_87->en_opcode = penv_xmm->en_opcode;
penv_87->en_foo = penv_xmm->en_rdp;
penv_87->en_fos = td->td_pcb->pcb_ds;
/* Entry into the kernel always sets TF_HASSEGS */
penv_87->en_fos = td->td_frame->tf_ds;
/* FPU registers */
for (i = 0; i < 8; ++i)

View file

@ -85,8 +85,6 @@ static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sigset_t *);
static void ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp);
static int ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp);
extern int _ucode32sel, _udatasel;
#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
@ -134,10 +132,11 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags)
PROC_LOCK(curthread->td_proc);
mcp->mc_onstack = sigonstack(tp->tf_rsp);
PROC_UNLOCK(curthread->td_proc);
mcp->mc_gs = td->td_pcb->pcb_gs;
mcp->mc_fs = td->td_pcb->pcb_fs;
mcp->mc_es = td->td_pcb->pcb_es;
mcp->mc_ds = td->td_pcb->pcb_ds;
/* Entry into kernel always sets TF_HASSEGS */
mcp->mc_gs = tp->tf_gs;
mcp->mc_fs = tp->tf_fs;
mcp->mc_es = tp->tf_es;
mcp->mc_ds = tp->tf_ds;
mcp->mc_edi = tp->tf_rdi;
mcp->mc_esi = tp->tf_rsi;
mcp->mc_ebp = tp->tf_rbp;
@ -158,6 +157,8 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags)
mcp->mc_ss = tp->tf_ss;
mcp->mc_len = sizeof(*mcp);
ia32_get_fpcontext(td, mcp);
mcp->mc_fsbase = td->td_pcb->pcb_fsbase;
mcp->mc_gsbase = td->td_pcb->pcb_gsbase;
return (0);
}
@ -182,11 +183,11 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp)
ret = ia32_set_fpcontext(td, mcp);
if (ret != 0)
return (ret);
#if 0 /* XXX deal with load_fs() and friends */
tp->tf_gs = mcp->mc_gs;
tp->tf_fs = mcp->mc_fs;
tp->tf_es = mcp->mc_es;
tp->tf_ds = mcp->mc_ds;
#endif
tp->tf_flags = TF_HASSEGS;
tp->tf_rdi = mcp->mc_edi;
tp->tf_rsi = mcp->mc_esi;
tp->tf_rbp = mcp->mc_ebp;
@ -199,9 +200,6 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp)
tp->tf_rflags = rflags;
tp->tf_rsp = mcp->mc_esp;
tp->tf_ss = mcp->mc_ss;
#if 0 /* XXX deal with load_gs() and friends */
td->td_pcb->pcb_gs = mcp->mc_gs;
#endif
td->td_pcb->pcb_flags |= PCB_FULLCTX;
return (0);
}
@ -326,10 +324,6 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
sf.sf_uc.uc_mcontext.mc_gs = rgs();
sf.sf_uc.uc_mcontext.mc_fs = rfs();
__asm __volatile("mov %%es,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_es));
__asm __volatile("mov %%ds,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_ds));
sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi;
sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi;
sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp;
@ -345,6 +339,10 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags;
sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp;
sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss;
sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds;
sf.sf_uc.uc_mcontext.mc_es = regs->tf_es;
sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs;
sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs;
/* Allocate space for the signal handler context. */
if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
@ -394,10 +392,8 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
load_ds(_udatasel);
td->td_pcb->pcb_ds = _udatasel;
load_es(_udatasel);
td->td_pcb->pcb_es = _udatasel;
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
/* leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
@ -441,10 +437,6 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
sf.sf_uc.uc_mcontext.mc_gs = rgs();
sf.sf_uc.uc_mcontext.mc_fs = rfs();
__asm __volatile("mov %%es,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_es));
__asm __volatile("mov %%ds,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_ds));
sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi;
sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi;
sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp;
@ -460,9 +452,15 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags;
sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp;
sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss;
sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds;
sf.sf_uc.uc_mcontext.mc_es = regs->tf_es;
sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs;
sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs;
sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext);
fpstate_drop(td);
sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase;
sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase;
/* Allocate space for the signal handler context. */
if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
@ -514,11 +512,9 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
load_ds(_udatasel);
td->td_pcb->pcb_ds = _udatasel;
load_es(_udatasel);
td->td_pcb->pcb_es = _udatasel;
/* leave user %fs and %gs untouched */
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
/* XXXKIB leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@ -591,7 +587,6 @@ freebsd4_freebsd32_sigreturn(td, uap)
return (EINVAL);
}
/* Segment selectors restored by sigtramp.S */
regs->tf_rdi = ucp->uc_mcontext.mc_edi;
regs->tf_rsi = ucp->uc_mcontext.mc_esi;
regs->tf_rbp = ucp->uc_mcontext.mc_ebp;
@ -606,6 +601,10 @@ freebsd4_freebsd32_sigreturn(td, uap)
regs->tf_rflags = ucp->uc_mcontext.mc_eflags;
regs->tf_rsp = ucp->uc_mcontext.mc_esp;
regs->tf_ss = ucp->uc_mcontext.mc_ss;
regs->tf_ds = ucp->uc_mcontext.mc_ds;
regs->tf_es = ucp->uc_mcontext.mc_es;
regs->tf_fs = ucp->uc_mcontext.mc_fs;
regs->tf_gs = ucp->uc_mcontext.mc_gs;
PROC_LOCK(p);
td->td_sigmask = ucp->uc_sigmask;
@ -678,7 +677,6 @@ freebsd32_sigreturn(td, uap)
if (ret != 0)
return (ret);
/* Segment selectors restored by sigtramp.S */
regs->tf_rdi = ucp->uc_mcontext.mc_edi;
regs->tf_rsi = ucp->uc_mcontext.mc_esi;
regs->tf_rbp = ucp->uc_mcontext.mc_ebp;
@ -693,6 +691,11 @@ freebsd32_sigreturn(td, uap)
regs->tf_rflags = ucp->uc_mcontext.mc_eflags;
regs->tf_rsp = ucp->uc_mcontext.mc_esp;
regs->tf_ss = ucp->uc_mcontext.mc_ss;
regs->tf_ds = ucp->uc_mcontext.mc_ds;
regs->tf_es = ucp->uc_mcontext.mc_es;
regs->tf_fs = ucp->uc_mcontext.mc_fs;
regs->tf_gs = ucp->uc_mcontext.mc_gs;
regs->tf_flags = TF_HASSEGS;
PROC_LOCK(p);
td->td_sigmask = ucp->uc_sigmask;
@ -715,20 +718,14 @@ ia32_setregs(td, entry, stack, ps_strings)
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
critical_enter();
wrmsr(MSR_FSBASE, 0);
wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
mtx_lock(&dt_lock);
if (td->td_proc->p_md.md_ldt != NULL)
user_ldt_free(td);
else
mtx_unlock(&dt_lock);
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
critical_exit();
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_udatasel);
load_gs(_udatasel);
pcb->pcb_ds = _udatasel;
pcb->pcb_es = _udatasel;
pcb->pcb_fs = _udatasel;
pcb->pcb_gs = _udatasel;
pcb->pcb_initial_fpucw = __INITIAL_FPUCW_I386__;
bzero((char *)regs, sizeof(struct trapframe));
@ -738,6 +735,12 @@ ia32_setregs(td, entry, stack, ps_strings)
regs->tf_ss = _udatasel;
regs->tf_cs = _ucode32sel;
regs->tf_rbx = ps_strings;
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
load_cr0(rcr0() | CR0_MP | CR0_TS);
fpstate_drop(td);

View file

@ -45,8 +45,6 @@ ia32_sigcode:
calll *IA32_SIGF_HANDLER(%esp)
leal IA32_SIGF_UC(%esp),%eax /* get ucontext */
pushl %eax
mov IA32_UC_ES(%eax),%es /* restore %es */
mov IA32_UC_DS(%eax),%ds /* restore %ds */
movl $SYS_sigreturn,%eax
pushl %eax /* junk to fake return addr. */
int $0x80 /* enter kernel with args */
@ -60,8 +58,6 @@ freebsd4_ia32_sigcode:
calll *IA32_SIGF_HANDLER(%esp)
leal IA32_SIGF_UC4(%esp),%eax/* get ucontext */
pushl %eax
mov IA32_UC4_ES(%eax),%es /* restore %es */
mov IA32_UC4_DS(%eax),%ds /* restore %ds */
movl $344,%eax /* 4.x SYS_sigreturn */
pushl %eax /* junk to fake return addr. */
int $0x80 /* enter kernel with args */

View file

@ -161,7 +161,12 @@
movq %r12,TF_R12(%rsp) ; \
movq %r13,TF_R13(%rsp) ; \
movq %r14,TF_R14(%rsp) ; \
movq %r15,TF_R15(%rsp)
movq %r15,TF_R15(%rsp) ; \
movw %fs,TF_FS(%rsp) ; \
movw %gs,TF_GS(%rsp) ; \
movw %es,TF_ES(%rsp) ; \
movw %ds,TF_DS(%rsp) ; \
movl $TF_HASSEGS,TF_FLAGS(%rsp)
#define POP_FRAME \
movq TF_RDI(%rsp),%rdi ; \

View file

@ -64,9 +64,13 @@ struct trapframe {
register_t tf_r13;
register_t tf_r14;
register_t tf_r15;
register_t tf_trapno;
uint32_t tf_trapno;
uint16_t tf_fs;
uint16_t tf_gs;
register_t tf_addr;
register_t tf_flags;
uint32_t tf_flags;
uint16_t tf_es;
uint16_t tf_ds;
/* below portion defined in hardware */
register_t tf_err;
register_t tf_rip;
@ -76,4 +80,7 @@ struct trapframe {
register_t tf_ss;
};
#define TF_HASSEGS 0x1
/* #define _MC_HASBASES 0x2 */
#endif /* _MACHINE_FRAME_H_ */

View file

@ -60,6 +60,11 @@ extern char sigcode[];
extern int szsigcode;
extern uint64_t *vm_page_dump;
extern int vm_page_dump_size;
extern int _udatasel;
extern int _ucodesel;
extern int _ucode32sel;
extern int _ufssel;
extern int _ugssel;
typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
struct thread;
@ -72,6 +77,14 @@ void busdma_swi(void);
void cpu_setregs(void);
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
void ld_ds(void) __asm(__STRING(ld_ds));
void ld_es(void) __asm(__STRING(ld_es));
void ld_fs(void) __asm(__STRING(ld_fs));
void ld_gs(void) __asm(__STRING(ld_gs));
void ds_load_fault(void) __asm(__STRING(ds_load_fault));
void es_load_fault(void) __asm(__STRING(es_load_fault));
void fs_load_fault(void) __asm(__STRING(fs_load_fault));
void gs_load_fault(void) __asm(__STRING(gs_load_fault));
void dump_add_page(vm_paddr_t);
void dump_drop_page(vm_paddr_t);
void initializecpu(void);

View file

@ -62,10 +62,6 @@ struct pcb {
#define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */
#define PCB_FULLCTX 0x80 /* full context restore on sysret */
u_int32_t pcb_ds;
u_int32_t pcb_es;
u_int32_t pcb_fs;
u_int32_t pcb_gs;
u_int64_t pcb_dr0;
u_int64_t pcb_dr1;
u_int64_t pcb_dr2;
@ -80,6 +76,8 @@ struct pcb {
/* 32-bit segment descriptor */
struct user_segment_descriptor pcb_gs32sd;
/* local tss, with i/o bitmap; NULL for common */
struct amd64tss *pcb_tssp;
};
struct xpcb {

View file

@ -62,12 +62,20 @@
char pc_monitorbuf[128] __aligned(128); /* cache line */ \
struct pcpu *pc_prvspace; /* Self-reference */ \
struct pmap *pc_curpmap; \
struct amd64tss *pc_tssp; \
struct amd64tss *pc_tssp; /* TSS segment active on CPU */ \
struct amd64tss *pc_commontssp;/* Common TSS for the CPU */ \
register_t pc_rsp0; \
register_t pc_scratch_rsp; /* User %rsp in syscall */ \
u_int pc_apic_id; \
u_int pc_acpi_id; /* ACPI CPU id */ \
struct user_segment_descriptor *pc_gs32p \
/* Pointer to the CPU %fs descriptor */ \
struct user_segment_descriptor *pc_fs32p; \
/* Pointer to the CPU %gs descriptor */ \
struct user_segment_descriptor *pc_gs32p; \
/* Pointer to the CPU LDT descriptor */ \
struct system_segment_descriptor *pc_ldt; \
/* Pointer to the CPU TSS descriptor */ \
struct system_segment_descriptor *pc_tss
PCPU_XEN_FIELDS
#ifdef _KERNEL

View file

@ -33,6 +33,13 @@
#ifndef _MACHINE_PROC_H_
#define _MACHINE_PROC_H_
#include <machine/segments.h>
struct proc_ldt {
caddr_t ldt_base;
int ldt_refcnt;
};
/*
* Machine-dependent part of the proc structure for AMD64.
*/
@ -42,6 +49,8 @@ struct mdthread {
};
struct mdproc {
struct proc_ldt *md_ldt; /* (t) per-process ldt */
struct system_segment_descriptor md_ldt_sd;
};
#ifdef _KERNEL
@ -55,6 +64,18 @@ struct mdproc {
(char *)&td; \
} while (0)
void set_user_ldt(struct mdproc *);
struct proc_ldt *user_ldt_alloc(struct proc *, int);
void user_ldt_free(struct thread *);
void user_ldt_deref(struct proc_ldt *);
struct sysarch_args;
int sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space);
int amd64_set_ldt_data(struct thread *td, int start, int num,
struct user_segment_descriptor *descs);
extern struct mtx dt_lock;
extern int max_ldt_segment;
#endif /* _KERNEL */
#endif /* !_MACHINE_PROC_H_ */

View file

@ -239,6 +239,9 @@ void ssdtosd(struct soft_segment_descriptor *ssdp,
struct user_segment_descriptor *sdp);
void ssdtosyssd(struct soft_segment_descriptor *ssdp,
struct system_segment_descriptor *sdp);
void update_gdt_gsbase(struct thread *td, uint32_t base);
void update_gdt_fsbase(struct thread *td, uint32_t base);
#endif /* _KERNEL */
#endif /* !_MACHINE_SEGMENTS_H_ */

View file

@ -77,6 +77,15 @@ int amd64_set_fsbase(void *);
int amd64_set_gsbase(void *);
int sysarch(int, void *);
__END_DECLS
#else
struct thread;
union descriptor;
int amd64_get_ldt(struct thread *, struct i386_ldt_args *);
int amd64_set_ldt(struct thread *, struct i386_ldt_args *,
struct user_segment_descriptor *);
int amd64_get_ioperm(struct thread *, struct i386_ioperm_args *);
int amd64_set_ioperm(struct thread *, struct i386_ioperm_args *);
#endif
#endif /* !_MACHINE_SYSARCH_H_ */

View file

@ -11,8 +11,6 @@
NON_GPROF_ENTRY(linux_sigcode)
call *LINUX_SIGF_HANDLER(%esp)
leal LINUX_SIGF_SC(%esp),%ebx /* linux scp */
mov LINUX_SC_ES(%ebx),%es
mov LINUX_SC_DS(%ebx),%ds
movl %esp, %ebx /* pass sigframe */
push %eax /* fake ret addr */
movl $LINUX_SYS_linux_sigreturn,%eax /* linux_sigreturn() */
@ -24,8 +22,6 @@ linux_rt_sigcode:
call *LINUX_RT_SIGF_HANDLER(%esp)
leal LINUX_RT_SIGF_UC(%esp),%ebx /* linux ucp */
leal LINUX_RT_SIGF_SC(%ebx),%ecx /* linux sigcontext */
mov LINUX_SC_ES(%ecx),%es
mov LINUX_SC_DS(%ecx),%ds
push %eax /* fake ret addr */
movl $LINUX_SYS_linux_rt_sigreturn,%eax /* linux_rt_sigreturn() */
int $0x80 /* enter kernel with args */

View file

@ -716,8 +716,8 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
sd.sd_long, sd.sd_def32, sd.sd_gran);
#endif
td2->td_pcb->pcb_gsbase = (register_t)info.base_addr;
td2->td_pcb->pcb_gs32sd = sd;
td2->td_pcb->pcb_gs = GSEL(GUGS32_SEL, SEL_UPL);
/* XXXKIB td2->td_pcb->pcb_gs32sd = sd; */
td2->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
td2->td_pcb->pcb_flags |= PCB_GS32BIT | PCB_32BIT;
}
}
@ -1359,12 +1359,9 @@ linux_set_thread_area(struct thread *td,
sd.sd_gran);
#endif
critical_enter();
td->td_pcb->pcb_gsbase = (register_t)info.base_addr;
td->td_pcb->pcb_gs32sd = *PCPU_GET(gs32p) = sd;
td->td_pcb->pcb_flags |= PCB_32BIT | PCB_GS32BIT;
wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase);
critical_exit();
update_gdt_gsbase(td, info.base_addr);
return (0);
}

View file

@ -290,7 +290,6 @@ elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
return 0;
}
extern int _ucodesel, _ucode32sel, _udatasel;
extern unsigned long linux_sznonrtsigcode;
static void
@ -360,13 +359,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
frame.sf_sc.uc_mcontext.sc_gs = rgs();
frame.sf_sc.uc_mcontext.sc_fs = rfs();
__asm __volatile("mov %%es,%0" :
"=rm" (frame.sf_sc.uc_mcontext.sc_es));
__asm __volatile("mov %%ds,%0" :
"=rm" (frame.sf_sc.uc_mcontext.sc_ds));
frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
@ -376,6 +369,10 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
@ -413,11 +410,11 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
load_ds(_udatasel);
td->td_pcb->pcb_ds = _udatasel;
load_es(_udatasel);
td->td_pcb->pcb_es = _udatasel;
/* leave user %fs and %gs untouched */
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@ -495,10 +492,10 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
* Build the signal context to be used by sigreturn.
*/
frame.sf_sc.sc_mask = lmask.__bits[0];
frame.sf_sc.sc_gs = rgs();
frame.sf_sc.sc_fs = rfs();
__asm __volatile("mov %%es,%0" : "=rm" (frame.sf_sc.sc_es));
__asm __volatile("mov %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
frame.sf_sc.sc_gs = regs->tf_gs;
frame.sf_sc.sc_fs = regs->tf_fs;
frame.sf_sc.sc_es = regs->tf_es;
frame.sf_sc.sc_ds = regs->tf_ds;
frame.sf_sc.sc_edi = regs->tf_rdi;
frame.sf_sc.sc_esi = regs->tf_rsi;
frame.sf_sc.sc_ebp = regs->tf_rbp;
@ -535,11 +532,11 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
load_ds(_udatasel);
td->td_pcb->pcb_ds = _udatasel;
load_es(_udatasel);
td->td_pcb->pcb_es = _udatasel;
/* leave user %fs and %gs untouched */
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@ -624,7 +621,6 @@ linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
/*
* Restore signal context.
*/
/* Selectors were restored by the trampoline. */
regs->tf_rdi = frame.sf_sc.sc_edi;
regs->tf_rsi = frame.sf_sc.sc_esi;
regs->tf_rbp = frame.sf_sc.sc_ebp;
@ -634,6 +630,10 @@ linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
regs->tf_rax = frame.sf_sc.sc_eax;
regs->tf_rip = frame.sf_sc.sc_eip;
regs->tf_cs = frame.sf_sc.sc_cs;
regs->tf_ds = frame.sf_sc.sc_ds;
regs->tf_es = frame.sf_sc.sc_es;
regs->tf_fs = frame.sf_sc.sc_fs;
regs->tf_gs = frame.sf_sc.sc_gs;
regs->tf_rflags = eflags;
regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
regs->tf_ss = frame.sf_sc.sc_ss;
@ -722,7 +722,10 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
/*
* Restore signal context
*/
/* Selectors were restored by the trampoline. */
regs->tf_gs = context->sc_gs;
regs->tf_fs = context->sc_fs;
regs->tf_es = context->sc_es;
regs->tf_ds = context->sc_ds;
regs->tf_rdi = context->sc_edi;
regs->tf_rsi = context->sc_esi;
regs->tf_rbp = context->sc_ebp;
@ -827,27 +830,30 @@ exec_linux_setregs(td, entry, stack, ps_strings)
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
mtx_lock(&dt_lock);
if (td->td_proc->p_md.md_ldt != NULL)
user_ldt_free(td);
else
mtx_unlock(&dt_lock);
critical_enter();
wrmsr(MSR_FSBASE, 0);
wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
critical_exit();
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_udatasel);
load_gs(_udatasel);
pcb->pcb_ds = _udatasel;
pcb->pcb_es = _udatasel;
pcb->pcb_fs = _udatasel;
pcb->pcb_gs = _udatasel;
pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = entry;
regs->tf_rsp = stack;
regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
regs->tf_gs = _ugssel;
regs->tf_fs = _ufssel;
regs->tf_es = _udatasel;
regs->tf_ds = _udatasel;
regs->tf_ss = _udatasel;
regs->tf_flags = TF_HASSEGS;
regs->tf_cs = _ucode32sel;
regs->tf_rbx = ps_strings;
load_cr0(rcr0() | CR0_MP | CR0_TS);

View file

@ -232,6 +232,7 @@ amd64/ia32/ia32_reg.c optional compat_ia32
amd64/ia32/ia32_signal.c optional compat_ia32
amd64/ia32/ia32_sigtramp.S optional compat_ia32
amd64/ia32/ia32_syscall.c optional compat_ia32
amd64/ia32/ia32_misc.c optional compat_ia32
compat/freebsd32/freebsd32_ioctl.c optional compat_ia32
compat/freebsd32/freebsd32_misc.c optional compat_ia32
compat/freebsd32/freebsd32_syscalls.c optional compat_ia32