From: Danny Feng <dfeng@redhat.com> Date: Mon, 7 Jun 2010 09:14:52 -0400 Subject: [x86_64] implement vDSO randomization Message-id: <20100607091451.8239.23560.sendpatchset@danny.redhat> Patchwork-id: 25985 O-Subject: [PATCH RHEL5.6] x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu Bugzilla: 459763 RH-Acked-by: Larry Woodman <lwoodman@redhat.com> RH-Acked-by: Prarit Bhargava <prarit@redhat.com> RHBZ#: https://bugzilla.redhat.com/show_bug.cgi?id=459763 Description: Randomization is not fully working on rhel-5 x86_64 kernel. There's no vDSO randomization for rhel5 x86_64 kernel. Upstream status: commit 2aae950b is the right fix. Since x86_64 doesn't have generic clocksource infrastructure, I had to implement x86_64 vDSO feature with old vxtime/xtime/... Brew build: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2497692 Test status: download and install rh-tests-kernel-security-execshield-randomization-2.1-13.noarch.rpm run test, all pass. review and comments are welcome, thank you. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0b85c83..81bd2b3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1856,7 +1856,7 @@ running once the system is up. usbhid.mousepoll= [USBHID] The interval which mice are to be polled at. - vdso= [IA-32] + vdso= [IA-32,x86_64] vdso=1: enable VDSO (default) vdso=0: disable VDSO mapping diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 9fd0063..421b9ee 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -67,6 +67,9 @@ libs-y += arch/x86_64/lib/ core-y += arch/x86_64/kernel/ \ arch/x86_64/mm/ \ arch/x86_64/crypto/ + +core-y += arch/x86_64/vdso/ + core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/ drivers-$(CONFIG_PCI) += arch/x86_64/pci/ drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index e095608..ce1d768 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -33,6 +33,7 @@ int sysctl_vsyscall32 = 1; +#undef ARCH_DLINFO #define ARCH_DLINFO do { \ if (sysctl_vsyscall32) { \ NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \ diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index cf41f55..daff4d4 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -213,6 +213,13 @@ SECTIONS from .altinstructions and .eh_frame */ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } + +/* vdso blob that is mapped into user space */ + vdso_start = . ; + .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) } + . = ALIGN(4096); + vdso_end = .; + . = ALIGN(4096); __initramfs_start = .; .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } diff --git a/arch/x86_64/kernel/vsyscall-xen.c b/arch/x86_64/kernel/vsyscall-xen.c index 0e422a2..615abd9 100644 --- a/arch/x86_64/kernel/vsyscall-xen.c +++ b/arch/x86_64/kernel/vsyscall-xen.c @@ -38,6 +38,7 @@ int __sysctl_vsyscall __section_sysctl_vsyscall = 1; seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; +int __vgetcpu_mode __section_vgetcpu_mode; #include <asm/unistd.h> diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 3b3eda4..3e04b35 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -894,3 +894,12 @@ void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) return __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); } + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + return "[vdso]"; + if (vma == &gate_vma) + return "[vsyscall]"; + return NULL; +} diff --git a/arch/x86_64/vdso/Makefile b/arch/x86_64/vdso/Makefile new file mode 100644 index 0000000..3cb986f --- /dev/null +++ b/arch/x86_64/vdso/Makefile @@ -0,0 +1,50 @@ +# +# x86-64 vDSO. +# + +# files to link into the vdso +# vdso-start.o has to be first +vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o + +# files to link into kernel +obj-y := vma.o vdso.o vdso-syms.o + +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) + +$(obj)/vdso.o: $(obj)/vdso.so + +targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o + +# The DSO images are built using a special linker script. +quiet_cmd_syscall = SYSCALL $@ + cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \ + -Wl,-T,$(filter-out FORCE,$^) -o $@ + +export CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) \ + -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +SYSCFLAGS_vdso.so = $(vdso-flags) + +$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so + +$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,syscall) + +CF := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ + $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) + +$(obj)/vclock_gettime.o: CFLAGS = $(CF) +$(obj)/vgetcpu.o: CFLAGS = $(CF) + +# We also create a special relocatable object that should mirror the symbol +# table and layout of the linked DSO. With ld -R we can then refer to +# these symbols in the kernel code rather than hand-coded addresses. +extra-y += vdso-syms.o +$(obj)/built-in.o: $(obj)/vdso-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o + +SYSCFLAGS_vdso-syms.o = -r -d +$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,syscall) diff --git a/arch/x86_64/vdso/vclock_gettime.c b/arch/x86_64/vdso/vclock_gettime.c new file mode 100644 index 0000000..5e15d01 --- /dev/null +++ b/arch/x86_64/vdso/vclock_gettime.c @@ -0,0 +1,142 @@ +/* + * Copyright 2006 Andi Kleen, SUSE Labs. + * Subject to the GNU Public License, v.2 + * + * Fast user context implementation of clock_gettime and gettimeofday. + * + * The code should have no internal unresolved relocations. + * Check with readelf after changing. + * Also alternative() doesn't work. + */ + +#include <linux/kernel.h> +#include <linux/posix-timers.h> +#include <linux/time.h> +#include <linux/string.h> +#include <linux/clocksource.h> +#include <asm/vsyscall.h> +#include <asm/timex.h> +#include <asm/hpet.h> +#include <asm/unistd.h> +#include <asm/io.h> +#include "vextern.h" + +#define NS_SCALE 10 /* 2^10, carefully chosen */ + +static long vdso_fallback_gettime(long clock, struct timespec *ts) +{ + long ret; + asm("syscall" : "=a" (ret) : + "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); + return ret; +} + +static inline cycle_t vread_hpet(void) +{ + return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); +} + +static inline cycle_t vread_tsc(void) +{ + cycle_t ret = (cycle_t)get_cycles_sync(); + return ret; +} + +static inline long vgetns(void) +{ + cycles_t vread; + long cycle_last, mult; + + if (vdso_vxtime->mode == VXTIME_HPET) { + vread = vread_hpet(); + cycle_last = vdso_vxtime->last; + mult = vdso_vxtime->quot; + } else if (vdso_vxtime->mode == VXTIME_TSC) { + vread = vread_tsc(); + cycle_last = vdso_vxtime->last_tsc; + mult = vdso_vxtime->tsc_quot; + } + + return ((vread - cycle_last) * mult) >> + NS_SCALE; +} + +static noinline int do_realtime(struct timespec *ts) +{ + unsigned long seq, ns; + do { + seq = read_seqbegin(vdso_xtime_lock); + ts->tv_sec = vdso_xtime->tv_sec; + ts->tv_nsec = vdso_xtime->tv_nsec; + ns = vgetns(); + } while (unlikely(read_seqretry(vdso_xtime_lock, seq))); + timespec_add_ns(ts, ns); + return 0; +} + +/* Copy of the version in kernel/time.c which we cannot directly access */ +static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) +{ + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static noinline int do_monotonic(struct timespec *ts) +{ + unsigned long seq, ns, secs; + do { + seq = read_seqbegin(vdso_xtime_lock); + secs = vdso_xtime->tv_sec; + ns = vdso_xtime->tv_nsec + vgetns(); + secs += wall_to_monotonic.tv_sec; + ns += wall_to_monotonic.tv_nsec; + } while (unlikely(read_seqretry(vdso_xtime_lock, seq))); + vset_normalized_timespec(ts, secs, ns); + return 0; +} + +int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +{ + if (likely(sysctl_vsyscall && (vdso_vxtime->mode != VXTIME_KVM))) + switch (clock) { + case CLOCK_REALTIME: + return do_realtime(ts); + case CLOCK_MONOTONIC: + return do_monotonic(ts); + } + return vdso_fallback_gettime(clock, ts); +} +int clock_gettime(clockid_t, struct timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + long ret; + + if (likely(sysctl_vsyscall && (vdso_vxtime->mode != VXTIME_KVM))) { + BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != + offsetof(struct timespec, tv_nsec) || + sizeof(*tv) != sizeof(struct timespec)); + do_realtime((struct timespec *)tv); + tv->tv_usec /= 1000; + if (unlikely(tz != NULL)) { + /* This relies on gcc inlining the memcpy. We'll notice + if it ever fails to do so. */ + memcpy(tz, vdso_sys_tz, sizeof(struct timezone)); + } + return 0; + } + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + return ret; +} +int gettimeofday(struct timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/x86_64/vdso/vdso-note.S b/arch/x86_64/vdso/vdso-note.S new file mode 100644 index 0000000..d980e30 --- /dev/null +++ b/arch/x86_64/vdso/vdso-note.S @@ -0,0 +1,25 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> + +#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ + .section name, flags; \ + .balign 4; \ + .long 1f - 0f; /* name length */ \ + .long 3f - 2f; /* data length */ \ + .long type; /* note type */ \ +0: .asciz vendor; /* vendor name */ \ +1: .balign 4; \ +2: + +#define ASM_ELF_NOTE_END \ +3: .balign 4; /* pad out section */ \ + .previous + +ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) + .long LINUX_VERSION_CODE +ASM_ELF_NOTE_END diff --git a/arch/x86_64/vdso/vdso-start.S b/arch/x86_64/vdso/vdso-start.S new file mode 100644 index 0000000..2dc2cdb --- /dev/null +++ b/arch/x86_64/vdso/vdso-start.S @@ -0,0 +1,2 @@ + .globl vdso_kernel_start +vdso_kernel_start: diff --git a/arch/x86_64/vdso/vdso.S b/arch/x86_64/vdso/vdso.S new file mode 100644 index 0000000..92e80c1 --- /dev/null +++ b/arch/x86_64/vdso/vdso.S @@ -0,0 +1,2 @@ + .section ".vdso","a" + .incbin "arch/x86_64/vdso/vdso.so" diff --git a/arch/x86_64/vdso/vdso.lds b/arch/x86_64/vdso/vdso.lds new file mode 100644 index 0000000..96adae3 --- /dev/null +++ b/arch/x86_64/vdso/vdso.lds @@ -0,0 +1,77 @@ +/* + * Automatically generated C config: don't edit + * Linux kernel version: 2.6.18.4 + * Fri Jun 4 19:45:35 2010 + */ +/* + * Linker script for vsyscall DSO. The vsyscall page is an ELF shared + * object prelinked to its virtual address, and with only one read-only + * segment (that fits in one page). This script controls its layout. + */ +/* + * DO NOT MODIFY. + * + * This file was generated by /mnt/opt/work/rhel5-clean/kernel/Kbuild + * + */ +SECTIONS +{ + . = 0xffffffffff700000 + SIZEOF_HEADERS; + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + /* This linker script is used both with -r and with -shared. + For the layouts to match, we need to skip more than enough + space for the dynamic symbol table et al. If this amount + is insufficient, ld -shared will barf. Just increase it here. */ + . = 0xffffffffff700000 + 0x500; + .text : { *(.text) } :text + .text.ptr : { *(.text.ptr) } :text + . = 0xffffffffff700000 + 0x900; + .data : { *(.data) } :text + .bss : { *(.bss) } :text + .altinstructions : { *(.altinstructions) } :text + .altinstr_replacement : { *(.altinstr_replacement) } :text + .note : { *(.note.*) } :text :note + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .dynamic : { *(.dynamic) } :text :dynamic + .useless : { + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.dynbss) + *(.gnu.linkonce.b.*) + } :text +} +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + getcpu; + __vdso_getcpu; + local: *; + }; +} diff --git a/arch/x86_64/vdso/vdso.lds.S b/arch/x86_64/vdso/vdso.lds.S new file mode 100644 index 0000000..d12fa8f --- /dev/null +++ b/arch/x86_64/vdso/vdso.lds.S @@ -0,0 +1,77 @@ +/* + * Linker script for vsyscall DSO. The vsyscall page is an ELF shared + * object prelinked to its virtual address, and with only one read-only + * segment (that fits in one page). This script controls its layout. + */ +#include <asm/asm-offsets.h> +#include "voffset.h" + +#define VDSO_PRELINK 0xffffffffff700000 + +SECTIONS +{ + . = VDSO_PRELINK + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + /* This linker script is used both with -r and with -shared. + For the layouts to match, we need to skip more than enough + space for the dynamic symbol table et al. If this amount + is insufficient, ld -shared will barf. Just increase it here. */ + . = VDSO_PRELINK + VDSO_TEXT_OFFSET; + + .text : { *(.text) } :text + .text.ptr : { *(.text.ptr) } :text + . = VDSO_PRELINK + 0x900; + .data : { *(.data) } :text + .bss : { *(.bss) } :text + + .altinstructions : { *(.altinstructions) } :text + .altinstr_replacement : { *(.altinstr_replacement) } :text + + .note : { *(.note.*) } :text :note + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .dynamic : { *(.dynamic) } :text :dynamic + .useless : { + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.dynbss) + *(.gnu.linkonce.b.*) + } :text +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + getcpu; + __vdso_getcpu; + local: *; + }; +} diff --git a/arch/x86_64/vdso/vextern.h b/arch/x86_64/vdso/vextern.h new file mode 100644 index 0000000..680c7ca --- /dev/null +++ b/arch/x86_64/vdso/vextern.h @@ -0,0 +1,18 @@ +#ifndef VEXTERN +#include <asm/vsyscall.h> +#define VEXTERN(x) \ + extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden"))); +#endif + +#define VMAGIC 0xfeedbabeabcdefabUL + +/* Any kernel variables used in the vDSO must be exported in the main + kernel's vmlinux.lds.S/vsyscall.h/proper __section and + put into vextern.h and be referenced as a pointer with vdso prefix. + The main kernel later fills in the values. */ + +VEXTERN(vxtime) +VEXTERN(xtime_lock) +VEXTERN(xtime) +VEXTERN(sys_tz) +VEXTERN(vgetcpu_mode) diff --git a/arch/x86_64/vdso/vgetcpu.c b/arch/x86_64/vdso/vgetcpu.c new file mode 100644 index 0000000..4787c5a --- /dev/null +++ b/arch/x86_64/vdso/vgetcpu.c @@ -0,0 +1,35 @@ +/* + * Copyright 2006 Andi Kleen, SUSE Labs. + * Subject to the GNU Public License, v.2 + * + * Fast user context implementation of getcpu() + */ + +#include <linux/kernel.h> +#include <linux/getcpu.h> +#include <linux/jiffies.h> +#include <linux/time.h> +#include <asm/vsyscall.h> +#include "vextern.h" + +long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) +{ + unsigned int dummy, p; + + if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) { + /* Load per CPU data from RDTSCP */ + rdtscp(dummy, dummy, p); + } else { + /* Load per CPU data from GDT */ + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + } + + if (cpu) + *cpu = p & 0xfff; + if (node) + *node = p >> 12; + return 0; +} + +long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) + __attribute__((weak, alias("__vdso_getcpu"))); diff --git a/arch/x86_64/vdso/vma.c b/arch/x86_64/vdso/vma.c new file mode 100644 index 0000000..f0a0d1d --- /dev/null +++ b/arch/x86_64/vdso/vma.c @@ -0,0 +1,140 @@ +/* + * Set up the VMAs to tell the VM about the vDSO. + * Copyright 2007 Andi Kleen, SUSE Labs. + * Subject to the GPL, v.2 + */ +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/random.h> +#include <linux/vmalloc.h> +#include <asm/vsyscall.h> +#include <asm/proto.h> +#include "voffset.h" + +int vdso_enabled = 1; + +#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x; +#include "vextern.h" +#undef VEXTERN + +extern char vdso_kernel_start[], vdso_start[], vdso_end[]; +extern unsigned short vdso_sync_cpuid; + +struct page **vdso_pages; + +static inline void *var_ref(void *vbase, char *var, char *name) +{ + unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET; + void *p = vbase + offset; + if (*(void **)p != (void *)VMAGIC) { + printk("VDSO: variable %s broken\n", name); + vdso_enabled = 0; + } + return p; +} + +static int __init init_vdso_vars(void) +{ + int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; + int i; + char *vbase; + + vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); + if (!vdso_pages) + goto oom; + for (i = 0; i < npages; i++) { + struct page *p; + p = alloc_page(GFP_KERNEL); + if (!p) + goto oom; + vdso_pages[i] = p; + copy_page(page_address(p), vdso_start + i*PAGE_SIZE); + } + + vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL); + if (!vbase) + goto oom; + + if (memcmp(vbase, "\177ELF", 4)) { + printk("VDSO: I'm broken; not ELF\n"); + vdso_enabled = 0; + } + +#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x) +#define VEXTERN(x) \ + V(vdso_ ## x) = &__ ## x; +#include "vextern.h" +#undef VEXTERN + return 0; + + oom: + printk("Cannot allocate vdso\n"); + vdso_enabled = 0; + return -ENOMEM; +} +__initcall(init_vdso_vars); + +struct linux_binprm; + +/* Put the vdso above the (randomized) stack with another randomized offset. + This way there is no hole in the middle of address space. + To save memory make sure it is still in the same PTE as the stack top. + This doesn't give that many random bits */ +static unsigned long vdso_addr(unsigned long start, unsigned len) +{ + unsigned long addr, end; + unsigned offset; + end = (start + PMD_SIZE - 1) & PMD_MASK; + if (end >= TASK_SIZE64) + end = TASK_SIZE64; + end -= len; + /* This loses some more bits than a modulo, but is cheaper */ + offset = get_random_int() & (PTRS_PER_PTE - 1); + addr = start + (offset << PAGE_SHIFT); + if (addr >= end) + addr = end; + return addr; +} + +/* Setup a VMA at program startup for the vsyscall page. + Not called for compat tasks */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack, + unsigned long start_code, unsigned long interp_map_addr) +{ + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret; + unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE); + + if (!vdso_enabled) + return 0; + + down_write(&mm->mmap_sem); + addr = vdso_addr(mm->start_stack, len); + addr = get_unmapped_area(NULL, addr, len, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + ret = install_special_mapping(mm, addr, len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + vdso_pages); + if (ret) + goto up_fail; + + current->mm->context.vdso = (void *)addr; +up_fail: + up_write(&mm->mmap_sem); + return ret; +} + +static __init int vdso_setup(char *s) +{ + vdso_enabled = simple_strtoul(s, NULL, 0); + return 0; +} +__setup("vdso=", vdso_setup); diff --git a/arch/x86_64/vdso/voffset.h b/arch/x86_64/vdso/voffset.h new file mode 100644 index 0000000..5304204 --- /dev/null +++ b/arch/x86_64/vdso/voffset.h @@ -0,0 +1 @@ +#define VDSO_TEXT_OFFSET 0x500 diff --git a/arch/x86_64/vdso/vvar.c b/arch/x86_64/vdso/vvar.c new file mode 100644 index 0000000..cd2d82d --- /dev/null +++ b/arch/x86_64/vdso/vvar.c @@ -0,0 +1,11 @@ +/* Define pointer to external vDSO variables. + These are part of the vDSO. The kernel fills in the real addresses + at boot time. This is done because when the vdso is linked the + kernel isn't yet and we don't know the final addresses. */ +#include <linux/kernel.h> +#include <linux/time.h> +#include <asm/vsyscall.h> +#include <asm/timex.h> + +#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC; +#include "vextern.h" diff --git a/include/asm-x86_64/auxvec.h b/include/asm-x86_64/auxvec.h index 2403c4c..fc1f483 100644 --- a/include/asm-x86_64/auxvec.h +++ b/include/asm-x86_64/auxvec.h @@ -1,4 +1,6 @@ #ifndef __ASM_X86_64_AUXVEC_H #define __ASM_X86_64_AUXVEC_H +#define AT_SYSINFO_EHDR 33 + #endif diff --git a/include/asm-x86_64/elf.h b/include/asm-x86_64/elf.h index 67eff98..1e46ca9 100644 --- a/include/asm-x86_64/elf.h +++ b/include/asm-x86_64/elf.h @@ -163,6 +163,19 @@ extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); /* 1GB for 64bit, 8MB for 32bit */ #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack, unsigned long start_code, + unsigned long interp_map_addr); + +extern unsigned int vdso_enabled; + +#define ARCH_DLINFO \ +do if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR,(unsigned long)current->mm->context.vdso);\ +} while (0) + #endif #define __HAVE_ARCH_RANDOMIZE_BRK diff --git a/include/asm-x86_64/mach-xen/asm/mmu.h b/include/asm-x86_64/mach-xen/asm/mmu.h index 2ac8252..ba26b62 100644 --- a/include/asm-x86_64/mach-xen/asm/mmu.h +++ b/include/asm-x86_64/mach-xen/asm/mmu.h @@ -20,6 +20,9 @@ typedef struct { unsigned has_foreign_mappings:1; struct list_head unpinned; #endif +#ifndef __GENKSYMS__ + void *vdso; +#endif } mm_context_t; #ifdef CONFIG_XEN diff --git a/include/asm-x86_64/mach-xen/asm/msr.h b/include/asm-x86_64/mach-xen/asm/msr.h index 912c4d1..20fb12d 100644 --- a/include/asm-x86_64/mach-xen/asm/msr.h +++ b/include/asm-x86_64/mach-xen/asm/msr.h @@ -66,6 +66,9 @@ #define rdtscl(low) \ __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx") +#define rdtscp(low,high,aux) \ + asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux)) + #define rdtscll(val) do { \ unsigned int __a,__d; \ asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ diff --git a/include/asm-x86_64/mmu.h b/include/asm-x86_64/mmu.h index 5dc6ed7..f2362a7 100644 --- a/include/asm-x86_64/mmu.h +++ b/include/asm-x86_64/mmu.h @@ -14,7 +14,10 @@ typedef struct { void *ldt; rwlock_t ldtlock; int size; - struct semaphore sem; + struct semaphore sem; +#ifndef __GENKSYMS__ + void *vdso; +#endif } mm_context_t; #endif