Sophie

Sophie

distrib > CentOS > 5 > x86_64 > by-pkgid > ea32411352494358b8d75a78402a4713 > files > 5338

kernel-2.6.18-238.19.1.el5.centos.plus.src.rpm

From: Danny Feng <dfeng@redhat.com>
Date: Mon, 7 Jun 2010 09:14:52 -0400
Subject: [x86_64] implement vDSO randomization
Message-id: <20100607091451.8239.23560.sendpatchset@danny.redhat>
Patchwork-id: 25985
O-Subject: [PATCH RHEL5.6] x86_64: Add vDSO for x86-64 with
	gettimeofday/clock_gettime/getcpu
Bugzilla: 459763
RH-Acked-by: Larry Woodman <lwoodman@redhat.com>
RH-Acked-by: Prarit Bhargava <prarit@redhat.com>

RHBZ#:
https://bugzilla.redhat.com/show_bug.cgi?id=459763

Description:
Randomization is not fully working on rhel-5 x86_64 kernel. There's no
vDSO randomization for rhel5 x86_64 kernel.

Upstream status:
commit 2aae950b is the right fix. Since x86_64 doesn't have generic clocksource
infrastructure, I had to implement x86_64 vDSO feature with old vxtime/xtime/...

Brew build:
http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2497692

Test status:
download and install rh-tests-kernel-security-execshield-randomization-2.1-13.noarch.rpm
run test, all pass.

review and comments are welcome, thank you.

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0b85c83..81bd2b3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1856,7 +1856,7 @@ running once the system is up.
 	usbhid.mousepoll=
 			[USBHID] The interval which mice are to be polled at.
 
-	vdso=		[IA-32]
+	vdso=		[IA-32,x86_64]
 			vdso=1: enable VDSO (default)
 			vdso=0: disable VDSO mapping
 
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 9fd0063..421b9ee 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -67,6 +67,9 @@ libs-y 					+= arch/x86_64/lib/
 core-y					+= arch/x86_64/kernel/ \
 					   arch/x86_64/mm/ \
 					   arch/x86_64/crypto/
+
+core-y					+= arch/x86_64/vdso/
+
 core-$(CONFIG_IA32_EMULATION)		+= arch/x86_64/ia32/
 drivers-$(CONFIG_PCI)			+= arch/x86_64/pci/
 drivers-$(CONFIG_OPROFILE)		+= arch/x86_64/oprofile/
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index e095608..ce1d768 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -33,6 +33,7 @@
 
 int sysctl_vsyscall32 = 1;
 
+#undef ARCH_DLINFO
 #define ARCH_DLINFO do {  \
 	if (sysctl_vsyscall32) { \
 	NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index cf41f55..daff4d4 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -213,6 +213,13 @@ SECTIONS
      from .altinstructions and .eh_frame */
   .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
   .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
+
+/* vdso blob that is mapped into user space */
+   vdso_start = . ;
+   .vdso  : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
+   . = ALIGN(4096);
+   vdso_end = .;
+
   . = ALIGN(4096);
   __initramfs_start = .;
   .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
diff --git a/arch/x86_64/kernel/vsyscall-xen.c b/arch/x86_64/kernel/vsyscall-xen.c
index 0e422a2..615abd9 100644
--- a/arch/x86_64/kernel/vsyscall-xen.c
+++ b/arch/x86_64/kernel/vsyscall-xen.c
@@ -38,6 +38,7 @@
 
 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
+int __vgetcpu_mode __section_vgetcpu_mode;
 
 #include <asm/unistd.h>
 
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 3b3eda4..3e04b35 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -894,3 +894,12 @@ void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
 	return __alloc_bootmem_core(pgdat->bdata, size,
 			SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
 }
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
+		return "[vdso]";
+	if (vma == &gate_vma)
+		return "[vsyscall]";
+	return NULL;
+}
diff --git a/arch/x86_64/vdso/Makefile b/arch/x86_64/vdso/Makefile
new file mode 100644
index 0000000..3cb986f
--- /dev/null
+++ b/arch/x86_64/vdso/Makefile
@@ -0,0 +1,50 @@
+#
+# x86-64 vDSO.
+#
+
+# files to link into the vdso
+# vdso-start.o has to be first
+vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
+
+# files to link into kernel
+obj-y := vma.o vdso.o vdso-syms.o
+
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+
+$(obj)/vdso.o: $(obj)/vdso.so
+
+targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o
+
+# The DSO images are built using a special linker script.
+quiet_cmd_syscall = SYSCALL $@
+      cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \
+		          -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+export CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \
+		 $(call ld-option, -Wl$(comma)--hash-style=sysv) \
+		-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+SYSCFLAGS_vdso.so = $(vdso-flags)
+
+$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
+
+$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE
+	$(call if_changed,syscall)
+
+CF := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
+       $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector)
+
+$(obj)/vclock_gettime.o: CFLAGS = $(CF)
+$(obj)/vgetcpu.o: CFLAGS = $(CF)
+
+# We also create a special relocatable object that should mirror the symbol
+# table and layout of the linked DSO.  With ld -R we can then refer to
+# these symbols in the kernel code rather than hand-coded addresses.
+extra-y += vdso-syms.o
+$(obj)/built-in.o: $(obj)/vdso-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
+
+SYSCFLAGS_vdso-syms.o = -r -d
+$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
+	$(call if_changed,syscall)
diff --git a/arch/x86_64/vdso/vclock_gettime.c b/arch/x86_64/vdso/vclock_gettime.c
new file mode 100644
index 0000000..5e15d01
--- /dev/null
+++ b/arch/x86_64/vdso/vclock_gettime.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Fast user context implementation of clock_gettime and gettimeofday.
+ *
+ * The code should have no internal unresolved relocations.
+ * Check with readelf after changing.
+ * Also alternative() doesn't work.
+ */
+
+#include <linux/kernel.h>
+#include <linux/posix-timers.h>
+#include <linux/time.h>
+#include <linux/string.h>
+#include <linux/clocksource.h>
+#include <asm/vsyscall.h>
+#include <asm/timex.h>
+#include <asm/hpet.h>
+#include <asm/unistd.h>
+#include <asm/io.h>
+#include "vextern.h"
+
+#define NS_SCALE	10 /* 2^10, carefully chosen */
+
+static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+	long ret;
+	asm("syscall" : "=a" (ret) :
+	    "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
+	return ret;
+}
+
+static inline cycle_t vread_hpet(void)
+{
+	return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
+}
+
+static inline cycle_t vread_tsc(void)
+{
+	cycle_t ret = (cycle_t)get_cycles_sync();
+	return ret;
+}
+
+static inline long vgetns(void)
+{
+	cycles_t vread;
+	long cycle_last, mult;
+
+	if (vdso_vxtime->mode == VXTIME_HPET) {
+		vread = vread_hpet();
+		cycle_last = vdso_vxtime->last;
+		mult = vdso_vxtime->quot;
+	} else if (vdso_vxtime->mode == VXTIME_TSC) {
+		vread = vread_tsc();
+		cycle_last = vdso_vxtime->last_tsc;
+		mult = vdso_vxtime->tsc_quot;
+	}
+
+	return ((vread - cycle_last) * mult) >>
+			NS_SCALE;
+}
+
+static noinline int do_realtime(struct timespec *ts)
+{
+	unsigned long seq, ns;
+	do {
+		seq = read_seqbegin(vdso_xtime_lock);
+		ts->tv_sec = vdso_xtime->tv_sec;
+		ts->tv_nsec = vdso_xtime->tv_nsec;
+		ns = vgetns();
+	} while (unlikely(read_seqretry(vdso_xtime_lock, seq)));
+	timespec_add_ns(ts, ns);
+	return 0;
+}
+
+/* Copy of the version in kernel/time.c which we cannot directly access */
+static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
+{
+	while (nsec >= NSEC_PER_SEC) {
+		nsec -= NSEC_PER_SEC;
+		++sec;
+	}
+	while (nsec < 0) {
+		nsec += NSEC_PER_SEC;
+		--sec;
+	}
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+static noinline int do_monotonic(struct timespec *ts)
+{
+	unsigned long seq, ns, secs;
+	do {
+		seq = read_seqbegin(vdso_xtime_lock);
+		secs = vdso_xtime->tv_sec;
+		ns = vdso_xtime->tv_nsec + vgetns();
+		secs += wall_to_monotonic.tv_sec;
+		ns += wall_to_monotonic.tv_nsec;
+	} while (unlikely(read_seqretry(vdso_xtime_lock, seq)));
+	vset_normalized_timespec(ts, secs, ns);
+	return 0;
+}
+
+int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+{
+	if (likely(sysctl_vsyscall && (vdso_vxtime->mode != VXTIME_KVM)))
+		switch (clock) {
+		case CLOCK_REALTIME:
+			return do_realtime(ts);
+		case CLOCK_MONOTONIC:
+			return do_monotonic(ts);
+		}
+	return vdso_fallback_gettime(clock, ts);
+}
+int clock_gettime(clockid_t, struct timespec *)
+	 __attribute__((weak, alias("__vdso_clock_gettime")));
+
+int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	long ret;
+
+	if (likely(sysctl_vsyscall && (vdso_vxtime->mode != VXTIME_KVM))) {
+		BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
+			     offsetof(struct timespec, tv_nsec) ||
+			     sizeof(*tv) != sizeof(struct timespec));
+		do_realtime((struct timespec *)tv);
+		tv->tv_usec /= 1000;
+		if (unlikely(tz != NULL)) {
+			/* This relies on gcc inlining the memcpy. We'll notice
+			   if it ever fails to do so. */
+			memcpy(tz, vdso_sys_tz, sizeof(struct timezone));
+		}
+		return 0;
+	}
+	asm("syscall" : "=a" (ret) :
+	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+	return ret;
+}
+int gettimeofday(struct timeval *, struct timezone *)
+	__attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/x86_64/vdso/vdso-note.S b/arch/x86_64/vdso/vdso-note.S
new file mode 100644
index 0000000..d980e30
--- /dev/null
+++ b/arch/x86_64/vdso/vdso-note.S
@@ -0,0 +1,25 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+
+#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type)                         \
+        .section name, flags;                                                 \
+        .balign 4;                                                            \
+        .long 1f - 0f;          /* name length */                             \
+        .long 3f - 2f;          /* data length */                             \
+        .long type;             /* note type */                               \
+0:      .asciz vendor;          /* vendor name */                             \
+1:      .balign 4;                                                            \
+2:
+
+#define ASM_ELF_NOTE_END                                                      \
+3:      .balign 4;              /* pad out section */                         \
+        .previous
+
+ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
+	.long LINUX_VERSION_CODE
+ASM_ELF_NOTE_END
diff --git a/arch/x86_64/vdso/vdso-start.S b/arch/x86_64/vdso/vdso-start.S
new file mode 100644
index 0000000..2dc2cdb
--- /dev/null
+++ b/arch/x86_64/vdso/vdso-start.S
@@ -0,0 +1,2 @@
+	.globl vdso_kernel_start
+vdso_kernel_start:
diff --git a/arch/x86_64/vdso/vdso.S b/arch/x86_64/vdso/vdso.S
new file mode 100644
index 0000000..92e80c1
--- /dev/null
+++ b/arch/x86_64/vdso/vdso.S
@@ -0,0 +1,2 @@
+	.section ".vdso","a"
+	.incbin "arch/x86_64/vdso/vdso.so"
diff --git a/arch/x86_64/vdso/vdso.lds b/arch/x86_64/vdso/vdso.lds
new file mode 100644
index 0000000..96adae3
--- /dev/null
+++ b/arch/x86_64/vdso/vdso.lds
@@ -0,0 +1,77 @@
+/*
+ * Automatically generated C config: don't edit
+ * Linux kernel version: 2.6.18.4
+ * Fri Jun  4 19:45:35 2010
+ */
+/*
+ * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
+ * object prelinked to its virtual address, and with only one read-only
+ * segment (that fits in one page).  This script controls its layout.
+ */
+/*
+ * DO NOT MODIFY.
+ *
+ * This file was generated by /mnt/opt/work/rhel5-clean/kernel/Kbuild
+ *
+ */
+SECTIONS
+{
+   . = 0xffffffffff700000 + SIZEOF_HEADERS;
+   .hash : { *(.hash) } :text
+   .gnu.hash : { *(.gnu.hash) }
+   .dynsym : { *(.dynsym) }
+   .dynstr : { *(.dynstr) }
+   .gnu.version : { *(.gnu.version) }
+   .gnu.version_d : { *(.gnu.version_d) }
+   .gnu.version_r : { *(.gnu.version_r) }
+   /* This linker script is used both with -r and with -shared.
+      For the layouts to match, we need to skip more than enough
+      space for the dynamic symbol table et al.  If this amount
+      is insufficient, ld -shared will barf.  Just increase it here.  */
+   . = 0xffffffffff700000 + 0x500;
+   .text : { *(.text) } :text
+   .text.ptr : { *(.text.ptr) } :text
+   . = 0xffffffffff700000 + 0x900;
+   .data : { *(.data) } :text
+   .bss : { *(.bss) } :text
+   .altinstructions : { *(.altinstructions) } :text
+   .altinstr_replacement : { *(.altinstr_replacement) } :text
+   .note : { *(.note.*) } :text :note
+   .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+   .eh_frame : { KEEP (*(.eh_frame)) } :text
+   .dynamic : { *(.dynamic) } :text :dynamic
+   .useless : {
+ *(.got.plt) *(.got)
+ *(.gnu.linkonce.d.*)
+ *(.dynbss)
+ *(.gnu.linkonce.b.*)
+  } :text
+}
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+  note PT_NOTE FLAGS(4); /* PF_R */
+  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+  LINUX_2.6 {
+    global:
+ clock_gettime;
+ __vdso_clock_gettime;
+ gettimeofday;
+ __vdso_gettimeofday;
+ getcpu;
+ __vdso_getcpu;
+    local: *;
+  };
+}
diff --git a/arch/x86_64/vdso/vdso.lds.S b/arch/x86_64/vdso/vdso.lds.S
new file mode 100644
index 0000000..d12fa8f
--- /dev/null
+++ b/arch/x86_64/vdso/vdso.lds.S
@@ -0,0 +1,77 @@
+/*
+ * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
+ * object prelinked to its virtual address, and with only one read-only
+ * segment (that fits in one page).  This script controls its layout.
+ */
+#include <asm/asm-offsets.h>
+#include "voffset.h"
+
+#define VDSO_PRELINK 0xffffffffff700000
+
+SECTIONS
+{
+   . = VDSO_PRELINK + SIZEOF_HEADERS;
+
+   .hash           : { *(.hash) }               :text
+   .gnu.hash       : { *(.gnu.hash) }
+   .dynsym         : { *(.dynsym) }
+   .dynstr         : { *(.dynstr) }
+   .gnu.version    : { *(.gnu.version) }
+   .gnu.version_d  : { *(.gnu.version_d) }
+   .gnu.version_r  : { *(.gnu.version_r) }
+
+   /* This linker script is used both with -r and with -shared.
+      For the layouts to match, we need to skip more than enough
+      space for the dynamic symbol table et al.  If this amount
+      is insufficient, ld -shared will barf.  Just increase it here.  */
+   . = VDSO_PRELINK + VDSO_TEXT_OFFSET;
+
+   .text           : { *(.text) }               :text
+   .text.ptr       : { *(.text.ptr) }           :text
+   . = VDSO_PRELINK + 0x900;
+   .data           : { *(.data) }               :text
+   .bss            : { *(.bss) }                        :text
+
+   .altinstructions : { *(.altinstructions) }                   :text
+   .altinstr_replacement  : { *(.altinstr_replacement) }        :text
+
+   .note                  : { *(.note.*) }              :text :note
+   .eh_frame_hdr   : { *(.eh_frame_hdr) }       :text :eh_frame_hdr
+   .eh_frame       : { KEEP (*(.eh_frame)) }    :text
+   .dynamic        : { *(.dynamic) }            :text :dynamic
+   .useless        : {
+	*(.got.plt) *(.got)
+	*(.gnu.linkonce.d.*)
+	*(.dynbss)
+	*(.gnu.linkonce.b.*)
+  }                                            :text
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+  note PT_NOTE FLAGS(4); /* PF_R */
+  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+  LINUX_2.6 {
+    global:
+	clock_gettime;
+	__vdso_clock_gettime;
+	gettimeofday;
+	__vdso_gettimeofday;
+	getcpu;
+	__vdso_getcpu;
+    local: *;
+  };
+}
diff --git a/arch/x86_64/vdso/vextern.h b/arch/x86_64/vdso/vextern.h
new file mode 100644
index 0000000..680c7ca
--- /dev/null
+++ b/arch/x86_64/vdso/vextern.h
@@ -0,0 +1,18 @@
+#ifndef VEXTERN
+#include <asm/vsyscall.h>
+#define VEXTERN(x) \
+       extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden")));
+#endif
+
+#define VMAGIC 0xfeedbabeabcdefabUL
+
+/* Any kernel variables used in the vDSO must be exported in the main
+   kernel's vmlinux.lds.S/vsyscall.h/proper __section and
+   put into vextern.h and be referenced as a pointer with vdso prefix.
+   The main kernel later fills in the values.   */
+
+VEXTERN(vxtime)
+VEXTERN(xtime_lock)
+VEXTERN(xtime)
+VEXTERN(sys_tz)
+VEXTERN(vgetcpu_mode)
diff --git a/arch/x86_64/vdso/vgetcpu.c b/arch/x86_64/vdso/vgetcpu.c
new file mode 100644
index 0000000..4787c5a
--- /dev/null
+++ b/arch/x86_64/vdso/vgetcpu.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Fast user context implementation of getcpu()
+ */
+
+#include <linux/kernel.h>
+#include <linux/getcpu.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/vsyscall.h>
+#include "vextern.h"
+
+long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+{
+	unsigned int dummy, p;
+
+	if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
+		/* Load per CPU data from RDTSCP */
+		rdtscp(dummy, dummy, p);
+	} else {
+		/* Load per CPU data from GDT */
+		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+	}
+
+	if (cpu)
+		*cpu = p & 0xfff;
+	if (node)
+		*node = p >> 12;
+	return 0;
+}
+
+long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+	__attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/x86_64/vdso/vma.c b/arch/x86_64/vdso/vma.c
new file mode 100644
index 0000000..f0a0d1d
--- /dev/null
+++ b/arch/x86_64/vdso/vma.c
@@ -0,0 +1,140 @@
+/*
+ * Set up the VMAs to tell the VM about the vDSO.
+ * Copyright 2007 Andi Kleen, SUSE Labs.
+ * Subject to the GPL, v.2
+ */
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/vmalloc.h>
+#include <asm/vsyscall.h>
+#include <asm/proto.h>
+#include "voffset.h"
+
+int vdso_enabled = 1;
+
+#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
+#include "vextern.h"
+#undef VEXTERN
+
+extern char vdso_kernel_start[], vdso_start[], vdso_end[];
+extern unsigned short vdso_sync_cpuid;
+
+struct page **vdso_pages;
+
+static inline void *var_ref(void *vbase, char *var, char *name)
+{
+	unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
+	void *p = vbase + offset;
+	if (*(void **)p != (void *)VMAGIC) {
+		printk("VDSO: variable %s broken\n", name);
+		vdso_enabled = 0;
+	}
+	return p;
+}
+
+static int __init init_vdso_vars(void)
+{
+	int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
+	int i;
+	char *vbase;
+
+	vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
+	if (!vdso_pages)
+		goto oom;
+	for (i = 0; i < npages; i++) {
+		struct page *p;
+		p = alloc_page(GFP_KERNEL);
+		if (!p)
+			goto oom;
+		vdso_pages[i] = p;
+		copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
+	}
+
+	vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
+	if (!vbase)
+		goto oom;
+
+	if (memcmp(vbase, "\177ELF", 4)) {
+		printk("VDSO: I'm broken; not ELF\n");
+		vdso_enabled = 0;
+	}
+
+#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
+#define VEXTERN(x) \
+	V(vdso_ ## x) = &__ ## x;
+#include "vextern.h"
+#undef VEXTERN
+	return 0;
+
+ oom:
+	printk("Cannot allocate vdso\n");
+	vdso_enabled = 0;
+	return -ENOMEM;
+}
+__initcall(init_vdso_vars);
+
+struct linux_binprm;
+
+/* Put the vdso above the (randomized) stack with another randomized offset.
+   This way there is no hole in the middle of address space.
+   To save memory make sure it is still in the same PTE as the stack top.
+   This doesn't give that many random bits */
+static unsigned long vdso_addr(unsigned long start, unsigned len)
+{
+	unsigned long addr, end;
+	unsigned offset;
+	end = (start + PMD_SIZE - 1) & PMD_MASK;
+	if (end >= TASK_SIZE64)
+		end = TASK_SIZE64;
+	end -= len;
+	/* This loses some more bits than a modulo, but is cheaper */
+	offset = get_random_int() & (PTRS_PER_PTE - 1);
+	addr = start + (offset << PAGE_SHIFT);
+	if (addr >= end)
+		addr = end;
+	return addr;
+}
+
+/* Setup a VMA at program startup for the vsyscall page.
+   Not called for compat tasks */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack, 
+				unsigned long start_code, unsigned long interp_map_addr)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret;
+	unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
+
+	if (!vdso_enabled)
+		return 0;
+
+	down_write(&mm->mmap_sem);
+	addr = vdso_addr(mm->start_stack, len);
+	addr = get_unmapped_area(NULL, addr, len, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = addr;
+		goto up_fail;
+	}
+
+	ret = install_special_mapping(mm, addr, len,
+				      VM_READ|VM_EXEC|
+				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+				      VM_ALWAYSDUMP,
+				      vdso_pages);
+	if (ret)
+		goto up_fail;
+
+	current->mm->context.vdso = (void *)addr;
+up_fail:
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+
+static __init int vdso_setup(char *s)
+{
+	vdso_enabled = simple_strtoul(s, NULL, 0);
+	return 0;
+}
+__setup("vdso=", vdso_setup);
diff --git a/arch/x86_64/vdso/voffset.h b/arch/x86_64/vdso/voffset.h
new file mode 100644
index 0000000..5304204
--- /dev/null
+++ b/arch/x86_64/vdso/voffset.h
@@ -0,0 +1 @@
+#define VDSO_TEXT_OFFSET 0x500
diff --git a/arch/x86_64/vdso/vvar.c b/arch/x86_64/vdso/vvar.c
new file mode 100644
index 0000000..cd2d82d
--- /dev/null
+++ b/arch/x86_64/vdso/vvar.c
@@ -0,0 +1,11 @@
+/* Define pointer to external vDSO variables.
+   These are part of the vDSO. The kernel fills in the real addresses
+   at boot time. This is done because when the vdso is linked the
+   kernel isn't yet and we don't know the final addresses. */
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <asm/vsyscall.h>
+#include <asm/timex.h>
+
+#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC;
+#include "vextern.h"
diff --git a/include/asm-x86_64/auxvec.h b/include/asm-x86_64/auxvec.h
index 2403c4c..fc1f483 100644
--- a/include/asm-x86_64/auxvec.h
+++ b/include/asm-x86_64/auxvec.h
@@ -1,4 +1,6 @@
 #ifndef __ASM_X86_64_AUXVEC_H
 #define __ASM_X86_64_AUXVEC_H
 
+#define AT_SYSINFO_EHDR                33
+
 #endif
diff --git a/include/asm-x86_64/elf.h b/include/asm-x86_64/elf.h
index 67eff98..1e46ca9 100644
--- a/include/asm-x86_64/elf.h
+++ b/include/asm-x86_64/elf.h
@@ -163,6 +163,19 @@ extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
 /* 1GB for 64bit, 8MB for 32bit */
 #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff)
 
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+				       int executable_stack, unsigned long start_code,
+				       unsigned long interp_map_addr);
+
+extern unsigned int vdso_enabled;
+
+#define ARCH_DLINFO                                            \
+do if (vdso_enabled) {                                         \
+	NEW_AUX_ENT(AT_SYSINFO_EHDR,(unsigned long)current->mm->context.vdso);\
+} while (0)
+
 #endif
 
 #define __HAVE_ARCH_RANDOMIZE_BRK
diff --git a/include/asm-x86_64/mach-xen/asm/mmu.h b/include/asm-x86_64/mach-xen/asm/mmu.h
index 2ac8252..ba26b62 100644
--- a/include/asm-x86_64/mach-xen/asm/mmu.h
+++ b/include/asm-x86_64/mach-xen/asm/mmu.h
@@ -20,6 +20,9 @@ typedef struct {
 	unsigned has_foreign_mappings:1;
 	struct list_head unpinned;
 #endif
+#ifndef __GENKSYMS__
+	void *vdso;
+#endif
 } mm_context_t;
 
 #ifdef CONFIG_XEN
diff --git a/include/asm-x86_64/mach-xen/asm/msr.h b/include/asm-x86_64/mach-xen/asm/msr.h
index 912c4d1..20fb12d 100644
--- a/include/asm-x86_64/mach-xen/asm/msr.h
+++ b/include/asm-x86_64/mach-xen/asm/msr.h
@@ -66,6 +66,9 @@
 #define rdtscl(low) \
      __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
 
+#define rdtscp(low,high,aux) \
+        asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux))
+
 #define rdtscll(val) do { \
      unsigned int __a,__d; \
      asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
diff --git a/include/asm-x86_64/mmu.h b/include/asm-x86_64/mmu.h
index 5dc6ed7..f2362a7 100644
--- a/include/asm-x86_64/mmu.h
+++ b/include/asm-x86_64/mmu.h
@@ -14,7 +14,10 @@ typedef struct {
 	void *ldt;
 	rwlock_t ldtlock; 
 	int size;
-	struct semaphore sem; 
+	struct semaphore sem;
+#ifndef __GENKSYMS__
+	void *vdso;
+#endif
 } mm_context_t;
 
 #endif