From: Peter Zijlstra <pzijlstr@redhat.com> Date: Thu, 7 May 2009 15:59:41 +0200 Subject: [x86] scale cyc_2_nsec according to CPU frequency Message-id: 20090507140138.248038000@chello.nl O-Subject: [PATCH 3/5] RHEL-5: x86: scale cyc_2_nsec according to CPU frequency Bugzilla: 297731 RH-Acked-by: Brian Maly <bmaly@redhat.com> RH-Acked-by: Rik van Riel <riel@redhat.com> CVE: CVE-2007-3719 Backport of: commit 53d517cdbaac704352b3d0c10fecb99e0b54572e Author: Guillaume Chazarain <guichaz@yahoo.fr> Date: Wed Jan 30 13:30:06 2008 +0100 x86: scale cyc_2_nsec according to CPU frequency scale the sched_clock() cyc_2_nsec scaling factor according to CPU frequency changes. [ mingo@elte.hu: simplified it and fixed it for SMP. ] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra <pzijlstr@redhat.com> diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 711eae4..254fdfb 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -12,6 +12,7 @@ #include <linux/dmi.h> #include <linux/acpi.h> #include <linux/delay.h> +#include <linux/percpu.h> #include <asm/delay.h> #include <asm/tsc.h> #include <asm/delay.h> @@ -89,18 +90,46 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale __read_mostly; +static DEFINE_PER_CPU(unsigned long, cyc2ns); #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_khz) +static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; + return cyc * __get_cpu_var(cyc2ns) >> CYC2NS_SCALE_FACTOR; +} + +static inline void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +{ + unsigned long flags, prev_scale, *scale; + unsigned long long tsc_now, ns_now; + + local_irq_save(flags); + sched_clock_idle_sleep_event(); + + scale = &per_cpu(cyc2ns, cpu); + + rdtscll(tsc_now); + ns_now = __cycles_2_ns(tsc_now); + + prev_scale = *scale; + if (cpu_khz) + *scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; + + sched_clock_idle_wakeup_event(0); + local_irq_restore(flags); } static inline unsigned long long cycles_2_ns(unsigned long long cyc) { - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; + unsigned long long ns; + unsigned long flags; + + local_irq_save(flags); + ns = __cycles_2_ns(cyc); + local_irq_restore(flags); + + return ns; } #ifndef CONFIG_XEN @@ -109,7 +138,8 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) */ unsigned long long sched_clock(void) { - unsigned long long this_offset; + unsigned long long clock = 0; + unsigned long flags; /* * in the NUMA case we dont use the TSC as they are not @@ -121,11 +151,14 @@ unsigned long long sched_clock(void) /* no locking but a rare wrong value is not a big deal */ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); + local_irq_save(flags); /* read the Time Stamp Counter: */ - rdtscll(this_offset); - + rdtscll(clock); /* return the value in ns */ - return cycles_2_ns(this_offset); + clock = __cycles_2_ns(clock); + local_irq_restore(flags); + + return clock; } #endif @@ -207,6 +240,7 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); void tsc_init(void) { u64 lpj; + int cpu; if (!cpu_has_tsc || tsc_disable) return; @@ -221,7 +255,8 @@ void tsc_init(void) (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); - set_cyc2ns_scale(cpu_khz); + for_each_possible_cpu(cpu) + set_cyc2ns_scale(cpu_khz, cpu); lpj = ((u64)tsc_khz * 1000); do_div(lpj, HZ); @@ -301,7 +336,8 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { tsc_khz = cpu_khz; - set_cyc2ns_scale(cpu_khz); + set_cyc2ns_scale(cpu_khz, get_cpu()); + put_cpu(); /* * TSC based sched_clock turns * to junk w/ cpufreq diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 897d5a8..9727728 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -296,12 +296,46 @@ static void set_rtc_mmss(unsigned long nowtime) spin_unlock(&rtc_lock); } -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static DEFINE_PER_CPU(unsigned long, cyc2ns); + +#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ + +static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - return (cyc * cyc2ns_scale) >> NS_SCALE; + return cyc * __get_cpu_var(cyc2ns) >> CYC2NS_SCALE_FACTOR; +} + +static inline void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +{ + unsigned long flags, *scale; + unsigned long long tsc_now, ns_now; + + local_irq_save(flags); + sched_clock_idle_sleep_event(); + + scale = &per_cpu(cyc2ns, cpu); + + rdtscll(tsc_now); + ns_now = __cycles_2_ns(tsc_now); + + if (cpu_khz) + *scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; + + sched_clock_idle_wakeup_event(0); + local_irq_restore(flags); } -#define __cycles_2_ns(c) cycles_2_ns(c) +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + unsigned long long ns; + unsigned long flags; + + local_irq_save(flags); + ns = __cycles_2_ns(cyc); + local_irq_restore(flags); + + return ns; +} /* monotonic_clock(): returns # of nanoseconds passed since time_init() * Note: This function is required to return accurate @@ -559,16 +593,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } -static unsigned int cyc2ns_scale __read_mostly; - -static inline void set_cyc2ns_scale(unsigned long cpu_khz) -{ - cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz; -} - unsigned long long sched_clock(void) { - unsigned long a = 0; + unsigned long flags, clock = 0; #if 0 /* Don't do a HPET read here. Using TSC always is much faster @@ -584,8 +611,12 @@ unsigned long long sched_clock(void) CPUs. But the errors should be too small to matter for scheduling purposes. */ - rdtscll(a); - return cycles_2_ns(a); + local_irq_save(flags); + rdtscll(clock); + clock = __cycles_2_ns(clock); + local_irq_restore(flags); + + return clock; } static unsigned long get_cmos_time(void) @@ -775,7 +806,8 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz; } - set_cyc2ns_scale(tsc_khz_ref); + set_cyc2ns_scale(tsc_khz_ref, get_cpu()); + put_cpu(); return 0; } @@ -1039,6 +1071,7 @@ time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) void __init time_init(void) { unsigned int hypervisor_khz; + int cpu; if (nohpet) vxtime.hpet_address = 0; @@ -1106,7 +1139,8 @@ void __init time_init(void) vxtime.last_tsc = get_cycles_sync(); setup_irq(0, &irq0); - set_cyc2ns_scale(tsc_khz); + for_each_possible_cpu(cpu) + set_cyc2ns_scale(tsc_khz, cpu); hotcpu_notifier(time_cpu_notifier, 0); time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id()); @@ -1153,6 +1187,7 @@ __cpuinit int unsynchronized_tsc(void) void time_init_gtod(void) { char *timetype; + int cpu; if (unsynchronized_tsc()) notsc = 1; @@ -1196,7 +1231,8 @@ void time_init_gtod(void) vxtime.tsc_quot = (NSEC_PER_MSEC << NS_SCALE) / tsc_khz; vxtime.last_tsc = get_cycles_sync(); - set_cyc2ns_scale(tsc_khz); + for_each_possible_cpu(cpu) + set_cyc2ns_scale(tsc_khz, cpu); } __setup("report_lost_ticks", time_setup);