From: Bhavna Sarathy <bnagendr@redhat.com> Date: Mon, 16 Aug 2010 16:20:59 -0400 Subject: [cpufreq] add APERF/MPERF support for AMD processors Message-id: <20100816162649.4233.9051.sendpatchset@localhost.localdomain> Patchwork-id: 27643 O-Subject: [RHEL5.6 PATCH] Add APERF/MPERF support for AMD processors Bugzilla: 621335 RH-Acked-by: Prarit Bhargava <prarit@redhat.com> RH-Acked-by: Andrew Jones <drjones@redhat.com> Resolves BZ 621335 Effective Frequency Reporting is a mechanism by which software can query the processor to obtain the effective, average frequency over a dynamically determined period of time. Starting with model 10 of Family 0x10, AMD processors support for APERF/MPERF. Add support for identifying it and using it within cpufreq. Move the APERF/MPERF functions out of the acpi-cpufreq code and into their own file so they can easily be shared. Applies cleaning to -211 kernel. Patch dependency note: please apply "per core frequency" patch first, before applying this (aper/mperf) patch to avoid patching/fuzz issues, thanks. Upstream commit: x86, cpufreq: Add APERF/MPERF support for AMD processors From: Mark Langsdorf <mark.langsdorf@amd.com> (Upstream commit id: a2fed573f065e526bfd5cbf26e5491973d9e9aaa) Tested family 0x10 model 10 systems successfully. Please review, and ACK. Signed-off-by: Jarod Wilson <jarod@redhat.com> diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile index c9fe427..89cbb5d 100644 --- a/arch/i386/kernel/cpu/cpufreq/Makefile +++ b/arch/i386/kernel/cpu/cpufreq/Makefile @@ -1,13 +1,13 @@ obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o obj-$(CONFIG_X86_LONGHAUL) += longhaul.o obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o obj-$(CONFIG_X86_LONGRUN) += longrun.o obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o -obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 574702a..acf1812 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -44,6 +44,8 @@ #include <asm/delay.h> #include <asm/uaccess.h> +#include "mperf.h" + #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); @@ -57,7 +59,6 @@ enum { }; #define INTEL_MSR_RANGE (0xffff) -#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; @@ -242,98 +243,6 @@ static u32 get_cur_val(cpumask_t mask) return cmd.val; } -/* - * Return the measured active (C0) frequency on this CPU since last call - * to this function. - * Input: cpu number - * Return: Average CPU frequency in terms of max frequency (zero on error) - * - * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance - * over a period of time, while CPU is in C0 state. - * IA32_MPERF counts at the rate of max advertised frequency - * IA32_APERF counts at the rate of actual CPU frequency - * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and - * no meaning should be associated with absolute values of these MSRs. - */ -static unsigned int get_measured_perf(unsigned int cpu) -{ - union { - struct { - u32 lo; - u32 hi; - } split; - u64 whole; - } aperf_cur, mperf_cur; - - cpumask_t saved_mask; - unsigned int perf_percent; - unsigned int retval; - - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (get_cpu() != cpu) { - /* We were not able to run on requested processor */ - put_cpu(); - return 0; - } - - rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); - rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); - - wrmsr(MSR_IA32_APERF, 0,0); - wrmsr(MSR_IA32_MPERF, 0,0); - -#ifdef __i386__ - /* - * We dont want to do 64 bit divide with 32 bit kernel - * Get an approximate value. Return failure in case we cannot get - * an approximate value. - */ - if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { - int shift_count; - u32 h; - - h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); - shift_count = fls(h); - - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; - } - - if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { - int shift_count = 7; - aperf_cur.split.lo >>= shift_count; - mperf_cur.split.lo >>= shift_count; - } - - if (aperf_cur.split.lo && mperf_cur.split.lo) - perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; - else - perf_percent = 0; - -#else - if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { - int shift_count = 7; - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; - } - - if (aperf_cur.whole && mperf_cur.whole) - perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; - else - perf_percent = 0; - -#endif - - retval = drv_data[cpu]->max_freq * perf_percent / 100; - - put_cpu(); - set_cpus_allowed(current, saved_mask); - - dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); - return retval; -} - static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { struct acpi_cpufreq_data *data = drv_data[cpu]; @@ -700,7 +609,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int ecx; ecx = cpuid_ecx(6); if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) - acpi_cpufreq_driver.getavg = get_measured_perf; + acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; } dprintk("CPU%u - ACPI performance management activated.\n", cpu); diff --git a/arch/i386/kernel/cpu/cpufreq/mperf.c b/arch/i386/kernel/cpu/cpufreq/mperf.c new file mode 100644 index 0000000..bbbebb3 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/mperf.c @@ -0,0 +1,106 @@ +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/slab.h> + +#include "mperf.h" + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) + +/* + * Return the measured active (C0) frequency on this CPU since last call + * to this function. + * Input: cpu number + * Return: Average CPU frequency in terms of max frequency (zero on error) + * + * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance + * over a period of time, while CPU is in C0 state. + * IA32_MPERF counts at the rate of max advertised frequency + * IA32_APERF counts at the rate of actual CPU frequency + * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and + * no meaning should be associated with absolute values of these MSRs. + */ +unsigned int cpufreq_get_measured_perf(unsigned int cpu) +{ + union { + struct { + u32 lo; + u32 hi; + } split; + u64 whole; + } aperf_cur, mperf_cur; + + cpumask_t saved_mask; + unsigned int perf_percent; + unsigned int retval; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (get_cpu() != cpu) { + /* We were not able to run on requested processor */ + put_cpu(); + return 0; + } + + rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); + rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); + + wrmsr(MSR_IA32_APERF, 0,0); + wrmsr(MSR_IA32_MPERF, 0,0); + +#ifdef __i386__ + /* + * We dont want to do 64 bit divide with 32 bit kernel + * Get an approximate value. Return failure in case we cannot get + * an approximate value. + */ + if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { + int shift_count; + u32 h; + + h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); + shift_count = fls(h); + + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { + int shift_count = 7; + aperf_cur.split.lo >>= shift_count; + mperf_cur.split.lo >>= shift_count; + } + + if (aperf_cur.split.lo && mperf_cur.split.lo) + perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; + else + perf_percent = 0; + +#else + if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { + int shift_count = 7; + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (aperf_cur.whole && mperf_cur.whole) + perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; + else + perf_percent = 0; + +#endif + + retval = policy->cpuinfo.max_freq * perf_percent / 100; + + put_cpu(); + set_cpus_allowed(current, saved_mask); + + dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); + return retval; +} + +EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); +MODULE_LICENSE("GPL"); diff --git a/arch/i386/kernel/cpu/cpufreq/mperf.h b/arch/i386/kernel/cpu/cpufreq/mperf.h new file mode 100644 index 0000000..c76fe9b --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/mperf.h @@ -0,0 +1,10 @@ +/* + * (c) 2010 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + */ + +unsigned int cpufreq_get_measured_perf(unsigned int); + +#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index c32e28d..16344b7 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -50,6 +50,7 @@ #define BFX PFX "BIOS error: " #define VERSION "version 2.20.00" #include "powernow-k8.h" +#include "mperf.h" /* serialize freq changes */ static DEFINE_MUTEX(fidvid_mutex); @@ -70,6 +71,8 @@ static int cpu_family = CPU_OPTERON; /* core performance boost */ static bool cpb_capable, cpb_enabled; +static struct cpufreq_driver cpufreq_amd64_driver; + #ifndef CONFIG_SMP static cpumask_t cpu_core_map[1]; #endif @@ -1256,6 +1259,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; struct init_on_cpu init_on_cpu; + struct cpuinfo_x86 *c = &cpu_data[pol->cpu]; int rc; if (!cpu_online(pol->cpu)) @@ -1328,6 +1332,14 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) return -EINVAL; } + /* Check for APERF/MPERF support in hardware */ + if (c->x86_vendor == X86_VENDOR_AMD && c->cpuid_level >= 6) { + unsigned int ecx; + ecx = cpuid_ecx(6); + if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) + cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; + } + cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); if (cpu_family == CPU_HW_PSTATE) diff --git a/arch/x86_64/kernel/cpufreq/Makefile b/arch/x86_64/kernel/cpufreq/Makefile index 753ce1d..b5f4e84 100644 --- a/arch/x86_64/kernel/cpufreq/Makefile +++ b/arch/x86_64/kernel/cpufreq/Makefile @@ -4,8 +4,8 @@ SRCDIR := ../../../i386/kernel/cpu/cpufreq -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o -obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o +obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o @@ -15,3 +15,4 @@ speedstep-centrino-objs := ${SRCDIR}/speedstep-centrino.o acpi-cpufreq-objs := ${SRCDIR}/acpi-cpufreq.o p4-clockmod-objs := ${SRCDIR}/p4-clockmod.o speedstep-lib-objs := ${SRCDIR}/speedstep-lib.o +mperf-objs := ${SRCDIR}/mperf.o