From: Don Dugger <ddugger@redhat.com> Date: Fri, 3 Sep 2010 17:02:07 -0400 Subject: [xen] oprofile: support Intel's arch perfmon registers Message-id: <201009031702.o83H27H5001933@sobek.n0ano.com> Patchwork-id: 28122 O-Subject: [RHEL 5.6 PATCH 2/3 V2] BZ 538564: xenoprof: support Intel's architectural perfmon registers. Bugzilla: 538564 RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com> (This version 2 adds missing hunk from upstream to setup the perfmon counters for core_i7 cpu models) One benefit is that more perfmon counters can be used on Nehalem. Signed-off-by: Yang Zhang <yang.zhang@intel.com> Signed-off-by: Yang Xiaowei <xiaowei.yang@intel.com> Upstream status: CS 15f5cff84adf Brew build: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2733669 Signed-off-by: Don Dugger <donald.d.dugger@intel.com> --- arch/x86/cpu/intel.c | 6 ++ arch/x86/oprofile/nmi_int.c | 30 ++++++--- arch/x86/oprofile/op_model_ppro.c | 124 +++++++++++++++++++++++++++++-------- arch/x86/oprofile/op_x86_model.h | 8 ++- include/asm-x86/cpufeature.h | 3 + 5 files changed, 131 insertions(+), 40 deletions(-) Signed-off-by: Jarod Wilson <jarod@redhat.com> diff --git a/arch/x86/cpu/intel.c b/arch/x86/cpu/intel.c index 5fcad24..c52f00d 100644 --- a/arch/x86/cpu/intel.c +++ b/arch/x86/cpu/intel.c @@ -118,6 +118,12 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) select_idle_routine(c); l2 = init_intel_cacheinfo(c); + if (c->cpuid_level > 9) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); + } /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 57047f3..5002098 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -317,6 +317,7 @@ static int __init ppro_init(char ** cpu_type) *cpu_type = "i386/core_2"; break; case 26: + arch_perfmon_setup_counters(); *cpu_type = "i386/core_i7"; break; case 28: @@ -324,9 +325,6 @@ static int __init ppro_init(char ** cpu_type) break; default: /* Unknown */ - printk("xenoprof: Initialization failed. " - "Intel processor model %d for P6 class family is not " - "supported\n", cpu_model); return 0; } @@ -334,10 +332,21 @@ static int __init ppro_init(char ** cpu_type) return 1; } +static int __init arch_perfmon_init(char **cpu_type) +{ + if (!cpu_has_arch_perfmon) + return 0; + *cpu_type = "i386/arch_perfmon"; + model = &op_arch_perfmon_spec; + arch_perfmon_setup_counters(); + return 1; +} + static int __init nmi_init(void) { __u8 vendor = current_cpu_data.x86_vendor; __u8 family = current_cpu_data.x86; + __u8 _model = current_cpu_data.x86_model; if (!cpu_has_apic) { printk("xenoprof: Initialization failed. No APIC\n"); @@ -391,21 +400,22 @@ static int __init nmi_init(void) switch (family) { /* Pentium IV */ case 0xf: - if (!p4_init(&cpu_type)) - return -ENODEV; + p4_init(&cpu_type); break; /* A P6-class processor */ case 6: - if (!ppro_init(&cpu_type)) - return -ENODEV; + ppro_init(&cpu_type); break; default: + break; + } + if (!cpu_type && !arch_perfmon_init(&cpu_type)) { printk("xenoprof: Initialization failed. " - "Intel processor family %d is not " - "supported\n", family); - return -ENODEV; + "Intel processor family %d model %d" + "is not supported\n", family, _model); + return -ENODEV; } break; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 53ff218..b47ac7d 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -22,12 +22,24 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 2 -#define NUM_CONTROLS 2 +/* + * Intel "Architectural Performance Monitoring" CPUID + * detection/enumeration details: + */ +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; -#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) -#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) +static int num_counters = 2; +static int counter_width = 32; + +#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) @@ -40,15 +52,16 @@ #define CTRL_SET_UM(val, m) (val |= (m << 8)) #define CTRL_SET_EVENT(val, e) (val |= e) -static unsigned long reset_value[NUM_COUNTERS]; +static unsigned long reset_value[OP_MAX_COUNTER]; static void ppro_fill_in_addresses(struct op_msrs * const msrs) { - msrs->counters[0].addr = MSR_P6_PERFCTR0; - msrs->counters[1].addr = MSR_P6_PERFCTR1; - - msrs->controls[0].addr = MSR_P6_EVNTSEL0; - msrs->controls[1].addr = MSR_P6_EVNTSEL1; + int i; + + for (i = 0; i < num_counters; i++) + msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; + for (i = 0; i < num_counters; i++) + msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; } @@ -56,25 +69,41 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) { unsigned int low, high; int i; + + if (cpu_has_arch_perfmon) { + union cpuid10_eax eax; + eax.full = cpuid_eax(0xa); + + /* + * For Core2 (family 6, model 15), don't reset the + * counter width: + */ + if (!(eax.split.version_id == 0 && + current_cpu_data.x86 == 6 && + current_cpu_data.x86_model == 15)) { + + if (counter_width < eax.split.bit_width) + counter_width = eax.split.bit_width; + } + } /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { + for (i = 0 ; i < num_counters; ++i) { CTRL_READ(low, high, msrs, i); CTRL_CLEAR(low); CTRL_WRITE(low, high, msrs, i); } /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { - CTR_WRITE(1, msrs, i); - } + for (i = 0; i < num_counters; ++i) + wrmsrl(msrs->counters[i].addr, -1LL); /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if (counter_config[i].enabled) { reset_value[i] = counter_config[i].count; - CTR_WRITE(counter_config[i].count, msrs, i); + wrmsrl(msrs->counters[i].addr, -reset_value[i]); CTRL_READ(low, high, msrs, i); CTRL_CLEAR(low); @@ -84,6 +113,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) CTRL_SET_UM(low, counter_config[i].unit_mask); CTRL_SET_EVENT(low, counter_config[i].event); CTRL_WRITE(low, high, msrs, i); + } else { + reset_value[i] = 0; } } } @@ -97,19 +128,19 @@ static int ppro_check_ctrs(unsigned int const cpu, struct op_msrs const * const msrs, struct cpu_user_regs * const regs) { - unsigned int low, high; + u64 val; int i; int ovf = 0; unsigned long eip = regs->eip; int mode = xenoprofile_get_mode(current, regs); - for (i = 0 ; i < NUM_COUNTERS; ++i) { + for (i = 0 ; i < num_counters; ++i) { if (!reset_value[i]) continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { + rdmsrl(msrs->counters[i].addr, val); + if (CTR_OVERFLOWED(val)) { xenoprof_log_event(current, regs, eip, mode, i); - CTR_WRITE(reset_value[i], msrs, i); + wrmsrl(msrs->counters[i].addr, -reset_value[i]); ovf = 1; } } @@ -127,7 +158,7 @@ static void ppro_start(struct op_msrs const * const msrs) unsigned int low,high; int i; - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); @@ -142,7 +173,7 @@ static void ppro_stop(struct op_msrs const * const msrs) unsigned int low,high; int i; - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; CTRL_READ(low, high, msrs, i); @@ -152,12 +183,51 @@ static void ppro_stop(struct op_msrs const * const msrs) } -struct op_x86_model_spec const op_ppro_spec = { - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, +/* + * Architectural performance monitoring. + * + * Newer Intel CPUs (Core1+) have support for architectural + * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. + * The advantage of this is that it can be done without knowing about + * the specific CPU. + */ +void arch_perfmon_setup_counters(void) +{ + union cpuid10_eax eax; + + eax.full = cpuid_eax(0xa); + + /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ + if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && + current_cpu_data.x86_model == 15) { + eax.split.version_id = 2; + eax.split.num_counters = 2; + eax.split.bit_width = 40; + } + + num_counters = min_t(u8, eax.split.num_counters, OP_MAX_COUNTER); + + op_arch_perfmon_spec.num_counters = num_counters; + op_arch_perfmon_spec.num_controls = num_counters; + op_ppro_spec.num_counters = num_counters; + op_ppro_spec.num_controls = num_counters; +} + +struct op_x86_model_spec op_ppro_spec = { + .num_counters = 2, + .num_controls = 2, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, .start = &ppro_start, .stop = &ppro_stop }; + +struct op_x86_model_spec op_arch_perfmon_spec = { + /* num_counters/num_controls filled in at runtime */ + .fill_in_addresses = &ppro_fill_in_addresses, + .setup_ctrs = &ppro_setup_ctrs, + .check_ctrs = &ppro_check_ctrs, + .start = &ppro_start, + .stop = &ppro_stop, +}; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 2858e8b..2c0020e 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -32,8 +32,8 @@ struct pt_regs; * various x86 CPU model's perfctr support. */ struct op_x86_model_spec { - unsigned int const num_counters; - unsigned int const num_controls; + unsigned int num_counters; + unsigned int num_controls; void (*fill_in_addresses)(struct op_msrs * const msrs); void (*setup_ctrs)(struct op_msrs const * const msrs); int (*check_ctrs)(unsigned int const cpu, @@ -43,9 +43,11 @@ struct op_x86_model_spec { void (*stop)(struct op_msrs const * const msrs); }; -extern struct op_x86_model_spec const op_ppro_spec; +extern struct op_x86_model_spec op_ppro_spec; +extern struct op_x86_model_spec op_arch_perfmon_spec; extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_ht2_spec; extern struct op_x86_model_spec const op_athlon_spec; +void arch_perfmon_setup_counters(void); #endif /* OP_X86_MODEL_H */ diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index d8d2350..baad8e2 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -73,6 +73,7 @@ #define X86_FEATURE_P3 (3*32+ 6) /* P3 */ #define X86_FEATURE_P4 (3*32+ 7) /* P4 */ #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -174,6 +175,8 @@ #define cpu_has_efer 1 #endif +#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) + #endif /* __ASM_I386_CPUFEATURE_H */ /*