Sophie

Sophie

distrib > CentOS > 5 > x86_64 > by-pkgid > ea32411352494358b8d75a78402a4713 > files > 5783

kernel-2.6.18-238.19.1.el5.centos.plus.src.rpm

From: Don Dugger <ddugger@redhat.com>
Date: Fri, 3 Sep 2010 17:02:07 -0400
Subject: [xen] oprofile: support Intel's arch perfmon registers
Message-id: <201009031702.o83H27H5001933@sobek.n0ano.com>
Patchwork-id: 28122
O-Subject: [RHEL 5.6 PATCH 2/3 V2] BZ 538564: xenoprof: support Intel's
	architectural perfmon registers.
Bugzilla: 538564
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>

(This version 2 adds missing hunk from upstream to setup the
perfmon counters for core_i7 cpu models)

One benefit is that more perfmon counters can be used on Nehalem.

Signed-off-by: Yang Zhang <yang.zhang@intel.com>
Signed-off-by: Yang Xiaowei <xiaowei.yang@intel.com>

Upstream status: CS 15f5cff84adf

Brew build: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2733669

Signed-off-by: Don Dugger <donald.d.dugger@intel.com>
---
 arch/x86/cpu/intel.c              |    6 ++
 arch/x86/oprofile/nmi_int.c       |   30 ++++++---
 arch/x86/oprofile/op_model_ppro.c |  124 +++++++++++++++++++++++++++++--------
 arch/x86/oprofile/op_x86_model.h  |    8 ++-
 include/asm-x86/cpufeature.h      |    3 +
 5 files changed, 131 insertions(+), 40 deletions(-)

Signed-off-by: Jarod Wilson <jarod@redhat.com>

diff --git a/arch/x86/cpu/intel.c b/arch/x86/cpu/intel.c
index 5fcad24..c52f00d 100644
--- a/arch/x86/cpu/intel.c
+++ b/arch/x86/cpu/intel.c
@@ -118,6 +118,12 @@ static void __devinit init_intel(struct cpuinfo_x86 *c)
 
 	select_idle_routine(c);
 	l2 = init_intel_cacheinfo(c);
+	if (c->cpuid_level > 9) {
+		unsigned eax = cpuid_eax(10);
+		/* Check for version and the number of counters */
+		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+			set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+	}
 
 	/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
 	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 57047f3..5002098 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -317,6 +317,7 @@ static int __init ppro_init(char ** cpu_type)
 		*cpu_type = "i386/core_2";
 		break;
 	case 26:
+		arch_perfmon_setup_counters();
 		*cpu_type = "i386/core_i7";
 		break;
 	case 28:
@@ -324,9 +325,6 @@ static int __init ppro_init(char ** cpu_type)
 		break;
 	default:
 		/* Unknown */
-		printk("xenoprof: Initialization failed. "
-		       "Intel processor model %d for P6 class family is not "
-		       "supported\n", cpu_model);
 		return 0;
 	}
 
@@ -334,10 +332,21 @@ static int __init ppro_init(char ** cpu_type)
 	return 1;
 }
 
+static int __init arch_perfmon_init(char **cpu_type)
+{
+	if (!cpu_has_arch_perfmon)
+		return 0;
+	*cpu_type = "i386/arch_perfmon";
+	model = &op_arch_perfmon_spec;
+	arch_perfmon_setup_counters();
+	return 1;
+}
+
 static int __init nmi_init(void)
 {
 	__u8 vendor = current_cpu_data.x86_vendor;
 	__u8 family = current_cpu_data.x86;
+	__u8 _model = current_cpu_data.x86_model;
  
 	if (!cpu_has_apic) {
 		printk("xenoprof: Initialization failed. No APIC\n");
@@ -391,21 +400,22 @@ static int __init nmi_init(void)
 			switch (family) {
 				/* Pentium IV */
 				case 0xf:
-					if (!p4_init(&cpu_type))
-						return -ENODEV;
+					p4_init(&cpu_type);
 					break;
 
 				/* A P6-class processor */
 				case 6:
-					if (!ppro_init(&cpu_type))
-						return -ENODEV;
+					ppro_init(&cpu_type);
 					break;
 
 				default:
+				break;
+			}
+			if (!cpu_type && !arch_perfmon_init(&cpu_type)) {
 				printk("xenoprof: Initialization failed. "
-				       "Intel processor family %d is not "
-				       "supported\n", family);
-					return -ENODEV;
+				       "Intel processor family %d model %d"
+				       "is not supported\n", family, _model);
+				return -ENODEV;
 			}
 			break;
 
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 53ff218..b47ac7d 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -22,12 +22,24 @@
 #include "op_x86_model.h"
 #include "op_counter.h"
 
-#define NUM_COUNTERS 2
-#define NUM_CONTROLS 2
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_counters:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
 
-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
-#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+static int num_counters = 2;
+static int counter_width = 32;
+
+#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) 
 
 #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
 #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
@@ -40,15 +52,16 @@
 #define CTRL_SET_UM(val, m) (val |= (m << 8))
 #define CTRL_SET_EVENT(val, e) (val |= e)
 
-static unsigned long reset_value[NUM_COUNTERS];
+static unsigned long reset_value[OP_MAX_COUNTER];
  
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 {
-	msrs->counters[0].addr = MSR_P6_PERFCTR0;
-	msrs->counters[1].addr = MSR_P6_PERFCTR1;
-	
-	msrs->controls[0].addr = MSR_P6_EVNTSEL0;
-	msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+	int i;
+
+	for (i = 0; i < num_counters; i++)
+		msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
+	for (i = 0; i < num_counters; i++)
+		msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
 }
 
 
@@ -56,25 +69,41 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 {
 	unsigned int low, high;
 	int i;
+	
+	if (cpu_has_arch_perfmon) {
+		union cpuid10_eax eax;
+		eax.full = cpuid_eax(0xa);
+
+		/*
+		 * For Core2 (family 6, model 15), don't reset the
+		 * counter width:
+		 */
+		if (!(eax.split.version_id == 0 &&
+			current_cpu_data.x86 == 6 &&
+				current_cpu_data.x86_model == 15)) {
+
+			if (counter_width < eax.split.bit_width)
+				counter_width = eax.split.bit_width;
+		}
+	}
 
 	/* clear all counters */
-	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+	for (i = 0 ; i < num_counters; ++i) {
 		CTRL_READ(low, high, msrs, i);
 		CTRL_CLEAR(low);
 		CTRL_WRITE(low, high, msrs, i);
 	}
 	
 	/* avoid a false detection of ctr overflows in NMI handler */
-	for (i = 0; i < NUM_COUNTERS; ++i) {
-		CTR_WRITE(1, msrs, i);
-	}
+	for (i = 0; i < num_counters; ++i)
+		wrmsrl(msrs->counters[i].addr, -1LL);
 
 	/* enable active counters */
-	for (i = 0; i < NUM_COUNTERS; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (counter_config[i].enabled) {
 			reset_value[i] = counter_config[i].count;
 
-			CTR_WRITE(counter_config[i].count, msrs, i);
+			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 
 			CTRL_READ(low, high, msrs, i);
 			CTRL_CLEAR(low);
@@ -84,6 +113,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 			CTRL_SET_UM(low, counter_config[i].unit_mask);
 			CTRL_SET_EVENT(low, counter_config[i].event);
 			CTRL_WRITE(low, high, msrs, i);
+		} else {
+			reset_value[i] = 0;
 		}
 	}
 }
@@ -97,19 +128,19 @@ static int ppro_check_ctrs(unsigned int const cpu,
                            struct op_msrs const * const msrs,
                            struct cpu_user_regs * const regs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 	int ovf = 0;
 	unsigned long eip = regs->eip;
 	int mode = xenoprofile_get_mode(current, regs);
 
-	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+	for (i = 0 ; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
-		CTR_READ(low, high, msrs, i);
-		if (CTR_OVERFLOWED(low)) {
+		rdmsrl(msrs->counters[i].addr, val);
+		if (CTR_OVERFLOWED(val)) {
 			xenoprof_log_event(current, regs, eip, mode, i);
-			CTR_WRITE(reset_value[i], msrs, i);
+			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 			ovf = 1;
 		}
 	}
@@ -127,7 +158,7 @@ static void ppro_start(struct op_msrs const * const msrs)
 	unsigned int low,high;
 	int i;
 
-	for (i = 0; i < NUM_COUNTERS; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (reset_value[i]) {
 			CTRL_READ(low, high, msrs, i);
 			CTRL_SET_ACTIVE(low);
@@ -142,7 +173,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
 	unsigned int low,high;
 	int i;
 
-	for (i = 0; i < NUM_COUNTERS; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
 		CTRL_READ(low, high, msrs, i);
@@ -152,12 +183,51 @@ static void ppro_stop(struct op_msrs const * const msrs)
 }
 
 
-struct op_x86_model_spec const op_ppro_spec = {
-	.num_counters = NUM_COUNTERS,
-	.num_controls = NUM_CONTROLS,
+/*
+ * Architectural performance monitoring.
+ *
+ * Newer Intel CPUs (Core1+) have support for architectural
+ * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
+ * The advantage of this is that it can be done without knowing about
+ * the specific CPU.
+ */
+void arch_perfmon_setup_counters(void)
+{
+	union cpuid10_eax eax;
+
+	eax.full = cpuid_eax(0xa);
+
+	/* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
+	if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
+	    current_cpu_data.x86_model == 15) {
+		eax.split.version_id = 2;
+		eax.split.num_counters = 2;
+		eax.split.bit_width = 40;
+	}
+
+	num_counters = min_t(u8, eax.split.num_counters, OP_MAX_COUNTER);
+
+	op_arch_perfmon_spec.num_counters = num_counters;
+	op_arch_perfmon_spec.num_controls = num_counters;
+	op_ppro_spec.num_counters = num_counters;
+	op_ppro_spec.num_controls = num_counters;
+}
+
+struct op_x86_model_spec op_ppro_spec = {
+	.num_counters = 2,
+	.num_controls = 2,
 	.fill_in_addresses = &ppro_fill_in_addresses,
 	.setup_ctrs = &ppro_setup_ctrs,
 	.check_ctrs = &ppro_check_ctrs,
 	.start = &ppro_start,
 	.stop = &ppro_stop
 };
+
+struct op_x86_model_spec op_arch_perfmon_spec = {
+	/* num_counters/num_controls filled in at runtime */
+	.fill_in_addresses = &ppro_fill_in_addresses,
+	.setup_ctrs = &ppro_setup_ctrs,
+	.check_ctrs = &ppro_check_ctrs,
+	.start = &ppro_start,
+	.stop = &ppro_stop,
+};
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 2858e8b..2c0020e 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -32,8 +32,8 @@ struct pt_regs;
  * various x86 CPU model's perfctr support.
  */
 struct op_x86_model_spec {
-	unsigned int const num_counters;
-	unsigned int const num_controls;
+	unsigned int num_counters;
+	unsigned int num_controls;
 	void (*fill_in_addresses)(struct op_msrs * const msrs);
 	void (*setup_ctrs)(struct op_msrs const * const msrs);
 	int (*check_ctrs)(unsigned int const cpu, 
@@ -43,9 +43,11 @@ struct op_x86_model_spec {
 	void (*stop)(struct op_msrs const * const msrs);
 };
 
-extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec op_ppro_spec;
+extern struct op_x86_model_spec op_arch_perfmon_spec;
 extern struct op_x86_model_spec const op_p4_spec;
 extern struct op_x86_model_spec const op_p4_ht2_spec;
 extern struct op_x86_model_spec const op_athlon_spec;
 
+void arch_perfmon_setup_counters(void);
 #endif /* OP_X86_MODEL_H */
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index d8d2350..baad8e2 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -73,6 +73,7 @@
 #define X86_FEATURE_P3		(3*32+ 6) /* P3 */
 #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -174,6 +175,8 @@
 #define cpu_has_efer            1
 #endif
 
+#define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
+
 #endif /* __ASM_I386_CPUFEATURE_H */
 
 /*