From: Kei Tokunaga <ktokunag@redhat.com> Subject: [RHEL5.1 PATCH 15/21] Cannot measure process time accurately on IA64 Date: Thu, 07 Jun 2007 03:44:43 -0400 Bugzilla: 240107 Message-Id: <4667B76B.9030200@redhat.com> Changelog: [xen] ia64: Cannot measure process time accurately bz240107 https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=240107 Backport of cset#14504 and 14508 Process time measurement cannot be done accurately on ia64 when runing CPU intensive benchmark-process at least two or more. It seems 2 or more times lesser than native measurements. To fix this, the patch is to add code to account CPU steal time of PV. Thanks, Kei rh bug 121137 # HG changeset patch # User awilliam@xenbuild2.aw # Date 1173372625 25200 # Node ID 9fbaf07d3f670b96d6a1653a002f5ca3db895fc8 # Parent 4e367aa898956466747417bff637a681b8b58202 [IA64] Add HYPERVISOR_vcpu_op For support of steal time accounting, only supports VCPUOP_register_runstate_memory_area right now. Signed-off-by: Atsushi SAKAI <sakaia@jp.fujitsu.com> --- linux-2.6.18-21.el5-gerd-order-kei/arch/ia64/xen/xcom_hcall.c | 18 ++++++++++ linux-2.6.18-21.el5-gerd-order-kei/include/asm-ia64/hypercall.h | 7 +++ linux-2.6.18-21.el5-gerd-order-kei/include/asm-ia64/xen/xcom_hcall.h | 2 + 3 files changed, 27 insertions(+) diff -puN arch/ia64/xen/xcom_hcall.c~14504-IA64_Add_HYPERVISOR_vcpu_op arch/ia64/xen/xcom_hcall.c --- linux-2.6.18-21.el5-gerd-order/arch/ia64/xen/xcom_hcall.c~14504-IA64_Add_HYPERVISOR_vcpu_op 2007-06-07 02:44:33.000000000 -0400 +++ linux-2.6.18-21.el5-gerd-order-kei/arch/ia64/xen/xcom_hcall.c 2007-06-07 02:44:33.000000000 -0400 @@ -32,6 +32,7 @@ #include <xen/interface/callback.h> #include <xen/interface/acm_ops.h> #include <xen/interface/hvm/params.h> +#include <xen/interface/vcpu.h> #include <asm/hypercall.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -301,3 +302,20 @@ xencomm_hypercall_suspend(unsigned long return xencomm_arch_hypercall_suspend(xencomm_create_inline(&arg)); } + +long +xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg) +{ + switch (cmd) { + case VCPUOP_register_runstate_memory_area: + xencommize_memory_reservation((xen_memory_reservation_t *)arg); + break; + + default: + printk("%s: unknown vcpu op %d\n", __func__, cmd); + return -ENOSYS; + } + + return xencomm_arch_hypercall_vcpu_op(cmd, cpu, + xencomm_create_inline(arg)); +} diff -puN include/asm-ia64/hypercall.h~14504-IA64_Add_HYPERVISOR_vcpu_op include/asm-ia64/hypercall.h --- linux-2.6.18-21.el5-gerd-order/include/asm-ia64/hypercall.h~14504-IA64_Add_HYPERVISOR_vcpu_op 2007-06-07 02:44:33.000000000 -0400 +++ linux-2.6.18-21.el5-gerd-order-kei/include/asm-ia64/hypercall.h 2007-06-07 02:44:33.000000000 -0400 @@ -205,6 +205,12 @@ xencomm_arch_hypercall_hvm_op(int cmd, v return _hypercall2(unsigned long, hvm_op, cmd, arg); } +static inline long +xencomm_arch_hypercall_vcpu_op(int cmd, int cpu, void *arg) +{ + return _hypercall3(long, vcpu_op, cmd, cpu, arg); +} + static inline int HYPERVISOR_physdev_op(int cmd, void *arg) { @@ -383,5 +389,6 @@ HYPERVISOR_expose_p2m(unsigned long conv #endif #define HYPERVISOR_suspend xencomm_hypercall_suspend +#define HYPERVISOR_vcpu_op xencomm_hypercall_vcpu_op #endif /* __HYPERCALL_H__ */ diff -puN include/asm-ia64/xen/xcom_hcall.h~14504-IA64_Add_HYPERVISOR_vcpu_op include/asm-ia64/xen/xcom_hcall.h --- linux-2.6.18-21.el5-gerd-order/include/asm-ia64/xen/xcom_hcall.h~14504-IA64_Add_HYPERVISOR_vcpu_op 2007-06-07 02:44:33.000000000 -0400 +++ linux-2.6.18-21.el5-gerd-order-kei/include/asm-ia64/xen/xcom_hcall.h 2007-06-07 02:44:33.000000000 -0400 @@ -46,6 +46,8 @@ extern unsigned long xencomm_hypercall_h extern int xencomm_hypercall_suspend(unsigned long srec); +extern long xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg); + /* Using mini xencomm. */ extern int xencomm_mini_hypercall_console_io(int cmd, int count, char *str); _ rh bug 121137 # HG changeset patch # User awilliam@xenbuild2.aw # Date 1173713999 21600 # Node ID 9ea0c5f469c871f3b6a387a61555a725cecdd1b6 # Parent 800f7904d6a4a3f5c0deaaa444289dff139ad610 [IA64] PV steal time accounting Signed-off-by: Atsushi SAKAI <sakaia@jp.fujitsu.com> --- linux-2.6.18-21.el5-gerd-order-kei/arch/ia64/kernel/time.c | 131 +++++++++++++ 1 file changed, 131 insertions(+) diff -puN arch/ia64/kernel/time.c~14508-IA64_PV_steal_time_accounting arch/ia64/kernel/time.c --- linux-2.6.18-21.el5-gerd-order/arch/ia64/kernel/time.c~14508-IA64_PV_steal_time_accounting 2007-06-07 02:44:33.000000000 -0400 +++ linux-2.6.18-21.el5-gerd-order-kei/arch/ia64/kernel/time.c 2007-06-07 02:44:33.000000000 -0400 @@ -29,6 +29,13 @@ #include <asm/sections.h> #include <asm/system.h> +#ifdef CONFIG_XEN +#include <linux/kernel_stat.h> +#include <linux/posix-timers.h> +#include <xen/interface/vcpu.h> +#include <asm/percpu.h> +#endif + extern unsigned long wall_jiffies; volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ @@ -40,16 +47,109 @@ EXPORT_SYMBOL(last_cli_ip); #endif +#ifdef CONFIG_XEN +DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); +DEFINE_PER_CPU(unsigned long, processed_stolen_time); +DEFINE_PER_CPU(unsigned long, processed_blocked_time); +#define NS_PER_TICK (1000000000LL/HZ) +#endif + static struct time_interpolator itc_interpolator = { .shift = 16, .mask = 0xffffffffffffffffLL, .source = TIME_SOURCE_CPU }; +#ifdef CONFIG_XEN +static unsigned long +consider_steal_time(unsigned long new_itm, struct pt_regs *regs) +{ + unsigned long stolen, blocked, sched_time; + unsigned long delta_itm = 0, stolentick = 0; + int i, cpu = smp_processor_id(); + struct vcpu_runstate_info *runstate; + struct task_struct *p = current; + + runstate = &per_cpu(runstate, smp_processor_id()); + + do { + sched_time = runstate->state_entry_time; + mb(); + stolen = runstate->time[RUNSTATE_runnable] + + runstate->time[RUNSTATE_offline] - + per_cpu(processed_stolen_time, cpu); + blocked = runstate->time[RUNSTATE_blocked] - + per_cpu(processed_blocked_time, cpu); + mb(); + } while (sched_time != runstate->state_entry_time); + + /* + * Check for vcpu migration effect + * In this case, itc value is reversed. + * This causes huge stolen value. + * This function just checks and reject this effect. + */ + if (!time_after_eq(runstate->time[RUNSTATE_blocked], + per_cpu(processed_blocked_time, cpu))) + blocked = 0; + + if (!time_after_eq(runstate->time[RUNSTATE_runnable] + + runstate->time[RUNSTATE_offline], + per_cpu(processed_stolen_time, cpu))) + stolen = 0; + + if (!time_after(delta_itm + new_itm, ia64_get_itc())) + stolentick = ia64_get_itc() - delta_itm - new_itm; + + do_div(stolentick, NS_PER_TICK); + stolentick++; + + do_div(stolen, NS_PER_TICK); + + if (stolen > stolentick) + stolen = stolentick; + + stolentick -= stolen; + do_div(blocked, NS_PER_TICK); + + if (blocked > stolentick) + blocked = stolentick; + + if (stolen > 0 || blocked > 0) { + account_steal_time(NULL, jiffies_to_cputime(stolen)); + account_steal_time(idle_task(cpu), jiffies_to_cputime(blocked)); + run_local_timers(); + + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_mode(regs)); + + scheduler_tick(); + run_posix_cpu_timers(p); + delta_itm += local_cpu_data->itm_delta * (stolen + blocked); + + if (cpu == time_keeper_id) { + write_seqlock(&xtime_lock); + for(i = 0; i < stolen + blocked; i++) + do_timer(regs); + local_cpu_data->itm_next = delta_itm + new_itm; + write_sequnlock(&xtime_lock); + } else { + local_cpu_data->itm_next = delta_itm + new_itm; + } + per_cpu(processed_stolen_time,cpu) += NS_PER_TICK * stolen; + per_cpu(processed_blocked_time,cpu) += NS_PER_TICK * blocked; + } + return delta_itm; +} +#else +#define consider_steal_time(new_itm, regs) (0) +#endif + static irqreturn_t timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) { unsigned long new_itm; + unsigned long delta_itm; /* XEN */ if (unlikely(cpu_is_offline(smp_processor_id()))) { return IRQ_HANDLED; @@ -65,6 +165,13 @@ timer_interrupt (int irq, void *dev_id, profile_tick(CPU_PROFILING, regs); + if (is_running_on_xen()) { + delta_itm = consider_steal_time(new_itm, regs); + new_itm += delta_itm; + if (time_after(new_itm, ia64_get_itc()) && delta_itm) + goto skip_process_time_accounting; + } + while (1) { update_process_times(user_mode(regs)); @@ -88,6 +195,8 @@ timer_interrupt (int irq, void *dev_id, break; } +skip_process_time_accounting: /* XEN */ + do { /* * If we're too close to the next clock tick for @@ -142,6 +251,25 @@ static int __init nojitter_setup(char *s __setup("nojitter", nojitter_setup); +#ifdef CONFIG_XEN +/* taken from i386/kernel/time-xen.c */ +static void init_missing_ticks_accounting(int cpu) +{ + struct vcpu_register_runstate_memory_area area; + struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); + + memset(runstate, 0, sizeof(*runstate)); + + area.addr.v = runstate; + HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area); + + per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked]; + per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable] + + runstate->time[RUNSTATE_offline]; +} +#else +#define init_missing_ticks_accounting(cpu) do {} while (0) +#endif void __devinit ia64_init_itm (void) @@ -225,6 +353,9 @@ ia64_init_itm (void) register_time_interpolator(&itc_interpolator); } + if (is_running_on_xen()) + init_missing_ticks_accounting(smp_processor_id()); + /* Setup the CPU local timer tick */ ia64_cpu_local_tick(); } _