From: Kei Tokunaga <ktokunag@redhat.com> Subject: [RHEL5.1 PATCH] Forced printk from recoverable MCA/INIT make printk/messages/console Date: Tue, 24 Apr 2007 19:05:25 -0400 Bugzilla: 219158 Message-Id: <462E8D35.9080509@redhat.com> Changelog: [ia64] MCA/INIT issues with printk/messages/console bz219158 https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=219158 This patch is a backport from upstream. http://www.gelato.unsw.edu.au/archives/linux-ia64/0609/19078.html This fixes three issues. 1) console_level remains a value of 15 after MCA/INIT This causes kernel messages that are usually suppressed (e.g. DEBUG level messages) to be displayed. This occurs inevitably after a MCA/INIT recovers. 2) oops_in_progress remains a value of 1 after MCA/INIT This causes any kernel messages not to be written to /var/log/messages once printk is called since klogd never be woken up. This occurs inevitably after a MCA/INIT recovers. 3) A lock of message buffer is unlocked after MCA/INIT This causes some problems regarding the message buffer since simultaneous printk-writes of multiple threads can occur. This occurs if a MCA/INIT is issued while printk is processing and the MCA/INIT recovers. So, this seldom occurs. Although Fujitsu has tested it on 2.6.18-13.el5, it applied to 2.6.18-16.el5, which is the latest today, cleanly. In the testing, we verified that issue #1 and #2 do not occur. Because of the rareness of issue #3, it is hard for us to produce a situation of #3, but from the fix's logic perspective, it is clear that issue #3 is also fixed. Please review and ACK! Thanks, Kei -- Keiichiro Tokunaga Fujitsu On-site Engineer --- linux-2.6.18-16.el5-bz219158-kei/arch/ia64/kernel/mca.c | 217 ++++++++++-- linux-2.6.18-16.el5-bz219158-kei/arch/ia64/kernel/mca_drv.c | 54 +- linux-2.6.18-16.el5-bz219158-kei/arch/ia64/kernel/mca_drv.h | 3 linux-2.6.18-16.el5-bz219158-kei/arch/ia64/kernel/salinfo.c | 4 4 files changed, 241 insertions(+), 37 deletions(-) diff -puN arch/ia64/kernel/mca.c~bz219158-MCA-INIT-printk arch/ia64/kernel/mca.c --- linux-2.6.18-16.el5-bz219158/arch/ia64/kernel/mca.c~bz219158-MCA-INIT-printk 2007-04-24 17:11:43.000000000 -0400 +++ linux-2.6.18-16.el5-bz219158-kei/arch/ia64/kernel/mca.c 2007-04-24 17:11:43.000000000 -0400 @@ -54,6 +54,9 @@ * * 2005-10-07 Keith Owens <kaos@sgi.com> * Add notify_die() hooks. + * + * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> + * Add printing support for MCA/INIT. */ #include <linux/types.h> #include <linux/init.h> @@ -141,11 +144,174 @@ extern void salinfo_log_wakeup(int type, static int mca_init __initdata; +/* + * limited & delayed printing support for MCA/INIT handler + */ + +#define mprintk(fmt...) ia64_mca_printk(fmt) + +#define MLOGBUF_SIZE (512+256*NR_CPUS) +#define MLOGBUF_MSGMAX 256 +static char mlogbuf[MLOGBUF_SIZE]; +static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ +static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ +static unsigned long mlogbuf_start; +static unsigned long mlogbuf_end; +static unsigned int mlogbuf_finished = 0; +static unsigned long mlogbuf_timestamp = 0; + +static int loglevel_save = -1; +#define BREAK_LOGLEVEL(__console_loglevel) \ + oops_in_progress = 1; \ + if (loglevel_save < 0) \ + loglevel_save = __console_loglevel; \ + __console_loglevel = 15; + +#define RESTORE_LOGLEVEL(__console_loglevel) \ + if (loglevel_save >= 0) { \ + __console_loglevel = loglevel_save; \ + loglevel_save = -1; \ + } \ + mlogbuf_finished = 0; \ + oops_in_progress = 0; + +/* + * Push messages into buffer, print them later if not urgent. + */ +void ia64_mca_printk(const char *fmt, ...) +{ + va_list args; + int printed_len; + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + + va_start(args, fmt); + printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); + va_end(args); + + /* Copy the output into mlogbuf */ + if (oops_in_progress) { + /* mlogbuf was abandoned, use printk directly instead. */ + printk(temp_buf); + } else { + spin_lock(&mlogbuf_wlock); + for (p = temp_buf; *p; p++) { + unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; + if (next != mlogbuf_start) { + mlogbuf[mlogbuf_end] = *p; + mlogbuf_end = next; + } else { + /* buffer full */ + break; + } + } + mlogbuf[mlogbuf_end] = '\0'; + spin_unlock(&mlogbuf_wlock); + } +} +EXPORT_SYMBOL(ia64_mca_printk); + +/* + * Print buffered messages. + * NOTE: call this after returning normal context. (ex. from salinfod) + */ +void ia64_mlogbuf_dump(void) +{ + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + unsigned long index; + unsigned long flags; + unsigned int printed_len; + + /* Get output from mlogbuf */ + while (mlogbuf_start != mlogbuf_end) { + temp_buf[0] = '\0'; + p = temp_buf; + printed_len = 0; + + spin_lock_irqsave(&mlogbuf_rlock, flags); + + index = mlogbuf_start; + while (index != mlogbuf_end) { + *p = mlogbuf[index]; + index = (index + 1) % MLOGBUF_SIZE; + if (!*p) + break; + p++; + if (++printed_len >= MLOGBUF_MSGMAX - 1) + break; + } + *p = '\0'; + if (temp_buf[0]) + printk(temp_buf); + mlogbuf_start = index; + + mlogbuf_timestamp = 0; + spin_unlock_irqrestore(&mlogbuf_rlock, flags); + } +} +EXPORT_SYMBOL(ia64_mlogbuf_dump); + +/* + * Call this if system is going to down or if immediate flushing messages to + * console is required. (ex. recovery was failed, crash dump is going to be + * invoked, long-wait rendezvous etc.) + * NOTE: this should be called from monarch. + */ +static void ia64_mlogbuf_finish(int wait) +{ + BREAK_LOGLEVEL(console_loglevel); + + spin_lock_init(&mlogbuf_rlock); + ia64_mlogbuf_dump(); + printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " + "MCA/INIT might be dodgy or fail.\n"); + + if (!wait) + return; + + /* wait for console */ + printk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + + mlogbuf_finished = 1; +} + +/* + * Print buffered messages from INIT context. + */ +static void ia64_mlogbuf_dump_from_init(void) +{ + if (mlogbuf_finished) + return; + + if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " + " and the system seems to be messed up.\n"); + ia64_mlogbuf_finish(0); + return; + } + + if (!spin_trylock(&mlogbuf_rlock)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " + "Generated messages other than stack dump will be " + "buffered to mlogbuf and will be printed later.\n"); + printk(KERN_ERR "INIT: If messages would not printed after " + "this INIT, wait 30sec and assert INIT again.\n"); + if (!mlogbuf_timestamp) + mlogbuf_timestamp = jiffies; + return; + } + spin_unlock(&mlogbuf_rlock); + ia64_mlogbuf_dump(); +} static void inline ia64_mca_spin(const char *func) { - printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + if (monarch_cpu == smp_processor_id()) + ia64_mlogbuf_finish(0); + mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); while (1) cpu_relax(); } @@ -993,18 +1160,22 @@ ia64_wait_for_slaves(int monarch, const } if (!missing) goto all_in; - printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); + /* + * Maybe slave(s) dead. Print buffered messages immediately. + */ + ia64_mlogbuf_finish(0); + mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); for_each_online_cpu(c) { if (c == monarch) continue; if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) - printk(" %d", c); + mprintk(" %d", c); } - printk("\n"); + mprintk("\n"); return; all_in: - printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); + mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); return; } @@ -1032,10 +1203,8 @@ ia64_mca_handler(struct pt_regs *regs, s struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", - sos->proc_state_param, cpu, sos->monarch); + mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " + "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; @@ -1071,8 +1240,10 @@ ia64_mca_handler(struct pt_regs *regs, s rh->severity = sal_log_severity_corrected; ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); sos->os_status = IA64_MCA_CORRECTED; -#ifdef CONFIG_KEXEC } else { + /* Dump buffered message to console */ + ia64_mlogbuf_finish(1); +#ifdef CONFIG_KEXEC kdump_in_progress = 1; /* In the case of (!recover), notify_die(DIE_MCA_MONARCH_LEAVE) will not return. A dump kernel will be booted. Need to set @@ -1319,6 +1490,15 @@ default_monarch_init_process(struct noti struct task_struct *g, *t; if (val != DIE_INIT_MONARCH_PROCESS) return NOTIFY_DONE; + + /* + * FIXME: mlogbuf will brim over with INIT stack dumps. + * To enable show_stack from INIT, we use oops_in_progress which should + * be used in real oops. This would cause something wrong after INIT. + */ + BREAK_LOGLEVEL(console_loglevel); + ia64_mlogbuf_dump_from_init(); + printk(KERN_ERR "Processes interrupted by INIT -"); for_each_online_cpu(c) { struct ia64_sal_os_state *s; @@ -1340,6 +1520,8 @@ default_monarch_init_process(struct noti } while_each_thread (g, t); read_unlock(&tasklist_lock); } + /* FIXME: This will not restore zapped printk locks. */ + RESTORE_LOGLEVEL(console_loglevel); return NOTIFY_DONE; } @@ -1371,12 +1553,9 @@ ia64_init_handler(struct pt_regs *regs, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); - printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", + mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); @@ -1389,7 +1568,7 @@ ia64_init_handler(struct pt_regs *regs, * fix their proms and get their customers updated. */ if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { - printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", + mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", __FUNCTION__, cpu); atomic_dec(&slaves); sos->monarch = 1; @@ -1401,7 +1580,7 @@ ia64_init_handler(struct pt_regs *regs, * fix their proms and get their customers updated. */ if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { - printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", + mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", __FUNCTION__, cpu); atomic_dec(&monarchs); sos->monarch = 0; @@ -1422,7 +1601,7 @@ ia64_init_handler(struct pt_regs *regs, if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("Slave on cpu %d returning to normal service.\n", cpu); + mprintk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; atomic_dec(&slaves); @@ -1440,7 +1619,7 @@ ia64_init_handler(struct pt_regs *regs, * same serial line, the user will need some time to switch out of the BMC before * the dump begins. */ - printk("Delaying for 5 seconds...\n"); + mprintk("Delaying for 5 seconds...\n"); udelay(5*1000000); ia64_wait_for_slaves(cpu, "INIT"); /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through @@ -1453,7 +1632,7 @@ ia64_init_handler(struct pt_regs *regs, if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); monarch_cpu = -1; diff -puN arch/ia64/kernel/mca_drv.c~bz219158-MCA-INIT-printk arch/ia64/kernel/mca_drv.c --- linux-2.6.18-16.el5-bz219158/arch/ia64/kernel/mca_drv.c~bz219158-MCA-INIT-printk 2007-04-24 17:11:43.000000000 -0400 +++ linux-2.6.18-16.el5-bz219158-kei/arch/ia64/kernel/mca_drv.c 2007-04-24 17:11:43.000000000 -0400 @@ -79,14 +79,30 @@ static int fatal_mca(const char *fmt, ...) { va_list args; + char buf[256]; va_start(args, fmt); - vprintk(fmt, args); + vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); + ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); return MCA_NOT_RECOVERED; } +static int +mca_recovered(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); + + return MCA_RECOVERED; +} + /** * mca_page_isolate - isolate a poisoned page in order not to use it later * @paddr: poisoned memory location @@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr) void mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) { + ia64_mlogbuf_dump(); printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", raw_smp_processor_id(), current->pid, current->uid, @@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *s /* Is target address valid? */ if (!pbci->tv) - return fatal_mca(KERN_ALERT "MCA: target address not valid\n"); + return fatal_mca("target address not valid"); /* * cpu read or memory-mapped io read @@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *s /* Is minstate valid? */ if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) - return fatal_mca(KERN_ALERT "MCA: minstate not valid\n"); + return fatal_mca("minstate not valid"); psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); @@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *s psr2->bn = 1; psr2->i = 0; - return MCA_RECOVERED; + return mca_recovered("user memory corruption. " + "kill affected process - recovered."); } } - return fatal_mca(KERN_ALERT "MCA: kernel context not recovered," - " iip 0x%lx\n", pmsa->pmsa_iip); + return fatal_mca("kernel context not recovered, iip 0x%lx\n", + pmsa->pmsa_iip); } /** @@ -584,13 +602,13 @@ recover_from_processor_error(int platfor * The machine check is corrected. */ if (psp->cm == 1) - return MCA_RECOVERED; + return mca_recovered("machine check is already corrected."); /* * The error was not contained. Software must be reset. */ if (psp->us || psp->ci == 0) - return fatal_mca(KERN_ALERT "MCA: error not contained\n"); + return fatal_mca("error not contained"); /* * The cache check and bus check bits have four possible states @@ -601,22 +619,22 @@ recover_from_processor_error(int platfor * 1 1 Memory error, attempt recovery */ if (psp->bc == 0 || pbci == NULL) - return fatal_mca(KERN_ALERT "MCA: No bus check\n"); + return fatal_mca("No bus check"); /* * Sorry, we cannot handle so many. */ if (peidx_bus_check_num(peidx) > 1) - return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n"); + return fatal_mca("Too many bus checks"); /* * Well, here is only one bus error. */ if (pbci->ib) - return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n"); + return fatal_mca("Internal Bus error"); if (pbci->cc) - return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n"); + return fatal_mca("Cache-cache error"); if (pbci->eb && pbci->bsi > 0) - return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n"); + return fatal_mca("External bus check fatal status"); /* * This is a local MCA and estimated as recoverble external bus error. @@ -628,7 +646,7 @@ recover_from_processor_error(int platfor /* * On account of strange SAL error record, we cannot recover. */ - return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n"); + return fatal_mca("Strange SAL record"); } /** @@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia6 /* Now, OS can recover when there is one processor error section */ if (n_proc_err > 1) - return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n"); + return fatal_mca("Too Many Errors"); else if (n_proc_err == 0) - /* Weird SAL record ... We need not to recover */ - return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n"); + /* Weird SAL record ... We can't do anything */ + return fatal_mca("Weird SAL record"); /* Make index of processor error section */ mca_make_peidx((sal_log_processor_info_t*) @@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia6 /* Check whether MCA is global or not */ if (is_mca_global(&peidx, &pbci, sos)) - return fatal_mca(KERN_ALERT "MCA: global MCA\n"); + return fatal_mca("global MCA"); /* Try to recover a processor error */ return recover_from_processor_error(platform_err, &slidx, &peidx, diff -puN arch/ia64/kernel/mca_drv.h~bz219158-MCA-INIT-printk arch/ia64/kernel/mca_drv.h --- linux-2.6.18-16.el5-bz219158/arch/ia64/kernel/mca_drv.h~bz219158-MCA-INIT-printk 2007-04-24 17:11:43.000000000 -0400 +++ linux-2.6.18-16.el5-bz219158-kei/arch/ia64/kernel/mca_drv.h 2007-04-24 17:11:43.000000000 -0400 @@ -118,3 +118,6 @@ struct mca_table_entry { extern const struct mca_table_entry *search_mca_tables (unsigned long addr); extern int mca_recover_range(unsigned long); +extern void ia64_mca_printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); +extern void ia64_mlogbuf_dump(void); diff -puN arch/ia64/kernel/salinfo.c~bz219158-MCA-INIT-printk arch/ia64/kernel/salinfo.c --- linux-2.6.18-16.el5-bz219158/arch/ia64/kernel/salinfo.c~bz219158-MCA-INIT-printk 2007-04-24 17:11:43.000000000 -0400 +++ linux-2.6.18-16.el5-bz219158-kei/arch/ia64/kernel/salinfo.c 2007-04-24 17:11:43.000000000 -0400 @@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, /* Check for outstanding MCA/INIT records every minute (arbitrary) */ #define SALINFO_TIMER_DELAY (60*HZ) static struct timer_list salinfo_timer; +extern void ia64_mlogbuf_dump(void); static void salinfo_timeout_check(struct salinfo_data *data) @@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_dat static void salinfo_timeout (unsigned long arg) { + ia64_mlogbuf_dump(); salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; @@ -332,6 +334,8 @@ retry: if (cpu == -1) goto retry; + ia64_mlogbuf_dump(); + /* for next read, start checking at next CPU */ data->cpu_check = cpu; if (++data->cpu_check == NR_CPUS) _