From c4f3b63fe15b4629aa1ec163c95ab30423d0f76a Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Tue, 16 Oct 2007 23:26:09 -0700 Subject: [PATCH] softlockup: add a /proc tuning parameter Control the trigger limit for softlockup warnings. This is useful for debugging softlockups, by lowering the softlockup_thresh to identify possible softlockups earlier. This patch: 1. Adds a sysctl softlockup_thresh with valid values of 1-60s (Higher value to disable false positives) 2. Changes the softlockup printk to print the cpu softlockup time [akpm@linux-foundation.org: Fix various warnings and add definition of "two"] Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 8 ++++++++ include/linux/sched.h | 1 + kernel/softlockup.c | 7 +++++-- kernel/sysctl.c | 33 ++++++++++++++++++++++++++------- 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 111fd28727e..8984a539627 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -320,6 +320,14 @@ kernel. This value defaults to SHMMAX. ============================================================== +softlockup_thresh: + +This value can be used to lower the softlockup tolerance +threshold. The default threshold is 10s. If a cpu is locked up +for 10s, the kernel complains. Valid values are 1-60s. + +============================================================== + tainted: Non-zero if the kernel has been tainted. Numeric values, which diff --git a/include/linux/sched.h b/include/linux/sched.h index 59738efff8a..e643357eda0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -261,6 +261,7 @@ extern void softlockup_tick(void); extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); +extern int softlockup_thresh; #else static inline void softlockup_tick(void) { diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 72c2561ff5f..edeeef3a6a3 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -24,6 +24,7 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp); static DEFINE_PER_CPU(struct task_struct *, watchdog_task); static int did_panic; +int softlockup_thresh = 10; static int softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) @@ -104,13 +105,15 @@ void softlockup_tick(void) wake_up_process(per_cpu(watchdog_task, this_cpu)); /* Warn about unreasonable 10+ seconds delays: */ - if (now <= (touch_timestamp + 10)) + if (now <= (touch_timestamp + softlockup_thresh)) return; per_cpu(print_timestamp, this_cpu) = touch_timestamp; spin_lock(&print_lock); - printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n", this_cpu); + printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", + this_cpu, now - touch_timestamp, + current->comm, current->pid); if (regs) show_regs(regs); else diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5e63de0f9ee..dde3d53e8ad 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -80,6 +80,19 @@ extern int maps_protect; extern int sysctl_stat_interval; extern int audit_argv_kb; +/* Constants used for minimum and maximum */ +#ifdef CONFIG_DETECT_SOFTLOCKUP +static int one = 1; +static int sixty = 60; +#endif + +#ifdef CONFIG_MMU +static int two = 2; +#endif + +static int zero; +static int one_hundred = 100; + /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; @@ -711,6 +724,19 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_DETECT_SOFTLOCKUP + { + .ctl_name = CTL_UNNUMBERED, + .procname = "softlockup_thresh", + .data = &softlockup_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &sixty, + }, +#endif #ifdef CONFIG_COMPAT { .ctl_name = KERN_COMPAT_LOG, @@ -757,13 +783,6 @@ static ctl_table kern_table[] = { { .ctl_name = 0 } }; -/* Constants for minimum and maximum testing in vm_table. - We use these as one-element integer vectors. */ -static int zero; -static int two = 2; -static int one_hundred = 100; - - static ctl_table vm_table[] = { { .ctl_name = VM_OVERCOMMIT_MEMORY,