From: Konrad Rzeszutek <konradr@redhat.com> Subject: [RHEL 5.1] RHBZ #217583: cpu-hotplug operations along with make and module insertion script on JS21,causes system to drop to xmon. Date: Tue, 27 Feb 2007 12:01:09 -0500 Bugzilla: 217583 Message-Id: <20070227170109.GA2353@localhost.localdomain> Changelog: [cpu-hotplug] make and module insertion script cause a panic RHBZ#: ------ https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=217583 Description: ------------ During the testing in RHEL5 Beta2 doing CPU hotplug operation while doing various operations (compile kernel, remove/re-insert the modules) causes a panic. The fix is do away with lock_cpu_hotplug in the slab subsystem. The patch is in the upstream kernel. RHEL Version Found: ------------------ RHEL5 Beta2 Upstream Status: ---------------- Discussion about it: http://lkml.org/lkml/2006/10/27/238 Patch is in 2.6.21-rc1 Test Status: ------------ This has been tested on the affecting platform and also regression tested on RHTS. If you have any specific boxes you think are affected by this, please e-mail and I run this kernel on your box. Proposed Patch: --------------- This patch is based on 2.6.18-8.el5 diff -uNr linux-2.6.18.i686.orig/mm/slab.c linux-2.6.18.i686/mm/slab.c --- linux-2.6.18.i686.orig/mm/slab.c 2007-02-05 12:42:05.000000000 -0500 +++ linux-2.6.18.i686/mm/slab.c 2007-02-05 15:21:27.000000000 -0500 @@ -730,7 +730,10 @@ } #endif -/* Guard access to the cache-chain. */ +/* + * 1. Guard access to the cache-chain. + * 2. Protect sanity of cpu_online_map against cpu hotplug events + */ static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; @@ -1224,12 +1227,18 @@ kfree(shared); free_alien_cache(alien); } - mutex_unlock(&cache_chain_mutex); break; case CPU_ONLINE: + mutex_unlock(&cache_chain_mutex); start_cpu_timer(cpu); break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DOWN_PREPARE: + mutex_lock(&cache_chain_mutex); + break; + case CPU_DOWN_FAILED: + mutex_unlock(&cache_chain_mutex); + break; case CPU_DEAD: /* * Even if all the cpus of a node are down, we don't free the @@ -1240,8 +1249,8 @@ * gets destroyed at kmem_cache_destroy(). */ /* fall thru */ +#endif case CPU_UP_CANCELED: - mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; struct array_cache *shared; @@ -1302,11 +1311,9 @@ } mutex_unlock(&cache_chain_mutex); break; -#endif } return NOTIFY_OK; bad: - mutex_unlock(&cache_chain_mutex); return NOTIFY_BAD; } @@ -2054,11 +2061,9 @@ } /* - * Prevent CPUs from coming and going. - * lock_cpu_hotplug() nests outside cache_chain_mutex + * We use cache_chain_mutex to ensure a consistent view of + * cpu_online_map as well. Please see cpuup_callback */ - lock_cpu_hotplug(); - mutex_lock(&cache_chain_mutex); list_for_each_entry(pc, &cache_chain, next) { @@ -2263,7 +2268,6 @@ panic("kmem_cache_create(): failed to create slab `%s'\n", name); mutex_unlock(&cache_chain_mutex); - unlock_cpu_hotplug(); return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -2381,6 +2385,7 @@ return nr_freed; } +/* Called with cache_chain_mutex held to protect against cpu hotplug */ static int __cache_shrink(struct kmem_cache *cachep) { int ret = 0, i = 0; @@ -2411,9 +2416,13 @@ */ int kmem_cache_shrink(struct kmem_cache *cachep) { + int ret; BUG_ON(!cachep || in_interrupt()); - return __cache_shrink(cachep); + mutex_lock(&cache_chain_mutex); + ret = __cache_shrink(cachep); + mutex_unlock(&cache_chain_mutex); + return ret; } EXPORT_SYMBOL(kmem_cache_shrink); @@ -2441,23 +2450,17 @@ BUG_ON(!cachep || in_interrupt()); - /* Don't let CPUs to come and go */ - lock_cpu_hotplug(); - /* Find the cache in the chain of caches. */ mutex_lock(&cache_chain_mutex); /* * the chain is never empty, cache_cache is never destroyed */ list_del(&cachep->next); - mutex_unlock(&cache_chain_mutex); if (__cache_shrink(cachep)) { slab_error(cachep, "Can't free all objects"); - mutex_lock(&cache_chain_mutex); list_add(&cachep->next, &cache_chain); mutex_unlock(&cache_chain_mutex); - unlock_cpu_hotplug(); return 1; } @@ -2477,7 +2480,7 @@ } } kmem_cache_free(&cache_cache, cachep); - unlock_cpu_hotplug(); + mutex_unlock(&cache_chain_mutex); return 0; } EXPORT_SYMBOL(kmem_cache_destroy); -- Konrad Rzeszutek 1-(978)-392-3903 or 1-(617)-693-1718 IBM on-site partner.