From: Chris Lalancette <clalance@redhat.com> Date: Thu, 5 Mar 2009 14:22:39 +0100 Subject: [xen] fix occasional deadlocks in Xen netfront Message-id: 49AFD21F.60401@redhat.com O-Subject: [RHEL5.4 PATCH]: Fix occasional deadlocks in Xen netfront Bugzilla: 480939 RH-Acked-by: Don Dutile <ddutile@redhat.com> RH-Acked-by: Rik van Riel <riel@redhat.com> RH-Acked-by: Justin M. Forbes <jforbes@redhat.com> All, Some time ago Jeremy Fitzhardinge discovered a couple of potential deadlocks in the Xen netfront code using lockdep on pvops; this was fixed in xen-unstable with c/s 14844 with an update based on comments by Herbert Xu in c/s 14851. Xensource is occasionally seeing deadlocks in our RHEL-4 and RHEL-5 kernels while booting up for automatic testing. The stack trace for these deadlocks look to be in the same code that was fixed in upstream xen-netfront.c. This patch is a backport of upstream xen-unstable c/s 14844 and 14851 to RHEL-5. Since the deadlock is difficult to hit, it hasn't been proven yet to fix the issue, but I think the locking fixes are correct, and thus I think we should take the patch. This should fix BZ 480939. Please review and ACK. -- Chris Lalancette diff --git a/drivers/xen/netfront/netfront.c b/drivers/xen/netfront/netfront.c index f037811..35279c2 100644 --- a/drivers/xen/netfront/netfront.c +++ b/drivers/xen/netfront/netfront.c @@ -599,14 +599,14 @@ static int network_open(struct net_device *dev) memset(&np->stats, 0, sizeof(np->stats)); - spin_lock(&np->rx_lock); + spin_lock_bh(&np->rx_lock); if (netif_carrier_ok(dev)) { network_alloc_rx_buffers(dev); np->rx.sring->rsp_event = np->rx.rsp_cons + 1; if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) netif_rx_schedule(dev); } - spin_unlock(&np->rx_lock); + spin_unlock_bh(&np->rx_lock); netif_start_queue(dev); @@ -1303,7 +1303,7 @@ static int netif_poll(struct net_device *dev, int *pbudget) int pages_flipped = 0; int err; - spin_lock(&np->rx_lock); + spin_lock(&np->rx_lock); /* no need for spin_lock_bh() in ->poll() */ if (unlikely(!netif_carrier_ok(dev))) { spin_unlock(&np->rx_lock); @@ -1511,7 +1511,7 @@ static void netif_release_rx_bufs_flip(struct netfront_info *np) skb_queue_head_init(&free_list); - spin_lock(&np->rx_lock); + spin_lock_bh(&np->rx_lock); for (id = 0; id < NET_RX_RING_SIZE; id++) { if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) { @@ -1578,7 +1578,7 @@ static void netif_release_rx_bufs_flip(struct netfront_info *np) while ((skb = __skb_dequeue(&free_list)) != NULL) dev_kfree_skb(skb); - spin_unlock(&np->rx_lock); + spin_unlock_bh(&np->rx_lock); } static void netif_release_rx_bufs_copy(struct netfront_info *np) @@ -1729,8 +1729,8 @@ static int network_connect(struct net_device *dev) IPRINTK("device %s has %sing receive path.\n", dev->name, np->copying_receiver ? "copy" : "flipp"); + spin_lock_bh(&np->rx_lock); spin_lock_irq(&np->tx_lock); - spin_lock(&np->rx_lock); /* * Recovery procedure: @@ -1782,8 +1782,8 @@ static int network_connect(struct net_device *dev) network_tx_buf_gc(dev); network_alloc_rx_buffers(dev); - spin_unlock(&np->rx_lock); spin_unlock_irq(&np->tx_lock); + spin_unlock_bh(&np->rx_lock); return 0; } @@ -1842,7 +1842,7 @@ static ssize_t store_rxbuf_min(struct class_device *cd, if (target > RX_MAX_TARGET) target = RX_MAX_TARGET; - spin_lock(&np->rx_lock); + spin_lock_bh(&np->rx_lock); if (target > np->rx_max_target) np->rx_max_target = target; np->rx_min_target = target; @@ -1851,7 +1851,7 @@ static ssize_t store_rxbuf_min(struct class_device *cd, network_alloc_rx_buffers(netdev); - spin_unlock(&np->rx_lock); + spin_unlock_bh(&np->rx_lock); return len; } @@ -1885,7 +1885,7 @@ static ssize_t store_rxbuf_max(struct class_device *cd, if (target > RX_MAX_TARGET) target = RX_MAX_TARGET; - spin_lock(&np->rx_lock); + spin_lock_bh(&np->rx_lock); if (target < np->rx_min_target) np->rx_min_target = target; np->rx_max_target = target; @@ -1894,7 +1894,7 @@ static ssize_t store_rxbuf_max(struct class_device *cd, network_alloc_rx_buffers(netdev); - spin_unlock(&np->rx_lock); + spin_unlock_bh(&np->rx_lock); return len; } @@ -2072,11 +2072,11 @@ netdev_notify(struct notifier_block *this, unsigned long event, void *ptr) static void netif_disconnect_backend(struct netfront_info *info) { /* Stop old i/f to prevent errors whilst we rebuild the state. */ + spin_lock_bh(&info->rx_lock); spin_lock_irq(&info->tx_lock); - spin_lock(&info->rx_lock); netif_carrier_off(info->netdev); - spin_unlock(&info->rx_lock); spin_unlock_irq(&info->tx_lock); + spin_unlock_bh(&info->rx_lock); if (info->irq) unbind_from_irqhandler(info->irq, info->netdev);