From: Neil Horman <nhorman@redhat.com> Date: Wed, 8 Oct 2008 20:31:57 -0400 Subject: [net] sky2: fix hang resulting from link flap Message-id: 20081009003157.GA26552@hmsendeavour.rdu.redhat.com O-Subject: Re: [RHEL 5.3 PATCH] sky2: fix hang resulting from link flap (bz 461681) Bugzilla: 461681 On Wed, Oct 08, 2008 at 05:52:42PM -0400, Don Zickus wrote: > On Tue, Oct 07, 2008 at 10:47:56AM -0400, Neil Horman wrote: > > Hey all- > > This patch is a backport of upstream commits: > > 75e806838a3327d4ca9030e588d34de11b04f341 > > 32c2c30085324aef9699934295281cca0161ef7e > > > > They fix a sky2 hardware bug that results in a hang on a full rx fifo, which was > > causing the hang reported in bz 461681. Tested and verified by the reporter. > > > > Regards > > Neil > Its exactly the same as the posted version, but it includes the missing definition from upstream (which includes normalizing the definition name to SKY2_HW_RAM_BUFFER, and the flags field in the private data to hold those bits). Neil diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 5fe8477..4a7d3d9 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -96,10 +96,6 @@ static int disable_msi = 0; module_param(disable_msi, int, 0); MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)"); -static int idle_timeout = 0; -module_param(idle_timeout, int, 0); -MODULE_PARM_DESC(idle_timeout, "Watchdog timer for lost interrupts (ms)"); - static const struct pci_device_id sky2_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, /* SK-9Sxx */ { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, /* SK-9Exx */ @@ -1266,6 +1262,8 @@ static int sky2_up(struct net_device *dev) if (ramsize > 0) { u32 rxspace; + hw->flags |= SKY2_HW_RAM_BUFFER; + pr_debug(PFX "%s: ram buffer %dK\n", dev->name, ramsize); if (ramsize < 16) rxspace = ramsize / 2; else @@ -1692,6 +1690,8 @@ static void sky2_link_up(struct sky2_port *sky2) netif_carrier_on(sky2->netdev); netif_wake_queue(sky2->netdev); + mod_timer(&hw->watchdog_timer, jiffies + 1); + /* Turn on link LED */ sky2_write8(hw, SK_REG(port, LNK_LED_REG), LINKLED_ON | LINKLED_BLINK_OFF | LINKLED_LINKSYNC_OFF); @@ -2367,25 +2367,72 @@ static void sky2_le_error(struct sky2_hw *hw, unsigned port, sky2_write32(hw, Q_ADDR(q, Q_CSR), BMU_CLR_IRQ_CHK); } -/* If idle then force a fake soft NAPI poll once a second - * to work around cases where sharing an edge triggered interrupt. - */ -static inline void sky2_idle_start(struct sky2_hw *hw) +static int sky2_rx_hung(struct net_device *dev) { - if (idle_timeout > 0) - mod_timer(&hw->idle_timer, - jiffies + msecs_to_jiffies(idle_timeout)); + struct sky2_port *sky2 = netdev_priv(dev); + struct sky2_hw *hw = sky2->hw; + unsigned port = sky2->port; + unsigned rxq = rxqaddr[port]; + u32 mac_rp = sky2_read32(hw, SK_REG(port, RX_GMF_RP)); + u8 mac_lev = sky2_read8(hw, SK_REG(port, RX_GMF_RLEV)); + u8 fifo_rp = sky2_read8(hw, Q_ADDR(rxq, Q_RP)); + u8 fifo_lev = sky2_read8(hw, Q_ADDR(rxq, Q_RL)); + + /* If idle and MAC or PCI is stuck */ + if (sky2->check.last == dev->last_rx && + ((mac_rp == sky2->check.mac_rp && + mac_lev != 0 && mac_lev >= sky2->check.mac_lev) || + /* Check if the PCI RX hang */ + (fifo_rp == sky2->check.fifo_rp && + fifo_lev != 0 && fifo_lev >= sky2->check.fifo_lev))) { + printk(KERN_DEBUG PFX "%s: hung mac %d:%d fifo %d (%d:%d)\n", + dev->name, mac_lev, mac_rp, fifo_lev, fifo_rp, + sky2_read8(hw, Q_ADDR(rxq, Q_WP))); + return 1; + } else { + sky2->check.last = dev->last_rx; + sky2->check.mac_rp = mac_rp; + sky2->check.mac_lev = mac_lev; + sky2->check.fifo_rp = fifo_rp; + sky2->check.fifo_lev = fifo_lev; + return 0; + } } -static void sky2_idle(unsigned long arg) +static void sky2_watchdog(unsigned long arg) { struct sky2_hw *hw = (struct sky2_hw *) arg; - struct net_device *dev = hw->dev[0]; + struct net_device *dev; - if (__netif_rx_schedule_prep(dev)) - __netif_rx_schedule(dev); + /* Check for lost IRQ once a second */ + if (sky2_read32(hw, B0_ISRC)) { + dev = hw->dev[0]; + if (__netif_rx_schedule_prep(dev)) + __netif_rx_schedule(dev); + } else { + int i, active = 0; + + for (i = 0; i < hw->ports; i++) { + dev = hw->dev[i]; + if (!netif_running(dev)) + continue; + ++active; + + /* For chips with Rx FIFO, check if stuck */ + if ((hw->flags & SKY2_HW_RAM_BUFFER) && + sky2_rx_hung(dev)) { + pr_info(PFX "%s: receiver hang detected\n", + dev->name); + schedule_work(&hw->restart_work); + return; + } + } + + if (active == 0) + return; + } - mod_timer(&hw->idle_timer, jiffies + msecs_to_jiffies(idle_timeout)); + mod_timer(&hw->watchdog_timer, round_jiffies(jiffies + HZ)); } /* Hardware/software error handling */ @@ -2668,8 +2715,6 @@ static void sky2_restart(void *d) dev_dbg(&hw->pdev->dev, "restarting\n"); - del_timer_sync(&hw->idle_timer); - rtnl_lock(); sky2_write32(hw, B0_IMSK, 0); sky2_read32(hw, B0_IMSK); @@ -2698,8 +2743,6 @@ static void sky2_restart(void *d) } } - sky2_idle_start(hw); - rtnl_unlock(); } @@ -3706,10 +3749,9 @@ static int __devinit sky2_probe(struct pci_dev *pdev, sky2_show_addr(dev1); } - setup_timer(&hw->idle_timer, sky2_idle, (unsigned long) hw); - INIT_WORK(&hw->restart_work, sky2_restart, hw); + setup_timer(&hw->watchdog_timer, sky2_watchdog, (unsigned long) hw); - sky2_idle_start(hw); + INIT_WORK(&hw->restart_work, sky2_restart, hw); pci_set_drvdata(pdev, hw); @@ -3744,7 +3786,7 @@ static void __devexit sky2_remove(struct pci_dev *pdev) if (!hw) return; - del_timer_sync(&hw->idle_timer); + del_timer_sync(&hw->watchdog_timer); flush_scheduled_work(); @@ -3785,7 +3827,6 @@ static int sky2_suspend(struct pci_dev *pdev, pm_message_t state) struct sky2_hw *hw = pci_get_drvdata(pdev); int i, wol = 0; - del_timer_sync(&hw->idle_timer); netif_poll_disable(hw->dev[0]); for (i = 0; i < hw->ports; i++) { @@ -3848,7 +3889,7 @@ static int sky2_resume(struct pci_dev *pdev) } netif_poll_enable(hw->dev[0]); - sky2_idle_start(hw); + return 0; out: dev_err(&pdev->dev, "resume failed (%d)\n", err); @@ -3862,7 +3903,6 @@ static void sky2_shutdown(struct pci_dev *pdev) struct sky2_hw *hw = pci_get_drvdata(pdev); int i, wol = 0; - del_timer_sync(&hw->idle_timer); netif_poll_disable(hw->dev[0]); for (i = 0; i < hw->ports; i++) { diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index 5efb5af..6906b81 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h @@ -1913,6 +1913,14 @@ struct sky2_port { u16 rx_tag; struct vlan_group *vlgrp; #endif + struct { + unsigned long last; + u32 mac_rp; + u8 mac_lev; + u8 fifo_rp; + u8 fifo_lev; + } check; + dma_addr_t rx_le_map; dma_addr_t tx_le_map; @@ -1933,6 +1941,15 @@ struct sky2_hw { void __iomem *regs; struct pci_dev *pdev; struct net_device *dev[2]; + unsigned long flags; +#define SKY2_HW_USE_MSI 0x00000001 +#define SKY2_HW_FIBRE_PHY 0x00000002 +#define SKY2_HW_GIGABIT 0x00000004 +#define SKY2_HW_NEWER_PHY 0x00000008 +#define SKY2_HW_RAM_BUFFER 0x00000010 +#define SKY2_HW_NEW_LE 0x00000020 /* new LSOv2 format */ +#define SKY2_HW_AUTO_TX_SUM 0x00000040 /* new IP decode for Tx */ +#define SKY2_HW_ADV_POWER_CTL 0x00000080 /* additional PHY power regs */ u8 chip_id; u8 chip_rev; @@ -1943,7 +1960,7 @@ struct sky2_hw { u32 st_idx; dma_addr_t st_dma; - struct timer_list idle_timer; + struct timer_list watchdog_timer; struct work_struct restart_work; int msi; wait_queue_head_t msi_wait;