Fix Broadcast NetXtreme II reboot hung 39/8839/2 6.1
authorPavel Boldin <pboldin@mirantis.com>
Sun, 28 Jun 2015 17:27:55 +0000 (13:27 -0400)
committerPavel Boldin <pboldin@mirantis.com>
Mon, 29 Jun 2015 11:29:10 +0000 (14:29 +0300)
Broadcast NetXtreme II driver was lacking the shutdown handler required
to disable the devices. This led to a hang during the reboot because the
enabled driver code was attempting to use the freed resources.

Change-Id: I8648f704a942883479c66ba62068870a70135ccd
Closes-Bug: #1467671

0006-mirantis-bnx2x-shutdown.patch [new file with mode: 0644]
kernel-lt-3.10.spec

diff --git a/0006-mirantis-bnx2x-shutdown.patch b/0006-mirantis-bnx2x-shutdown.patch
new file mode 100644 (file)
index 0000000..ea27393
--- /dev/null
@@ -0,0 +1,146 @@
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+@@ -12647,17 +12647,11 @@ init_one_exit:
+       return rc;
+ }
+-static void bnx2x_remove_one(struct pci_dev *pdev)
++static void __bnx2x_remove(struct pci_dev *pdev,
++                         struct net_device *dev,
++                         struct bnx2x *bp,
++                         bool remove_netdev)
+ {
+-      struct net_device *dev = pci_get_drvdata(pdev);
+-      struct bnx2x *bp;
+-
+-      if (!dev) {
+-              dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
+-              return;
+-      }
+-      bp = netdev_priv(dev);
+-
+       /* Delete storage MAC address */
+       if (!NO_FCOE(bp)) {
+               rtnl_lock();
+@@ -12670,7 +12664,15 @@ static void bnx2x_remove_one(struct pci_
+       bnx2x_dcbnl_update_applist(bp, true);
+ #endif
+-      unregister_netdev(dev);
++      /* Close the interface - either directly or implicitly */
++      if (remove_netdev) {
++              unregister_netdev(dev);
++      } else {
++              rtnl_lock();
++              if (netif_running(dev))
++                      bnx2x_close(dev);
++              rtnl_unlock();
++      }
+       /* Power on: we can't let PCI layer write to us while we are in D3 */
+       if (IS_PF(bp))
+@@ -12692,27 +12694,50 @@ static void bnx2x_remove_one(struct pci_
+       if (IS_VF(bp))
+               bnx2x_vfpf_release(bp);
+-      if (bp->regview)
+-              iounmap(bp->regview);
++      /* Assumes no further PCIe PM changes will occur */
++      if (system_state == SYSTEM_POWER_OFF) {
++              pci_wake_from_d3(pdev, bp->wol);
++              pci_set_power_state(pdev, PCI_D3hot);
++      }
++
++    if (remove_netdev) {
++        if (bp->regview)
++            iounmap(bp->regview);
++
++        /* for vf doorbells are part of the regview and were unmapped along with
++         * it. FW is only loaded by PF.
++         */
++        if (IS_PF(bp)) {
++            if (bp->doorbells)
++                iounmap(bp->doorbells);
++
++            bnx2x_release_firmware(bp);
++        }
++        bnx2x_free_mem_bp(bp);
++
++              free_netdev(dev);
++
++        if (atomic_read(&pdev->enable_cnt) == 1)
++            pci_release_regions(pdev);
++    }
+-      /* for vf doorbells are part of the regview and were unmapped along with
+-       * it. FW is only loaded by PF.
+-       */
+-      if (IS_PF(bp)) {
+-              if (bp->doorbells)
+-                      iounmap(bp->doorbells);
++      pci_disable_device(pdev);
+-              bnx2x_release_firmware(bp);
+-      }
+-      bnx2x_free_mem_bp(bp);
++      pci_set_drvdata(pdev, NULL);
++}
+-      free_netdev(dev);
++static void bnx2x_remove_one(struct pci_dev *pdev)
++{
++      struct net_device *dev = pci_get_drvdata(pdev);
++      struct bnx2x *bp;
+-      if (atomic_read(&pdev->enable_cnt) == 1)
+-              pci_release_regions(pdev);
++      if (!dev) {
++              dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
++              return;
++      }
++      bp = netdev_priv(dev);
+-      pci_disable_device(pdev);
+-      pci_set_drvdata(pdev, NULL);
++      __bnx2x_remove(pdev, dev, bp, true);
+ }
+ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
+@@ -12905,6 +12930,29 @@ static const struct pci_error_handlers b
+       .resume         = bnx2x_io_resume,
+ };
++static void bnx2x_shutdown(struct pci_dev *pdev)
++{
++      struct net_device *dev = pci_get_drvdata(pdev);
++      struct bnx2x *bp;
++
++      if (!dev)
++              return;
++
++      bp = netdev_priv(dev);
++      if (!bp)
++              return;
++
++      rtnl_lock();
++      netif_device_detach(dev);
++      rtnl_unlock();
++
++      /* Don't remove the netdevice, as there are scenarios which will cause
++       * the kernel to hang, e.g., when trying to remove bnx2i while the
++       * rootfs is mounted from SAN.
++       */
++      __bnx2x_remove(pdev, dev, bp, false);
++}
++
+ static struct pci_driver bnx2x_pci_driver = {
+       .name        = DRV_MODULE_NAME,
+       .id_table    = bnx2x_pci_tbl,
+@@ -12916,6 +12964,7 @@ static struct pci_driver bnx2x_pci_drive
+ #ifdef CONFIG_BNX2X_SRIOV
+       .sriov_configure = bnx2x_sriov_configure,
+ #endif
++      .shutdown    = bnx2x_shutdown,
+ };
+ static int __init bnx2x_init(void)
index fe733c81ef2f7d3ce22d483212f75c69c5780dc4..524dfbfaa5ecd5a67cf96b63e94733efd29813ad 100644 (file)
@@ -177,6 +177,7 @@ Patch2: 0002-netfilter-nf_conntrack-avoid-large-timeout-for-mid-s.patch
 Patch3: 0003-mirantis-hpsa-3-4-4-RH-1-el7-driver-update.patch
 Patch4: 0004-mirantis-hpsa-3-4-8-140-el7-driver-update.patch
 Patch5: 0005-mirantis-megaraid_sas-06.803.01.00-rc1.patch
+Patch6: 0006-mirantis-bnx2x-shutdown.patch
 
 # Do not package the source tarball.
 NoSource: 0
@@ -323,6 +324,7 @@ pushd linux-%{version}-%{release}.%{_target_cpu} > /dev/null
 %patch3 -p1
 %patch4 -p1
 %patch5 -p1
+%patch6 -p1
 popd > /dev/null
 
 %build
@@ -797,6 +799,14 @@ fi
 
 %changelog
 
+* Sun Jun 28 2015 Pavel Boldin <pboldin@mirantis.com> - 3.10.55-mira4
+- Fix the Broadcom NetXtreme II reboot kernel hang by backporting the
+  listed commits from the upstream:
+    * b030ed2fdc8a396dba71e4d550236a0f1bb38b40
+      bnx2x: Implement PCI shutdown
+    * d9aee591b0f06bd44cd577b757d3f267bc35fe4d
+      bnx2x: Don't release PCI bars on shutdown
+
 * Tue May 12 2015 Albert Siriy <asyriy@mirantis.net> - 3.10.55-mira3
 - Added patch 0005-mirantis-megaraid_sas-06.803.01.00-rc1.patch
 - to support Dell PERC RAID H310 controllers