From: Doug Ledford <dledford@redhat.com> Date: Tue, 14 Apr 2009 15:23:33 -0400 Subject: [openib] ipath: update driver to OFED 1.4.1-rc3 Message-id: 1239737023-31222-7-git-send-email-dledford@redhat.com O-Subject: [Patch RHEL5.4 06/16] [ipath] update driver to OFED 1.4.1-rc3 version Bugzilla: 230035 480696 Signed-off-by: Doug Ledford <dledford@redhat.com> diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 2556762..3c7968f 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_IPATH tristate "QLogic InfiniPath Driver" - depends on PCI_MSI && 64BIT && NET + depends on 64BIT && NET ---help--- This is a driver for QLogic InfiniPath host channel adapters, including InfiniBand verbs support. This driver allows these diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index f19ef92..e3109fb 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -26,15 +26,19 @@ ib_ipath-y := \ ipath_sysfs.o \ ipath_uc.o \ ipath_ud.o \ + ipath_wc_pat.o \ ipath_user_pages.o \ ipath_user_sdma.o \ ipath_verbs_mcast.o \ - ipath_verbs.o + ipath_verbs.o \ + ipath_iba7220.o \ + ipath_sd7220.o \ + ipath_sd7220_img.o ib_ipath-y += ipath_iba6110.o ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o -ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba7220.o ipath_sd7220.o ipath_sd7220_img.o -ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o +ib_ipath-$(CONFIG_X86_64) += iowrite32_copy_x86_64.o ib_ipath-$(CONFIG_X86_64) += memcpy_cachebypass_x86_64.o +ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/iowrite32_copy_x86_64.S b/drivers/infiniband/hw/ipath/iowrite32_copy_x86_64.S new file mode 100644 index 0000000..6c659cf --- /dev/null +++ b/drivers/infiniband/hw/ipath/iowrite32_copy_x86_64.S @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * __iowrite32_copy - copy a memory block using dword multiple writes + * + * This is primarily for writing to the InfiniPath PIO buffers, which + * only support dword multiple writes, and thus can not use memcpy(). + * For this reason, we use nothing smaller than dword writes. + * It is also used as a fast copy routine in some places that have been + * measured to win over memcpy, and the performance delta matters. + * + * Count is number of dwords; might not be a qword multiple. + */ + + .globl __iowrite32_copy + .p2align 4 +/* rdi destination, rsi source, rdx count */ +__iowrite32_copy: + movl %edx,%ecx + shrl $1,%ecx + andl $1,%edx + rep + movsq + movl %edx,%ecx + rep + movsd + ret diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 2fa012d..28cfe97 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -201,7 +201,6 @@ typedef enum _ipath_ureg { #define IPATH_RUNTIME_RCVHDR_COPY 0x8 #define IPATH_RUNTIME_MASTER 0x10 #define IPATH_RUNTIME_NODMA_RTAIL 0x80 -#define IPATH_RUNTIME_SPECIAL_TRIGGER 0x100 #define IPATH_RUNTIME_SDMA 0x200 #define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400 #define IPATH_RUNTIME_PIO_REGSWAPPED 0x800 @@ -452,8 +451,6 @@ struct ipath_user_info { #define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */ #define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */ -#define IPATH_CMD_MAX 31 - /* * Poll types */ diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 29d1a82..261bf85 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -82,7 +82,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) wc->uqueue[head].opcode = entry->opcode; wc->uqueue[head].vendor_err = entry->vendor_err; wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data; + wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data; wc->uqueue[head].qp_num = entry->qp->qp_num; wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index f4eaf13..6d49d2f 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -403,7 +403,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } /* - * - Want to skip check for l_state if using custom PBC, + * Want to skip check for l_state if using custom PBC, * because we might be trying to force an SM packet out. * first-cut, skip _all_ state checking in that case. */ @@ -476,13 +476,6 @@ static ssize_t ipath_diagpkt_write(struct file *fp, } else __iowrite32_copy(piobuf + 2, tmpbuf, clen); - if (dd->ipath_flags & IPATH_USE_SPCL_TRIG) { - u32 spcl_off = (pbufn > dd->ipath_piobcnt2k) ? - 2047 : 1023; - ipath_flush_wc(); - __raw_writel(0xaebecede, piobuf + spcl_off); - } - ipath_flush_wc(); ret = sizeof(dp); diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index b91d67e..39c3123 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -39,6 +39,7 @@ #include <linux/netdevice.h> #include <linux/vmalloc.h> +#include "ipath_wc_pat.h" #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -83,11 +84,6 @@ module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); MODULE_PARM_DESC(hol_timeout_ms, "duration of user app suspension after link failure"); -unsigned ipath_sdma_fetch_arb = 1; -EXPORT_SYMBOL_GPL(ipath_sdma_fetch_arb); -module_param_named(fetch_arb, ipath_sdma_fetch_arb, uint, S_IRUGO); -MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration"); - unsigned ipath_linkrecovery = 1; module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); @@ -160,23 +156,6 @@ static struct pci_driver ipath_driver = { .id_table = ipath_pci_tbl, }; -static void ipath_check_status(struct work_struct *work) -{ - struct ipath_devdata *dd = container_of(work, struct ipath_devdata, - status_work.work); - - /* - * If we're in the NOCABLE state, try again in another minute. - */ - if (*dd->ipath_statusp & IPATH_STATUS_IB_NOCABLE) { - schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT); - return; - } - - if (!(*dd->ipath_statusp & IPATH_STATUS_IB_READY)) - dev_info(&dd->pcidev->dev, "IB link is not ACTIVE\n"); -} - static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, u32 *bar0, u32 *bar1) { @@ -244,8 +223,6 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) dd->pcidev = pdev; pci_set_drvdata(pdev, dd); - INIT_DELAYED_WORK(&dd->status_work, ipath_check_status); - list_add(&dd->ipath_list, &ipath_dev_list); bail_unlock: @@ -378,8 +355,8 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd) * length 0, no dwords actually sent, and mark as VL15 * on chips where that may matter (due to IB flowcontrol) */ - if ((dd->ipath_flags&IPATH_HAS_PBC_CNT)) - writeq(0x80000000UL<<32, piobuf); + if ((dd->ipath_flags & IPATH_HAS_PBC_CNT)) + writeq(1UL << 63, piobuf); else writeq(0, piobuf); ipath_flush_wc(); @@ -560,7 +537,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, #endif case PCI_DEVICE_ID_INFINIPATH_7220: #ifndef CONFIG_PCI_MSI - ipath_dbg("CONFIG_PCI_MSI is not enabled, using IntX for unit %u\n", dd->ipath_unit); + ipath_dbg("CONFIG_PCI_MSI is not enabled, " + "using INTx for unit %u\n", dd->ipath_unit); #endif ipath_init_iba7220_funcs(dd); break; @@ -600,19 +578,19 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, #else dd->ipath_kregbase = ioremap_nocache(addr, len); #endif - if (!dd->ipath_kregbase) { ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", addr); ret = -ENOMEM; - goto bail_iounmap; + goto bail_regions; } dd->ipath_kregend = (u64 __iomem *) ((void __iomem *)dd->ipath_kregbase + len); dd->ipath_physaddr = addr; /* used for io_remap, etc. */ /* for user mmap */ - ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n", - addr, dd->ipath_kregbase); + ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p, " + "length %lu bytes\n", + addr, dd->ipath_kregbase, (unsigned long) len); if (dd->ipath_f_bus(dd, pdev)) ipath_dev_err(dd, "Failed to setup config space; " @@ -624,15 +602,15 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, * check 0 irq after we return from chip-specific bus setup, since * that can affect this due to setup */ - if (!pdev->irq) + if (!dd->ipath_irq) ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " "work\n"); else { - ret = request_irq(pdev->irq, ipath_intr, IRQF_SHARED, + ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, IPATH_DRV_NAME, dd); if (ret) { ipath_dev_err(dd, "Couldn't setup irq handler, " - "irq=%d: %d\n", pdev->irq, ret); + "irq=%d: %d\n", dd->ipath_irq, ret); goto bail_iounmap; } } @@ -641,13 +619,15 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, if (ret) goto bail_irqsetup; - ret = ipath_enable_wc(dd); + if (!ipath_wc_pat) { + ret = ipath_enable_wc(dd); - if (ret) { - ipath_dev_err(dd, "Write combining not enabled " - "(err %d): performance may be poor\n", - -ret); - ret = 0; + if (ret) { + ipath_dev_err(dd, "Write combining not enabled " + "(err %d): performance may be poor\n", + -ret); + ret = 0; + } } ipath_verify_pioperf(dd); @@ -658,9 +638,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ipath_diag_add(dd); ipath_register_ib_device(dd); - /* Check that card status in STATUS_TIMEOUT seconds. */ - schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT); - goto bail; bail_irqsetup: @@ -686,6 +663,8 @@ bail: static void __devexit cleanup_device(struct ipath_devdata *dd) { int port; + struct ipath_portdata **tmp; + unsigned long flags; if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { /* can't do anything more with chip; needs re-init */ @@ -697,12 +676,15 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) * re-init */ dd->ipath_kregbase = NULL; + dd->ipath_piobase = NULL; + dd->ipath_userbase = NULL; dd->ipath_uregbase = 0; dd->ipath_sregbase = 0; dd->ipath_cregbase = 0; dd->ipath_kregsize = 0; } - ipath_disable_wc(dd); + if (!ipath_wc_pat) + ipath_disable_wc(dd); } if (dd->ipath_spectriggerhit) @@ -767,20 +749,21 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) /* * free any resources still in use (usually just kernel ports) - * at unload; we do for portcnt, not cfgports, because cfgports - * could have changed while we were loaded. + * at unload; we do for portcnt, because that's what we allocate. + * We acquire lock to be really paranoid that ipath_pd isn't being + * accessed from some interrupt-related code (that should not happen, + * but best to be sure). */ + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); + tmp = dd->ipath_pd; + dd->ipath_pd = NULL; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); for (port = 0; port < dd->ipath_portcnt; port++) { - struct ipath_portdata *pd = dd->ipath_pd[port]; - dd->ipath_pd[port] = NULL; + struct ipath_portdata *pd = tmp[port]; + tmp[port] = NULL; /* debugging paranoia */ ipath_free_pddata(dd, pd); } - kfree(dd->ipath_pd); - /* - * debuggability, in case some cleanup path tries to use it - * after this - */ - dd->ipath_pd = NULL; + kfree(tmp); } static void __devexit ipath_remove_one(struct pci_dev *pdev) @@ -795,7 +778,6 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) */ ipath_shutdown_device(dd); - cancel_delayed_work(&dd->status_work); flush_scheduled_work(); if (dd->verbs_dev) @@ -817,11 +799,10 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs * for all versions of the driver, if they were allocated */ - if (pdev->irq) { - ipath_cdbg(VERBOSE, - "unit %u free_irq of irq %x\n", - dd->ipath_unit, pdev->irq); - free_irq(pdev->irq, dd); + if (dd->ipath_irq) { + ipath_cdbg(VERBOSE, "unit %u free irq %d\n", + dd->ipath_unit, dd->ipath_irq); + dd->ipath_f_free_irq(dd); } else ipath_dbg("irq is 0, not doing free_irq " "for unit %u\n", dd->ipath_unit); @@ -837,6 +818,17 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); iounmap((volatile void __iomem *) dd->ipath_kregbase); + if (dd->ipath_piobase) { + ipath_cdbg(VERBOSE, "Unmapping piobase %p\n", + dd->ipath_piobase); + iounmap((volatile void __iomem *) dd->ipath_piobase); + } + if (dd->ipath_userbase) { + ipath_cdbg(VERBOSE, "Unmapping userbase %p\n", + dd->ipath_userbase); + iounmap((volatile void __iomem *) dd->ipath_userbase); + } + pci_release_regions(pdev); ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); pci_disable_device(pdev); @@ -864,10 +856,10 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, unsigned cnt) { unsigned i, last = first + cnt; + unsigned long flags; ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); for (i = first; i < last; i++) { - unsigned long flags; spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); /* * The disarm-related bits are write-only, so it @@ -1129,7 +1121,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, * key header. In order to keep everything dword aligned, * we'll reserve 4 bytes. */ - len = dd->ipath_ibmaxlen + 4; + len = dd->ipath_init_ibmaxlen + 4; if (dd->ipath_flags & IPATH_4BYTE_TID) { /* We need a 2KB multiple alignment, and there is no way @@ -1286,7 +1278,7 @@ reloop: */ ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" " %x, len %x hdrq+%x rhf: %Lx\n", - etail, tlen, l, + etail, tlen, l, (unsigned long long) le64_to_cpu(*(__le64 *) rhf_addr)); if (ipath_debug & __IPATH_ERRPKTDBG) { u32 j, *d, dw = rsize-2; @@ -1455,7 +1447,6 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd) spin_unlock_irqrestore(&ipath_pioavail_lock, flags); } - /* * used to force update of pioavailshadow if we can't get a pio buffer. * Needed primarily due to exitting freeze mode after recovering @@ -1485,7 +1476,8 @@ static void ipath_reset_availshadow(struct ipath_devdata *dd) 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ if (oldval != dd->ipath_pioavailshadow[i]) ipath_dbg("shadow[%d] was %Lx, now %lx\n", - i, oldval, dd->ipath_pioavailshadow[i]); + i, (unsigned long long) oldval, + dd->ipath_pioavailshadow[i]); } spin_unlock_irqrestore(&ipath_pioavail_lock, flags); } @@ -1575,7 +1567,6 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd) } } - /* * common code for normal driver pio buffer allocation, and reserved * allocation. @@ -1666,13 +1657,11 @@ rescan: return buf; } - /** * ipath_getpiobuf - find an available pio buffer * @dd: the infinipath device * @plen: the size of the PIO buffer needed in 32-bit words * @pbufnum: the buffer number is placed here - * Searches the allocated driver range. */ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum) { @@ -1925,7 +1914,7 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) */ if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { int skip_cancel; - u64 *statp = &dd->ipath_sdma_status; + unsigned long *statp = &dd->ipath_sdma_status; spin_lock_irqsave(&dd->ipath_sdma_lock, flags); skip_cancel = @@ -1980,7 +1969,7 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - /* only wait so long for intr */ + /* only wait so long for intr */ dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; dd->ipath_sdma_reset_wait = 200; if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) @@ -2014,13 +2003,6 @@ void ipath_force_pio_avail_update(struct ipath_devdata *dd) spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } - -/* - * Formerly took parameter <which> in pre-shifted, - * pre-merged form with LinkCmd and LinkInitCmd - * together, and assuming the zero was NOP. - * This is problematic for IBA7220. - */ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, int linitcmd) { @@ -2031,6 +2013,7 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" }; + if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) { /* * If we are told to disable, note that so link-recovery @@ -2041,9 +2024,9 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, preempt_enable(); } else if (linitcmd) { /* - * Any other linkinitcmd will lead to LINKDOWN< and then - * to INIT (if all is well), so clear flag to let ink-recovery - * code attempt to bring us back up. + * Any other linkinitcmd will lead to LINKDOWN and then + * to INIT (if all is well), so clear flag to let + * link-recovery code attempt to bring us back up. */ preempt_disable(); dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; @@ -2134,6 +2117,7 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); /* turn heartbeat off, as it causes loopback to fail */ dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, @@ -2150,6 +2134,7 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); /* don't wait */ ret = 0; goto bail; @@ -2251,6 +2236,7 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) dd->ipath_ibcctrl = ibc; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); dd->ipath_f_tidtemplate(dd); } @@ -2458,10 +2444,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd) del_timer_sync(&dd->ipath_stats_timer); dd->ipath_stats_timer_active = 0; } - if (dd->ipath_link_timer_active) { - del_timer_sync(&dd->ipath_link_timer); - dd->ipath_link_timer_active = 0; - } if (dd->ipath_intrchk_timer.data) { del_timer_sync(&dd->ipath_intrchk_timer); dd->ipath_intrchk_timer.data = 0; @@ -2545,9 +2527,9 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) skbinfo); for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++) if (skbinfo[e].skb) { - pci_unmap_single(dd->pcidev, - skbinfo[e].phys, dd->ipath_ibmaxlen, - PCI_DMA_FROMDEVICE); + pci_unmap_single(dd->pcidev, skbinfo[e].phys, + dd->ipath_init_ibmaxlen, + PCI_DMA_FROMDEVICE); dev_kfree_skb(skbinfo[e].skb); } vfree(skbinfo); @@ -2566,6 +2548,15 @@ static int __init infinipath_init(void) if (ipath_debug & __IPATH_DBG) printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); + if (ipath_wc_pat) { + if (ipath_enable_wc_pat() || !ipath_wc_pat_enabled()) { + printk(KERN_ERR IPATH_DRV_NAME + ": WC PAT unavailable, fall-back to MTRR\n"); + ipath_wc_pat = 0; + } else + ipath_dbg("WC PAT mechanism is enabled\n"); + } + /* * These must be called before the driver is registered with * the PCI subsystem. @@ -2574,7 +2565,7 @@ static int __init infinipath_init(void) if (!idr_pre_get(&unit_table, GFP_KERNEL)) { printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n"); ret = -ENOMEM; - goto bail; + goto bail_wc_pat; } ret = pci_register_driver(&ipath_driver); @@ -2609,6 +2600,10 @@ bail_pci: bail_unit: idr_destroy(&unit_table); +bail_wc_pat: + if (ipath_wc_pat) + ipath_disable_wc_pat(); + bail: return ret; } @@ -2623,6 +2618,11 @@ static void __exit infinipath_cleanup(void) pci_unregister_driver(&ipath_driver); idr_destroy(&unit_table); + + if (ipath_wc_pat) { + ipath_disable_wc_pat(); + ipath_dbg("WC PAT mechanism is disabled\n"); + } } /** @@ -2638,6 +2638,7 @@ int ipath_reset_device(int unit) { int ret, i; struct ipath_devdata *dd = ipath_lookup(unit); + unsigned long flags; if (!dd) { ret = -ENODEV; @@ -2663,6 +2664,7 @@ int ipath_reset_device(int unit) goto bail; } + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); if (dd->ipath_pd) for (i = 1; i < dd->ipath_cfgports; i++) { if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) { @@ -2675,6 +2677,7 @@ int ipath_reset_device(int unit) goto bail; } } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); if (dd->ipath_flags & IPATH_HAS_SEND_DMA) teardown_sdma(dd); @@ -2704,18 +2707,24 @@ bail: * through the normal interfaces (i.e., everything other than diags * interface). Returns number of signalled processes. */ -int ipath_signal_procs(struct ipath_devdata *dd, int sig) +static int ipath_signal_procs(struct ipath_devdata *dd, int sig) { int i, sub, any = 0; pid_t pid; - + unsigned long flags; + if (!dd->ipath_pd) return 0; + + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); for (i = 1; i < dd->ipath_cfgports; i++) { - if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt - || !dd->ipath_pd[i]->port_pid) + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt || + !dd->ipath_pd[i]->port_pid) continue; pid = dd->ipath_pd[i]->port_pid; + if (!pid) + continue; + dev_info(&dd->pcidev->dev, "context %d in use " "(PID %u), sending signal %d\n", i, pid, sig); @@ -2732,6 +2741,7 @@ int ipath_signal_procs(struct ipath_devdata *dd, int sig) any++; } } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); return any; } diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 899f156..dc37277 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -482,7 +482,6 @@ done: return (idx >= 0) ? i2c_chains + idx : NULL; } - static int ipath_eeprom_internal_read(struct ipath_devdata *dd, u8 eeprom_offset, void *buffer, int len) { @@ -561,7 +560,8 @@ static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offse while (len > 0) { if (icd->eeprom_dev == IPATH_NO_DEV) { - if (i2c_startcmd(dd, (eeprom_offset << 1) | WRITE_CMD)) { + if (i2c_startcmd(dd, + (eeprom_offset << 1) | WRITE_CMD)) { ipath_dbg("Failed to start cmd offset %u\n", eeprom_offset); goto failed_write; diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index dc61e15..09b41e2 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -39,11 +39,13 @@ #include <linux/highmem.h> #include <linux/io.h> #include <linux/jiffies.h> +#include <linux/smp_lock.h> #include <asm/pgtable.h> #include "ipath_kernel.h" #include "ipath_common.h" #include "ipath_user_sdma.h" +#include "ipath_wc_pat.h" static int ipath_open(struct inode *, struct file *); static int ipath_close(struct inode *, struct file *); @@ -222,8 +224,13 @@ static int ipath_get_base_info(struct file *fp, (unsigned long long) kinfo->spi_subport_rcvhdr_base); } - kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / - dd->ipath_palign; + /* + * All user buffers are 2KB buffers. If we ever support + * giving 4KB buffers to user processes, this will need some + * work. + */ + kinfo->spi_pioindex = (kinfo->spi_piobufbase - + (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign; kinfo->spi_pioalign = dd->ipath_palign; kinfo->spi_qpair = IPATH_KD_QP; @@ -903,7 +910,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) chunk = pd->port_rcvegrbuf_chunks; egrperchunk = pd->port_rcvegrbufs_perchunk; size = pd->port_rcvegrbuf_size; - pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), + pd->port_rcvegrbuf = kzalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), GFP_KERNEL); if (!pd->port_rcvegrbuf) { ret = -ENOMEM; @@ -1077,6 +1084,9 @@ static int mmap_piobufs(struct vm_area_struct *vma, vma->vm_flags &= ~VM_MAYREAD; vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + if (ipath_wc_pat) + vma->vm_page_prot = pgprot_wc(vma->vm_page_prot); + ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); @@ -1747,8 +1757,8 @@ recheck: ipath_dbg("No ports available (none initialized " "and ready)\n"); } else { - if (prefunit > 0) { - /* if started above 0, retry from 0 */ + if (prefunit != -1) { + /* if had prefunit, retry from 0 */ ipath_cdbg(PROC, "%s[%u] no ports on prefunit " "%d, clear and re-check\n", @@ -1823,6 +1833,7 @@ done: static int ipath_open(struct inode *in, struct file *fp) { /* The real work is performed later in ipath_assign_port() */ + cycle_kernel_lock(); fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); return fp->private_data ? 0 : -ENOMEM; } @@ -1981,7 +1992,12 @@ static int ipath_do_user_init(struct file *fp, * explictly set the in-memory tail copy to 0 beforehand, so we * don't have to wait to be sure the DMA update has happened * (chip resets head/tail to 0 on transition to enable). + * The mutex ensures that the read value of dd->ipath_rcvctrl + * after the atomic set_bit is not stale, and avoids a race + * hazard with 2 processes attempting to enable (distinct) + * ports simultaneously. */ + mutex_lock(&ipath_mutex); set_bit(dd->ipath_r_portenable_shift + pd->port_port, &dd->ipath_rcvctrl); if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { @@ -1993,6 +2009,7 @@ static int ipath_do_user_init(struct file *fp, } ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + mutex_unlock(&ipath_mutex); /* Notify any waiting slaves */ if (pd->port_subport_cnt) { clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); @@ -2047,7 +2064,9 @@ static int ipath_close(struct inode *in, struct file *fp) struct ipath_filedata *fd; struct ipath_portdata *pd; struct ipath_devdata *dd; + unsigned long flags; unsigned port; + pid_t pid; ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", (long)in->i_rdev, fp->private_data); @@ -2079,14 +2098,13 @@ static int ipath_close(struct inode *in, struct file *fp) mutex_unlock(&ipath_mutex); goto bail; } + /* early; no interrupt users after this */ + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); port = pd->port_port; - - if (pd->port_hdrqfull) { - ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " - "during run\n", pd->port_comm, pd->port_pid, - pd->port_hdrqfull); - pd->port_hdrqfull = 0; - } + dd->ipath_pd[port] = NULL; + pid = pd->port_pid; + pd->port_pid = 0; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); if (pd->port_rcvwait_to || pd->port_piowait_to || pd->port_rcvnowait || pd->port_pionowait) { @@ -2143,12 +2161,10 @@ static int ipath_close(struct inode *in, struct file *fp) unlock_expected_tids(pd); ipath_stats.sps_ports--; ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", - pd->port_comm, pd->port_pid, + pd->port_comm, pid, dd->ipath_unit, port); } - pd->port_pid = 0; - dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ mutex_unlock(&ipath_mutex); ipath_free_pddata(dd, pd); /* after releasing the mutex */ diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index fe852e3..d32ec71 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/mount.h> @@ -238,8 +237,7 @@ static int create_device_files(struct super_block *sb, snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, - (struct file_operations *) &simple_dir_operations, - dd); + &simple_dir_operations, dd); if (ret) { printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); goto bail; diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 6559c92..5b91705 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -39,11 +39,11 @@ #include <linux/vmalloc.h> #include <linux/pci.h> #include <linux/delay.h> -#include <linux/swap.h> #include <rdma/ib_verbs.h> #include "ipath_kernel.h" #include "ipath_registers.h" +#include "ipath_wc_pat.h" static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64); @@ -473,7 +473,6 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), }; - #define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) @@ -963,11 +962,28 @@ static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev, } dd->ipath_lbus_speed = speed; } + snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info), "HyperTransport,%uMHz,x%u\n", dd->ipath_lbus_speed, dd->ipath_lbus_width); +} + +static int ipath_ht_intconfig(struct ipath_devdata *dd) +{ + int ret; + + if (dd->ipath_intconfig) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig, + dd->ipath_intconfig); /* interrupt address */ + ret = 0; + } else { + ipath_dev_err(dd, "No interrupts enabled, couldn't setup " + "interrupt address\n"); + ret = -EINVAL; + } + return ret; } static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev, @@ -1010,6 +1026,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev, /* can't program yet, so save for interrupt setup */ dd->ipath_intconfig = ihandler; + dd->ipath_irq = intvec; /* keep going, so we find link control stuff also */ return ihandler != 0; @@ -1482,25 +1499,6 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); } -static int ipath_ht_intconfig(struct ipath_devdata *dd) -{ - int ret; - - if (!dd->ipath_intconfig) { - ipath_dev_err(dd, "No interrupts enabled, couldn't setup " - "interrupt address\n"); - ret = 1; - goto bail; - } - - ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig, - dd->ipath_intconfig); /* interrupt address */ - ret = 0; - -bail: - return ret; -} - /** * ipath_pe_put_tid - write a TID in chip * @dd: the infinipath device @@ -1654,8 +1652,12 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) * these out on the wire. * Chip Errata bug 6610 */ - piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) + - dd->ipath_piobufbase); + if (ipath_wc_pat) + piobuf = (u32 __iomem *) dd->ipath_piobase; + else + piobuf = (u32 __iomem *) + (((char __iomem *)(dd->ipath_kregbase)) + + dd->ipath_piobufbase); pioincr = dd->ipath_palign / sizeof(*piobuf); for (i = 0; i < dd->ipath_piobcnt2k; i++) { /* @@ -1717,6 +1719,13 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) return 0; } +static void ipath_ht_free_irq(struct ipath_devdata *dd) +{ + free_irq(dd->ipath_irq, dd); + dd->ipath_irq = 0; + dd->ipath_intconfig = 0; +} + static struct ipath_message_header * ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) { @@ -1944,6 +1953,7 @@ void ipath_init_iba6110_funcs(struct ipath_devdata *dd) dd->ipath_f_cleanup = ipath_setup_ht_cleanup; dd->ipath_f_setextled = ipath_setup_ht_setextled; dd->ipath_f_get_base_info = ipath_ht_get_base_info; + dd->ipath_f_free_irq = ipath_ht_free_irq; dd->ipath_f_tidtemplate = ipath_ht_tidtemplate; dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback; dd->ipath_f_get_msgheader = ipath_ht_get_msgheader; diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 790b8f7..302e412 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -36,10 +36,8 @@ */ #include <linux/interrupt.h> -#include <linux/vmalloc.h> #include <linux/pci.h> #include <linux/delay.h> -#include <linux/swap.h> #include <rdma/ib_verbs.h> #include "ipath_kernel.h" @@ -389,7 +387,6 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), }; - #define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) @@ -702,6 +699,10 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) */ val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM; } + + /* avoid some intel cpu's speculative read freeze mode issue */ + val &= ~(INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); dd->ipath_hwerrmask = val; } @@ -724,6 +725,12 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) INFINIPATH_HWE_SERDESPLLFAILED); } + dd->ibdeltainprog = 1; + dd->ibsymsnap = + ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = + ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1); @@ -813,6 +820,36 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd) { u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); + if (dd->ibsymdelta || dd->iblnkerrdelta || + dd->ibdeltainprog) { + u64 diagc; + /* enable counter writes */ + diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, + diagc | INFINIPATH_DC_COUNTERWREN); + + if (dd->ibsymdelta || dd->ibdeltainprog) { + val = ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + if (dd->ibdeltainprog) + val -= val - dd->ibsymsnap; + val -= dd->ibsymdelta; + ipath_write_creg(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt, val); + } + if (dd->iblnkerrdelta || dd->ibdeltainprog) { + val = ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + if (dd->ibdeltainprog) + val -= val - dd->iblnkerrsnap; + val -= dd->iblnkerrdelta; + ipath_write_creg(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt, val); + } + + /* and disable counter writes */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc); + } val |= INFINIPATH_SERDC0_TXIDLE; ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n", (unsigned long long) val); @@ -911,7 +948,6 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) pci_disable_msi(dd->pcidev); } - static void ipath_6120_pcie_params(struct ipath_devdata *dd) { u16 linkstat, speed; @@ -968,7 +1004,6 @@ bail: return; } - /** * ipath_setup_pe_config - setup PCIe config related stuff * @dd: the infinipath device @@ -1000,6 +1035,7 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd, ipath_dev_err(dd, "pci_enable_msi failed: %d, " "interrupts may not work\n", ret); /* continue even if it fails, we may still be OK... */ + dd->ipath_irq = pdev->irq; if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) { u16 control; @@ -1484,13 +1520,17 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) /* * For openfabrics, we need to be able to handle an IB header of - * 24 dwords. HT chip has arbitrary sized receive buffers, so we - * made them the same size as the PIO buffers. This chip does not - * handle arbitrary size buffers, so we need the header large enough - * to handle largest IB header, but still have room for a 2KB MTU - * standard IB packet. + * at least 24 dwords. This chip does not handle arbitrary size + * buffers, so we need the header large enough to handle largest + * IB header, but still have room for a 2KB MTU standard IB packet. + * Additionally, some processor/memory controller combinations + * benefit quite strongly from having the DMA'ed data be cacheline + * aligned and a cacheline multiple, so we set the size to 32 dwords + * (2 64-byte primary cachelines for pretty much all processors of + * interest). The alignment hurts nothing, other than using somewhat + * more memory. */ - dd->ipath_rcvhdrentsize = 24; + dd->ipath_rcvhdrentsize = 32; dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; dd->ipath_rhf_offset = 0; dd->ipath_egrtidbase = (u64 __iomem *) @@ -1555,6 +1595,12 @@ done: return 0; } +static void ipath_pe_free_irq(struct ipath_devdata *dd) +{ + free_irq(dd->ipath_irq, dd); + dd->ipath_irq = 0; +} + static struct ipath_message_header * ipath_pe_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) @@ -1747,6 +1793,31 @@ static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b) static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) { + if (ibup) { + if (dd->ibdeltainprog) { + dd->ibdeltainprog = 0; + dd->ibsymdelta += + ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt) - + dd->ibsymsnap; + dd->iblnkerrdelta += + ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt) - + dd->iblnkerrsnap; + } + } else { + dd->ipath_lli_counter = 0; + if (!dd->ibdeltainprog) { + dd->ibdeltainprog = 1; + dd->ibsymsnap = + ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = + ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + } + } + ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs), ipath_ib_linktrstate(dd, ibcs)); return 0; @@ -1780,6 +1851,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd) dd->ipath_f_cleanup = ipath_setup_pe_cleanup; dd->ipath_f_setextled = ipath_setup_pe_setextled; dd->ipath_f_get_base_info = ipath_pe_get_base_info; + dd->ipath_f_free_irq = ipath_pe_free_irq; dd->ipath_f_tidtemplate = ipath_pe_tidtemplate; dd->ipath_f_intr_fallback = ipath_pe_nointr_fallback; dd->ipath_f_xgxs_reset = ipath_pe_xgxs_reset; @@ -1795,3 +1867,4 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd) /* initialize chip-specific variables */ ipath_init_pe_variables(dd); } + diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index f16cf9c..521c51e 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -54,6 +54,15 @@ module_param_named(compat_ddr_negotiate, ipath_compat_ddr_negotiate, uint, MODULE_PARM_DESC(compat_ddr_negotiate, "Attempt pre-IBTA 1.2 DDR speed negotiation"); +static unsigned ipath_sdma_fetch_arb = 1; +module_param_named(fetch_arb, ipath_sdma_fetch_arb, uint, S_IRUGO); +MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration"); + +static int ipath_pcie_coalesce; +module_param_named(pcie_coalesce, ipath_pcie_coalesce, int, S_IRUGO); +MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets"); + + /* * This file contains almost all the chip-specific register information and * access functions for the QLogic InfiniPath 7220 PCI-Express chip, with the @@ -407,10 +416,6 @@ static const struct ipath_cregs ipath_7220_cregs = { .cr_psxmitwaitcount = IPATH_CREG_OFFSET(PSXmitWaitCount), }; -/* kr_revision bits */ -#define INFINIPATH_R_EMULATORREV_MASK ((1ULL<<22) - 1) -#define INFINIPATH_R_EMULATORREV_SHIFT 40 - /* kr_control bits */ #define INFINIPATH_C_RESET (1U<<7) @@ -528,9 +533,7 @@ static const struct ipath_cregs ipath_7220_cregs = { static char int_type[16] = "auto"; module_param_string(interrupt_type, int_type, sizeof(int_type), 0444); -MODULE_PARM_DESC(int_type, " interrupt_type=auto|force_msi|force_intx\n"); - -static int ipath_special_trigger; +MODULE_PARM_DESC(int_type, " interrupt_type=auto|force_msi|force_intx"); /* packet rate matching delay; chip has support */ static u8 rate_to_delay[2][2] = { @@ -539,9 +542,6 @@ static u8 rate_to_delay[2][2] = { { 4, 1 } /* DDR */ }; -module_param_named(special_trigger, ipath_special_trigger, int, S_IRUGO); -MODULE_PARM_DESC(special_trigger, "Enable SpecialTrigger arm/launch"); - /* 7220 specific hardware errors... */ static const struct ipath_hwerror_msgs ipath_7220_hwerror_msgs[] = { INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), @@ -857,17 +857,8 @@ static int ipath_7220_boardname(struct ipath_devdata *dd, char *name, boardrev); break; } - if (n) { - if (dd->ipath_revision & INFINIPATH_R_EMULATOR_MASK) { - unsigned rev = - (unsigned) ((dd->ipath_revision >> - INFINIPATH_R_EMULATORREV_SHIFT) & - INFINIPATH_R_EMULATORREV_MASK); - - snprintf(name, namelen, "%s(%u)", n, rev); - } else - snprintf(name, namelen, "%s", n); - } + if (n) + snprintf(name, namelen, "%s", n); if (dd->ipath_majrev != 5 || !dd->ipath_minrev || dd->ipath_minrev > 2) { @@ -965,6 +956,12 @@ static int ipath_7220_bringup_serdes(struct ipath_devdata *dd) INFINIPATH_HWE_SERDESPLLFAILED); } + dd->ibdeltainprog = 1; + dd->ibsymsnap = + ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = + ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); + if (!dd->ipath_ibcddrctrl) { /* not on re-init after reset */ dd->ipath_ibcddrctrl = @@ -1010,8 +1007,10 @@ static int ipath_7220_bringup_serdes(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl, dd->ipath_ibcddrctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl), 0Ull); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); /* IBA7220 has SERDES MPU reset in D0 of what _was_ IBPLLCfg */ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl); @@ -1046,7 +1045,7 @@ static int ipath_7220_bringup_serdes(struct ipath_devdata *dd) ipath_cdbg(VERBOSE, "done: xgxs=%llx from %llx\n", (unsigned long long) ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig), - prev_val); + (unsigned long long) prev_val); guid = be64_to_cpu(dd->ipath_guid); @@ -1056,8 +1055,10 @@ static int ipath_7220_bringup_serdes(struct ipath_devdata *dd) ipath_dbg("No GUID for heartbeat, faking %llx\n", (unsigned long long)guid); } else - ipath_cdbg(VERBOSE, "Wrote %llX to HRTBT_GUID\n", guid); + ipath_cdbg(VERBOSE, "Wrote %llX to HRTBT_GUID\n", + (unsigned long long) guid); ipath_write_kreg(dd, dd->ipath_kregs->kr_hrtbt_guid, guid); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); return ret; } @@ -1097,6 +1098,37 @@ static void ipath_7220_config_jint(struct ipath_devdata *dd, static void ipath_7220_quiet_serdes(struct ipath_devdata *dd) { u64 val; + if (dd->ibsymdelta || dd->iblnkerrdelta || + dd->ibdeltainprog) { + u64 diagc; + /* enable counter writes */ + diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, + diagc | INFINIPATH_DC_COUNTERWREN); + + if (dd->ibsymdelta || dd->ibdeltainprog) { + val = ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + if (dd->ibdeltainprog) + val -= val - dd->ibsymsnap; + val -= dd->ibsymdelta; + ipath_write_creg(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt, val); + } + if (dd->iblnkerrdelta || dd->ibdeltainprog) { + val = ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + if (dd->ibdeltainprog) + val -= val - dd->iblnkerrsnap; + val -= dd->iblnkerrdelta; + ipath_write_creg(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt, val); + } + + /* and disable counter writes */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc); + } + dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG; wake_up(&dd->ipath_autoneg_wait); cancel_delayed_work(&dd->ipath_autoneg_work); @@ -1221,16 +1253,23 @@ static int ipath_msi_enabled(struct pci_dev *pdev) /* * disable msi interrupt if enabled, and clear the flag. - * flag is used primarily for the fallback to IntX, but + * flag is used primarily for the fallback to INTx, but * is also used in reinit after reset as a flag. */ static void ipath_7220_nomsi(struct ipath_devdata *dd) { dd->ipath_msi_lo = 0; -#ifdef CONFIG_PCI_MSI - if (ipath_msi_enabled(dd->pcidev)) + + if (ipath_msi_enabled(dd->pcidev)) { + /* + * free, but don't zero; later kernels require + * it be freed before disable_msi, so the intx + * setup has to request it again. + */ + if (dd->ipath_irq) + free_irq(dd->ipath_irq, dd); pci_disable_msi(dd->pcidev); -#endif + } } /* @@ -1246,6 +1285,90 @@ static void ipath_setup_7220_cleanup(struct ipath_devdata *dd) ipath_7220_nomsi(dd); } +/* + * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300 + * chipsets. This is known to be unsafe for some revisions of some + * of these chipsets, with some BIOS settings, and enabling it on those + * systems may result in the system crashing, and/or data corruption. + */ +static void ipath_7220_tune_pcie_coalesce(struct ipath_devdata *dd) +{ + int r; + struct pci_dev *parent; + int ppos; + u16 devid; + u32 mask, bits, val; + + if (!ipath_pcie_coalesce) + return; + + /* Find out supported and configured values for parent (root) */ + parent = dd->pcidev->bus->self; + if (parent->bus->parent) { + dev_info(&dd->pcidev->dev, "Parent not root\n"); + return; + } + ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); + if (!ppos) { + ipath_dbg("parent not PCIe root complex!?\n"); + return; + } + if (parent->vendor != 0x8086) { + ipath_dbg("VendorID 0x%x isn't Intel, skip\n", parent->vendor); + return; + } + + /* + * - bit 12: Max_rdcmp_Imt_EN: need to set to 1 + * - bit 11: COALESCE_FORCE: need to set to 0 + * - bit 10: COALESCE_EN: need to set to 1 + * (but limitations on some on some chipsets) + * + * On the Intel 5000, 5100, and 7300 chipsets, there is + * also: - bit 25:24: COALESCE_MODE, need to set to 0 + * OLSON OLSON: 10,11,12 may need to be gated by maxpayload + */ + devid = parent->device; + if (devid >= 0x25e2 && devid <= 0x25fa) { + /* 5000 P/V/X/Z */ + u8 rev; + pci_read_config_byte(parent, PCI_REVISION_ID, &rev); + if (rev <= 0xb2) { + bits = 1U << 10; + ipath_dbg("Old rev 5000* (0x%x), enable-only\n", rev); + } else + bits = 7U << 10; + mask = (3U << 24) | (7U << 10); + } else if (devid >= 0x65e2 && devid <= 0x65fa) { + /* 5100 */ + bits = 1U << 10; + mask = (3U << 24) | (7U << 10); + } else if (devid >= 0x4021 && devid <= 0x402e) { + /* 5400 */ + bits = 7U << 10; + mask = 7U << 10; + } else if (devid >= 0x3604 && devid <= 0x360a) { + /* 7300 */ + bits = 7U << 10; + mask = (3U << 24) | (7U << 10); + } else { + /* not one of the chipsets that we know about */ + ipath_dbg("DeviceID 0x%x isn't one we know, skip\n", devid); + return; + } + pci_read_config_dword(parent, 0x48, &val); + ipath_dbg("Read initial value 0x%x at 0x48, deviceid 0x%x\n", + val, devid); + val &= ~mask; + val |= bits; + r = pci_write_config_dword(parent, 0x48, val); + if (r) + ipath_dev_err(dd, "Unable to update deviceid 0x%x to val 0x%x" + " for PCIe coalescing\n", devid, val); + else + dev_info(&dd->pcidev->dev, "Updated deviceid 0x%x to val 0x%x" + " for PCIe coalescing\n", devid, val); +} static void ipath_7220_pcie_params(struct ipath_devdata *dd, u32 boardrev) { @@ -1305,6 +1428,8 @@ static void ipath_7220_pcie_params(struct ipath_devdata *dd, u32 boardrev) "PCIe linkspeed %u is incorrect; " "should be 1 (2500)!\n", speed); + ipath_7220_tune_pcie_coalesce(dd); + bail: /* fill in string, even on errors */ snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info), @@ -1342,7 +1467,8 @@ static int ipath_setup_7220_config(struct ipath_devdata *dd, u32 boardrev; dd->ipath_msi_lo = 0; /* used as a flag during reset processing */ -#ifdef CONFIG_PCI_MSI + + pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); if (!strcmp(int_type, "force_msi") || !strcmp(int_type, "auto")) ret = pci_enable_msi(pdev); if (ret) { @@ -1357,7 +1483,7 @@ static int ipath_setup_7220_config(struct ipath_devdata *dd, if (!strcmp(int_type, "auto")) ipath_dev_err(dd, "pci_enable_msi failed: %d, " "falling back to INTx\n", ret); - } else if ((pos = pci_find_capability(pdev, PCI_CAP_ID_MSI))) { + } else if (pos) { u16 control; pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, &dd->ipath_msi_lo); @@ -1374,10 +1500,8 @@ static int ipath_setup_7220_config(struct ipath_devdata *dd, } else ipath_dev_err(dd, "Can't find MSI capability, " "can't save MSI settings for reset\n"); -#else - ipath_dbg("PCI_MSI not configured, using IntX interrupts\n"); - ipath_enable_intx(pdev); -#endif + + dd->ipath_irq = pdev->irq; /* * We save the cachelinesize also, although it doesn't @@ -1397,7 +1521,7 @@ static int ipath_setup_7220_config(struct ipath_devdata *dd, dd->ipath_flags |= IPATH_NODMA_RTAIL | IPATH_HAS_SEND_DMA | IPATH_HAS_PBC_CNT | IPATH_HAS_THRESH_UPDATE; - dd->ipath_pioupd_thresh = 4U; /* set default update threshold */ + dd->ipath_pioupd_thresh = 8U; /* set default update threshold */ return 0; } @@ -1578,7 +1702,7 @@ static void ipath_init_7220_variables(struct ipath_devdata *dd) static int ipath_reinit_msi(struct ipath_devdata *dd) { int ret = 0; -#ifdef CONFIG_PCI_MSI + int pos; u16 control; if (!dd->ipath_msi_lo) /* Using intX, or init problem */ @@ -1612,10 +1736,10 @@ static int ipath_reinit_msi(struct ipath_devdata *dd) ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), dd->ipath_msi_data); ret = 1; + bail: -#endif if (!ret) { - ipath_dbg("Using IntX, MSI disabled or not configured\n"); + ipath_dbg("Using INTx, MSI disabled or not configured\n"); ipath_enable_intx(dd->pcidev); ret = 1; } @@ -1727,7 +1851,7 @@ static void ipath_7220_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, "not 2KB aligned!\n", pa); return; } - if (pa >= (1UL << IBA7220_TID_SZ_SHIFT)) { + if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { ipath_dev_err(dd, "BUG: Physical page address 0x%lx " "larger than supported\n", pa); @@ -1835,18 +1959,20 @@ static int ipath_7220_early_init(struct ipath_devdata *dd) dd->ipath_control |= 1<<4; dd->ipath_flags |= IPATH_4BYTE_TID; - if (ipath_special_trigger) - dd->ipath_flags |= IPATH_USE_SPCL_TRIG; /* * For openfabrics, we need to be able to handle an IB header of - * 24 dwords. HT chip has arbitrary sized receive buffers, so we - * made them the same size as the PIO buffers. This chip does not - * handle arbitrary size buffers, so we need the header large enough - * to handle largest IB header, but still have room for a 2KB MTU - * standard IB packet. + * at least 24 dwords. This chip does not handle arbitrary size + * buffers, so we need the header large enough to handle largest + * IB header, but still have room for a 2KB MTU standard IB packet. + * Additionally, some processor/memory controller combinations + * benefit quite strongly from having the DMA'ed data be cacheline + * aligned and a cacheline multiple, so we set the size to 32 dwords + * (2 64-byte primary cachelines for pretty much all processors of + * interest). The alignment hurts nothing, other than using somewhat + * more memory. */ - dd->ipath_rcvhdrentsize = 24; + dd->ipath_rcvhdrentsize = 32; dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; dd->ipath_rhf_offset = dd->ipath_rcvhdrentsize - sizeof(u64) / sizeof(u32); @@ -1918,12 +2044,15 @@ static int ipath_7220_get_base_info(struct ipath_portdata *pd, void *kbase) IPATH_RUNTIME_PCIE | IPATH_RUNTIME_NODMA_RTAIL | IPATH_RUNTIME_SDMA; - if (ipath_special_trigger) - kinfo->spi_runtime_flags |= IPATH_RUNTIME_SPECIAL_TRIGGER; - return 0; } +static void ipath_7220_free_irq(struct ipath_devdata *dd) +{ + free_irq(dd->ipath_irq, dd); + dd->ipath_irq = 0; +} + static struct ipath_message_header * ipath_7220_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) { @@ -1965,7 +2094,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports) dd->ipath_rcvctrl); dd->ipath_p0_rcvegrcnt = 2048; /* always */ if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */ + dd->ipath_pioreserved = 8; /* kpiobufs used for PIO */ } @@ -2127,6 +2256,7 @@ static int ipath_7220_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) dd->ipath_ibcddrctrl |= (((u64) val & maskr) << lsb); ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl, dd->ipath_ibcddrctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); if (setforce) dd->ipath_flags |= IPATH_IB_FORCE_NOTIFY; bail: @@ -2143,14 +2273,25 @@ static void ipath_7220_read_counters(struct ipath_devdata *dd, counters[i] = ipath_snap_cntr(dd, i); } -/* if we are using MSI, try to fallback to IntX */ +/* if we are using MSI, try to fallback to INTx */ static int ipath_7220_intr_fallback(struct ipath_devdata *dd) { if (dd->ipath_msi_lo) { dev_info(&dd->pcidev->dev, "MSI interrupt not detected," - " trying IntX interrupts\n"); + " trying INTx interrupts\n"); ipath_7220_nomsi(dd); ipath_enable_intx(dd->pcidev); + /* + * some newer kernels require free_irq before disable_msi, + * and irq can be changed during disable and intx enable + * and we need to therefore use the pcidev->irq value, + * not our saved MSI value. + */ + dd->ipath_irq = dd->pcidev->irq; + if (request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, + IPATH_DRV_NAME, dd)) + ipath_dev_err(dd, + "Could not re-request_irq for INTx\n"); return 1; } return 0; @@ -2203,12 +2344,6 @@ static void autoneg_send(struct ipath_devdata *dd, ipath_flush_wc(); __iowrite32_copy(piobuf + 2, hdr, 7); __iowrite32_copy(piobuf + 9, data, dcnt); - if (dd->ipath_flags & IPATH_USE_SPCL_TRIG) { - u32 spcl_off = (pnum > dd->ipath_piobcnt2k) ? - 2047 : 1023; - ipath_flush_wc(); - __raw_writel(0xaebecede, piobuf + spcl_off); - } ipath_flush_wc(); } @@ -2230,18 +2365,18 @@ static void ipath_autoneg_send(struct ipath_devdata *dd, int which) 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40000001, 0x1388, 0x15e, /* rest 0's */ }; - dcnt = sizeof(madpayload_start)/sizeof(madpayload_start[0]); - hcnt = sizeof(hdr)/sizeof(hdr[0]); + dcnt = ARRAY_SIZE(madpayload_start); + hcnt = ARRAY_SIZE(hdr); if (!swapped) { /* for maintainability, do it at runtime */ for (i = 0; i < hcnt; i++) { - dw = cpu_to_be32(hdr[i]); + dw = (__force u32) cpu_to_be32(hdr[i]); hdr[i] = dw; } for (i = 0; i < dcnt; i++) { - dw = cpu_to_be32(madpayload_start[i]); + dw = (__force u32) cpu_to_be32(madpayload_start[i]); madpayload_start[i] = dw; - dw = cpu_to_be32(madpayload_done[i]); + dw = (__force u32) cpu_to_be32(madpayload_done[i]); madpayload_done[i] = dw; } swapped = 1; @@ -2295,6 +2430,7 @@ static void set_speed_fast(struct ipath_devdata *dd, u32 speed) IBA7220_IBC_WIDTH_SHIFT; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl, dd->ipath_ibcddrctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); ipath_cdbg(VERBOSE, "setup for IB speed (%x) done\n", speed); } @@ -2314,6 +2450,7 @@ static void try_auto_neg(struct ipath_devdata *dd) */ ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl), 0x3b9dc07); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); dd->ipath_flags |= IPATH_IB_AUTONEG_INPROG; ipath_autoneg_send(dd, 0); set_speed_fast(dd, IPATH_IB_DDR); @@ -2326,7 +2463,7 @@ static void try_auto_neg(struct ipath_devdata *dd) static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) { - int ret = 0; + int ret = 0, symadj = 0; u32 ltstate = ipath_ib_linkstate(dd, ibcs); dd->ipath_link_width_active = @@ -2369,6 +2506,13 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) ipath_dbg("DDR negotiation try, %u/%u\n", dd->ipath_autoneg_tries, IPATH_AUTONEG_TRIES); + if (!dd->ibdeltainprog) { + dd->ibdeltainprog = 1; + dd->ibsymsnap = ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + } try_auto_neg(dd); ret = 1; /* no other IB status change processing */ } else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) @@ -2389,6 +2533,7 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) set_speed_fast(dd, dd->ipath_link_speed_enabled); wake_up(&dd->ipath_autoneg_wait); + symadj = 1; } else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) { /* * clear autoneg failure flag, and do setup @@ -2404,22 +2549,31 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) IBA7220_IBC_IBTA_1_2_MASK; ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl), 0); + ipath_write_kreg(dd, + dd->ipath_kregs->kr_scratch, + 0xfeedbeef); + symadj = 1; } } /* - * if we are in 1X, and are in autoneg width, it - * could be due to an xgxs problem, so if we haven't + * if we are in 1X on rev1 only, and are in autoneg width, + * it could be due to an xgxs problem, so if we haven't * already tried, try twice to get to 4X; if we * tried, and couldn't, report it, since it will * probably not be what is desired. */ - if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X | + if (dd->ipath_minrev == 1 && + (dd->ipath_link_width_enabled & (IB_WIDTH_1X | IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X) && dd->ipath_link_width_active == IB_WIDTH_1X && dd->ipath_x1_fix_tries < 3) { - if (++dd->ipath_x1_fix_tries == 3) + if (++dd->ipath_x1_fix_tries == 3) { dev_info(&dd->pcidev->dev, "IB link is in 1X mode\n"); + if (!(dd->ipath_flags & + IPATH_IB_AUTONEG_INPROG)) + symadj = 1; + } else { ipath_cdbg(VERBOSE, "IB 1X in " "auto-width, try %u to be " @@ -2430,7 +2584,8 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) dd->ipath_f_xgxs_reset(dd); ret = 1; /* skip other processing */ } - } + } else if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) + symadj = 1; if (!ret) { dd->delay_mult = rate_to_delay @@ -2441,6 +2596,25 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) } } + if (symadj) { + if (dd->ibdeltainprog) { + dd->ibdeltainprog = 0; + dd->ibsymdelta += ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt) - + dd->ibsymsnap; + dd->iblnkerrdelta += ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt) - + dd->iblnkerrsnap; + } + } else if (!ibup && !dd->ibdeltainprog + && !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) { + dd->ibdeltainprog = 1; + dd->ibsymsnap = ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + } + if (!ret) ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs), ltstate); @@ -2507,7 +2681,7 @@ done: if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { ipath_dbg("Did not get to DDR INIT (%x) after %Lu msecs\n", ipath_ib_state(dd, dd->ipath_lastibcstat), - jiffies_to_msecs(jiffies)-startms); + (unsigned long long) jiffies_to_msecs(jiffies)-startms); dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG; if (dd->ipath_autoneg_tries == IPATH_AUTONEG_TRIES) { dd->ipath_flags |= IPATH_IB_AUTONEG_FAILED; @@ -2543,6 +2717,7 @@ void ipath_init_iba7220_funcs(struct ipath_devdata *dd) dd->ipath_f_cleanup = ipath_setup_7220_cleanup; dd->ipath_f_setextled = ipath_setup_7220_setextled; dd->ipath_f_get_base_info = ipath_7220_get_base_info; + dd->ipath_f_free_irq = ipath_7220_free_irq; dd->ipath_f_tidtemplate = ipath_7220_tidtemplate; dd->ipath_f_intr_fallback = ipath_7220_intr_fallback; dd->ipath_f_xgxs_reset = ipath_7220_xgxs_reset; diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index edd5c92..c01ff2a 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -37,6 +37,7 @@ #include "ipath_kernel.h" #include "ipath_common.h" +#include "ipath_wc_pat.h" /* * min buffers we want to have per port, after driver @@ -126,7 +127,7 @@ static int create_port0_egr(struct ipath_devdata *dd) dd->ipath_port0_skbinfo[e].phys = ipath_map_single(dd->pcidev, dd->ipath_port0_skbinfo[e].skb->data, - dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); + dd->ipath_init_ibmaxlen, PCI_DMA_FROMDEVICE); dd->ipath_f_put_tid(dd, e + (u64 __iomem *) ((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase), @@ -220,6 +221,131 @@ static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd) return pd; } +static int init_chip_wc_pat(struct ipath_devdata *dd) +{ + int ret = 0; + u64 __iomem *ipath_kregbase = NULL; + void __iomem *ipath_piobase = NULL; + u64 __iomem *ipath_userbase = NULL; + u64 ipath_kreglen; + u64 ipath_pio2koffset = dd->ipath_piobufbase & 0xffffffff; + u64 ipath_pio4koffset = dd->ipath_piobufbase >> 32; + u64 ipath_pio2klen = dd->ipath_piobcnt2k * dd->ipath_palign; + u64 ipath_pio4klen = dd->ipath_piobcnt4k * dd->ipath_4kalign; + u64 ipath_physaddr = dd->ipath_physaddr; + u64 ipath_piolen; + u64 ipath_userlen = 0; + + /* Assumes chip address space looks like: + - kregs + sregs + cregs + uregs (in any order) + - piobufs (2K and 4K bufs in either order) + or: + - kregs + sregs + cregs (in any order) + - piobufs (2K and 4K bufs in either order) + - uregs + */ + if (dd->ipath_piobcnt4k == 0) { + ipath_kreglen = ipath_pio2koffset; + ipath_piolen = ipath_pio2klen; + } else if (ipath_pio2koffset < ipath_pio4koffset) { + ipath_kreglen = ipath_pio2koffset; + ipath_piolen = ipath_pio4koffset + ipath_pio4klen - + ipath_kreglen; + } else { + ipath_kreglen = ipath_pio4koffset; + ipath_piolen = ipath_pio2koffset + ipath_pio2klen - + ipath_kreglen; + } + if (dd->ipath_sregbase > ipath_kreglen) { + ipath_dbg("Unexpected sregbase layout\n"); + ret = -EINVAL; + goto done; + } + if (dd->ipath_cregbase > ipath_kreglen) { + ipath_dbg("Unexpected cregbase layout\n"); + ret = -EINVAL; + goto done; + } + if (dd->ipath_uregbase > ipath_kreglen) + /* Map just the configured ports (not all hw ports) */ + ipath_userlen = dd->ipath_ureg_align * + dd->ipath_cfgports; + + /* Sanity checks passed, now create the new mappings */ + ipath_kregbase = ioremap_nocache(ipath_physaddr, + ipath_kreglen); + if (!ipath_kregbase) { + ipath_dbg("Unable to remap io addr %llx to kvirt\n", + ipath_physaddr); + ret = -ENOMEM; + goto done; + } + ipath_cdbg(VERBOSE, "WC PAT remapped io addr %llx" + " to kregbase %p for %llu bytes\n", + ipath_physaddr, ipath_kregbase, ipath_kreglen); + + ipath_piobase = (void __iomem *) ioremap_wc( + ipath_physaddr + ipath_kreglen, + ipath_piolen); + if (!ipath_piobase) { + ipath_dbg("Unable to remap io addr %llx to kvirt\n", + ipath_physaddr + ipath_kreglen); + ret = -ENOMEM; + goto done_kregbase; + } + ipath_cdbg(VERBOSE, "WC PAT remapped io addr %llx" + " to piobase %p for %llu bytes\n", + ipath_physaddr + ipath_kreglen, + ipath_piobase, ipath_piolen); + + if (ipath_userlen) { + ipath_userbase = (void __iomem *) ioremap_nocache( + ipath_physaddr + + dd->ipath_uregbase, + ipath_userlen); + if (!ipath_userbase) { + ipath_dbg("Unable to remap io addr %llx " + "to kvirt\n", + ipath_physaddr + dd->ipath_uregbase); + ret = -ENOMEM; + goto done_piobase; + } + ipath_cdbg(VERBOSE, "WC PAT remapped io addr %llx" + " to userbase %p for %llu bytes\n", + ipath_physaddr + dd->ipath_uregbase, + ipath_userbase, ipath_userlen); + } + + /* All remapping successful, get rid of old mapping */ + iounmap((volatile void __iomem *) dd->ipath_kregbase); + + /* Finally update dd with the changes */ + dd->ipath_kregbase = ipath_kregbase; + dd->ipath_kregend = (u64 __iomem *) + ((char __iomem *) ipath_kregbase + ipath_kreglen); + dd->ipath_piobase = ipath_piobase; + dd->ipath_pio2kbase = (void __iomem *) + (((char __iomem *) dd->ipath_piobase) + + ipath_pio2koffset - ipath_kreglen); + if (dd->ipath_piobcnt4k) + dd->ipath_pio4kbase = (void __iomem *) + (((char __iomem *) dd->ipath_piobase) + + ipath_pio4koffset - ipath_kreglen); + if (ipath_userlen) + /* ureg will now be accessed relative to dd->ipath_userbase */ + dd->ipath_userbase = ipath_userbase; + goto done; + +done_piobase: + iounmap((volatile void __iomem *) ipath_piobase); + +done_kregbase: + iounmap((volatile void __iomem *) ipath_kregbase); + +done: + return ret; +} + static int init_chip_first(struct ipath_devdata *dd) { struct ipath_portdata *pd; @@ -229,6 +355,7 @@ static int init_chip_first(struct ipath_devdata *dd) spin_lock_init(&dd->ipath_kernel_tid_lock); spin_lock_init(&dd->ipath_user_tid_lock); spin_lock_init(&dd->ipath_sendctrl_lock); + spin_lock_init(&dd->ipath_uctxt_lock); spin_lock_init(&dd->ipath_sdma_lock); spin_lock_init(&dd->ipath_gpio_lock); spin_lock_init(&dd->ipath_eep_st_lock); @@ -314,6 +441,15 @@ static int init_chip_first(struct ipath_devdata *dd) */ dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k, dd->ipath_palign); + } + + if (ipath_wc_pat) { + ret = init_chip_wc_pat(dd); + if (ret) + goto done; + } + + if (dd->ipath_piobcnt4k) { ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p " "(%x aligned)\n", dd->ipath_piobcnt2k, dd->ipath_piosize2k, @@ -483,8 +619,6 @@ static void enable_chip(struct ipath_devdata *dd, int reinit) /* Enable PIO send, and update of PIOavail regs to memory. */ dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE | INFINIPATH_S_PIOBUFAVAILUPD; - if (dd->ipath_flags & IPATH_USE_SPCL_TRIG) - dd->ipath_sendctrl |= INFINIPATH_S_SPECIALTRIGGER; /* * Set the PIO avail update threshold to host memory @@ -535,21 +669,21 @@ static void enable_chip(struct ipath_devdata *dd, int reinit) * initial values of the generation bit correct. */ for (i = 0; i < dd->ipath_pioavregs; i++) { - __le64 tmp; + __le64 pioavail; /* * Chip Errata bug 6641; even and odd qwords>3 are swapped. */ if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) - tmp = dd->ipath_pioavailregs_dma[i ^ 1]; + pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; else - tmp = dd->ipath_pioavailregs_dma[i]; + pioavail = dd->ipath_pioavailregs_dma[i]; /* * don't need to worry about ipath_pioavailkernel here * because we will call ipath_chg_pioavailkernel() later * in initialization, to busy out buffers as needed */ - dd->ipath_pioavailshadow[i] = le64_to_cpu(tmp); + dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail); } /* can get counters, stats, etc. */ dd->ipath_flags |= IPATH_PRESENT; @@ -655,10 +789,7 @@ static int init_housekeeping(struct ipath_devdata *dd, int reinit) INFINIPATH_R_SOFTWARE_SHIFT) & INFINIPATH_R_SOFTWARE_MASK); - if (dd->ipath_revision & INFINIPATH_R_EMULATOR_MASK) - dev_info(&dd->pcidev->dev, "%s", dd->ipath_boardversion); - else - ipath_dbg("%s", dd->ipath_boardversion); + ipath_dbg("%s", dd->ipath_boardversion); if (ret) goto done; @@ -672,7 +803,6 @@ done: return ret; } - static void verify_interrupt(unsigned long opaque) { struct ipath_devdata *dd = (struct ipath_devdata *) opaque; @@ -681,7 +811,7 @@ static void verify_interrupt(unsigned long opaque) return; /* being torn down */ /* - * If we don't have a lid or any interrupts, let the user know and + * If we don't have any interrupts, let the user know and * don't bother checking again. */ if (dd->ipath_int_counter == 0) { @@ -695,7 +825,6 @@ static void verify_interrupt(unsigned long opaque) dd->ipath_int_counter); } - /** * ipath_init_chip - do the actual initialization sequence on the chip * @dd: the infinipath device @@ -793,11 +922,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) "ports <= %u\n", dd->ipath_pbufsport, dd->ipath_ports_extrabuf); dd->ipath_lastpioindex = 0; - dd->ipath_lastpioindexl = dd->ipath_lastport_piobuf; + dd->ipath_lastpioindexl = dd->ipath_piobcnt2k; /* ipath_pioavailshadow initialized earlier */ ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " "each for %u user ports\n", kpiobufs, piobufs, dd->ipath_pbufsport, uports); + if (dd->ipath_pioupd_thresh && + (dd->ipath_pioupd_thresh > dd->ipath_pbufsport - 2)) { + dd->ipath_pioupd_thresh = dd->ipath_pbufsport - 2; + ipath_cdbg(VERBOSE, "Drop pioupd_thresh to %u\n", + dd->ipath_pioupd_thresh); + } ret = dd->ipath_f_early_init(dd); if (ret) { ipath_dev_err(dd, "Early initialization failure\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index b3445e9..e73afd9 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -243,7 +243,6 @@ static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs) return ret; } - void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev) { struct ib_event event; @@ -269,7 +268,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat); ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */ - /* Since going into a recovery state causes the link state to go + /* + * Since going into a recovery state causes the link state to go * down and since recovery is transitory, it is better if we "miss" * ever seeing the link training state go into recovery (i.e., * ignore this transition for link state special handling purposes) @@ -328,12 +328,12 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, * Ignore cycling back and forth from Polling.Active to * Polling.Quiet while waiting for the other end of the link * to come up, except to try and decide if we are connected - * to a live IB device or not. We will * cycle back and + * to a live IB device or not. We will cycle back and * forth between them if no cable is plugged in, the other * device is powered off or disabled, etc. */ - if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE - || lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { + if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || + lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) && (++dd->ipath_ibpollcnt == 40)) { dd->ipath_flags |= IPATH_NOCABLE; @@ -350,16 +350,16 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, dd->ipath_ibpollcnt = 0; /* not poll*, now */ ipath_stats.sps_iblink++; - if (ibstate != init && dd->ipath_lastlinkrecov && - ipath_linkrecovery) { + if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) { u64 linkrecov; linkrecov = ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); if (linkrecov != dd->ipath_lastlinkrecov) { ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n", - ibcs, ib_linkstate(dd, ibcs), + (unsigned long long) ibcs, + ib_linkstate(dd, ibcs), ipath_ibcstatus_str[ltstate], - linkrecov); + (unsigned long long) linkrecov); /* and no more until active again */ dd->ipath_lastlinkrecov = 0; ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); @@ -449,9 +449,8 @@ done: return; } - static void handle_supp_msgs(struct ipath_devdata *dd, - unsigned supp_msgs, char *msg, u32 msgsz) + unsigned supp_msgs, char *msg, u32 msgsz) { /* * Print the message unless it's ibc status change only, which @@ -461,8 +460,8 @@ static void handle_supp_msgs(struct ipath_devdata *dd, int iserr; ipath_err_t mask; iserr = ipath_decode_err(dd, msg, msgsz, - dd->ipath_lasterror & - ~INFINIPATH_E_IBSTATUSCHANGED); + dd->ipath_lasterror & + ~INFINIPATH_E_IBSTATUSCHANGED); mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED; @@ -550,13 +549,20 @@ static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs) ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx " "lengen 0x%lx\n", tl, hd, status, lengen); } - + expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + ipath_dbg("%sxpected sdma error, sdma_status 0x%lx\n", + expected ? "e" : "une", dd->ipath_sdma_status); + /* + * we are in interrupt context (and only one interrupt vector), + * so we won't get another interrupt and process the sdma state + * change before the set_bit of SDMA_DISABLED. We set DISABLED + * here because there are cases where abort_task will not. + */ + if (!expected) /* must be prior to setting SDMA_DISABLED */ + ipath_cancel_sends(dd, 1); spin_lock_irqsave(&dd->ipath_sdma_lock, flags); __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); - expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!expected) - ipath_cancel_sends(dd, 1); } static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat) @@ -571,13 +577,19 @@ static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat) if (istat & INFINIPATH_I_SDMADISABLED) { expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - ipath_dbg("%s SDmaDisabled intr\n", - expected ? "expected" : "unexpected"); + ipath_dbg("%sxpected sdma disabled intr, sdma_status 0x%lx\n", + expected ? "e" : "une", dd->ipath_sdma_status); + /* + * we are in interrupt context (and only one interrupt vector), + * so we won't get another interrupt and process the sdma state + * change before the set_bit of SDMA_DISABLED. We set DISABLED + * here because there are cases where abort_task will not. + */ + if (!expected) /* must be prior to setting SDMA_DISABLED */ + ipath_cancel_sends(dd, 1); spin_lock_irqsave(&dd->ipath_sdma_lock, flags); __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!expected) - ipath_cancel_sends(dd, 1); if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) tasklet_hi_schedule(&dd->ipath_sdma_abort_task); } @@ -588,8 +600,10 @@ static int handle_hdrq_full(struct ipath_devdata *dd) int chkerrpkts = 0; u32 hd, tl; u32 i; + unsigned long flags; ipath_stats.sps_hdrqfull++; + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); for (i = 0; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; @@ -625,6 +639,7 @@ static int handle_hdrq_full(struct ipath_devdata *dd) wake_up_interruptible(&pd->port_wait); } } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); return chkerrpkts; } @@ -939,14 +954,14 @@ static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp) * linuxbios development work, and it may happen in * the future again. */ - if (dd->pcidev && dd->pcidev->irq) { + if (dd->pcidev && dd->ipath_irq) { ipath_dev_err(dd, "Now %u unexpected " "interrupts, unregistering " "interrupt handler\n", *unexpectp); - ipath_dbg("free_irq of irq %x\n", - dd->pcidev->irq); - free_irq(dd->pcidev->irq, dd); + ipath_dbg("free_irq of irq %d\n", + dd->ipath_irq); + dd->ipath_f_free_irq(dd); } } if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) { @@ -982,7 +997,7 @@ static noinline void ipath_bad_regread(struct ipath_devdata *dd) if (allbits == 2) { ipath_dev_err(dd, "Still bad interrupt status, " "unregistering interrupt\n"); - free_irq(dd->pcidev->irq, dd); + dd->ipath_f_free_irq(dd); } else if (allbits > 2) { if ((allbits % 10000) == 0) printk("."); @@ -1011,7 +1026,6 @@ set: spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); } - /* * Handle receive interrupts for user ports; this means a user * process was waiting for a packet to arrive, and didn't want @@ -1022,6 +1036,7 @@ static void handle_urcv(struct ipath_devdata *dd, u64 istat) u64 portr; int i; int rcvdint = 0; + unsigned long flags; /* * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and @@ -1037,6 +1052,7 @@ static void handle_urcv(struct ipath_devdata *dd, u64 istat) dd->ipath_i_rcvavail_mask) | ((istat >> dd->ipath_i_rcvurg_shift) & dd->ipath_i_rcvurg_mask); + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); for (i = 1; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; @@ -1054,6 +1070,8 @@ static void handle_urcv(struct ipath_devdata *dd, u64 istat) } } } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); + if (rcvdint) { /* only want to take one interrupt, so turn off the rcv * interrupt for all the ports that we set the rcv_waiting @@ -1121,9 +1139,11 @@ irqreturn_t ipath_intr(int irq, void *data) if (unlikely(istat & ~dd->ipath_i_bitsextant)) ipath_dev_err(dd, "interrupt with unknown interrupts %Lx set\n", + (unsigned long long) istat & ~dd->ipath_i_bitsextant); else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */ - ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat); + ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", + (unsigned long long) istat); if (istat & INFINIPATH_I_ERROR) { ipath_stats.sps_errints++; @@ -1131,7 +1151,8 @@ irqreturn_t ipath_intr(int irq, void *data) dd->ipath_kregs->kr_errorstatus); if (!estat) dev_info(&dd->pcidev->dev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + "but no error bits set!\n", + (unsigned long long) istat); else if (estat == -1LL) /* * should we try clearing all, or hope next read @@ -1238,17 +1259,14 @@ irqreturn_t ipath_intr(int irq, void *data) * waiting for receive are at the bottom. */ kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) | - (1ULL << dd->ipath_i_rcvurg_shift) | - INFINIPATH_I_JINT; + (1ULL << dd->ipath_i_rcvurg_shift); if (chk0rcv || (istat & kportrbits)) { istat &= ~kportrbits; ipath_kreceive(dd->ipath_pd[0]); } - if (istat & ((dd->ipath_i_rcvavail_mask << - dd->ipath_i_rcvavail_shift) - | (dd->ipath_i_rcvurg_mask << - dd->ipath_i_rcvurg_shift))) + if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) | + (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift))) handle_urcv(dd, istat); if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED)) diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 7754f5e..65f9234 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -43,7 +43,7 @@ #include <linux/dma-mapping.h> #include <linux/mutex.h> #include <linux/list.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include <asm/io.h> #include <rdma/ib_verbs.h> @@ -102,7 +102,6 @@ struct ipath_portdata { /* mmap of hdrq, must fit in 44 bits */ dma_addr_t port_rcvhdrq_phys; dma_addr_t port_rcvhdrqtailaddr_phys; - /* * number of opens (including slave subports) on this instance * (ignoring forks, dup, etc. for now) @@ -211,7 +210,6 @@ struct ipath_sdma_txreq { }; void (*callback)(void *, int); void *callback_cookie; - int callback_status; u16 start_idx; /* sdma private */ u16 next_descq_idx; /* sdma private */ struct list_head list; /* sdma private */ @@ -227,13 +225,17 @@ struct ipath_sdma_desc { #define IPATH_SDMA_TXREQ_F_FREEBUF 0x8 #define IPATH_SDMA_TXREQ_F_FREEDESC 0x10 #define IPATH_SDMA_TXREQ_F_VL15 0x20 -#define IPATH_SDMA_TXREQ_F_FREECNT_HACK 0x40 /*XXX*/ #define IPATH_SDMA_TXREQ_S_OK 0 #define IPATH_SDMA_TXREQ_S_SENDERROR 1 #define IPATH_SDMA_TXREQ_S_ABORTED 2 #define IPATH_SDMA_TXREQ_S_SHUTDOWN 3 +#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63) +#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62) +#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61) +#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30) + /* max dwords in small buffer packet */ #define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2) @@ -275,6 +277,10 @@ struct ipath_devdata { void __iomem *ipath_pio2kbase; /* kvirt address of 1st 4k pio buffer */ void __iomem *ipath_pio4kbase; + /* mem-mapped pointer to base of PIO buffers (if using WC PAT) */ + void __iomem *ipath_piobase; + /* mem-mapped pointer to base of user chip regs (if using WC PAT) */ + u64 __iomem *ipath_userbase; /* * points to area where PIOavail registers will be DMA'ed. * Has to be on a page of it's own, because the page will be @@ -310,6 +316,8 @@ struct ipath_devdata { void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); /* fill out chip-specific fields */ int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); + /* free irq */ + void (*ipath_f_free_irq)(struct ipath_devdata *); struct ipath_message_header *(*ipath_f_get_msgheader) (struct ipath_devdata *, __le32 *); void (*ipath_f_config_ports)(struct ipath_devdata *, ushort); @@ -350,6 +358,19 @@ struct ipath_devdata { /* errors masked because they occur too fast */ ipath_err_t ipath_maskederrs; u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */ + /* these 5 fields are used to establish deltas for IB Symbol + * errors and linkrecovery errors. They can be reported on + * some chips during link negotiation prior to INIT, and with + * DDR when faking DDR negotiations with non-IBTA switches. + * The chip counters are adjusted at driver unload if there is + * a non-zero delta. + */ + u64 ibdeltainprog; + u64 ibsymdelta; + u64 ibsymsnap; + u64 iblnkerrdelta; + u64 iblnkerrsnap; + /* time in jiffies at which to re-enable maskederrs */ unsigned long ipath_unmasktime; /* count of egrfull errors, combined for all ports */ @@ -376,7 +397,6 @@ struct ipath_devdata { u32 ipath_lastport_piobuf; /* is a stats timer active */ u32 ipath_stats_timer_active; - u32 ipath_link_timer_active; /* number of interrupts for this device -- saturates... */ u32 ipath_int_counter; /* dwords sent read from counter */ @@ -460,6 +480,8 @@ struct ipath_devdata { spinlock_t ipath_kernel_tid_lock; spinlock_t ipath_user_tid_lock; spinlock_t ipath_sendctrl_lock; + /* around ipath_pd and (user ports) port_cnt use (intr vs free) */ + spinlock_t ipath_uctxt_lock; /* * IPATH_STATUS_*, @@ -477,7 +499,6 @@ struct ipath_devdata { struct class_device *diag_class_dev; /* timer used to prevent stats overflow, error throttling, etc. */ struct timer_list ipath_stats_timer; - struct timer_list ipath_link_timer; /* timer to verify interrupts work, and fallback if possible */ struct timer_list ipath_intrchk_timer; void *ipath_dummy_hdrq; /* used after port close */ @@ -485,7 +506,7 @@ struct ipath_devdata { /* SendDMA related entries */ spinlock_t ipath_sdma_lock; - u64 ipath_sdma_status; + unsigned long ipath_sdma_status; unsigned long ipath_sdma_abort_jiffies; unsigned long ipath_sdma_abort_intr_timeout; unsigned long ipath_sdma_buf_jiffies; @@ -500,9 +521,7 @@ struct ipath_devdata { u16 ipath_sdma_reset_wait; u8 ipath_sdma_generation; struct tasklet_struct ipath_sdma_abort_task; - struct tasklet_struct ipath_sdma_notify_task; struct list_head ipath_sdma_activelist; - struct list_head ipath_sdma_notifylist; atomic_t ipath_sdma_vl15_count; struct timer_list ipath_sdma_vl15_timer; @@ -719,9 +738,6 @@ struct ipath_devdata { u32 ipath_overrun_thresh_errs; u32 ipath_lli_errs; - /* status check work */ - struct delayed_work status_work; - /* * Not all devices managed by a driver instance are the same * type, so these fields must be per-device. @@ -815,7 +831,7 @@ struct ipath_devdata { u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */ }; -/* ipath_hol_state values (stopping/starting user proc, send flushing */ +/* ipath_hol_state values (stopping/starting user proc, send flushing) */ #define IPATH_HOL_UP 0 #define IPATH_HOL_DOWN 1 /* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */ @@ -827,8 +843,8 @@ struct ipath_devdata { #define IPATH_SDMA_DISARMED 1 #define IPATH_SDMA_DISABLED 2 #define IPATH_SDMA_LAYERBUF 3 -#define IPATH_SDMA_RUNNING 62 -#define IPATH_SDMA_SHUTDOWN 63 +#define IPATH_SDMA_RUNNING 30 +#define IPATH_SDMA_SHUTDOWN 31 /* bit combinations that correspond to abort states */ #define IPATH_SDMA_ABORT_NONE 0 @@ -862,7 +878,6 @@ void ipath_disable_wc(struct ipath_devdata *dd); int ipath_count_units(int *npresentp, int *nupp, int *maxportsp); void ipath_shutdown_device(struct ipath_devdata *); void ipath_clear_freeze(struct ipath_devdata *); -int ipath_signal_procs(struct ipath_devdata *, int); struct file_operations; int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, @@ -884,7 +899,7 @@ extern int ipath_diag_inuse; irqreturn_t ipath_intr(int irq, void *devid); int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, - ipath_err_t err); + ipath_err_t err); #if __IPATH_INFO || __IPATH_DBG extern const char *ipath_ibcstatus_str[]; #endif @@ -988,8 +1003,6 @@ void ipath_shutdown_relock_poll(struct ipath_devdata *); #define IPATH_HAS_PBC_CNT 0x800000 /* Suppress heartbeat, even if turning off loopback */ #define IPATH_NO_HRTBT 0x1000000 - /* 7220 SpecialTrigger arm/launch mechanism */ -#define IPATH_USE_SPCL_TRIG 0x2000000 #define IPATH_HAS_THRESH_UPDATE 0x4000000 #define IPATH_HAS_MULT_IB_SPEED 0x8000000 #define IPATH_IB_AUTONEG_INPROG 0x10000000 @@ -1025,7 +1038,6 @@ void ipath_get_eeprom_info(struct ipath_devdata *); int ipath_update_eeprom_log(struct ipath_devdata *dd); void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); -void ipath_dump_lookup_output_queue(struct ipath_devdata *); void ipath_disarm_senderrbufs(struct ipath_devdata *); void ipath_force_pio_avail_update(struct ipath_devdata *); void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); @@ -1114,10 +1126,15 @@ static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd, if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) return 0; - return readl(regno + (u64 __iomem *) - (dd->ipath_uregbase + - (char __iomem *)dd->ipath_kregbase + - dd->ipath_ureg_align * port)); + if (dd->ipath_userbase) + return readl(regno + (u64 __iomem *) + ((char __iomem *)dd->ipath_userbase + + dd->ipath_ureg_align * port)); + else + return readl(regno + (u64 __iomem *) + (dd->ipath_uregbase + + (char __iomem *)dd->ipath_kregbase + + dd->ipath_ureg_align * port)); } /** @@ -1132,9 +1149,17 @@ static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd, static inline void ipath_write_ureg(const struct ipath_devdata *dd, ipath_ureg regno, u64 value, int port) { - u64 __iomem *ubase = (u64 __iomem *) - (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase + - dd->ipath_ureg_align * port); + u64 __iomem *ubase; + if (dd->ipath_userbase) + ubase = (u64 __iomem *) + ((char __iomem *) dd->ipath_userbase + + dd->ipath_ureg_align * port); + else + ubase = (u64 __iomem *) + (dd->ipath_uregbase + + (char __iomem *) dd->ipath_kregbase + + dd->ipath_ureg_align * port); + if (dd->ipath_kregbase) writeq(value, &ubase[regno]); } @@ -1308,11 +1333,8 @@ const char *ipath_get_unit_name(int unit); extern unsigned ipath_debug; /* debugging bit mask */ extern unsigned ipath_linkrecovery; extern unsigned ipath_mtu4096; -extern unsigned ipath_sdma_fetch_arb; extern struct mutex ipath_mutex; - - #define IPATH_DRV_NAME "ib_ipath" #define IPATH_MAJOR 233 #define IPATH_USER_MINOR_BASE 0 diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index 8f32b17..6dfb578 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -93,17 +93,37 @@ bail: * @rkt: table from which to free the lkey * @lkey: lkey id to free */ -void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey) +int ipath_free_lkey(struct ipath_ibdev *dev, struct ipath_mregion *mr) { unsigned long flags; + u32 lkey = mr->lkey; u32 r; + int ret; - if (lkey == 0) - return; - r = lkey >> (32 - ib_ipath_lkey_table_size); - spin_lock_irqsave(&rkt->lock, flags); - rkt->table[r] = NULL; - spin_unlock_irqrestore(&rkt->lock, flags); + spin_lock_irqsave(&dev->lk_table.lock, flags); + if (lkey == 0) { + if (dev->dma_mr) { + ret = atomic_read(&dev->dma_mr->refcount); + if (dev->dma_mr == mr) { + if (!ret) + dev->dma_mr = NULL; + } else + ret = 0; + } else + ret = 0; + } else { + r = lkey >> (32 - ib_ipath_lkey_table_size); + ret = atomic_read(&dev->lk_table.table[r]->refcount); + if (!ret) + dev->lk_table.table[r] = NULL; + } + spin_unlock_irqrestore(&dev->lk_table.lock, flags); + + if (ret) { + ipath_dbg("MR busy (LKEY %x cnt %u)\n", lkey, ret); + ret = -EBUSY; + } + return ret; } /** @@ -125,40 +145,41 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, struct ipath_mregion *mr; unsigned n, m; size_t off; - int ret; + int ret = 0; + unsigned long flags; /* * We use LKEY == zero for kernel virtual addresses * (see ipath_get_dma_mr and ipath_dma.c). */ + spin_lock_irqsave(&rkt->lock, flags); if (sge->lkey == 0) { struct ipath_pd *pd = to_ipd(qp->ibqp.pd); + struct ipath_ibdev *dev = to_idev(pd->ibpd.device); - if (pd->user) { - ret = 0; + if (pd->user) goto bail; - } - isge->mr = NULL; + if (!dev->dma_mr) + goto bail; + atomic_inc(&dev->dma_mr->refcount); + isge->mr = dev->dma_mr; isge->vaddr = (void *) sge->addr; isge->length = sge->length; isge->sge_length = sge->length; - ret = 1; - goto bail; + isge->m = 0; + isge->n = 0; + goto ok; } mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))]; if (unlikely(mr == NULL || mr->lkey != sge->lkey || - qp->ibqp.pd != mr->pd)) { - ret = 0; + qp->ibqp.pd != mr->pd)) goto bail; - } off = sge->addr - mr->user_base; if (unlikely(sge->addr < mr->user_base || off + sge->length > mr->length || - (mr->access_flags & acc) != acc)) { - ret = 0; + (mr->access_flags & acc) != acc)) goto bail; - } off += mr->offset; m = 0; @@ -171,16 +192,17 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, n = 0; } } + atomic_inc(&mr->refcount); isge->mr = mr; isge->vaddr = mr->map[m]->segs[n].vaddr + off; isge->length = mr->map[m]->segs[n].length - off; isge->sge_length = sge->length; isge->m = m; isge->n = n; - +ok: ret = 1; - bail: + spin_unlock_irqrestore(&rkt->lock, flags); return ret; } @@ -195,51 +217,49 @@ bail: * * Return 1 if successful, otherwise 0. */ -int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_lkey_table *rkt = &dev->lk_table; - struct ipath_sge *sge = &ss->sge; struct ipath_mregion *mr; unsigned n, m; size_t off; - int ret; + int ret = 0; + unsigned long flags; /* * We use RKEY == zero for kernel virtual addresses * (see ipath_get_dma_mr and ipath_dma.c). */ + spin_lock_irqsave(&rkt->lock, flags); if (rkey == 0) { struct ipath_pd *pd = to_ipd(qp->ibqp.pd); + struct ipath_ibdev *dev = to_idev(pd->ibpd.device); - if (pd->user) { - ret = 0; + if (pd->user) goto bail; - } - sge->mr = NULL; + if (!dev->dma_mr) + goto bail; + atomic_inc(&dev->dma_mr->refcount); + sge->mr = dev->dma_mr; sge->vaddr = (void *) vaddr; sge->length = len; sge->sge_length = len; - ss->sg_list = NULL; - ss->num_sge = 1; - ret = 1; - goto bail; + sge->m = 0; + sge->n = 0; + goto ok; } mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))]; if (unlikely(mr == NULL || mr->lkey != rkey || - qp->ibqp.pd != mr->pd)) { - ret = 0; + qp->ibqp.pd != mr->pd)) goto bail; - } off = vaddr - mr->iova; if (unlikely(vaddr < mr->iova || off + len > mr->length || - (mr->access_flags & acc) == 0)) { - ret = 0; + (mr->access_flags & acc) == 0)) goto bail; - } off += mr->offset; m = 0; @@ -252,17 +272,16 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, n = 0; } } + atomic_inc(&mr->refcount); sge->mr = mr; sge->vaddr = mr->map[m]->segs[n].vaddr + off; sge->length = mr->map[m]->segs[n].length - off; sge->sge_length = len; sge->m = m; sge->n = n; - ss->sg_list = NULL; - ss->num_sge = 1; - +ok: ret = 1; - bail: + spin_unlock_irqrestore(&rkt->lock, flags); return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 361a73f..2a4d8a2 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -111,9 +111,9 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, nip->revision = cpu_to_be32((majrev << 16) | minrev); nip->local_port_num = port; vendor = dd->ipath_vendorid; - nip->vendor_id[0] = 0; - nip->vendor_id[1] = vendor >> 8; - nip->vendor_id[2] = vendor; + nip->vendor_id[0] = IPATH_SRC_OUI_1; + nip->vendor_id[1] = IPATH_SRC_OUI_2; + nip->vendor_id[2] = IPATH_SRC_OUI_3; return reply(smp); } @@ -146,7 +146,6 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp, return reply(smp); } - static void set_link_width_enabled(struct ipath_devdata *dd, u32 w) { (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w); @@ -185,6 +184,7 @@ static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n) (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); } return 0; } @@ -217,6 +217,7 @@ static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); } return 0; } @@ -349,6 +350,7 @@ bail: */ static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) { + /* always a kernel port, no locking needed */ struct ipath_portdata *pd = dd->ipath_pd[0]; memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); @@ -402,6 +404,7 @@ static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep) dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeedbeef); return 0; } @@ -731,6 +734,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) int i; int changed = 0; + /* always a kernel port, no locking needed */ pd = dd->ipath_pd[0]; for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { @@ -756,6 +760,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) pd->port_pkeys[i] = key; } if (changed) { + struct ib_event event; u64 pkey; pkey = (u64) dd->ipath_pkeys[0] | @@ -766,6 +771,11 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) (unsigned long long) pkey); ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, pkey); + + event.event = IB_EVENT_PKEY_CHANGE; + event.device = &dd->verbs_dev->ibdev; + event.element.port_num = 1; + ib_dispatch_event(&event); } return 0; } @@ -1398,7 +1408,8 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, } /* Is the mkey in the process of expiring? */ - if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) { + if (dev->mkey_lease_timeout && + time_after_eq(jiffies, dev->mkey_lease_timeout)) { /* Clear timeout and mkey protection field. */ dev->mkey_lease_timeout = 0; dev->mkeyprot = 0; @@ -1492,6 +1503,10 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, goto bail; } + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_REPORT: + case IB_MGMT_METHOD_REPORT_RESP: + case IB_MGMT_METHOD_TRAP_REPRESS: case IB_MGMT_METHOD_GET_RESP: /* * The ib_mad module will call us to process responses diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index db4ba92..0e164e6 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -35,6 +35,7 @@ #include <rdma/ib_pack.h> #include <rdma/ib_smi.h> +#include "ipath_kernel.h" #include "ipath_verbs.h" /* Fast memory region */ @@ -60,8 +61,15 @@ static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) */ struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc) { + struct ipath_ibdev *dev = to_idev(pd->device); struct ipath_mr *mr; struct ib_mr *ret; + unsigned long flags; + + if (to_ipd(pd)->user) { + ret = ERR_PTR(-EPERM); + goto bail; + } mr = kzalloc(sizeof *mr, GFP_KERNEL); if (!mr) { @@ -70,6 +78,13 @@ struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc) } mr->mr.access_flags = acc; + atomic_set(&mr->mr.refcount, 0); + + spin_lock_irqsave(&dev->lk_table.lock, flags); + if (!dev->dma_mr) + dev->dma_mr = &mr->mr; + spin_unlock_irqrestore(&dev->lk_table.lock, flags); + ret = &mr->ibmr; bail: @@ -104,6 +119,7 @@ static struct ipath_mr *alloc_mr(int count, goto bail; mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey; + atomic_set(&mr->mr.refcount, 0); goto done; bail: @@ -195,7 +211,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto bail; } - umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags); + umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); if (IS_ERR(umem)) return (void *) umem; @@ -257,9 +274,14 @@ bail: int ipath_dereg_mr(struct ib_mr *ibmr) { struct ipath_mr *mr = to_imr(ibmr); + struct ipath_ibdev *dev = to_idev(ibmr->device); + int ret; int i; - ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey); + ret = ipath_free_lkey(dev, &mr->mr); + if (ret) + return ret; + i = mr->mr.mapsz; while (i) { i--; @@ -323,6 +345,7 @@ struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, fmr->mr.max_segs = fmr_attr->max_pages; fmr->page_shift = fmr_attr->page_shift; + atomic_set(&fmr->mr.refcount, 0); ret = &fmr->ibfmr; goto done; @@ -356,6 +379,12 @@ int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, u32 ps; int ret; + if (atomic_read(&fmr->mr.refcount)) { + ipath_dbg("FMR modified when busy (LKEY %x cnt %u)\n", + fmr->mr.lkey, atomic_read(&fmr->mr.refcount)); + return -EBUSY; + } + if (list_len > fmr->mr.max_segs) { ret = -EINVAL; goto bail; @@ -399,6 +428,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list) list_for_each_entry(fmr, fmr_list, ibfmr.list) { rkt = &to_idev(fmr->ibfmr.device)->lk_table; spin_lock_irqsave(&rkt->lock, flags); + if (atomic_read(&fmr->mr.refcount)) + ipath_dbg("FMR busy (LKEY %x cnt %u)\n", + fmr->mr.lkey, atomic_read(&fmr->mr.refcount)); + fmr->mr.user_base = 0; fmr->mr.iova = 0; fmr->mr.length = 0; @@ -416,9 +449,13 @@ int ipath_unmap_fmr(struct list_head *fmr_list) int ipath_dealloc_fmr(struct ib_fmr *ibfmr) { struct ipath_fmr *fmr = to_ifmr(ibfmr); + int ret; int i; - ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey); + ret = ipath_free_lkey(to_idev(ibfmr->device), &fmr->mr); + if (ret) + return ret; + i = fmr->mr.mapsz; while (i) kfree(fmr->mr.map[--i]); diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index e8498bb..aefd621 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -330,6 +330,10 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->s_wqe = NULL; qp->s_pkt_delay = 0; qp->s_draining = 0; + qp->s_next_psn = 0; + qp->s_last_psn = 0; + qp->s_sending_psn = 0; + qp->s_sending_hpsn = 0; qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; @@ -348,6 +352,7 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->s_head = 0; qp->s_tail = 0; qp->s_cur = 0; + qp->s_acked = 0; qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; @@ -359,6 +364,50 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; } + qp->r_sge.num_sge = 0; +} + +static void clear_mr_refs(struct ipath_qp *qp, int clr_sends) +{ + unsigned n; + + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct ipath_sge *sge = &wqe->sg_list[i]; + + atomic_dec(&sge->mr->refcount); + } + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + if (qp->s_rdma_mr) { + atomic_dec(&qp->s_rdma_mr->refcount); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct ipath_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } + } } /** @@ -394,6 +443,8 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) if (qp->s_last != qp->s_head) ipath_schedule_send(qp); + clear_mr_refs(qp, 0); + memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; wc.opcode = IB_WC_RECV; @@ -521,8 +572,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, tasklet_kill(&qp->s_task); wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); spin_lock_irq(&qp->s_lock); + clear_mr_refs(qp, 1); + ipath_reset_qp(qp, ibqp->qp_type); } - ipath_reset_qp(qp, ibqp->qp_type); break; case IB_QPS_SQD: @@ -552,8 +604,8 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->remote_qpn = attr->dest_qp_num; if (attr_mask & IB_QP_SQ_PSN) { - qp->s_psn = qp->s_next_psn = attr->sq_psn; - qp->s_last_psn = qp->s_next_psn - 1; + qp->s_sending_psn = qp->s_psn = qp->s_next_psn = attr->sq_psn; + qp->s_sending_hpsn = qp->s_last_psn = qp->s_next_psn - 1; } if (attr_mask & IB_QP_RQ_PSN) @@ -745,8 +797,14 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, struct ipath_swqe *swq = NULL; struct ipath_ibdev *dev; size_t sz; + size_t sg_list_sz; struct ib_qp *ret; + if (init_attr->create_flags) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + if (init_attr->cap.max_send_sge > ib_ipath_max_sges || init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) { ret = ERR_PTR(-EINVAL); @@ -784,27 +842,34 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail; } sz = sizeof(*qp); + sg_list_sz = 0; if (init_attr->srq) { struct ipath_srq *srq = to_isrq(init_attr->srq); - sz += sizeof(*qp->r_sg_list) * - srq->rq.max_sge; - } else - sz += sizeof(*qp->r_sg_list) * - init_attr->cap.max_recv_sge; - qp = kmalloc(sz, GFP_KERNEL); + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } - if (init_attr->srq) { + if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD || + init_attr->qp_type == IB_QPT_SMI || + init_attr->qp_type == IB_QPT_GSI)) { + qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL); + if (!qp->r_ud_sg_list) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + } else + qp->r_ud_sg_list = NULL; + if (init_attr->srq) sz = 0; - qp->r_rq.size = 0; - qp->r_rq.max_sge = 0; - qp->r_rq.wq = NULL; - init_attr->cap.max_recv_wr = 0; - init_attr->cap.max_recv_sge = 0; - } else { + else { qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + @@ -813,7 +878,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->r_rq.size * sz); if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); - goto bail_qp; + goto bail_sg_list; } memset(qp->r_rq.wq, 0, sizeof(struct ipath_rwq) + qp->r_rq.size * sz); @@ -837,18 +902,14 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = IPATH_S_SIGNAL_REQ_WR; - else - qp->s_flags = 0; dev = to_idev(ibpd->device); err = ipath_alloc_qpn(&dev->qp_table, qp, init_attr->qp_type); if (err) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); - goto bail_qp; + goto bail_sg_list; } - qp->ip = NULL; - qp->s_tx = NULL; ipath_reset_qp(qp, init_attr->qp_type); break; @@ -922,6 +983,8 @@ bail_ip: vfree(qp->r_rq.wq); ipath_free_qp(&dev->qp_table, qp); free_qpn(&dev->qp_table, qp->ibqp.qp_num); +bail_sg_list: + kfree(qp->r_ud_sg_list); bail_qp: kfree(qp); bail_swq: @@ -965,6 +1028,10 @@ int ipath_destroy_qp(struct ib_qp *ibqp) ipath_free_qp(&dev->qp_table, qp); if (qp->s_tx) { + if (qp->s_tx->mr) { + atomic_dec(&qp->s_tx->mr->refcount); + qp->s_tx->mr = NULL; + } atomic_dec(&qp->refcount); if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) kfree(qp->s_tx->txreq.map_addr); @@ -976,6 +1043,8 @@ int ipath_destroy_qp(struct ib_qp *ibqp) wait_event(qp->wait, !atomic_read(&qp->refcount)); + clear_mr_refs(qp, 1); + /* all user's cleaned up, mark it available */ free_qpn(&dev->qp_table, qp->ibqp.qp_num); spin_lock(&dev->n_qps_lock); @@ -986,6 +1055,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp) kref_put(&qp->ip->ref, ipath_release_mmap_info); else vfree(qp->r_rq.wq); + kfree(qp->r_ud_sg_list); vfree(qp->s_wq); kfree(qp); return 0; @@ -1048,12 +1118,4 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth) if (ipath_cmp24(credit, qp->s_lsn) > 0) qp->s_lsn = credit; } - - /* Restart sending if it was blocked due to lack of credits. */ - if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && - qp->s_cur != qp->s_head && - (qp->s_lsn == (u32) -1 || - ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, - qp->s_lsn + 1) <= 0)) - ipath_schedule_send(qp); } diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 9d1c0f8..801694f 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -49,7 +49,7 @@ static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, ss->sg_list = wqe->sg_list + 1; ss->num_sge = wqe->wr.num_sge; ss->total_len = wqe->length; - ipath_skip_sge(ss, len); + ipath_skip_sge(ss, len, 0); return wqe->length - len; } @@ -103,6 +103,12 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, switch (qp->s_ack_state) { case OP(RDMA_READ_RESPONSE_LAST): case OP(RDMA_READ_RESPONSE_ONLY): + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + if (e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } + /* FALLTHROUGH */ case OP(ATOMIC_ACKNOWLEDGE): /* * We can increment the tail pointer now that the last @@ -124,10 +130,25 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, e = &qp->s_ack_queue[qp->s_tail_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST)) { + /* + * If a RDMA read response is being resent and + * we haven't seen the duplicate request yet, + * then stop sending the remaining responses the + * responder has seen until the requester resends it. + */ + if (e->rdma_sge.sge_length && !e->rdma_sge.mr) { + qp->s_tail_ack_queue = qp->r_head_ack_queue; + qp->s_ack_state = OP(ACKNOWLEDGE); + goto bail; + } /* Copy SGE state in case we need to resend */ - qp->s_ack_rdma_sge = e->rdma_sge; + qp->s_rdma_mr = e->rdma_sge.mr; + if (qp->s_rdma_mr) + atomic_inc(&qp->s_rdma_mr->refcount); + qp->s_ack_rdma_sge.sge = e->rdma_sge; + qp->s_ack_rdma_sge.num_sge = 1; qp->s_cur_sge = &qp->s_ack_rdma_sge; - len = e->rdma_sge.sge.sge_length; + len = e->rdma_sge.sge_length; if (len > pmtu) { len = pmtu; qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); @@ -160,6 +181,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_READ_RESPONSE_MIDDLE): + qp->s_cur_sge = &qp->s_ack_rdma_sge; + qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; + if (qp->s_rdma_mr) + atomic_inc(&qp->s_rdma_mr->refcount); len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) len = pmtu; @@ -167,7 +192,8 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); - qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1; + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + e->sent = 1; } bth0 = qp->s_ack_state << 24; bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; @@ -196,6 +222,7 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, bth0 = OP(ACKNOWLEDGE) << 24; bth2 = qp->s_ack_psn & IPATH_PSN_MASK; } + qp->s_rdma_ack_cnt++; qp->s_hdrwords = hwords; qp->s_cur_size = len; ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2); @@ -225,6 +252,7 @@ int ipath_make_rc_req(struct ipath_qp *qp) char newreq; unsigned long flags; int ret = 0; + int delta; ohdr = &qp->s_hdr.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) @@ -255,6 +283,12 @@ int ipath_make_rc_req(struct ipath_qp *qp) goto bail; } wqe = get_swqe_ptr(qp, qp->s_last); + while (qp->s_last != qp->s_acked) { + ipath_send_complete(qp, wqe, IB_WC_SUCCESS); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + wqe = get_swqe_ptr(qp, qp->s_last); + } ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); goto done; } @@ -265,6 +299,19 @@ int ipath_make_rc_req(struct ipath_qp *qp) goto bail; } + /* + * Leave BUSY set until sdma queue drains so we don't send + * the same PSN multiple times. + */ + if (ipath_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) { + if (ipath_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) { + qp->s_flags |= IPATH_S_WAITING; + goto bail; + } + qp->s_sending_psn = qp->s_psn; + qp->s_sending_hpsn = qp->s_psn - 1; + } + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; bth0 = 1 << 22; /* Set M bit */ @@ -329,7 +376,7 @@ int ipath_make_rc_req(struct ipath_qp *qp) else { qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.imm_data; + ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) @@ -369,7 +416,7 @@ int ipath_make_rc_req(struct ipath_qp *qp) qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); /* Immediate data comes after RETH */ - ohdr->u.rc.imm_data = wqe->wr.imm_data; + ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; hwords += 1; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; @@ -514,7 +561,7 @@ int ipath_make_rc_req(struct ipath_qp *qp) else { qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.imm_data; + ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) @@ -550,7 +597,7 @@ int ipath_make_rc_req(struct ipath_qp *qp) else { qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.imm_data; + ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; @@ -575,9 +622,8 @@ int ipath_make_rc_req(struct ipath_qp *qp) ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); - bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; + bth2 = qp->s_psn & IPATH_PSN_MASK; + qp->s_psn = wqe->lpsn + 1; ss = NULL; len = 0; qp->s_cur++; @@ -585,7 +631,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) qp->s_cur = 0; break; } - if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) + qp->s_sending_hpsn = bth2; + delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8; + if (delta && delta % IPATH_PSN_CREDIT == 0) bth2 |= 1 << 31; /* Request ACK. */ qp->s_len -= len; qp->s_hdrwords = hwords; @@ -619,7 +667,6 @@ static void send_rc_ack(struct ipath_qp *qp) u16 lrh0; u32 bth0; u32 hwords; - u32 pbufn; u32 __iomem *piobuf; struct ipath_ib_header hdr; struct ipath_other_headers *ohdr; @@ -630,7 +677,8 @@ static void send_rc_ack(struct ipath_qp *qp) /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ if (qp->r_head_ack_queue != qp->s_tail_ack_queue || (qp->s_flags & IPATH_S_ACK_PENDING) || - qp->s_ack_state != OP(ACKNOWLEDGE)) + qp->s_ack_state != OP(ACKNOWLEDGE) || + qp->s_rdma_ack_cnt) goto queue_ack; spin_unlock_irqrestore(&qp->s_lock, flags); @@ -640,7 +688,7 @@ static void send_rc_ack(struct ipath_qp *qp) if (!(dd->ipath_flags & IPATH_LINKACTIVE)) goto done; - piobuf = ipath_getpiobuf(dd, 0, &pbufn); + piobuf = ipath_getpiobuf(dd, 0, NULL); if (!piobuf) { /* * We are out of PIO buffers at the moment. @@ -678,7 +726,8 @@ static void send_rc_ack(struct ipath_qp *qp) hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(dd->ipath_lid); + hdr.lrh[3] = cpu_to_be16(dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); @@ -695,14 +744,6 @@ static void send_rc_ack(struct ipath_qp *qp) } else __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords); - if (dd->ipath_flags & IPATH_USE_SPCL_TRIG) { - u32 spcl_off = (pbufn >= dd->ipath_piobcnt2k) ? - 2047 : 1023; - - ipath_flush_wc(); - __raw_writel(0xaebecede, piobuf + spcl_off); - } - ipath_flush_wc(); dev->n_unicast_xmit++; @@ -734,7 +775,7 @@ done: */ static void reset_psn(struct ipath_qp *qp, u32 psn) { - u32 n = qp->s_last; + u32 n = qp->s_acked; struct ipath_swqe *wqe = get_swqe_ptr(qp, n); u32 opcode; @@ -815,12 +856,17 @@ done: */ void ipath_restart_rc(struct ipath_qp *qp, u32 psn) { - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_acked); struct ipath_ibdev *dev; if (qp->s_retry == 0) { - ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); - ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); + if (qp->s_last == qp->s_acked) { + ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); + } else { + /* XXX need to handle delayed completion */ + ipath_dbg("Delayed too many retries\n"); + } goto bail; } qp->s_retry--; @@ -849,6 +895,101 @@ bail: return; } +/* + * Set qp->s_sending_psn to the next PSN after the given one. + * This would be psn+1 except when RDMA reads are present. + */ +static void reset_sending_psn(struct ipath_qp *qp, u32 psn) +{ + struct ipath_swqe *wqe; + u32 n = qp->s_last; + + /* Find the work request corresponding to the given PSN. */ + for (;;) { + wqe = get_swqe_ptr(qp, n); + if (ipath_cmp24(psn, wqe->lpsn) <= 0) { + if (wqe->wr.opcode == IB_WR_RDMA_READ) + qp->s_sending_psn = wqe->lpsn + 1; + else + qp->s_sending_psn = psn + 1; + break; + } + if (++n == qp->s_size) + n = 0; + if (n == qp->s_tail) + break; + } +} + +/* + * This should be called with the QP s_lock held and interrupts disabled. + */ +void ipath_rc_send_complete(struct ipath_qp *qp, struct ipath_ib_header *hdr) +{ + struct ipath_other_headers *ohdr; + struct ipath_swqe *wqe; + struct ib_wc wc; + unsigned i; + u32 opcode; + u32 psn; + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + return; + + /* Find out where the BTH is */ + if ((be16_to_cpu(hdr->lrh[0]) & 3) == IPATH_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && + opcode <= OP(ATOMIC_ACKNOWLEDGE)) { + WARN_ON(!qp->s_rdma_ack_cnt); + qp->s_rdma_ack_cnt--; + return; + } + + psn = be32_to_cpu(ohdr->bth[2]); + reset_sending_psn(qp, psn); + + while (qp->s_last != qp->s_acked) { + wqe = get_swqe_ptr(qp, qp->s_last); + if (ipath_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 && + ipath_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) + break; + for (i = 0; i < wqe->wr.num_sge; i++) { + struct ipath_sge *sge = &wqe->sg_list[i]; + + atomic_dec(&sge->mr->refcount); + } + /* Post a send completion queue entry if requested. */ + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + } + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + /* + * If we were waiting for sends to complete before resending, + * and they are now complete, restart sending. + */ + if (qp->s_cur != qp->s_head && + ipath_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0 && + ipath_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) { + qp->s_sending_psn = qp->s_psn; + qp->s_sending_hpsn = qp->s_psn - 1; + ipath_schedule_send(qp); + } +} + static inline void update_last_psn(struct ipath_qp *qp, u32 psn) { qp->s_last_psn = psn; @@ -875,6 +1016,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, int ret = 0; u32 ack_psn; int diff; + unsigned i; /* * Remove the QP from the timeout queue (or RNR timeout queue). @@ -896,7 +1038,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ack_psn = psn; if (aeth >> 29) ack_psn--; - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = get_swqe_ptr(qp, qp->s_acked); /* * The MSN might be for a later WQE than the PSN indicates so @@ -956,65 +1098,79 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, qp->s_flags & IPATH_S_RDMAR_PENDING) ipath_schedule_send(qp); } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof wc); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); - } + /* + * Don't decrement refcount and don't generate a + * completion if the WQE is being resent until the send + * is finished. + */ + if (ipath_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 || + ipath_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { + for (i = 0; i < wqe->wr.num_sge; i++) { + struct ipath_sge *sge = &wqe->sg_list[i]; + + atomic_dec(&sge->mr->refcount); + } + /* Post a send completion queue entry if requested. */ + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, + 0); + } + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } else + dev->n_rc_delayed_comp++; qp->s_retry = qp->s_retry_cnt; /* * If we are completing a request which is in the process of * being resent, we can stop resending it since we know the * responder has already seen it. */ - if (qp->s_last == qp->s_cur) { + if (qp->s_acked == qp->s_cur) { if (++qp->s_cur >= qp->s_size) qp->s_cur = 0; - qp->s_last = qp->s_cur; - if (qp->s_last == qp->s_tail) + qp->s_acked = qp->s_cur; + if (qp->s_acked == qp->s_tail) break; wqe = get_swqe_ptr(qp, qp->s_cur); qp->s_state = OP(SEND_LAST); qp->s_psn = wqe->psn; } else { - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) + if (++qp->s_acked >= qp->s_size) + qp->s_acked = 0; + if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur) qp->s_draining = 0; - if (qp->s_last == qp->s_tail) + if (qp->s_acked == qp->s_tail) break; - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = get_swqe_ptr(qp, qp->s_acked); } } switch (aeth >> 29) { case 0: /* ACK */ dev->n_rc_acks++; - /* If this is a partial ACK, reset the retransmit timer. */ - if (qp->s_last != qp->s_tail) { + if (qp->s_acked != qp->s_tail) { + /* + * We got a partial ACK for a resent operation so + * reset the retransmit timer. + */ spin_lock(&dev->pending_lock); if (list_empty(&qp->timerwait)) list_add_tail(&qp->timerwait, &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); /* - * If we get a partial ACK for a resent operation, - * we can stop resending the earlier packets and + * We can stop resending the earlier packets and * continue with the next packet the receiver wants. */ - if (ipath_cmp24(qp->s_psn, psn) <= 0) { + if (ipath_cmp24(qp->s_psn, psn) <= 0) reset_psn(qp, psn + 1); - ipath_schedule_send(qp); - } } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { qp->s_state = OP(SEND_LAST); qp->s_psn = psn + 1; @@ -1023,12 +1179,16 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, qp->s_rnr_retry = qp->s_rnr_retry_cnt; qp->s_retry = qp->s_retry_cnt; update_last_psn(qp, psn); + if (qp->s_cur != qp->s_head) + ipath_schedule_send(qp); + else + qp->s_flags &= ~IPATH_S_WAITING; ret = 1; goto bail; case 1: /* RNR NAK */ dev->n_rnr_naks++; - if (qp->s_last == qp->s_tail) + if (qp->s_acked == qp->s_tail) goto bail; if (qp->s_rnr_retry == 0) { status = IB_WC_RNR_RETRY_EXC_ERR; @@ -1056,7 +1216,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, goto bail; case 3: /* NAK */ - if (qp->s_last == qp->s_tail) + if (qp->s_acked == qp->s_tail) goto bail; /* The last valid PSN is the previous PSN. */ update_last_psn(qp, psn - 1); @@ -1087,8 +1247,13 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, status = IB_WC_REM_OP_ERR; dev->n_other_naks++; class_b: - ipath_send_complete(qp, wqe, status); - ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); + if (qp->s_last == qp->s_acked) { + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); + } else { + /* XXX need to handle delayed completion */ + ipath_dbg("Delayed error %d\n", status); + } break; default: @@ -1135,13 +1300,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, { struct ipath_swqe *wqe; enum ib_wc_status status; - unsigned long flags; int diff; u32 pad; u32 aeth; u64 val; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock(&qp->s_lock); /* Double check we can process this now that we hold the s_lock. */ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) @@ -1168,9 +1332,9 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, goto ack_done; } - if (unlikely(qp->s_last == qp->s_tail)) + if (unlikely(qp->s_acked == qp->s_tail)) goto ack_done; - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = get_swqe_ptr(qp, qp->s_acked); status = IB_WC_SUCCESS; switch (opcode) { @@ -1197,7 +1361,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; hdrsize += 4; - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = get_swqe_ptr(qp, qp->s_acked); if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; qp->r_flags &= ~IPATH_R_RDMAR_SEQ; @@ -1244,8 +1408,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, */ qp->s_rdma_read_len -= pmtu; update_last_psn(qp, psn); - spin_unlock_irqrestore(&qp->s_lock, flags); - ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); + spin_unlock(&qp->s_lock); + ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0); goto bail; case OP(RDMA_READ_RESPONSE_ONLY): @@ -1269,7 +1433,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, * have to be careful to copy the data to the right * location. */ - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = get_swqe_ptr(qp, qp->s_acked); qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, wqe, psn, pmtu); goto read_last; @@ -1305,7 +1469,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, aeth = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); } - ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); + ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0); + WARN_ON(qp->s_rdma_read_sge.num_sge); (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST), 0); goto ack_done; @@ -1318,10 +1483,15 @@ ack_op_err: ack_len_err: status = IB_WC_LOC_LEN_ERR; ack_err: - ipath_send_complete(qp, wqe, status); - ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); + if (qp->s_last == qp->s_acked) { + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); + } else { + /* XXX need to handle delayed completion */ + ipath_dbg("Delayed error %d\n", status); + } ack_done: - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock(&qp->s_lock); bail: return; } @@ -1355,7 +1525,6 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, struct ipath_ack_entry *e; u8 i, prev; int old_req; - unsigned long flags; if (diff > 0) { /* @@ -1390,7 +1559,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, e = NULL; old_req = 1; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock(&qp->s_lock); /* Double check we can process this now that we hold the s_lock. */ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) goto unlock_done; @@ -1447,8 +1616,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, offset = ((psn - e->psn) & IPATH_PSN_MASK) * ib_mtu_enum_to_int(qp->path_mtu); len = be32_to_cpu(reth->length); - if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) + if (unlikely(offset + len > e->rdma_sge.sge_length)) goto unlock_done; + if (e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); @@ -1460,12 +1633,9 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, if (unlikely(!ok)) goto unlock_done; } else { - e->rdma_sge.sg_list = NULL; - e->rdma_sge.num_sge = 0; - e->rdma_sge.sge.mr = NULL; - e->rdma_sge.sge.vaddr = NULL; - e->rdma_sge.sge.length = 0; - e->rdma_sge.sge.sge_length = 0; + e->rdma_sge.vaddr = NULL; + e->rdma_sge.length = 0; + e->rdma_sge.sge_length = 0; } e->psn = psn; qp->s_ack_state = OP(ACKNOWLEDGE); @@ -1495,7 +1665,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, * after all the previous RDMA reads and atomics. */ if (i == qp->r_head_ack_queue) { - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock(&qp->s_lock); qp->r_nak_state = 0; qp->r_ack_psn = qp->r_psn - 1; goto send_ack; @@ -1508,7 +1678,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, if (qp->r_head_ack_queue == qp->s_tail_ack_queue && !(qp->s_flags & IPATH_S_ACK_PENDING) && qp->s_ack_state == OP(ACKNOWLEDGE)) { - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock(&qp->s_lock); qp->r_nak_state = 0; qp->r_ack_psn = qp->s_ack_queue[i].psn - 1; goto send_ack; @@ -1525,7 +1695,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, ipath_schedule_send(qp); unlock_done: - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock(&qp->s_lock); done: return 1; @@ -1559,10 +1729,8 @@ static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) next = n + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - if (n == qp->s_tail_ack_queue) { - qp->s_tail_ack_queue = next; - qp->s_ack_state = OP(ACKNOWLEDGE); - } + qp->s_tail_ack_queue = next; + qp->s_ack_state = OP(ACKNOWLEDGE); } /** @@ -1591,7 +1759,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int diff; struct ib_reth *reth; int header_in_data; - unsigned long flags; /* Validate the SLID. See Ch. 9.6.1.5 */ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) @@ -1694,7 +1861,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) goto nack_inv; - ipath_copy_sge(&qp->r_sge, data, pmtu); + ipath_copy_sge(&qp->r_sge, data, pmtu, 1); break; case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): @@ -1714,11 +1881,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -1737,7 +1904,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; - ipath_copy_sge(&qp->r_sge, data, tlen); + ipath_copy_sge(&qp->r_sge, data, tlen, 1); + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } qp->r_msn++; if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) break; @@ -1775,19 +1947,21 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, hdrsize += sizeof(*reth); qp->r_len = be32_to_cpu(reth->length); qp->r_rcv_len = 0; + qp->r_sge.sg_list = NULL; if (qp->r_len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); int ok; /* Check rkey & NAK */ - ok = ipath_rkey_ok(qp, &qp->r_sge, + ok = ipath_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) goto nack_acc; + qp->r_sge.num_sge = 1; } else { - qp->r_sge.sg_list = NULL; + qp->r_sge.num_sge = 0; qp->r_sge.sge.mr = NULL; qp->r_sge.sge.vaddr = NULL; qp->r_sge.sge.length = 0; @@ -1812,7 +1986,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock(&qp->s_lock); /* Double check we can process this while holding the s_lock. */ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) goto unlock; @@ -1822,6 +1996,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } /* RETH comes after BTH */ if (!header_in_data) reth = &ohdr->u.rc.reth; @@ -1847,12 +2025,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, if (len > pmtu) qp->r_psn += (len - 1) / pmtu; } else { - e->rdma_sge.sg_list = NULL; - e->rdma_sge.num_sge = 0; - e->rdma_sge.sge.mr = NULL; - e->rdma_sge.sge.vaddr = NULL; - e->rdma_sge.sge.length = 0; - e->rdma_sge.sge.sge_length = 0; + e->rdma_sge.mr = NULL; + e->rdma_sge.vaddr = NULL; + e->rdma_sge.length = 0; + e->rdma_sge.sge_length = 0; } e->opcode = opcode; e->sent = 0; @@ -1890,7 +2066,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock(&qp->s_lock); /* Double check we can process this while holding the s_lock. */ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) goto unlock; @@ -1899,6 +2075,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + atomic_dec(&e->rdma_sge.mr->refcount); + e->rdma_sge.mr = NULL; + } if (!header_in_data) ateth = &ohdr->u.atomic_eth; else @@ -1909,19 +2090,20 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ - if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = be64_to_cpu(ateth->swap_data); - e = &qp->s_ack_queue[qp->r_head_ack_queue]; e->atomic_data = (opcode == OP(FETCH_ADD)) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, be64_to_cpu(ateth->compare_data), sdata); + atomic_dec(&qp->r_sge.sge.mr->refcount); + qp->r_sge.num_sge = 0; e->opcode = opcode; e->sent = 0; e->psn = psn & IPATH_PSN_MASK; @@ -1956,7 +2138,7 @@ rnr_nak: goto send_ack; nack_inv_unlck: - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock(&qp->s_lock); nack_inv: ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); qp->r_nak_state = IB_NAK_INVALID_REQUEST; @@ -1964,7 +2146,7 @@ nack_inv: goto send_ack; nack_acc_unlck: - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock(&qp->s_lock); nack_acc: ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; @@ -1974,7 +2156,7 @@ send_ack: goto done; unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock(&qp->s_lock); done: return; } diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index a0fabaa..8f44d0c 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -59,7 +59,6 @@ #define INFINIPATH_R_SOFTWARE_SHIFT 24 #define INFINIPATH_R_BOARDID_MASK 0xFF #define INFINIPATH_R_BOARDID_SHIFT 32 -#define INFINIPATH_R_EMULATOR_MASK (1ULL<<62) /* kr_control bits */ #define INFINIPATH_C_FREEZEMODE 0x00000002 @@ -70,12 +69,10 @@ #define INFINIPATH_S_UPDTHRESH_SHIFT 24 #define INFINIPATH_S_UPDTHRESH_MASK 0x1f - #define IPATH_S_ABORT 0 #define IPATH_S_PIOINTBUFAVAIL 1 #define IPATH_S_PIOBUFAVAILUPD 2 #define IPATH_S_PIOENABLE 3 -#define IPATH_S_SPECIALTRIGGER 4 #define IPATH_S_SDMAINTENABLE 9 #define IPATH_S_SDMASINGLEDESCRIPTOR 10 #define IPATH_S_SDMAENABLE 11 @@ -86,7 +83,6 @@ #define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL) #define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD) #define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE) -#define INFINIPATH_S_SPECIALTRIGGER (1U << IPATH_S_SPECIALTRIGGER) #define INFINIPATH_S_SDMAINTENABLE (1U << IPATH_S_SDMAINTENABLE) #define INFINIPATH_S_SDMASINGLEDESCRIPTOR \ (1U << IPATH_S_SDMASINGLEDESCRIPTOR) diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 1cc5da6..44f0518 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -142,6 +142,12 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, goto bail; bad_lkey: + while (j) { + struct ipath_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; + + atomic_dec(&sge->mr->refcount); + } + ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr_id; wc.status = IB_WC_LOC_PROT_ERR; @@ -157,7 +163,7 @@ bail: /** * ipath_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP - * @wr_id_only: update wr_id only, not SGEs + * @wr_id_only: update qp->r_wr_id only, not qp->r_sge * * Return 0 if no RWQE is available, otherwise return 1. * @@ -174,8 +180,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) u32 tail; int ret; - qp->r_sge.sg_list = qp->r_sg_list; - if (qp->ibqp.srq) { srq = to_isrq(qp->ibqp.srq); handler = srq->ibsrq.event_handler; @@ -197,20 +201,29 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) /* Validate tail before using it since it is user writable. */ if (tail >= rq->size) tail = 0; - do { - if (unlikely(tail == wq->head)) { + if (unlikely(tail == wq->head)) { + ret = 0; + goto unlock; + } + /* Make sure entry is read after head index is read. */ + smp_rmb(); + wqe = get_rwqe_ptr(rq, tail); + /* + * Even though we update the tail index in memory, the verbs + * consumer is not supposed to post more entries until a + * completion is generated. + */ + if (++tail >= rq->size) + tail = 0; + wq->tail = tail; + if (!wr_id_only) { + qp->r_sge.sg_list = qp->r_sg_list; + if (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge)) { ret = 0; goto unlock; } - /* Make sure entry is read after head index is read. */ - smp_rmb(); - wqe = get_rwqe_ptr(rq, tail); - if (++tail >= rq->size) - tail = 0; - } while (!wr_id_only && !ipath_init_sge(qp, wqe, &qp->r_len, - &qp->r_sge)); + } qp->r_wr_id = wqe->wr_id; - wq->tail = tail; ret = 1; set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); @@ -268,6 +281,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) u64 sdata; atomic64_t *maddr; enum ib_wc_status send_status; + int release; /* * Note that we check the responder QP state after @@ -325,6 +339,7 @@ again: memset(&wc, 0, sizeof wc); send_status = IB_WC_SUCCESS; + release = 1; sqp->s_sge.sge = wqe->sg_list[0]; sqp->s_sge.sg_list = wqe->sg_list + 1; sqp->s_sge.num_sge = wqe->wr.num_sge; @@ -332,7 +347,7 @@ again: switch (wqe->wr.opcode) { case IB_WR_SEND_WITH_IMM: wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = wqe->wr.imm_data; + wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: if (!ipath_get_rwqe(qp, 0)) @@ -343,7 +358,7 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = wqe->wr.imm_data; + wc.ex.imm_data = wqe->wr.ex.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; /* FALLTHROUGH */ @@ -352,22 +367,27 @@ again: goto inv_err; if (wqe->length == 0) break; - if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge.sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_WRITE))) goto acc_err; + qp->r_sge.sg_list = NULL; + qp->r_sge.num_sge = 1; qp->r_sge.total_len = wqe->length; break; case IB_WR_RDMA_READ: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; - if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, + if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; + release = 0; + sqp->s_sge.sg_list = NULL; + sqp->s_sge.num_sge = 1; qp->r_sge.sge = wqe->sg_list[0]; qp->r_sge.sg_list = wqe->sg_list + 1; qp->r_sge.num_sge = wqe->wr.num_sge; @@ -378,7 +398,7 @@ again: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; - if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), wqe->wr.wr.atomic.remote_addr, wqe->wr.wr.atomic.rkey, IB_ACCESS_REMOTE_ATOMIC))) @@ -391,6 +411,8 @@ again: (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, sdata, wqe->wr.wr.atomic.swap); + atomic_dec(&qp->r_sge.sge.mr->refcount); + qp->r_sge.num_sge = 0; goto send_comp; default: @@ -407,14 +429,16 @@ again: if (len > sge->sge_length) len = sge->sge_length; BUG_ON(len == 0); - ipath_copy_sge(&qp->r_sge, sge->vaddr, len); + ipath_copy_sge(&qp->r_sge, sge->vaddr, len, release); sge->vaddr += len; sge->length -= len; sge->sge_length -= len; if (sge->sge_length == 0) { + if (!release) + atomic_dec(&sge->mr->refcount); if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { + } else if (sge->length == 0 && sge->mr->lkey) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; @@ -427,6 +451,12 @@ again: } sqp->s_len -= len; } + if (release) + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) goto send_comp; @@ -621,7 +651,8 @@ void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); + qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22)); @@ -641,12 +672,14 @@ void ipath_do_send(unsigned long data) { struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ipath_devdata *dd = dev->dd; int (*make_req)(struct ipath_qp *qp); unsigned long flags; if ((qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) && - qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { + (qp->remote_ah_attr.dlid & ~((1 << dd->ipath_lmc) - 1)) == + dd->ipath_lid) { ipath_ruc_loopback(qp); goto bail; } @@ -701,10 +734,16 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; + unsigned i; if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) return; + for (i = 0; i < wqe->wr.num_sge; i++) { + struct ipath_sge *sge = &wqe->sg_list[i]; + + atomic_dec(&sge->mr->refcount); + } /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED) || @@ -726,6 +765,8 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, if (++last >= qp->s_size) last = 0; qp->s_last = last; + if (qp->s_acked == old_last) + qp->s_acked = last; if (qp->s_cur == old_last) qp->s_cur = last; if (qp->s_tail == old_last) diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220.c b/drivers/infiniband/hw/ipath/ipath_sd7220.c index c0c9b44..aa47eb5 100644 --- a/drivers/infiniband/hw/ipath/ipath_sd7220.c +++ b/drivers/infiniband/hw/ipath/ipath_sd7220.c @@ -47,7 +47,7 @@ * various SerDes registers by IBC. It is not part of the normal kregs * map and is used in exactly one place, hence the #define below. */ -#define KR_IBSerDesMappTable (0x94000 / (sizeof (uint64_t))) +#define KR_IBSerDesMappTable (0x94000 / (sizeof(uint64_t))) /* * Below used for sdnum parameter, selecting one of the two sections @@ -93,7 +93,7 @@ void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup); /* * Below keeps track of whether the "once per power-on" initialization has * been done, because uC code Version 1.32.17 or higher allows the uC to - * be reset at will, and Automatic Equalization may requore it. So the + * be reset at will, and Automatic Equalization may require it. So the * state of the reset "pin", as reflected in was_reset parameter to * ipath_sd7220_init() is no longer valid. Instead, we check for the * actual uC code having been loaded. @@ -137,10 +137,10 @@ bail: return; } -/* After a reset or other unusual event, the epb interface may need +/* + * After a reset or other unusual event, the epb interface may need * to be re-synchronized, between the host and the uC. - * returns <0 for failure - * (which can only happen if we fail IBSD_RESYNC_TRIES times) + * returns <0 for failure to resync within IBSD_RESYNC_TRIES (not expected) */ #define IBSD_RESYNC_TRIES 3 #define IB_PGUDP(chn) (EPB_LOC((chn), 2, 1) | EPB_IB_QUAD0_CS) @@ -301,12 +301,7 @@ static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd, ipath_cdbg(VERBOSE, "IBCS TRIMDONE set (%s)\n", where); else ipath_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where); - /* - * Do "dummy read/mod/wr" to get EPB in sane state after reset - * The default (and hopefully only, D6..0) value for MPREG6 is 0, and - * we want to set to 0x80. Since we can't trust read, or we wouldn't - * be doing this, hope for the best - */ + udelay(2); ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80); @@ -370,12 +365,14 @@ int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset) { int ret = 1; /* default to failure */ int first_reset; + int val_stat; if (!was_reset) { /* entered with reset not asserted, we need to do it */ ipath_ibsd_reset(dd, 1); ipath_sd_trimdone_monitor(dd, "Driver-reload"); } + /* Substitute our deduced value for was_reset */ ret = ipath_ibsd_ucode_loaded(dd); if (ret < 0) { @@ -383,88 +380,82 @@ int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset) goto done; } first_reset = !ret; /* First reset if IBSD uCode not yet loaded */ - do { - /* - * Alter some regs per vendor latest doc, reset-defaults - * are not right for IB. - */ - ret = ipath_sd_early(dd); + + /* + * Alter some regs per vendor latest doc, reset-defaults + * are not right for IB. + */ + ret = ipath_sd_early(dd); + if (ret < 0) { + ipath_dev_err(dd, "Failed to set IB SERDES early defaults\n"); + ret = 1; + goto done; + } + + /* + * Set DAC manual trim IB. + * We only do this once after chip has been reset (usually + * same as once per system boot). + */ + if (first_reset) { + ret = ipath_sd_dactrim(dd); if (ret < 0) { - ipath_dev_err(dd, - "Failed to set IB SERDES early defaults\n"); + ipath_dev_err(dd, "Failed IB SERDES DAC trim\n"); ret = 1; - break; - } - /* Set DAC manual trim IB. - * We only do this once after chip has been reset (usually - * same as once per system boot). - */ - if (first_reset) { - ret = ipath_sd_dactrim(dd); - if (ret < 0) { - ipath_dev_err(dd, - "Failed IB SERDES DAC trim\n"); - ret = 1; - break; - } + goto done; } - /* - * Set various registers (DDS and RXEQ) that will be - * controlled by IBC (in 1.2 mode) to reasonable preset values - * Calling the "internal" version avoids the "check for needed" - * and "trimdone monitor" that might be counter-productive. - */ - ret = ipath_internal_presets(dd); + } + + /* + * Set various registers (DDS and RXEQ) that will be + * controlled by IBC (in 1.2 mode) to reasonable preset values + * Calling the "internal" version avoids the "check for needed" + * and "trimdone monitor" that might be counter-productive. + */ + ret = ipath_internal_presets(dd); + if (ret < 0) { + ipath_dev_err(dd, "Failed to set IB SERDES presets\n"); + ret = 1; + goto done; + } + ret = ipath_sd_trimself(dd, 0x80); + if (ret < 0) { + ipath_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n"); + ret = 1; + goto done; + } + + /* Load image, then try to verify */ + ret = 0; /* Assume success */ + if (first_reset) { + int vfy; + int trim_done; + ipath_dbg("SerDes uC was reset, reloading PRAM\n"); + ret = ipath_sd7220_ib_load(dd); if (ret < 0) { - ipath_dev_err(dd, "Failed to set IB SERDES presets\n"); + ipath_dev_err(dd, "Failed to load IB SERDES image\n"); ret = 1; - break; + goto done; } - ret = ipath_sd_trimself(dd, 0x80); - if (ret < 0) { - ipath_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n"); + + /* Loaded image, try to verify */ + vfy = ipath_sd7220_ib_vfy(dd); + if (vfy != ret) { + ipath_dev_err(dd, "SERDES PRAM VFY failed\n"); ret = 1; - break; + goto done; } + /* + * Loaded and verified. Almost good... + * hold "success" in ret + */ + ret = 0; - /* Load image, then try to verify */ - ret = 0; /* Assume success */ - if (first_reset) { - ipath_dbg("SerDes uC was reset, reloading PRAM\n"); - ret = ipath_sd7220_ib_load(dd); - if (ret < 0) { - ipath_dev_err(dd, - "Failed to load IB SERDES image\n"); - break; - } else { - /* Loaded image, try to verify */ - int vfy; - - vfy = ipath_sd7220_ib_vfy(dd); - if (vfy != ret) { - ipath_dev_err(dd, - "SERDES PRAM VFY failed\n"); - ret = 1; - } else { - /* - * Loaded and verified. Almost good... - * hold "success" in ret - */ - ret = 0; - } /* end if verified */ - } /* end if loaded */ - } /* end if first_reset */ - } while (0) ; /* do_while for goto-less bailing */ - - if (ret == 0 && first_reset) { /* * Prev steps all worked, continue bringup * De-assert RESET to uC, only in first reset, to allow * trimming. - */ - int trim_done; - - /* + * * Since our default setup sets START_EQ1 to * PRESET, we need to clear that for this very first run. */ @@ -478,7 +469,7 @@ int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset) ipath_ibsd_reset(dd, 0); /* * If this is not the first reset, trimdone should be set - * already. We may need to check about this. + * already. */ trim_done = ipath_sd_trimdone_poll(dd); /* @@ -493,25 +484,19 @@ int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset) ret = 1; goto done; } - /* - * DEBUG: check each time we reset if trimdone bits have - * gotten cleared, and re-set them. - */ + ipath_sd_trimdone_monitor(dd, "First-reset"); /* Remember so we do not re-do the load, dactrim, etc. */ dd->serdes_first_init_done = 1; } - if (ret == 0) { - /* - * setup for channel training and load values for - * RxEq and DDS in tables used by IBC in IB1.2 mode - */ - int val_stat; + /* + * Setup for channel training and load values for + * RxEq and DDS in tables used by IBC in IB1.2 mode + */ - val_stat = ipath_sd_setvals(dd); - if (val_stat < 0) - ret = 1; - } + val_stat = ipath_sd_setvals(dd); + if (val_stat < 0) + ret = 1; done: /* start relock timer regardless, but start at 1 second */ ipath_set_relock_poll(dd, -1); @@ -573,9 +558,6 @@ static int epb_access(struct ipath_devdata *dd, int sdnum, int claim) */ u64 newval = 0; ipath_write_kreg(dd, acc, newval); - /* WHAT IS APPROPRIATE DELAY? How do we handle - * failures? - */ /* First read after write is not trustworthy */ pollval = ipath_read_kreg32(dd, acc); udelay(5); @@ -587,7 +569,6 @@ static int epb_access(struct ipath_devdata *dd, int sdnum, int claim) u64 pollval; u64 newval = EPB_ACC_REQ | oct_sel; ipath_write_kreg(dd, acc, newval); - /* WHAT IS APPROPRIATE DELAY? How do we handle failures? */ /* First read after write is not trustworthy */ pollval = ipath_read_kreg32(dd, acc); udelay(5); @@ -772,8 +753,8 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, * In future code, we may need to distinguish several address ranges, * and select various memories based on this. For now, just trim * "loc" (location including address and memory select) to - * "addr" (address within memory). we will only support PRAM - * The memory is 8KB. + * "addr" (address within memory). we will only support PRAM + * The memory is 8KB. */ addr = loc & 0x1FFF; for (tries = EPB_TRANS_TRIES; tries; --tries) { @@ -784,7 +765,6 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, } sofar = 0; - /* If failed to see ready, what error-handling? */ if (tries <= 0) ipath_dbg("No initial RDY on EPB access request\n"); else { @@ -908,10 +888,7 @@ int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum, return errors ? -errors : sofar; } -/* - * Future driver may use IRQ to sequence SERDES and IBC bringup, but - * for now we poll. - */ +/* IRQ not set up at this point in init, so we poll. */ #define IB_SERDES_TRIM_DONE (1ULL << 11) #define TRIM_TMO (30) @@ -944,45 +921,15 @@ static int ipath_sd_trimdone_poll(struct ipath_devdata *dd) #define TX_FAST_ELT (9) /* - * Set the "register patch" values for SERDES. These are referenced, - * indirectly, by the contents of the SerDesDDSRXEQ[] array. Details - * are beyond the scope of this document, but in essence, there are - * two classes of registers that are "tweaked" during operation, - * Driver DeEmphasis (DDS) and Receiver Equalization. The first - * 'm' (currently 6) entries specify registers for DDS, and the next - * 'n' (currently 15) entries specify registers for RxEQ. In operation, - * the hardware traverses the list for each, forming an index into - * IBSerDesMappTable[] in one of two ways: - * DDS entries: - * (0 << 8) | (dds_val) << 4) | (index) - * RxEQ entries: - * (1 << 8) | (rxeq_val << 6) | (0 << 5) | (index) - * Where (index) is the index in the SerDesDDSRXEQ[] array, and - * dds_val (4 bits) or rxeq_val (2 bits) are based on conditions in - * the SerDes and IBC. - * - * With the data coming from one place, and the addresses coming from - * another, it can get confusing, but the _intent_ is to follow the - * initialization sequence described in Infinband_REG_Prcedure_10_17_06.pdf. - * This is somewhat complicated by the fact that although the doc - * says "write these registers in this order", in fact the order - * is not relevant (per the vendor). In an attempt to make it somewhat - * easier on human maintainers, the table below is organized as 16 - * rows, corresponding to one of the rows in the vendor's table. - * Each row has amplitude, Main_control, Post-curser, and Pre-curser values - * for "full" (DDR) and "half" (SDR) rate. These are merged into an array - * of six bytes, in the order they are actually to be stored, to the - * Registers of element 9, Rges 0, 1, 9, 0xA, 6, 7, in that order - * - * Also, because the vendor specifies that the "Enable" bits are set in - * every case, we do that in the macro. That way the values can be - * visually compared with vendor docs. + * Set the "negotiation" values for SERDES. These are used by the IB1.2 + * link negotiation. Macros below are attempt to keep the values a + * little more human-editable. + * First, values related to Drive De-emphasis Settings. */ #define NUM_DDS_REGS 6 #define DDS_REG_MAP 0x76A910 /* LSB-first list of regs (in elt 9) to mod */ -/* Ignore values from vendor. Use compromise values in all slots */ #define DDS_VAL(amp_d, main_d, ipst_d, ipre_d, amp_s, main_s, ipst_s, ipre_s) \ { { ((amp_d & 0x1F) << 1) | 1, ((amp_s & 0x1F) << 1) | 1, \ (main_d << 3) | 4 | (ipre_d >> 2), \ @@ -1014,22 +961,11 @@ static struct dds_init { DDS_VAL(28, 25, 6, 0, 21, 28, 3, 0), DDS_VAL(27, 26, 5, 0, 19, 29, 2, 0), DDS_VAL(25, 27, 4, 0, 17, 30, 1, 0) -/* - * Below is 17th value from vendor. IBC only handles 16 values, and uses - * first one as default. The line below just documents what we would use. - * DDS_VAL(23, 28, 3, 0, 15, 31, 0, 0 ) - */ }; /* - * Now the RXEQ section of the table. This is both simpler and - * more complicated. Simpler because each "register store" has only - * four valuess associated with it (only two bits of RxEqualization). - * So, unlike the DDS values above, we simply enumerate all four - * values across one "line", which corresponds to one register-write. - * More complicated because there are several register-writes that do - * not differ across RXEQ vals. - * Values below are from Vendor doc dated 7May2007 + * Next, values related to Receive Equalization. + * In comments, FDR (Full) is IB DDR, HDR (Half) is IB SDR */ /* Hardware packs an element number and register address thus: */ #define RXEQ_INIT_RDESC(elt, addr) (((elt) & 0xF) | ((addr) << 4)) @@ -1044,11 +980,6 @@ static struct dds_init { #define RXEQ_SDR_G1CNT_Z1CNT 0x11 #define RXEQ_SDR_ZCNT 23 -/* - * The values below (as opposed to what "was") were experimentally determined - * to reduce IB Symbol errors, but currently all four "sets" are the same. - * with more experimentation, we will derive a range. - */ static struct rxeq_init { u16 rdesc; /* in form used in SerDesDDSRXEQ */ u8 rdata[4]; @@ -1056,17 +987,17 @@ static struct rxeq_init { /* Set Rcv Eq. to Preset node */ RXEQ_VAL_ALL(7, 0x27, 0x10), /* Set DFELTHFDR/HDR thresholds */ - RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR, was 0, 1, 2, 3 */ + RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR */ RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */ /* Set TLTHFDR/HDR theshold */ - RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR, was 0, 2, 4, 6 */ - RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR, was 0, 1, 2, 3 */ + RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR */ + RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR */ /* Set Preamp setting 2 (ZFR/ZCNT) */ - RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR, was 12, 16, 20, 24 */ - RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR, was 12, 16, 20, 24 */ + RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR */ + RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR */ /* Set Preamp DC gain and Setting 1 (GFR/GHR) */ - RXEQ_VAL(7, 0x1E, 0x10, 0x10, 0x10, 0x10), /* FDR, was 0x10, 0x11, 0x12, 0x14 */ - RXEQ_VAL(7, 0x1F, 0x10, 0x10, 0x10, 0x10), /* HDR, was 0x10, 0x11, 0x12, 0x14 */ + RXEQ_VAL(7, 0x1E, 0x10, 0x10, 0x10, 0x10), /* FDR */ + RXEQ_VAL(7, 0x1F, 0x10, 0x10, 0x10, 0x10), /* HDR */ /* Toggle RELOCK (in VCDL_CTRL0) to lock to data */ RXEQ_VAL_ALL(6, 6, 0x20), /* Set D5 High */ RXEQ_VAL_ALL(6, 6, 0), /* Set D5 Low */ @@ -1171,13 +1102,13 @@ static int ibsd_sto_noisy(struct ipath_devdata *dd, int loc, int val, int mask) return ret; } -/* Repeat a "store" across all channels of the IB SerDes. +/* + * Repeat a "store" across all channels of the IB SerDes. * Although nominally it inherits the "read value" of the last * channel it modified, the only really useful return is <0 for * failure, >= 0 for success. The parameter 'loc' is assumed to * be the location for the channel-0 copy of the register to - * be modified. In future, we could use the "gang write" option - * of EPB, as long as we do not read. + * be modified. */ static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val, int mask) @@ -1186,7 +1117,8 @@ static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val, int chnl; if (loc & EPB_GLOBAL_WR) { - /* our caller has assured us that we can set all four + /* + * Our caller has assured us that we can set all four * channels at once. Trust that. If mask is not 0xFF, * we will read the _specified_ channel for our starting * value. @@ -1285,8 +1217,6 @@ static int set_rxeq_vals(struct ipath_devdata *dd, int vsel) /* * Set the default values (row 0) for DDR Driver Demphasis. * we do this initially and whenever we turn off IB-1.2 - * Vendor recommends non-default presets, depending on - * cable length. Initial testing will assume 3 meter cables. * The "default" values for Rx equalization are also stored to * SerDes registers. Formerly (and still default), we used set 2. * For experimenting with cables and link-partners, we allow changing @@ -1295,7 +1225,8 @@ static int set_rxeq_vals(struct ipath_devdata *dd, int vsel) static unsigned ipath_rxeq_set = 2; module_param_named(rxeq_default_set, ipath_rxeq_set, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(rxeq_default_set, "Which set [0..3] of Rx Equalization values is default"); +MODULE_PARM_DESC(rxeq_default_set, + "Which set [0..3] of Rx Equalization values is default"); static int ipath_internal_presets(struct ipath_devdata *dd) { @@ -1337,44 +1268,33 @@ static int ipath_sd_early(struct ipath_devdata *dd) int ret = -1; /* Default failed */ int chnl; - do { - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, RXHSCTRL0(chnl), 0xD4, 0xFF); - if (ret < 0) - break; - } + for (chnl = 0; chnl < 4; ++chnl) { + ret = ibsd_sto_noisy(dd, RXHSCTRL0(chnl), 0xD4, 0xFF); if (ret < 0) - break; - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, VCDL_DAC2(chnl), 0x2D, 0xFF); - if (ret < 0) - break; - } + goto bail; + } + for (chnl = 0; chnl < 4; ++chnl) { + ret = ibsd_sto_noisy(dd, VCDL_DAC2(chnl), 0x2D, 0xFF); if (ret < 0) - break; - /* more fine-tuning of what will be default */ - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, VCDL_CTRL2(chnl), 3, 0xF); - if (ret < 0) - break; - } + goto bail; + } + /* more fine-tuning of what will be default */ + for (chnl = 0; chnl < 4; ++chnl) { + ret = ibsd_sto_noisy(dd, VCDL_CTRL2(chnl), 3, 0xF); if (ret < 0) - break; - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, START_EQ1(chnl), 0x10, 0xFF); - if (ret < 0) - break; - } + goto bail; + } + for (chnl = 0; chnl < 4; ++chnl) { + ret = ibsd_sto_noisy(dd, START_EQ1(chnl), 0x10, 0xFF); if (ret < 0) - break; - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, START_EQ2(chnl), 0x30, 0xFF); - if (ret < 0) - break; - } + goto bail; + } + for (chnl = 0; chnl < 4; ++chnl) { + ret = ibsd_sto_noisy(dd, START_EQ2(chnl), 0x30, 0xFF); if (ret < 0) - break; - } while (0); + goto bail; + } +bail: return ret; } @@ -1387,42 +1307,32 @@ static int ipath_sd_dactrim(struct ipath_devdata *dd) int ret = -1; /* Default failed */ int chnl; - do { - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, BACTRL(chnl), 0x40, 0xFF); - if (ret < 0) - break; - } + for (chnl = 0; chnl < 4; ++chnl) { + ret = ibsd_sto_noisy(dd, BACTRL(chnl), 0x40, 0xFF); if (ret < 0) - break; - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x04, 0xFF); - if (ret < 0) - break; - } + goto bail; + } + for (chnl = 0; chnl < 4; ++chnl) { + ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x04, 0xFF); if (ret < 0) - break; - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, RXHSSTATUS(chnl), 0x04, 0xFF); - if (ret < 0) - break; - } + goto bail; + } + for (chnl = 0; chnl < 4; ++chnl) { + ret = ibsd_sto_noisy(dd, RXHSSTATUS(chnl), 0x04, 0xFF); if (ret < 0) - break; - /* - * delay for max possible number of steps, with slop. - * Each step is about 4usec. - */ - udelay(415); - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x00, 0xFF); - if (ret < 0) - break; - } + goto bail; + } + /* + * delay for max possible number of steps, with slop. + * Each step is about 4usec. + */ + udelay(415); + for (chnl = 0; chnl < 4; ++chnl) { + ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x00, 0xFF); if (ret < 0) - break; - } while (0); - + goto bail; + } +bail: return ret; } @@ -1495,19 +1405,16 @@ static void ipath_run_relock(unsigned long opaque) val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); ltstate = ipath_ib_linktrstate(dd, val); - /* Below check was <= CFGDEBOUNCE, JBR requests change for test */ if (ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT && ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) { int timeoff; /* Not up yet. Try again, if allowed by module-param */ if (ipath_relock_by_timer) { - if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { + if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) ipath_cdbg(VERBOSE, "Skip RELOCK in AUTONEG\n"); - } else if (!(dd->ipath_flags & - IPATH_IB_LINK_DISABLED)) { + else if (!(dd->ipath_flags & IPATH_IB_LINK_DISABLED)) { ipath_cdbg(VERBOSE, "RELOCK\n"); ipath_toggle_rclkrls(dd); - } } /* re-set timer for next check */ @@ -1528,7 +1435,7 @@ void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup) struct ipath_relock *irp = &dd->ipath_relock_singleton; if (ibup > 0) { - /* we are now up, so squelch timer */ + /* we are now up, so relax timer to 1 second interval */ if (atomic_read(&irp->ipath_relock_timer_active)) mod_timer(&irp->ipath_relock_timer, jiffies + HZ); } else { diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 3697449..85fdc98 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -131,76 +131,28 @@ int ipath_sdma_make_progress(struct ipath_devdata *dd) dd->ipath_sdma_descq_head = 0; if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) { - /* move to notify list */ + /* remove from active list */ + list_del_init(&txp->list); + if (txp->callback) + (*txp->callback)(txp->callback_cookie, + IPATH_SDMA_TXREQ_S_OK); if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) vl15_watchdog_deq(dd); - list_move_tail(lp, &dd->ipath_sdma_notifylist); if (!list_empty(&dd->ipath_sdma_activelist)) { lp = dd->ipath_sdma_activelist.next; txp = list_entry(lp, struct ipath_sdma_txreq, list); start_idx = txp->start_idx; - } else { - lp = NULL; + } else txp = NULL; - } } progress = 1; } - if (progress) - tasklet_hi_schedule(&dd->ipath_sdma_notify_task); - done: return progress; } -static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list) -{ - struct ipath_sdma_txreq *txp, *txp_next; - - list_for_each_entry_safe(txp, txp_next, list, list) { - list_del_init(&txp->list); - - if (txp->callback) - (*txp->callback)(txp->callback_cookie, - txp->callback_status); - } -} - -static void sdma_notify_taskbody(struct ipath_devdata *dd) -{ - unsigned long flags; - struct list_head list; - - INIT_LIST_HEAD(&list); - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - list_splice_init(&dd->ipath_sdma_notifylist, &list); - - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - ipath_sdma_notify(dd, &list); - - /* - * The IB verbs layer needs to see the callback before getting - * the call to ipath_ib_piobufavail() because the callback - * handles releasing resources the next send will need. - * Otherwise, we could do these calls in - * ipath_sdma_make_progress(). - */ - ipath_ib_piobufavail(dd->verbs_dev); -} - -static void sdma_notify_task(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - - if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - sdma_notify_taskbody(dd); -} - static void dump_sdma_state(struct ipath_devdata *dd) { unsigned long reg; @@ -258,19 +210,14 @@ static void sdma_abort_task(unsigned long opaque) if (status == IPATH_SDMA_ABORT_ABORTED) { struct ipath_sdma_txreq *txp, *txpnext; u64 hwstatus; - int notify = 0; hwstatus = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus); - if (/* ScoreBoardDrainInProg */ - test_bit(63, &hwstatus) || - /* AbortInProg */ - test_bit(62, &hwstatus) || - /* InternalSDmaEnable */ - test_bit(61, &hwstatus) || - /* ScbEmpty */ - !test_bit(30, &hwstatus)) { + if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG | + IPATH_SDMA_STATUS_ABORT_IN_PROG | + IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) || + !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) { if (dd->ipath_sdma_reset_wait > 0) { /* not done shutting down sdma */ --dd->ipath_sdma_reset_wait; @@ -284,14 +231,13 @@ static void sdma_abort_task(unsigned long opaque) /* dequeue all "sent" requests */ list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist, list) { - txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED; + list_del_init(&txp->list); + if (txp->callback) + (*txp->callback)(txp->callback_cookie, + IPATH_SDMA_TXREQ_S_ABORTED); if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) vl15_watchdog_deq(dd); - list_move_tail(&txp->list, &dd->ipath_sdma_notifylist); - notify = 1; } - if (notify) - tasklet_hi_schedule(&dd->ipath_sdma_notify_task); /* reset our notion of head and tail */ dd->ipath_sdma_descq_tail = 0; @@ -345,7 +291,7 @@ resched: * state change */ if (jiffies > dd->ipath_sdma_abort_jiffies) { - ipath_dbg("looping with status 0x%016llx\n", + ipath_dbg("looping with status 0x%08lx\n", dd->ipath_sdma_status); dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; } @@ -484,10 +430,7 @@ int setup_sdma(struct ipath_devdata *dd) senddmabufmask[2]); INIT_LIST_HEAD(&dd->ipath_sdma_activelist); - INIT_LIST_HEAD(&dd->ipath_sdma_notifylist); - tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task, - (unsigned long) dd); tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task, (unsigned long) dd); @@ -524,7 +467,6 @@ void teardown_sdma(struct ipath_devdata *dd) spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); tasklet_kill(&dd->ipath_sdma_abort_task); - tasklet_kill(&dd->ipath_sdma_notify_task); /* turn off sdma */ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -538,15 +480,15 @@ void teardown_sdma(struct ipath_devdata *dd) /* dequeue all "sent" requests */ list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist, list) { - txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN; + list_del_init(&txp->list); + if (txp->callback) + (*txp->callback)(txp->callback_cookie, + IPATH_SDMA_TXREQ_S_SHUTDOWN); if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) vl15_watchdog_deq(dd); - list_move_tail(&txp->list, &dd->ipath_sdma_notifylist); } spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - sdma_notify_taskbody(dd); - del_timer_sync(&dd->ipath_sdma_vl15_timer); spin_lock_irqsave(&dd->ipath_sdma_lock, flags); @@ -615,7 +557,7 @@ void ipath_restart_sdma(struct ipath_devdata *dd) } spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); if (!needed) { - ipath_dbg("invalid attempt to restart SDMA, status 0x%016llx\n", + ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n", dd->ipath_sdma_status); goto bail; } @@ -702,10 +644,8 @@ retry: addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(addr)) { - ret = -EIO; - goto unlock; - } + if (dma_mapping_error(addr)) + goto ioerr; dwoffset = tx->map_len >> 2; make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0); @@ -745,6 +685,8 @@ retry: dw = (len + 3) >> 2; addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); + if (dma_mapping_error(addr)) + goto unmap; make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset); /* SDmaUseLargeBuf has to be set in every descriptor */ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) @@ -765,7 +707,7 @@ retry: if (sge->sge_length == 0) { if (--ss->num_sge) *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { + } else if (sge->length == 0 && sge->mr->lkey) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; @@ -791,18 +733,28 @@ retry: descqp[0] |= __constant_cpu_to_le64(1ULL << 15); } + tx->txreq.next_descq_idx = tail; + dd->ipath_sdma_descq_tail = tail; /* Commit writes to memory and advance the tail on the chip */ wmb(); ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail); - tx->txreq.next_descq_idx = tail; - tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK; - dd->ipath_sdma_descq_tail = tail; dd->ipath_sdma_descq_added += tx->txreq.sg_count; list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist); if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15) vl15_watchdog_enq(dd); - + goto unlock; + +unmap: + while (tail != dd->ipath_sdma_descq_tail) { + if (!tail) + tail = dd->ipath_sdma_descq_cnt - 1; + else + tail--; + unmap_desc(dd, tail); + } +ioerr: + ret = -EIO; unlock: spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); fail: diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index b6c809d..f63e143 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -112,6 +112,14 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) dd->ipath_lastrpkts = val; } val64 = dd->ipath_rpkts; + } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) { + if (dd->ibdeltainprog) + val64 -= val64 - dd->ibsymsnap; + val64 -= dd->ibsymdelta; + } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) { + if (dd->ibdeltainprog) + val64 -= val64 - dd->iblnkerrsnap; + val64 -= dd->iblnkerrdelta; } else val64 = (u64) val; @@ -308,11 +316,12 @@ void ipath_get_faststats(unsigned long opaque) * level. */ if (iserr) - ipath_dbg("Re-enabling queue full errors (%s)\n", - ebuf); + ipath_dbg( + "Re-enabling queue full errors (%s)\n", + ebuf); else ipath_cdbg(ERRPKT, "Re-enabling packet" - " problem interrupt (%s)\n", ebuf); + " problem interrupt (%s)\n", ebuf); } /* re-enable masked errors */ diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index ca1df73..ec04b5d 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -164,7 +164,6 @@ static ssize_t show_boardversion(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion); } - static ssize_t show_localbus_info(struct device *dev, struct device_attribute *attr, char *buf) @@ -1088,13 +1087,13 @@ static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); +static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO, show_jint_max_packets, store_jint_max_packets); static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO, show_jint_idle_ticks, store_jint_idle_ticks); static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO, show_tempsense, store_tempsense); -static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); static struct attribute *dev_attributes[] = { &dev_attr_guid.attr, @@ -1194,7 +1193,6 @@ void ipath_driver_remove_group(struct device_driver *drv) int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) { int ret; - char unit[5]; ret = sysfs_create_group(&dev->kobj, &dev_attr_group); if (ret) @@ -1204,15 +1202,10 @@ int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) if (ret) goto bail_attrs; - snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit); - ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, unit); - if (ret) - goto bail_counter; - if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { ret = device_create_file(dev, &dev_attr_jint_idle_ticks); if (ret) - goto bail_unit; + goto bail_counter; ret = device_create_file(dev, &dev_attr_jint_max_packets); if (ret) goto bail_idle; @@ -1222,14 +1215,12 @@ int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) goto bail_max; } - goto bail; + return 0; bail_max: device_remove_file(dev, &dev_attr_jint_max_packets); bail_idle: device_remove_file(dev, &dev_attr_jint_idle_ticks); -bail_unit: - sysfs_remove_link(&dev->driver->kobj, unit); bail_counter: sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); bail_attrs: @@ -1240,11 +1231,6 @@ bail: void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd) { - char unit[5]; - - snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit); - sysfs_remove_link(&dev->driver->kobj, unit); - sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 717c13e..38df44f 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -114,7 +114,7 @@ int ipath_make_uc_req(struct ipath_qp *qp) qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.imm_data; + ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) @@ -143,7 +143,7 @@ int ipath_make_uc_req(struct ipath_qp *qp) qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); /* Immediate data comes after the RETH */ - ohdr->u.rc.imm_data = wqe->wr.imm_data; + ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; hwords += 1; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; @@ -172,7 +172,7 @@ int ipath_make_uc_req(struct ipath_qp *qp) else { qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.imm_data; + ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) @@ -197,7 +197,7 @@ int ipath_make_uc_req(struct ipath_qp *qp) qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.imm_data; + ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; @@ -225,6 +225,26 @@ unlock: return ret; } +static void fix_mr_refcount(struct ipath_qp *qp) +{ + unsigned i; + + if (qp->r_sge.num_sge == qp->s_rdma_read_sge.num_sge) + return; + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } + for (i = 0; i < qp->s_rdma_read_sge.num_sge; i++) { + struct ipath_sge *sge = i ? + &qp->s_rdma_read_sge.sg_list[i - 1] : + &qp->s_rdma_read_sge.sge; + + atomic_inc(&sge->mr->refcount); + } +} + /** * ipath_uc_rcv - handle an incoming UC packet * @dev: the device the packet came in on @@ -293,6 +313,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, */ qp->r_psn = psn; inv: + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): @@ -348,13 +373,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, send_first: if (qp->r_flags & IPATH_R_REUSE_SGE) { qp->r_flags &= ~IPATH_R_REUSE_SGE; + fix_mr_refcount(qp); qp->r_sge = qp->s_rdma_read_sge; } else if (!ipath_get_rwqe(qp, 0)) { dev->n_pkt_drops++; goto done; - } - /* Save the WQE so we can reuse it in case of an error. */ - qp->s_rdma_read_sge = qp->r_sge; + } else + qp->s_rdma_read_sge = qp->r_sge; qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) goto send_last; @@ -374,17 +399,17 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, dev->n_pkt_drops++; goto done; } - ipath_copy_sge(&qp->r_sge, data, pmtu); + ipath_copy_sge(&qp->r_sge, data, pmtu, 1); break; case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -410,7 +435,12 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } wc.opcode = IB_WC_RECV; last_imm: - ipath_copy_sge(&qp->r_sge, data, tlen); + ipath_copy_sge(&qp->r_sge, data, tlen, 1); + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; @@ -437,21 +467,23 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, hdrsize += sizeof(*reth); qp->r_len = be32_to_cpu(reth->length); qp->r_rcv_len = 0; + qp->r_sge.sg_list = NULL; if (qp->r_len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); int ok; /* Check rkey */ - ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, + ok = ipath_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) { dev->n_pkt_drops++; goto done; } + qp->r_sge.num_sge = 1; } else { - qp->r_sge.sg_list = NULL; + qp->r_sge.num_sge = 0; qp->r_sge.sge.mr = NULL; qp->r_sge.sge.vaddr = NULL; qp->r_sge.sge.length = 0; @@ -478,17 +510,17 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, dev->n_pkt_drops++; goto done; } - ipath_copy_sge(&qp->r_sge, data, pmtu); + ipath_copy_sge(&qp->r_sge, data, pmtu, 1); break; case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): rdma_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -533,7 +565,12 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, dev->n_pkt_drops++; goto done; } - ipath_copy_sge(&qp->r_sge, data, tlen); + ipath_copy_sge(&qp->r_sge, data, tlen, 1); + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } break; default: diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 64e0c9a..0bf7a96 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -54,6 +54,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) unsigned long flags; struct ipath_rq *rq; struct ipath_srq *srq; + struct ipath_sge_state ssge; struct ipath_sge_state rsge; struct ipath_sge *sge; struct ipath_rwq *wq; @@ -70,8 +71,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) goto done; } - rsge.sg_list = NULL; - /* * Check that the qkey matches (except for QP0, see 9.6.1.4.1). * Qkeys with the high order bit set mean use the @@ -96,7 +95,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = swqe->wr.imm_data; + wc.ex.imm_data = swqe->wr.ex.imm_data; } /* @@ -115,21 +114,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) rq = &qp->r_rq; } - if (rq->max_sge > 1) { - /* - * XXX We could use GFP_KERNEL if ipath_do_send() - * was always called from the tasklet instead of - * from ipath_post_send(). - */ - rsge.sg_list = kmalloc((rq->max_sge - 1) * - sizeof(struct ipath_sge), - GFP_ATOMIC); - if (!rsge.sg_list) { - dev->n_pkt_drops++; - goto drop; - } - } - /* * Get the next work request entry to find where to put the data. * Note that it is safe to drop the lock after changing rq->tail @@ -147,14 +131,21 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) goto drop; } wqe = get_rwqe_ptr(rq, tail); - if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) { + rsge.sg_list = qp->r_ud_sg_list; + if (unlikely(!ipath_init_sge(qp, wqe, &rlen, &rsge))) { spin_unlock_irqrestore(&rq->lock, flags); dev->n_pkt_drops++; goto drop; } /* Silently drop packets which are too big. */ - if (wc.byte_len > rlen) { + if (unlikely(wc.byte_len > rlen)) { + unsigned i; + spin_unlock_irqrestore(&rq->lock, flags); + for (i = 0; i < rsge.num_sge; i++) { + sge = i ? &rsge.sg_list[i - 1] : &rsge.sge; + atomic_dec(&sge->mr->refcount); + } dev->n_pkt_drops++; goto drop; } @@ -192,11 +183,14 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; if (ah_attr->ah_flags & IB_AH_GRH) { - ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); + ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh), 1); wc.wc_flags |= IB_WC_GRH; } else - ipath_skip_sge(&rsge, sizeof(struct ib_grh)); - sge = swqe->sg_list; + ipath_skip_sge(&rsge, sizeof(struct ib_grh), 1); + ssge.sg_list = swqe->sg_list + 1; + ssge.sge = *swqe->sg_list; + ssge.num_sge = swqe->wr.num_sge; + sge = &ssge.sge; while (length) { u32 len = sge->length; @@ -205,14 +199,14 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) if (len > sge->sge_length) len = sge->sge_length; BUG_ON(len == 0); - ipath_copy_sge(&rsge, sge->vaddr, len); + ipath_copy_sge(&rsge, sge->vaddr, len, 1); sge->vaddr += len; sge->length -= len; sge->sge_length -= len; if (sge->sge_length == 0) { - if (--swqe->wr.num_sge) - sge++; - } else if (sge->length == 0 && sge->mr != NULL) { + if (--ssge.num_sge) + *sge = *ssge.sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; @@ -225,12 +219,17 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) } length -= len; } + while (rsge.num_sge) { + atomic_dec(&rsge.sge.mr->refcount); + if (--rsge.num_sge) + rsge.sge = *rsge.sg_list++; + } wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; wc.src_qp = sqp->ibqp.qp_num; - /* XXX do we know which pkey matched? Only needed for GSI. */ - wc.pkey_index = 0; + wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? + swqe->wr.wr.ud.pkey_index : 0; wc.slid = dev->dd->ipath_lid | (ah_attr->src_path_bits & ((1 << dev->dd->ipath_lmc) - 1)); @@ -242,7 +241,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); drop: - kfree(rsge.sg_list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); done:; @@ -267,6 +265,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) u16 lrh0; u16 lid; int ret = 0; + int next_cur; spin_lock_irqsave(&qp->s_lock, flags); @@ -290,8 +289,9 @@ int ipath_make_ud_req(struct ipath_qp *qp) goto bail; wqe = get_swqe_ptr(qp, qp->s_cur); - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; + next_cur = qp->s_cur + 1; + if (next_cur >= qp->s_size) + next_cur = 0; /* Construct the header. */ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; @@ -315,6 +315,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) qp->s_flags |= IPATH_S_WAIT_DMA; goto bail; } + qp->s_cur = next_cur; spin_unlock_irqrestore(&qp->s_lock, flags); ipath_ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -323,6 +324,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) } } + qp->s_cur = next_cur; extra_bytes = -wqe->length & 3; nwords = (wqe->length + extra_bytes) >> 2; @@ -355,7 +357,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; - ohdr->u.ud.imm_data = wqe->wr.imm_data; + ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; } else bth0 = IB_OPCODE_UD_SEND_ONLY << 24; @@ -377,7 +379,8 @@ int ipath_make_ud_req(struct ipath_qp *qp) bth0 |= 1 << 23; bth0 |= extra_bytes << 20; bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : - ipath_get_pkey(dev->dd, qp->s_pkey_index); + ipath_get_pkey(dev->dd, qp->ibqp.qp_type == IB_QPT_GSI ? + wqe->wr.wr.ud.pkey_index : qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); /* * Use the multicast QP if the destination LID is a multicast LID. @@ -406,6 +409,23 @@ unlock: return ret; } +static unsigned ipath_lookup_pkey(struct ipath_devdata *dd, u16 pkey) +{ + unsigned i; + + pkey &= 0x7fff; /* remove limited/full membership bit */ + + for (i = 0; i < ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); ++i) + if ((dd->ipath_pd[0]->port_pkeys[i] & 0x7fff) == pkey) + return i; + + /* + * Should not get here, this means hardware failed to validate pkeys. + * Punt and return index 0. + */ + return 0; +} + /** * ipath_ud_rcv - receive an incoming UD packet * @dev: the device the packet came in on @@ -493,14 +513,14 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, if (qp->ibqp.qp_num > 1 && opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else - wc.imm_data = ohdr->u.ud.imm_data; + wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; hdrsize += sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { - wc.imm_data = 0; + wc.ex.imm_data = 0; wc.wc_flags = 0; } else { dev->n_pkt_drops++; @@ -559,12 +579,17 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } if (has_grh) { ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh, - sizeof(struct ib_grh)); + sizeof(struct ib_grh), 1); wc.wc_flags |= IB_WC_GRH; } else - ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); + ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); ipath_copy_sge(&qp->r_sge, data, - wc.byte_len - sizeof(struct ib_grh)); + wc.byte_len - sizeof(struct ib_grh), 1); + while (qp->r_sge.num_sge) { + atomic_dec(&qp->r_sge.sge.mr->refcount); + if (--qp->r_sge.num_sge) + qp->r_sge.sge = *qp->r_sge.sg_list++; + } if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) goto bail; wc.wr_id = qp->r_wr_id; @@ -573,8 +598,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = src_qp; - /* XXX do we know which pkey matched? Only needed for GSI. */ - wc.pkey_index = 0; + wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? + ipath_lookup_pkey(dev->dd, be32_to_cpu(ohdr->bth[0])) : 0; wc.slid = be16_to_cpu(hdr->lrh[3]); wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF; dlid = be16_to_cpu(hdr->lrh[1]); diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index fb26b03..39eb1fa 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -484,7 +484,8 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, const unsigned long faddr = (unsigned long) iov[idx].iov_base; - if (slen & 3 || faddr & 3 || !slen || slen > PAGE_SIZE) { + if (slen & 3 || faddr & 3 || !slen || + slen > PAGE_SIZE) { ret = -EINVAL; goto free_pbc; } @@ -552,6 +553,12 @@ done: return ret; } +static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq, + u32 c) +{ + pq->sent_counter = c; +} + /* try to clean out queue -- needs pq->lock */ static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq) @@ -665,8 +672,8 @@ static inline __le64 ipath_sdma_make_first_desc0(__le64 descq) static inline __le64 ipath_sdma_make_last_desc0(__le64 descq) { - /* last */ /* dma head */ - return descq | __constant_cpu_to_le64(1ULL << 11 | 1ULL << 13); + /* last */ + return descq | __constant_cpu_to_le64(1ULL << 11); } static inline __le64 ipath_sdma_make_desc1(u64 addr) @@ -705,6 +712,8 @@ static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd, int ret = 0; unsigned long flags; u16 tail; + u8 generation; + u64 descq_added; if (list_empty(pktlist)) return 0; @@ -714,6 +723,10 @@ static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd, spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + /* keep a copy for restoring purposes in case of problems */ + generation = dd->ipath_sdma_generation; + descq_added = dd->ipath_sdma_descq_added; + if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) { ret = -ECOMM; goto unlock; @@ -756,7 +769,7 @@ static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd, if (ofs >= IPATH_SMALLBUF_DWORDS) { for (i = 0; i < pkt->naddr; i++) { dd->ipath_sdma_descq[dtail].qw[0] |= - 1ULL<<14; + __constant_cpu_to_le64(1ULL << 14); if (++dtail == dd->ipath_sdma_descq_cnt) dtail = 0; } @@ -777,6 +790,10 @@ unlock_check_tail: } unlock: + if (unlikely(ret < 0)) { + dd->ipath_sdma_generation = generation; + dd->ipath_sdma_descq_added = descq_added; + } spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); return ret; @@ -860,26 +877,11 @@ int ipath_user_sdma_make_progress(struct ipath_devdata *dd, return ret; } -int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq, - u32 counter) -{ - const u32 scounter = ipath_user_sdma_complete_counter(pq); - const s32 dcounter = scounter - counter; - - return dcounter >= 0; -} - u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq) { return pq->sent_counter; } -void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq, - u32 c) -{ - pq->sent_counter = c; -} - u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq) { return pq->counter; diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h index ce0448f..fc76316 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h @@ -45,12 +45,8 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd, int ipath_user_sdma_make_progress(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); -int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq, - u32 counter); void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq); -void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq, - u32 c); u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 7bdcc04..3fc08ae 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -35,6 +35,7 @@ #include <rdma/ib_user_verbs.h> #include <linux/io.h> #include <linux/utsname.h> +#include <linux/rculist.h> #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -117,7 +118,7 @@ MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); static unsigned int ib_ipath_disable_sma; module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA"); +MODULE_PARM_DESC(disable_sma, "Disable the SMA"); /* * Note that it is OK to post send work requests in the SQE and ERR @@ -173,7 +174,8 @@ static __be64 sys_image_guid; * @data: the data to copy * @length: the length of the data */ -void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) +void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length, + int release) { struct ipath_sge *sge = &ss->sge; @@ -193,9 +195,11 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) sge->length -= len; sge->sge_length -= len; if (sge->sge_length == 0) { + if (release) + atomic_dec(&sge->mr->refcount); if (--ss->num_sge) *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { + } else if (sge->length == 0 && sge->mr->lkey) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; @@ -216,7 +220,7 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) * @ss: the SGE state * @length: the number of bytes to skip */ -void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) +void ipath_skip_sge(struct ipath_sge_state *ss, u32 length, int release) { struct ipath_sge *sge = &ss->sge; @@ -232,9 +236,11 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) sge->length -= len; sge->sge_length -= len; if (sge->sge_length == 0) { + if (release) + atomic_dec(&sge->mr->refcount); if (--ss->num_sge) *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { + } else if (sge->length == 0 && sge->mr->lkey) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; @@ -281,7 +287,7 @@ static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length) if (sge.sge_length == 0) { if (--num_sge) sge = *sg_list++; - } else if (sge.length == 0 && sge.mr != NULL) { + } else if (sge.length == 0 && sge.mr->lkey) { if (++sge.n >= IPATH_SEGSZ) { if (++sge.m >= sge.mr->mapsz) break; @@ -320,7 +326,7 @@ static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss, if (sge->sge_length == 0) { if (--ss->num_sge) *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { + } else if (sge->length == 0 && sge->mr->lkey) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; @@ -350,9 +356,16 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) int acc; int ret; unsigned long flags; + struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; spin_lock_irqsave(&qp->s_lock, flags); + if (qp->ibqp.qp_type != IB_QPT_SMI && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + ret = -ENETDOWN; + goto bail; + } + /* Check that state is OK to post send. */ if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) goto bail_inval; @@ -398,10 +411,11 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) wqe = get_swqe_ptr(qp, qp->s_head); wqe->wr = *wr; wqe->length = 0; + j = 0; if (wr->num_sge) { acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { + for (i = 0; i < wr->num_sge; i++) { u32 length = wr->sg_list[i].length; int ok; @@ -410,7 +424,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) ok = ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i], acc); if (!ok) - goto bail_inval; + goto bail_inval_free; wqe->length += length; j++; } @@ -419,15 +433,21 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) if (qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_RC) { if (wqe->length > 0x80000000U) - goto bail_inval; + goto bail_inval_free; } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) - goto bail_inval; + goto bail_inval_free; wqe->ssn = qp->s_ssn++; qp->s_head = next; ret = 0; goto bail; +bail_inval_free: + while (j) { + struct ipath_sge *sge = &wqe->sg_list[--j]; + + atomic_dec(&sge->mr->refcount); + } bail_inval: ret = -EINVAL; bail: @@ -752,7 +772,7 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) resend = qp->timer_next; spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_last != qp->s_tail && + if (qp->s_acked != qp->s_tail && ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { dev->n_timeouts++; ipath_restart_rc(qp, qp->s_last_psn + 1); @@ -788,7 +808,7 @@ static void update_sge(struct ipath_sge_state *ss, u32 length) if (sge->sge_length == 0) { if (--ss->num_sge) *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { + } else if (sge->length == 0 && sge->mr->lkey) { if (++sge->n >= IPATH_SEGSZ) { if (++sge->m >= sge->mr->mapsz) return; @@ -989,7 +1009,7 @@ unsigned ipath_ib_rate_to_mult(enum ib_rate rate) /* * Convert delay multiplier to IB rate */ -enum ib_rate ipath_mult_to_ib_rate(unsigned mult) +static enum ib_rate ipath_mult_to_ib_rate(unsigned mult) { switch (mult) { case 8: return IB_RATE_2_5_GBPS; @@ -1031,7 +1051,7 @@ static void sdma_complete(void *cookie, int status) struct ipath_verbs_txreq *tx = cookie; struct ipath_qp *qp = tx->qp; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned int flags; + unsigned long flags; enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; @@ -1039,6 +1059,8 @@ static void sdma_complete(void *cookie, int status) spin_lock_irqsave(&qp->s_lock, flags); if (tx->wqe) ipath_send_complete(qp, tx->wqe, ibs); + else if (qp->ibqp.qp_type == IB_QPT_RC) + ipath_rc_send_complete(qp, &tx->hdr.hdr); if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && qp->s_last != qp->s_head) || (qp->s_flags & IPATH_S_WAIT_DMA)) @@ -1049,19 +1071,29 @@ static void sdma_complete(void *cookie, int status) spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, tx->wqe, ibs); spin_unlock_irqrestore(&qp->s_lock, flags); + } else if (qp->ibqp.qp_type == IB_QPT_RC) { + spin_lock_irqsave(&qp->s_lock, flags); + ipath_rc_send_complete(qp, &tx->hdr.hdr); + spin_unlock_irqrestore(&qp->s_lock, flags); } + if (tx->mr) { + atomic_dec(&tx->mr->refcount); + tx->mr = NULL; + } if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) kfree(tx->txreq.map_addr); put_txreq(dev, tx); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); + + ipath_ib_piobufavail(dev); } static void decrement_dma_busy(struct ipath_qp *qp) { - unsigned int flags; + unsigned long flags; if (atomic_dec_and_test(&qp->s_dma_busy)) { spin_lock_irqsave(&qp->s_lock, flags); @@ -1138,6 +1170,9 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->qp = qp; atomic_inc(&qp->refcount); tx->wqe = qp->s_wqe; + tx->mr = qp->s_rdma_mr; + if (qp->s_rdma_mr) + qp->s_rdma_mr = NULL; tx->txreq.callback = sdma_complete; tx->txreq.callback_cookie = tx; tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST | @@ -1190,9 +1225,10 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->txreq.map_addr = piobuf; tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF; tx->txreq.sg_count = 1; + memcpy(&tx->hdr.hdr, hdr, hdrwords << 2); - *piobuf++ = cpu_to_le32(plen); - *piobuf++ = cpu_to_le32(control); + *piobuf++ = (__force u32) cpu_to_le32(plen); + *piobuf++ = (__force u32) cpu_to_le32(control); memcpy(piobuf, hdr, hdrwords << 2); ipath_copy_from_sge(piobuf + hdrwords, ss, len); @@ -1213,6 +1249,10 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, goto bail; err_tx: + if (tx->mr) { + atomic_dec(&tx->mr->refcount); + tx->mr = NULL; + } if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); put_txreq(dev, tx); @@ -1231,7 +1271,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, unsigned flush_wc; u32 control; int ret; - unsigned int flags; + unsigned long flags; piobuf = ipath_getpiobuf(dd, plen, NULL); if (unlikely(piobuf == NULL)) { @@ -1302,10 +1342,18 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, } copy_io(piobuf, ss, len, flush_wc); done: + if (qp->s_rdma_mr) { + atomic_dec(&qp->s_rdma_mr->refcount); + qp->s_rdma_mr = NULL; + } if (qp->s_wqe) { spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); spin_unlock_irqrestore(&qp->s_lock, flags); + } else if (qp->ibqp.qp_type == IB_QPT_RC) { + spin_lock_irqsave(&qp->s_lock, flags); + ipath_rc_send_complete(qp, ibhdr); + spin_unlock_irqrestore(&qp->s_lock, flags); } ret = 0; bail: @@ -1505,9 +1553,11 @@ static int ipath_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | - IB_DEVICE_SYS_IMAGE_GUID; + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; props->page_size_cap = PAGE_SIZE; - props->vendor_id = dev->dd->ipath_vendorid; + props->vendor_id = + IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3; props->vendor_part_id = dev->dd->ipath_deviceid; props->hw_ver = dev->dd->ipath_pcirev; @@ -1853,7 +1903,7 @@ unsigned ipath_get_npkeys(struct ipath_devdata *dd) } /** - * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table + * ipath_get_pkey - return the indexed PKEY from the port PKEY table * @dd: the infinipath device * @index: the PKEY index */ @@ -1861,6 +1911,7 @@ unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) { unsigned ret; + /* always a kernel port, no locking needed */ if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) ret = 0; else @@ -2135,7 +2186,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev->phys_port_cnt = 1; dev->num_comp_vectors = 1; dev->dma_device = &dd->pcidev->dev; - dev->class_dev.dev = dev->dma_device; dev->query_device = ipath_query_device; dev->modify_device = ipath_modify_device; dev->query_port = ipath_query_port; @@ -2228,6 +2278,8 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) ipath_dev_err(dev->dd, "piowait list not empty!\n"); if (!list_empty(&dev->rnrwait)) ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); + if (dev->dma_mr) + ipath_dev_err(dev->dd, "DMA MR not NULL!\n"); if (!ipath_mcast_tree_empty()) ipath_dev_err(dev->dd, "multicast table memory leak!\n"); /* @@ -2274,10 +2326,12 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) container_of(cdev, struct ipath_ibdev, ibdev.class_dev); int i; int len; + struct ipath_qp_table *qpt; + unsigned long flags; len = sprintf(buf, "RC resends %d\n" - "RC no QACK %d\n" + "RC QACKs %d\n" "RC ACKs %d\n" "RC SEQ NAKs %d\n" "RC RDMA seq %d\n" @@ -2285,6 +2339,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) "RC OTH NAKs %d\n" "RC timeouts %d\n" "RC RDMA dup %d\n" + "RC DComp %d\n" "piobuf wait %d\n" "unaligned %d\n" "PKT drops %d\n" @@ -2292,7 +2347,8 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, + dev->n_rdma_dup_busy, dev->n_rc_delayed_comp, + dev->n_piowait, dev->n_unaligned, dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; @@ -2303,6 +2359,33 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) (unsigned long long) si->n_packets, (unsigned long long) si->n_bytes); } + qpt = &dev->qp_table; + spin_lock_irqsave(&qpt->lock, flags); + for (i = 0; i < qpt->max; i++) { + struct ipath_qp *qp; + for (qp = qpt->table[i]; qp != NULL; qp = qp->next) { + if (qp->s_last == qp->s_acked && + qp->s_acked == qp->s_cur && + qp->s_cur == qp->s_tail && + qp->s_tail == qp->s_head) + continue; + if (len + 128 >= PAGE_SIZE) + break; + len += sprintf(buf + len, + "QP%u %x %u PSN %x %x %x %x %x (%u %u %u %u %u)\n", + qp->ibqp.qp_num, + qp->s_flags, + atomic_read(&qp->s_dma_busy), + qp->s_last_psn, + qp->s_psn, + qp->s_next_psn, + qp->s_sending_psn, + qp->s_sending_hpsn, + qp->s_last, qp->s_acked, qp->s_cur, + qp->s_tail, qp->s_head); + } + } + spin_unlock_irqrestore(&qpt->lock, flags); return len; } @@ -2325,7 +2408,7 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev) for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) if (class_device_create_file(&dev->class_dev, - ipath_class_attributes[i])) { + ipath_class_attributes[i])) { ret = 1; goto bail; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 9b21282..e60ccbc 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -248,6 +248,7 @@ struct ipath_mregion { int access_flags; u32 max_segs; /* number of ipath_segs in all the arrays */ u32 mapsz; /* size of the map array */ + atomic_t refcount; struct ipath_segarray *map[0]; /* the segments */ }; @@ -330,7 +331,6 @@ struct ipath_sge_state { struct ipath_sge sge; /* progress state for the current SGE */ u32 total_len; u8 num_sge; - u8 static_rate; }; /* @@ -342,7 +342,7 @@ struct ipath_ack_entry { u8 sent; u32 psn; union { - struct ipath_sge_state rdma_sge; + struct ipath_sge rdma_sge; u64 atomic_data; }; }; @@ -371,6 +371,7 @@ struct ipath_qp { struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; struct ipath_verbs_txreq *s_tx; + struct ipath_mregion *s_rdma_mr; struct ipath_sge_state s_sge; /* current send request data */ struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1]; struct ipath_sge_state s_ack_rdma_sge; @@ -385,6 +386,8 @@ struct ipath_qp { u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ u32 s_next_psn; /* PSN for next request */ u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ @@ -420,6 +423,7 @@ struct ipath_qp { u8 s_dmult; u8 s_draining; u8 timeout; /* Timeout for this QP */ + u16 s_rdma_ack_cnt; enum ib_mtu path_mtu; u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ @@ -427,11 +431,13 @@ struct ipath_qp { u32 s_head; /* new entries added here */ u32 s_tail; /* next entry to process */ u32 s_cur; /* current work queue entry */ - u32 s_last; /* last un-ACK'ed entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ struct ipath_swqe *s_wq; /* send work queue */ struct ipath_swqe *s_wqe; + struct ipath_sge *r_ud_sg_list; struct ipath_rq r_rq; /* receive work queue */ struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; @@ -457,7 +463,7 @@ struct ipath_qp { * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating - next send completion entry not via send DMA. + * next send completion entry not via send DMA. */ #define IPATH_S_SIGNAL_REQ_WR 0x01 #define IPATH_S_FENCE_PENDING 0x02 @@ -538,6 +544,7 @@ struct ipath_ibdev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; u32 mmap_offset; + struct ipath_mregion *dma_mr; int ib_unit; /* This is the device number */ u16 sm_lid; /* in host order */ u8 sm_sl; @@ -600,6 +607,7 @@ struct ipath_ibdev { u32 n_rc_resends; u32 n_rc_acks; u32 n_rc_qacks; + u32 n_rc_delayed_comp; u32 n_seq_naks; u32 n_rdma_seq; u32 n_rnr_naks; @@ -647,6 +655,7 @@ struct ipath_verbs_txreq { struct ipath_swqe *wqe; u32 map_len; u32 len; + struct ipath_mregion *mr; struct ipath_sge_state *ss; struct ipath_pio_header hdr; struct ipath_sdma_txreq txreq; @@ -755,14 +764,13 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth); unsigned ipath_ib_rate_to_mult(enum ib_rate rate); -enum ib_rate ipath_mult_to_ib_rate(unsigned mult); - int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, u32 hdrwords, struct ipath_sge_state *ss, u32 len); -void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length); +void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length, + int release); -void ipath_skip_sge(struct ipath_sge_state *ss, u32 length); +void ipath_skip_sge(struct ipath_sge_state *ss, u32 length, int release); void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int has_grh, void *data, u32 tlen, struct ipath_qp *qp); @@ -772,6 +780,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, void ipath_restart_rc(struct ipath_qp *qp, u32 psn); +void ipath_rc_send_complete(struct ipath_qp *qp, struct ipath_ib_header *hdr); + void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err); int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); @@ -782,12 +792,12 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr); -void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey); +int ipath_free_lkey(struct ipath_ibdev *dev, struct ipath_mregion *mr); int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, struct ib_sge *sge, int acc); -int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index 9e5abf9..d73e322 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -31,8 +31,7 @@ * SOFTWARE. */ -#include <linux/list.h> -#include <linux/rcupdate.h> +#include <linux/rculist.h> #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_wc_pat.c b/drivers/infiniband/hw/ipath/ipath_wc_pat.c new file mode 100644 index 0000000..31aa960 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_wc_pat.c @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <asm/processor.h> +#include <linux/io.h> +#include <linux/pci.h> +#include "ipath_kernel.h" +#include "ipath_wc_pat.h" + +/* ipath_wc_pat parameter: + * 0 is WC via MTRR + * 1 is WC via PAT + * 2 is WC via PAT and over-ride chip-set wc errata and PAT checks + * If PAT initialization fails, code reverts back to MTRR + */ +unsigned ipath_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ +module_param_named(wc_pat, ipath_wc_pat, uint, S_IRUGO); +MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); + +static u32 old_pat_lo[NR_CPUS] = {0}; +static u32 old_pat_hi[NR_CPUS] = {0}; +static u32 new_pat_lo[NR_CPUS] = {0}; +static unsigned int wc_enabled; + +#define IPATH_PAT_MASK (0xFFFFF8FF) /* PAT1 mask for the PAT MSR */ +#define IPATH_PAT_EXP (0x00000400) /* expected PAT1 value (WT) */ +#define IPATH_PAT_MOD (0x00000100) /* PAT1 value to select WC */ +#define IPATH_WC_MASK (~_PAGE_PCD) /* selects PAT1 for this page */ +#define IPATH_WC_FLAGS (_PAGE_PWT) /* selects PAT1 for this page */ + +#if defined(__i386__) || defined(__x86_64__) + +#define X86_MSR_PAT_OFFSET 0x277 + +/* Returns non-zero if we have a chipset write-combining problem */ +static int have_wc_errata(void) +{ + struct pci_dev *dev; + u8 rev; + + if (ipath_wc_pat == 2) + return 0; + + dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); + if (dev != NULL) { + /* + * ServerWorks LE chipsets < rev 6 have problems with + * write-combining. + */ + if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && + dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { + pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + if (rev <= 5) { + ipath_dbg("Serverworks LE rev < 6 detected. " + "Write-combining disabled\n"); + pci_dev_put(dev); + return -ENOSYS; + } + } + /* Intel 450NX errata # 23. Non ascending cacheline evictions + to write combining memory may resulting in data corruption + */ + if (dev->vendor == PCI_VENDOR_ID_INTEL && + dev->device == PCI_DEVICE_ID_INTEL_82451NX) { + ipath_dbg("Intel 450NX MMC detected. " + "Write-combining disabled.\n"); + pci_dev_put(dev); + return -ENOSYS; + } + pci_dev_put(dev); + } + return 0; +} + +static void rd_old_pat(void *err) +{ + *(int *)err |= rdmsr_safe(X86_MSR_PAT_OFFSET, + &old_pat_lo[smp_processor_id()], + &old_pat_hi[smp_processor_id()]); +} + +static void wr_new_pat(void *err) +{ + new_pat_lo[smp_processor_id()] = + (old_pat_lo[smp_processor_id()] & IPATH_PAT_MASK) | + IPATH_PAT_MOD; + + *(int *)err |= wrmsr_safe(X86_MSR_PAT_OFFSET, + new_pat_lo[smp_processor_id()], + old_pat_hi[smp_processor_id()]); +} + +static void wr_old_pat(void *err) +{ + u32 cur_pat_lo, cur_pat_hi; + + *(int *)err |= rdmsr_safe(X86_MSR_PAT_OFFSET, + &cur_pat_lo, &cur_pat_hi); + + if (*(int *) err) + goto done; + + /* only restore old PAT if it currently has the expected values */ + if (cur_pat_lo != new_pat_lo[smp_processor_id()] || + cur_pat_hi != old_pat_hi[smp_processor_id()]) + goto done; + + *(int *)err |= wrmsr_safe(X86_MSR_PAT_OFFSET, + old_pat_lo[smp_processor_id()], + old_pat_hi[smp_processor_id()]); +done: ; +} + +static int validate_old_pat(void) +{ + int ret = 0; + int cpu = smp_processor_id(); + int ncpus = num_online_cpus(); + int i; + int onetime = 1; + u32 my_pat1 = old_pat_lo[cpu] & ~IPATH_PAT_MASK; + + if (ipath_wc_pat == 2) + goto done; + + for (i = 0; i < ncpus; i++) { + u32 this_pat1 = old_pat_lo[i] & ~IPATH_PAT_MASK; + if (this_pat1 != my_pat1) { + ipath_dbg("Inconsistent PAT1 settings across CPUs\n"); + ret = -ENOSYS; + goto done; + } else if (this_pat1 == IPATH_PAT_MOD) { + if (onetime) { + ipath_dbg("PAT1 has already been " + "modified for WC (warning)\n"); + onetime = 0; + } + } else if (this_pat1 != IPATH_PAT_EXP) { + ipath_dbg("PAT1 not in expected WT state\n"); + ret = -ENOSYS; + goto done; + } + } +done: + return ret; +} + +static int read_and_modify_pat(void) +{ + int ret = 0; + + preempt_disable(); + rd_old_pat(&ret); + if (!ret) + smp_call_function(rd_old_pat, &ret, 1, 1); + if (ret) + goto out; + + if (validate_old_pat()) + goto out; + + wr_new_pat(&ret); + if (ret) + goto out; + + smp_call_function(wr_new_pat, &ret, 1, 1); + BUG_ON(ret); /* have inconsistent PAT state */ +out: + preempt_enable(); + return ret; +} + +static int restore_pat(void) +{ + int ret = 0; + + preempt_disable(); + wr_old_pat(&ret); + if (!ret) { + smp_call_function(wr_old_pat, &ret, 1, 1); + BUG_ON(ret); /* have inconsistent PAT state */ + } + + preempt_enable(); + return ret; +} + +int ipath_enable_wc_pat(void) +{ + struct cpuinfo_x86 *c = &(cpu_data)[0]; + int ret; + + if (wc_enabled) + return 0; + + if (!cpu_has(c, X86_FEATURE_MSR) || + !cpu_has(c, X86_FEATURE_PAT)) { + ipath_dbg("WC PAT not available on this processor\n"); + return -ENOSYS; + } + + if (have_wc_errata()) + return -ENOSYS; + + ret = read_and_modify_pat(); + if (!ret) + wc_enabled = 1; + else + ipath_dbg("Failed to enable WC PAT\n"); + return ret ? -EIO : 0; +} + +void ipath_disable_wc_pat(void) +{ + if (wc_enabled) { + if (!restore_pat()) + wc_enabled = 0; + else + ipath_dbg("Failed to disable WC PAT\n"); + } +} + +pgprot_t pgprot_wc(pgprot_t _prot) +{ + return wc_enabled ? + __pgprot(pgprot_val(_prot) | IPATH_WC_FLAGS) : + pgprot_noncached(_prot); +} + +void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) +{ + return __ioremap(phys_addr, size, IPATH_WC_FLAGS); +} + +int ipath_wc_pat_enabled(void) +{ + return wc_enabled; +} + +#else /* !(defined(__i386__) || defined(__x86_64__)) */ + +int ipath_enable_wc_pat(void){ return 0; } +void ipath_disable_wc_pat(void){} + +pgprot_t pgprot_wc(pgprot_t _prot) +{ + return pgprot_noncached(_prot); +} + +void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) +{ + return ioremap_nocache(phys_addr, size); +} + +int ipath_wc_pat_enabled(void) +{ + return 0; +} + +#endif diff --git a/drivers/infiniband/hw/ipath/ipath_wc_pat.h b/drivers/infiniband/hw/ipath/ipath_wc_pat.h new file mode 100644 index 0000000..1b17661 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_wc_pat.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IPATH_WC_PAT_H +#define IPATH_WC_PAT_H + +#include <asm/pgtable.h> + +extern unsigned ipath_wc_pat; + +int ipath_enable_wc_pat(void); +void ipath_disable_wc_pat(void); +int ipath_wc_pat_enabled(void); +pgprot_t pgprot_wc(pgprot_t _prot); +void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size); + +#endif