From: Steve Best <sbest@redhat.com> Date: Tue, 31 Aug 2010 14:58:58 -0400 Subject: [scsi] ibmvfc: Fix terminate_rport_io Message-id: <20100831144710.8826.36913.sendpatchset@squad5-lp1.lab.bos.redhat.com> Patchwork-id: 27964 O-Subject: [PATCH RHEL5.6 BZ628615] SCSI ibmvfc: Fix terminate_rport_io Bugzilla: 628615 RH-Acked-by: Mike Christie <mchristi@redhat.com> RH-Acked-by: David Howells <dhowells@redhat.com> RH-Acked-by: Stefan Assmann <sassmann@redhat.com> RHBZ#: ------ https://bugzilla.redhat.com/show_bug.cgi?id=628615 Description: ------------ The ibmvfc driver was incorrectly obtaining a scsi_target pointer from an fc_rport. The way it is coded ensures that ibmvfc's terminate_rport_io handler does absolutely nothing. Fix this up to iterate through affected devices differently, sending cancel and abort task set as appropriate. Without this patch, fast_io_fail_tmo is broken for ibmvfc. Signed-off-by: Brian King <brking@linux.vnet.ibm.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de> RHEL Version Found: ------------------- RHEL 5.5 Brew: ----- http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2720098 Upstream: --------- http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/torvalds/ linux-2.6.git;a=patch;h=d2fab5cf3979c55f802c96616daf96e9e8de1c80 back ported to RHEL 5.6 Test Status: ------------ Brian King at IBM helped me test this fix. Hardware Configuration: Power 6 or Power 7 with an NPIV capable adapter (8Gb FC adapter) VIOS which is assigned the NPIV FC adapter and has NPIV ports configured and assigned to one or more Linux LPARs. Multipath configuration to one or more SAN targets connected via NPIV using dm-multipath Software Configuration: RHEL 5.5 installed to a multipath device Set fast_io_fail_tmo on the FC rports to 5 seconds Steps to reproduce: 1. Start an I/O workload to one of the multipath devices, such as cp -ax /usr /tmp 2. Simulate a path failure. I did this by rezoning the SAN switch so the NPIV port only has access to one of the SAN targets ports. Once the path failure has been simulated, if the path that was disabled was the path that I/O was currently being sent down, all I/O will hang. After 5 seconds, the fast_io_fail_tmo will fire, but the I/O will continue to hang until the dev_loss_tmo fires much later. --------------------------------------------------------------- Steve Best IBM on-site partner Proposed Patch: --------------- Signed-off-by: Jarod Wilson <jarod@redhat.com> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 8986f5f..7de0b5c 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1824,95 +1824,6 @@ static int ibmvfc_reset_device(struct scsi_device *sdev, int type, char *desc) } /** - * ibmvfc_abort_task_set - Abort outstanding commands to the device - * @sdev: scsi device to abort commands - * - * This sends an Abort Task Set to the VIOS for the specified device. This does - * NOT send any cancel to the VIOS. That must be done separately. - * - * Returns: - * 0 on success / other on failure - **/ -static int ibmvfc_abort_task_set(struct scsi_device *sdev) -{ - struct ibmvfc_host *vhost = shost_priv(sdev->host); - struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); - struct ibmvfc_cmd *tmf; - struct ibmvfc_event *evt, *found_evt; - union ibmvfc_iu rsp_iu; - struct ibmvfc_fcp_rsp *fc_rsp = &rsp_iu.cmd.rsp; - int rsp_rc = -EBUSY; - unsigned long flags; - int rsp_code = 0; - - spin_lock_irqsave(vhost->host->host_lock, flags); - found_evt = NULL; - list_for_each_entry(evt, &vhost->sent, queue) { - if (evt->cmnd && evt->cmnd->device == sdev) { - found_evt = evt; - break; - } - } - - if (!found_evt) { - if (vhost->log_level > IBMVFC_DEFAULT_LOG_LEVEL) - sdev_printk(KERN_INFO, sdev, "No events found to abort\n"); - spin_unlock_irqrestore(vhost->host->host_lock, flags); - return 0; - } - - if (vhost->state == IBMVFC_ACTIVE) { - evt = ibmvfc_get_event(vhost); - ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_CMD_FORMAT); - - tmf = &evt->iu.cmd; - memset(tmf, 0, sizeof(*tmf)); - tmf->resp.va = (u64)evt->crq.ioba + offsetof(struct ibmvfc_cmd, rsp); - tmf->resp.len = sizeof(tmf->rsp); - tmf->frame_type = IBMVFC_SCSI_FCP_TYPE; - tmf->payload_len = sizeof(tmf->iu); - tmf->resp_len = sizeof(tmf->rsp); - tmf->cancel_key = (unsigned long)sdev->hostdata; - tmf->tgt_scsi_id = rport->port_id; - int_to_scsilun(sdev->lun, &tmf->iu.lun); - tmf->flags = (IBMVFC_NO_MEM_DESC | IBMVFC_TMF); - tmf->iu.tmf_flags = IBMVFC_ABORT_TASK_SET; - evt->sync_iu = &rsp_iu; - - init_completion(&evt->comp); - rsp_rc = ibmvfc_send_event(evt, vhost, default_timeout); - } - - spin_unlock_irqrestore(vhost->host->host_lock, flags); - - if (rsp_rc != 0) { - sdev_printk(KERN_ERR, sdev, "Failed to send abort. rc=%d\n", rsp_rc); - return -EIO; - } - - sdev_printk(KERN_INFO, sdev, "Aborting outstanding commands\n"); - wait_for_completion(&evt->comp); - - if (rsp_iu.cmd.status) { - if (fc_rsp->flags & FCP_RSP_LEN_VALID) - rsp_code = fc_rsp->data.info.rsp_code; - - sdev_printk(KERN_ERR, sdev, "Abort failed: %s (%x:%x) " - "flags: %x fcp_rsp: %x, scsi_status: %x\n", - ibmvfc_get_cmd_error(rsp_iu.cmd.status, rsp_iu.cmd.error), - rsp_iu.cmd.status, rsp_iu.cmd.error, fc_rsp->flags, rsp_code, - fc_rsp->scsi_status); - rsp_rc = -EIO; - } else - sdev_printk(KERN_INFO, sdev, "Abort successful\n"); - - spin_lock_irqsave(vhost->host->host_lock, flags); - ibmvfc_free_event(evt); - spin_unlock_irqrestore(vhost->host->host_lock, flags); - return rsp_rc; -} - -/** * ibmvfc_cancel_all - Cancel all outstanding commands to the device * @sdev: scsi device to cancel commands * @type: type of error recovery being performed @@ -1997,6 +1908,44 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type) } /** + * ibmvfc_match_rport - Match function for specified remote port + * @evt: ibmvfc event struct + * @device: device to match (rport) + * + * Returns: + * 1 if event matches rport / 0 if event does not match rport + **/ +static int ibmvfc_match_rport(struct ibmvfc_event *evt, void *rport) +{ + struct fc_rport *cmd_rport; + + if (evt->cmnd) { + cmd_rport = starget_to_rport(scsi_target(evt->cmnd->device)); + if (cmd_rport == rport) + return 1; + } + return 0; +} + +/** + * ibmvfc_match_key - Match function for specified cancel key + * @evt: ibmvfc event struct + * @key: cancel key to match + * + * Returns: + * 1 if event matches key / 0 if event does not match key + **/ +static int ibmvfc_match_key(struct ibmvfc_event *evt, void *key) +{ + unsigned long cancel_key = (unsigned long)key; + + if (evt->crq.format == IBMVFC_CMD_FORMAT && + evt->iu.cmd.cancel_key == cancel_key) + return 1; + return 0; +} + +/** * ibmvfc_match_target - Match function for specified target * @evt: ibmvfc event struct * @device: device to match (starget) @@ -2082,6 +2031,112 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device, } /** + * ibmvfc_abort_task_set - Abort outstanding commands to the device + * @sdev: scsi device to abort commands + * + * This sends an Abort Task Set to the VIOS for the specified device. This does + * NOT send any cancel to the VIOS. That must be done separately. + * + * Returns: + * 0 on success / other on failure + **/ +static int ibmvfc_abort_task_set(struct scsi_device *sdev) +{ + struct ibmvfc_host *vhost = shost_priv(sdev->host); + struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); + struct ibmvfc_cmd *tmf; + struct ibmvfc_event *evt, *found_evt; + union ibmvfc_iu rsp_iu; + struct ibmvfc_fcp_rsp *fc_rsp = &rsp_iu.cmd.rsp; + int rc, rsp_rc = -EBUSY; + unsigned long flags, timeout = IBMVFC_ABORT_TIMEOUT; + int rsp_code = 0; + + spin_lock_irqsave(vhost->host->host_lock, flags); + found_evt = NULL; + list_for_each_entry(evt, &vhost->sent, queue) { + if (evt->cmnd && evt->cmnd->device == sdev) { + found_evt = evt; + break; + } + } + + if (!found_evt) { + if (vhost->log_level > IBMVFC_DEFAULT_LOG_LEVEL) + sdev_printk(KERN_INFO, sdev, "No events found to abort\n"); + spin_unlock_irqrestore(vhost->host->host_lock, flags); + return 0; + } + + if (vhost->state == IBMVFC_ACTIVE) { + evt = ibmvfc_get_event(vhost); + ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_CMD_FORMAT); + + tmf = &evt->iu.cmd; + memset(tmf, 0, sizeof(*tmf)); + tmf->resp.va = (u64)evt->crq.ioba + offsetof(struct ibmvfc_cmd, rsp); + tmf->resp.len = sizeof(tmf->rsp); + tmf->frame_type = IBMVFC_SCSI_FCP_TYPE; + tmf->payload_len = sizeof(tmf->iu); + tmf->resp_len = sizeof(tmf->rsp); + tmf->cancel_key = (unsigned long)sdev->hostdata; + tmf->tgt_scsi_id = rport->port_id; + int_to_scsilun(sdev->lun, &tmf->iu.lun); + tmf->flags = (IBMVFC_NO_MEM_DESC | IBMVFC_TMF); + tmf->iu.tmf_flags = IBMVFC_ABORT_TASK_SET; + evt->sync_iu = &rsp_iu; + + init_completion(&evt->comp); + rsp_rc = ibmvfc_send_event(evt, vhost, default_timeout); + } + + spin_unlock_irqrestore(vhost->host->host_lock, flags); + + if (rsp_rc != 0) { + sdev_printk(KERN_ERR, sdev, "Failed to send abort. rc=%d\n", rsp_rc); + return -EIO; + } + + sdev_printk(KERN_INFO, sdev, "Aborting outstanding commands\n"); + timeout = wait_for_completion_timeout(&evt->comp, timeout); + + if (!timeout) { + rc = ibmvfc_cancel_all(sdev, IBMVFC_TMF_ABORT_TASK_SET); + if (!rc) { + rc = ibmvfc_wait_for_ops(vhost, sdev->hostdata, ibmvfc_match_key); + if (rc == SUCCESS) + rc = 0; + } + + if (rc) { + sdev_printk(KERN_INFO, sdev, "Cancel failed, resetting host\n"); + ibmvfc_reset_host(vhost); + rsp_rc = 0; + goto out; + } + } + + if (rsp_iu.cmd.status) { + if (fc_rsp->flags & FCP_RSP_LEN_VALID) + rsp_code = fc_rsp->data.info.rsp_code; + + sdev_printk(KERN_ERR, sdev, "Abort failed: %s (%x:%x) " + "flags: %x fcp_rsp: %x, scsi_status: %x\n", + ibmvfc_get_cmd_error(rsp_iu.cmd.status, rsp_iu.cmd.error), + rsp_iu.cmd.status, rsp_iu.cmd.error, fc_rsp->flags, rsp_code, + fc_rsp->scsi_status); + rsp_rc = -EIO; + } else + sdev_printk(KERN_INFO, sdev, "Abort successful\n"); + +out: + spin_lock_irqsave(vhost->host->host_lock, flags); + ibmvfc_free_event(evt); + spin_unlock_irqrestore(vhost->host->host_lock, flags); + return rsp_rc; +} + +/** * ibmvfc_eh_abort_handler - Abort a command * @cmd: scsi command to abort * @@ -2145,18 +2200,6 @@ static void ibmvfc_dev_cancel_all(struct scsi_device *sdev, void *data) } /** - * ibmvfc_dev_abort_all - Device iterated abort task set function - * @sdev: scsi device struct - * @data: return code - * - **/ -static void ibmvfc_dev_abort_all(struct scsi_device *sdev, void *data) -{ - unsigned long *rc = data; - *rc |= ibmvfc_abort_task_set(sdev); -} - -/** * ibmvfc_eh_target_reset_handler - Reset the target * @cmd: scsi command struct * @@ -2211,19 +2254,22 @@ static int ibmvfc_eh_host_reset_handler(struct scsi_cmnd *cmd) **/ static void ibmvfc_terminate_rport_io(struct fc_rport *rport) { - struct scsi_target *starget = to_scsi_target(&rport->dev); - struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); + struct Scsi_Host *shost = rport_to_shost(rport); struct ibmvfc_host *vhost = shost_priv(shost); - unsigned long cancel_rc = 0; - unsigned long abort_rc = 0; - int rc = FAILED; + struct fc_rport *dev_rport; + struct scsi_device *sdev; + unsigned long rc; ENTER(); - starget_for_each_device(starget, &cancel_rc, ibmvfc_dev_cancel_all); - starget_for_each_device(starget, &abort_rc, ibmvfc_dev_abort_all); + shost_for_each_device(sdev, shost) { + dev_rport = starget_to_rport(scsi_target(sdev)); + if (dev_rport != rport) + continue; + ibmvfc_cancel_all(sdev, IBMVFC_TMF_ABORT_TASK_SET); + ibmvfc_abort_task_set(sdev); + } - if (!cancel_rc && !abort_rc) - rc = ibmvfc_wait_for_ops(vhost, starget, ibmvfc_match_target); + rc = ibmvfc_wait_for_ops(vhost, rport, ibmvfc_match_rport); if (rc == FAILED) ibmvfc_issue_fc_host_lip(shost); diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 4fea405..be7f1de 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -38,6 +38,7 @@ #define IBMVFC_ADISC_PLUS_CANCEL_TIMEOUT \ (IBMVFC_ADISC_TIMEOUT + IBMVFC_ADISC_CANCEL_TIMEOUT) #define IBMVFC_INIT_TIMEOUT 120 +#define IBMVFC_ABORT_TIMEOUT 8 #define IBMVFC_MAX_REQUESTS_DEFAULT 100 #define IBMVFC_DEBUG 0