From: Rob Evers <revers@redhat.com> Date: Tue, 5 Oct 2010 14:24:50 -0400 Subject: [scsi] lpfc: fix a failure to roundrobin on all FCFs Message-id: <1286288695-20754-19-git-send-email-revers@redhat.com> Patchwork-id: 28610 O-Subject: [RHEL5.6 PATCH 18/23] Fixed failure to roundrobin on all available FCFs when FLOGI to in-use FCF rejected by switch (CR: 105402) Bugzilla: 619917 RH-Acked-by: Tomas Henzl <thenzl@redhat.com> Fixed failure to roundrobin on all available FCFs when FLOGI to in-use FCF rejected by switch (CR: 105402) From: Rob Evers on behalf of Emulex <revers@redhat.com> https://bugzilla.redhat.com/show_bug.cgi?id=619917 diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index d919b3d..5ccb942 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -811,9 +811,12 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, (irsp->un.ulpWord[4] != IOERR_SLI_ABORTED)) { lpfc_printf_log(phba, KERN_WARNING, LOG_FIP | LOG_ELS, "2611 FLOGI failed on registered " - "FCF record fcf_index:%d, trying " - "to perform round robin failover\n", - phba->fcf.current_rec.fcf_indx); + "FCF record fcf_index(%d), status: " + "x%x/x%x, tmo:x%x, trying to perform " + "round robin failover\n", + phba->fcf.current_rec.fcf_indx, + irsp->ulpStatus, irsp->un.ulpWord[4], + irsp->ulpTimeout); fcf_index = lpfc_sli4_fcf_rr_next_index_get(phba); if (fcf_index == LPFC_FCOE_FCF_NEXT_NONE) { /* @@ -851,6 +854,12 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } } + /* FLOGI failure */ + lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + "2858 FLOGI failure Status:x%x/x%x TMO:x%x\n", + irsp->ulpStatus, irsp->un.ulpWord[4], + irsp->ulpTimeout); + /* Check for retry */ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) goto out; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index f136882..657e89e 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1987,8 +1987,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) __lpfc_sli4_stop_fcf_redisc_wait_timer(phba); else if (phba->fcf.fcf_flag & FCF_REDISC_FOV) /* If in fast failover, mark it's completed */ - phba->fcf.fcf_flag &= ~(FCF_REDISC_FOV | - FCF_DISCOVERY); + phba->fcf.fcf_flag &= ~FCF_REDISC_FOV; spin_unlock_irq(&phba->hbalock); lpfc_printf_log(phba, KERN_INFO, LOG_FIP, "2836 The new FCF record (x%x) " @@ -2784,7 +2783,6 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la) spin_unlock_irq(&phba->hbalock); lpfc_printf_log(phba, KERN_INFO, LOG_FIP | LOG_DISCOVERY, "2778 Start FCF table scan at linkup\n"); - rc = lpfc_sli4_fcf_scan_read_fcf_rec(phba, LPFC_FCOE_FCF_GET_FIRST); if (rc) { @@ -2793,6 +2791,9 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la) spin_unlock_irq(&phba->hbalock); goto out; } + /* Reset FCF roundrobin bmask for new discovery */ + memset(phba->fcf.fcf_rr_bmask, 0, + sizeof(*phba->fcf.fcf_rr_bmask)); } return; @@ -5537,6 +5538,10 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba) spin_lock_irq(&phba->hbalock); phba->fcf.fcf_flag |= FCF_INIT_DISC; spin_unlock_irq(&phba->hbalock); + + /* Reset FCF roundrobin bmask for new discovery */ + memset(phba->fcf.fcf_rr_bmask, 0, sizeof(*phba->fcf.fcf_rr_bmask)); + rc = lpfc_sli4_fcf_scan_read_fcf_rec(phba, LPFC_FCOE_FCF_GET_FIRST); if (rc) { diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index cc986a9..53bbeea 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3471,21 +3471,7 @@ lpfc_sli4_async_fcoe_evt(struct lpfc_hba *phba, "evt_tag:x%x, fcf_index:x%x\n", acqe_fcoe->event_tag, acqe_fcoe->index); - /* If the FCF discovery is in progress, do nothing. */ - spin_lock_irq(&phba->hbalock); - if (phba->hba_flag & FCF_DISC_INPROGRESS) { - spin_unlock_irq(&phba->hbalock); - break; - } - /* If fast FCF failover rescan event is pending, do nothing */ - if (phba->fcf.fcf_flag & FCF_REDISC_EVT) { - spin_unlock_irq(&phba->hbalock); - break; - } - spin_unlock_irq(&phba->hbalock); - - if ((phba->fcf.fcf_flag & FCF_DISCOVERY) && - !(phba->fcf.fcf_flag & FCF_REDISC_FOV)) { + if (phba->fcf.fcf_flag & FCF_DISCOVERY) { /* * During period of FCF discovery, read the FCF * table record indexed by the event to update @@ -3499,13 +3485,26 @@ lpfc_sli4_async_fcoe_evt(struct lpfc_hba *phba, acqe_fcoe->index); rc = lpfc_sli4_read_fcf_rec(phba, acqe_fcoe->index); } - /* If the FCF has been in discovered state, do nothing. */ + + /* If the FCF discovery is in progress, do nothing. */ spin_lock_irq(&phba->hbalock); + if (phba->hba_flag & FCF_DISC_INPROGRESS) { + spin_unlock_irq(&phba->hbalock); + break; + } + /* If fast FCF failover rescan event is pending, do nothing */ + if (phba->fcf.fcf_flag & FCF_REDISC_EVT) { + spin_unlock_irq(&phba->hbalock); + break; + } + + /* If the FCF has been in discovered state, do nothing. */ if (phba->fcf.fcf_flag & FCF_SCAN_DONE) { spin_unlock_irq(&phba->hbalock); break; } spin_unlock_irq(&phba->hbalock); + /* Otherwise, scan the entire FCF table and re-discover SAN */ lpfc_printf_log(phba, KERN_INFO, LOG_FIP | LOG_DISCOVERY, "2770 Start FCF table scan due to new FCF " @@ -3531,13 +3530,9 @@ lpfc_sli4_async_fcoe_evt(struct lpfc_hba *phba, "2549 FCF disconnected from network index 0x%x" " tag 0x%x\n", acqe_fcoe->index, acqe_fcoe->event_tag); - /* If the event is not for currently used fcf do nothing */ - if (phba->fcf.current_rec.fcf_indx != acqe_fcoe->index) - break; - /* We request port to rediscover the entire FCF table for - * a fast recovery from case that the current FCF record - * is no longer valid if we are not in the middle of FCF - * failover process already. + /* + * If we are in the middle of FCF failover process, clear + * the corresponding FCF bit in the roundrobin bitmap. */ spin_lock_irq(&phba->hbalock); if (phba->fcf.fcf_flag & FCF_DISCOVERY) { @@ -3546,9 +3541,23 @@ lpfc_sli4_async_fcoe_evt(struct lpfc_hba *phba, lpfc_sli4_fcf_rr_index_clear(phba, acqe_fcoe->index); break; } + spin_unlock_irq(&phba->hbalock); + + /* If the event is not for currently used fcf do nothing */ + if (phba->fcf.current_rec.fcf_indx != acqe_fcoe->index) + break; + + /* + * Otherwise, request the port to rediscover the entire FCF + * table for a fast recovery from case that the current FCF + * is no longer valid as we are not in the middle of FCF + * failover process already. + */ + spin_lock_irq(&phba->hbalock); /* Mark the fast failover process in progress */ phba->fcf.fcf_flag |= FCF_DEAD_DISC; spin_unlock_irq(&phba->hbalock); + lpfc_printf_log(phba, KERN_INFO, LOG_FIP | LOG_DISCOVERY, "2771 Start FCF fast failover process due to " "FCF DEAD event: evt_tag:x%x, fcf_index:x%x " @@ -3568,12 +3577,16 @@ lpfc_sli4_async_fcoe_evt(struct lpfc_hba *phba, * as a link down to FCF registration. */ lpfc_sli4_fcf_dead_failthrough(phba); - } else - /* Handling fast FCF failover to a DEAD FCF event - * is considered equalivant to receiving CVL to all - * vports. + } else { + /* Reset FCF roundrobin bmask for new discovery */ + memset(phba->fcf.fcf_rr_bmask, 0, + sizeof(*phba->fcf.fcf_rr_bmask)); + /* + * Handling fast FCF failover to a DEAD FCF event is + * considered equalivant to receiving CVL to all vports. */ lpfc_sli4_perform_all_vport_cvl(phba); + } break; case LPFC_FCOE_EVENT_TYPE_CVL: lpfc_printf_log(phba, KERN_ERR, LOG_FIP | LOG_DISCOVERY, @@ -3648,7 +3661,13 @@ lpfc_sli4_async_fcoe_evt(struct lpfc_hba *phba, * the current registered FCF entry. */ lpfc_retry_pport_discovery(phba); - } + } else + /* + * Reset FCF roundrobin bmask for new + * discovery. + */ + memset(phba->fcf.fcf_rr_bmask, 0, + sizeof(*phba->fcf.fcf_rr_bmask)); } break; default: diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 96c5415..ecd73c0 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -12338,12 +12338,9 @@ lpfc_sli4_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, uint16_t fcf_index) spin_lock_irq(&phba->hbalock); phba->hba_flag |= FCF_DISC_INPROGRESS; spin_unlock_irq(&phba->hbalock); - /* Reset FCF round robin index bmask for new scan */ - if (fcf_index == LPFC_FCOE_FCF_GET_FIRST) { - memset(phba->fcf.fcf_rr_bmask, 0, - sizeof(*phba->fcf.fcf_rr_bmask)); + /* Reset eligible FCF count for new scan */ + if (fcf_index == LPFC_FCOE_FCF_GET_FIRST) phba->fcf.eligible_fcf_cnt = 0; - } error = 0; } fail_fcf_scan: