Sophie

Sophie

distrib > CentOS > 5 > x86_64 > by-pkgid > ea32411352494358b8d75a78402a4713 > files > 4341

kernel-2.6.18-238.19.1.el5.centos.plus.src.rpm

From: Steve Best <sbest@redhat.com>
Date: Mon, 2 Aug 2010 20:48:12 -0400
Subject: [scsi] ibmvscsi: Fix soft lockup on resume
Message-id: <20100802203703.18096.95386.sendpatchset@squad5-lp1.lab.bos.redhat.com>
Patchwork-id: 27294
O-Subject: [PATCH RHEL5.6 BZ565570 7/9] ibmvscsi: Fix soft lockup on resume
Bugzilla: 565570
RH-Acked-by: David Howells <dhowells@redhat.com>

RHBZ#:
======
https://bugzilla.redhat.com/show_bug.cgi?id=565570

Description:
============
This fixes a softlockup seen on resume. During resume, the CRQ
must be reenabled. However, the H_ENABLE_CRQ hcall used to do
this may return H_BUSY or H_LONG_BUSY. When this happens, the
caller is expected to retry later. Normally the H_ENABLE_CRQ
succeeds relatively soon. However, we have seen cases where
this can take long enough to see softlockup warnings.
This patch changes a simple loop, which was causing the
softlockup, to a loop at task level which sleeps between
retries rather than simply spinning.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>

RHEL Version Found:
===================
5.6

kABI Status:
============
No symbols were harmed.

Brew:
=====
Built on all platforms. All patches brewed together.
http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2642470

Upstream Status:
================
http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/jejb/scsi-misc-2.6.git;a=commitdiff;h=0f33ece5bc3d5a9567b65cfbc736e8f206ecfc7b

===============================================================
Steve Best
IBM on-site partner

Signed-off-by: Jarod Wilson <jarod@redhat.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index fe13366..bd7f750 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -70,6 +70,7 @@
 #include <linux/moduleparam.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <linux/kthread.h>
 #include <asm/prom.h>
 #include <asm/vio.h>
 #include <scsi/scsi.h>
@@ -559,14 +560,8 @@ static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata)
 	atomic_set(&hostdata->request_limit, 0);
 
 	purge_requests(hostdata, DID_ERROR);
-	if ((ibmvscsi_reset_crq_queue(&hostdata->queue, hostdata)) ||
-	    (ibmvscsi_send_crq(hostdata, 0xC001000000000000LL, 0)) ||
-	    (vio_enable_interrupts(to_vio_dev(hostdata->dev)))) {
-		atomic_set(&hostdata->request_limit, -1);
-		dev_err(hostdata->dev, "error after reset\n");
-	}
-
-	scsi_unblock_requests(hostdata->host);
+	hostdata->reset_crq = 1;
+	wake_up(&hostdata->work_wait_q);
 }
 
 /**
@@ -1519,30 +1514,14 @@ void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 			/* We need to re-setup the interpartition connection */
 			dev_info(hostdata->dev, "Re-enabling adapter!\n");
 			hostdata->client_migrated = 1;
+			hostdata->reenable_crq = 1;
 			purge_requests(hostdata, DID_REQUEUE);
-			if ((ibmvscsi_reenable_crq_queue(&hostdata->queue,
-							hostdata)) ||
-			    (ibmvscsi_send_crq(hostdata,
-					       0xC001000000000000LL, 0))) {
-					atomic_set(&hostdata->request_limit,
-						   -1);
-					dev_err(hostdata->dev, "error after enable\n");
-			}
+			wake_up(&hostdata->work_wait_q);
 		} else {
 			dev_err(hostdata->dev, "Virtual adapter failed rc %d!\n",
 				crq->format);
-
-			purge_requests(hostdata, DID_ERROR);
-			if ((ibmvscsi_reset_crq_queue(&hostdata->queue,
-							hostdata)) ||
-			    (ibmvscsi_send_crq(hostdata,
-					       0xC001000000000000LL, 0))) {
-					atomic_set(&hostdata->request_limit,
-						   -1);
-					dev_err(hostdata->dev, "error after reset\n");
-			}
+			ibmvscsi_reset_host(hostdata);
 		}
-		scsi_unblock_requests(hostdata->host);
 		return;
 	case 0x80:		/* real payload */
 		break;
@@ -1874,6 +1853,76 @@ static struct scsi_host_template driver_template = {
 	.shost_attrs = ibmvscsi_attrs,
 };
 
+static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata)
+{
+	int rc;
+	char *action = "reset";
+
+	if (hostdata->reset_crq) {
+		smp_rmb();
+		hostdata->reset_crq = 0;
+
+		rc = ibmvscsi_reset_crq_queue(&hostdata->queue, hostdata);
+		if (!rc)
+			rc = ibmvscsi_send_crq(hostdata, 0xC001000000000000LL, 0);
+		if (!rc)
+			rc = vio_enable_interrupts(to_vio_dev(hostdata->dev));
+	} else if (hostdata->reenable_crq) {
+		smp_rmb();
+		action = "enable";
+		rc = ibmvscsi_reenable_crq_queue(&hostdata->queue, hostdata);
+		hostdata->reenable_crq = 0;
+		if (!rc)
+			rc = ibmvscsi_send_crq(hostdata, 0xC001000000000000LL, 0);
+	} else
+		return;
+
+	if (rc) {
+		atomic_set(&hostdata->request_limit, -1);
+		dev_err(hostdata->dev, "error after %s\n", action);
+	}
+
+	scsi_unblock_requests(hostdata->host);
+}
+
+static int ibmvscsi_work_to_do(struct ibmvscsi_host_data *hostdata)
+{
+	if (kthread_should_stop())
+		return 1;
+	else if (hostdata->reset_crq) {
+		smp_rmb();
+		return 1;
+	} else if (hostdata->reenable_crq) {
+		smp_rmb();
+		return 1;
+	}
+
+	return 0;
+}
+
+static int ibmvscsi_work(void *data)
+{
+	struct ibmvscsi_host_data *hostdata = data;
+	int rc;
+
+	set_user_nice(current, -20);
+	current->flags |= PF_NOFREEZE;
+
+	while (1) {
+		rc = wait_event_interruptible(hostdata->work_wait_q,
+					      ibmvscsi_work_to_do(hostdata));
+
+		BUG_ON(rc);
+
+		if (kthread_should_stop())
+			break;
+
+		ibmvscsi_do_work(hostdata);
+	}
+
+	return 0;
+}
+
 /**
  * Called by bus code for each adapter
  */
@@ -1896,6 +1945,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	hostdata = shost_priv(host);
 	memset(hostdata, 0x00, sizeof(*hostdata));
 	INIT_LIST_HEAD(&hostdata->sent);
+	init_waitqueue_head(&hostdata->work_wait_q);
 	hostdata->host = host;
 	hostdata->dev = dev;
 	atomic_set(&hostdata->request_limit, -1);
@@ -1906,10 +1956,19 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 		goto persist_bufs_failed;
 	}
 
+	hostdata->work_thread = kthread_run(ibmvscsi_work, hostdata, "%s_%d",
+					    "ibmvscsi", host->host_no);
+
+	if (IS_ERR(hostdata->work_thread)) {
+		dev_err(&vdev->dev, "couldn't initialize kthread. rc=%ld\n",
+			PTR_ERR(hostdata->work_thread));
+		goto init_crq_failed;
+	}
+
 	rc = ibmvscsi_init_crq_queue(&hostdata->queue, hostdata, max_events);
 	if (rc != 0 && rc != H_RESOURCE) {
 		dev_err(&vdev->dev, "couldn't initialize crq. rc=%d\n", rc);
-		goto init_crq_failed;
+		goto kill_kthread;
 	}
 	if (initialize_event_pool(&hostdata->pool, max_events, hostdata) != 0) {
 		dev_err(&vdev->dev, "couldn't initialize event pool\n");
@@ -1955,6 +2014,8 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	release_event_pool(&hostdata->pool, hostdata);
       init_pool_failed:
 	ibmvscsi_release_crq_queue(&hostdata->queue, hostdata, max_events);
+      kill_kthread:
+	kthread_stop(hostdata->work_thread);
       init_crq_failed:
 	unmap_persist_bufs(hostdata);
       persist_bufs_failed:
@@ -1971,6 +2032,7 @@ static int ibmvscsi_remove(struct vio_dev *vdev)
 	ibmvscsi_release_crq_queue(&hostdata->queue, hostdata,
 				   max_events);
 	
+	kthread_stop(hostdata->work_thread);
 	scsi_remove_host(hostdata->host);
 	scsi_host_put(hostdata->host);
 
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h
index 3024812..8c1d2d3 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.h
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.h
@@ -89,12 +89,16 @@ struct event_pool {
 struct ibmvscsi_host_data {
 	atomic_t request_limit;
 	int client_migrated;
+	int reset_crq;
+	int reenable_crq;
 	struct device *dev;
 	struct event_pool pool;
 	struct crq_queue queue;
 	struct tasklet_struct srp_task;
 	struct list_head sent;
 	struct Scsi_Host *host;
+	struct task_struct *work_thread;
+	wait_queue_head_t work_wait_q;
 	struct mad_adapter_info_data madapter_info;
 	struct capabilities caps;
 	dma_addr_t caps_addr;
diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c
index ae86254..8a39c98 100644
--- a/drivers/scsi/ibmvscsi/rpa_vscsi.c
+++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c
@@ -31,6 +31,7 @@
 #include <asm/prom.h>
 #include <asm/iommu.h>
 #include <asm/hvcall.h>
+#include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include "ibmvscsi.h"
@@ -73,11 +74,13 @@ void ibmvscsi_release_crq_queue(struct crq_queue *queue,
 				struct ibmvscsi_host_data *hostdata,
 				int max_requests)
 {
-	long rc;
+	long rc = 0;
 	struct vio_dev *vdev = to_vio_dev(hostdata->dev);
 	free_irq(vdev->irq, (void *)hostdata);
 	tasklet_kill(&hostdata->srp_task);
 	do {
+		if (rc)
+			msleep(100);
 		rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
 	} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
 	dma_unmap_single(hostdata->dev,
@@ -267,7 +270,10 @@ int ibmvscsi_init_crq_queue(struct crq_queue *queue,
 	return retrc;
 
       req_irq_failed:
+	rc = 0;
 	do {
+		if (rc)
+			msleep(100);
 		rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
 	} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
       reg_crq_failed:
@@ -289,11 +295,13 @@ int ibmvscsi_init_crq_queue(struct crq_queue *queue,
 int ibmvscsi_reenable_crq_queue(struct crq_queue *queue,
 				 struct ibmvscsi_host_data *hostdata)
 {
-	int rc;
+	int rc = 0;
 	struct vio_dev *vdev = to_vio_dev(hostdata->dev);
 
 	/* Re-enable the CRQ */
 	do {
+		if (rc)
+			msleep(100);
 		rc = plpar_hcall_norets(H_ENABLE_CRQ, vdev->unit_address);
 	} while ((rc == H_IN_PROGRESS) || (rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
 
@@ -311,11 +319,13 @@ int ibmvscsi_reenable_crq_queue(struct crq_queue *queue,
 int ibmvscsi_reset_crq_queue(struct crq_queue *queue,
 			      struct ibmvscsi_host_data *hostdata)
 {
-	int rc;
+	int rc = 0;
 	struct vio_dev *vdev = to_vio_dev(hostdata->dev);
 
 	/* Close the CRQ */
 	do {
+		if (rc)
+			msleep(100);
 		rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
 	} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));