Sophie: kernel-2.6.18-238.19.1.el5.centos.plus src

kernel-2.6.18-238.19.1.el5.centos.plus.src.rpm

From: Doug Ledford <dledford@redhat.com>
Date: Fri, 24 Apr 2009 16:16:39 -0400
Subject: [md] fix lockup on read error
Message-id: C3D8F332-FEEC-4F72-ADE8-6FCF51FE2AF1@redhat.com
O-Subject: [Patch RHEL5.4] md: fix lockup on read error
Bugzilla: 465781
RH-Acked-by: David Milburn <dmilburn@redhat.com>
RH-Acked-by: John Feeney <jfeeney@redhat.com>

This patch resolves bugzilla 465781 and has been partner verified.
I've reviewed and brew built the patch, but testing will have to wait
until my md raid box is up and running again (CPU failure with an old
enough machine that I couldn't get a CPU for that motherboard any
more).  I expect personal test results back sometime next week.

commit f8221c8c23fdcc8fdbf4be9c23397e309e4d89ae
Author: Doug Ledford <dledford@redhat.com>
Date:   Fri Apr 24 10:40:49 2009 -0400

    [md raid1] Fix deadlock on read error

    Original patch from Bryn Reeves.  Tested by customer and fix confirmed.
    Reviewed by me and passes review.

    Bugzilla 465781

    Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index dc45b3f..ac323a1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -605,6 +605,38 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
 	return ret;
 }
 
+
+static int flush_pending_writes(conf_t *conf)
+{
+	/* Any writes that have been queued but are awaiting
+	 * bitmap updates get flushed here.
+	 * We return 1 if any requests were actually submitted.
+	 */
+	int rv = 0;
+
+	spin_lock_irq(&conf->device_lock);
+
+	if (conf->pending_bio_list.head) {
+		struct bio *bio;
+		bio = bio_list_get(&conf->pending_bio_list);
+		blk_remove_plug(conf->mddev->queue);
+		spin_unlock_irq(&conf->device_lock);
+		/* flush any pending bitmap writes to
+		 * disk before proceeding w/ I/O */
+		bitmap_unplug(conf->mddev->bitmap);
+
+		while (bio) { /* submit pending writes */
+			struct bio *next = bio->bi_next;
+			bio->bi_next = NULL;
+			generic_make_request(bio);
+			bio = next;
+		}
+		rv = 1;
+	} else
+		spin_unlock_irq(&conf->device_lock);
+	return rv;
+}
+
 /* Barriers....
  * Sometimes we need to suspend IO while we do something else,
  * either some resync/recovery, or reconfigure the array.
@@ -694,7 +726,8 @@ static void freeze_array(conf_t *conf)
 	wait_event_lock_irq(conf->wait_barrier,
 			    conf->barrier+conf->nr_pending == conf->nr_queued+2,
 			    conf->resync_lock,
-			    raid1_unplug(conf->mddev->queue));
+			    ({ flush_pending_writes(conf);
+			       raid1_unplug(conf->mddev->queue); }));
 	spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(conf_t *conf)
@@ -919,6 +952,9 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	blk_plug_device(mddev->queue);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 
+	/* In case raid1d snuck into freeze_array */
+	wake_up(&conf->wait_barrier);
+
 #if 0
 	while ((bio = bio_list_pop(&bl)) != NULL)
 		generic_make_request(bio);
@@ -1402,29 +1438,14 @@ static void raid1d(mddev_t *mddev)
 	
 	for (;;) {
 		char b[BDEVNAME_SIZE];
-		spin_lock_irqsave(&conf->device_lock, flags);
 
-		if (conf->pending_bio_list.head) {
-			bio = bio_list_get(&conf->pending_bio_list);
-			blk_remove_plug(mddev->queue);
-			spin_unlock_irqrestore(&conf->device_lock, flags);
-			/* flush any pending bitmap writes to disk before proceeding w/ I/O */
-			if (bitmap_unplug(mddev->bitmap) != 0)
-				printk("%s: bitmap file write failed!\n", mdname(mddev));
+		unplug += flush_pending_writes(conf);
 
-			while (bio) { /* submit pending writes */
-				struct bio *next = bio->bi_next;
-				bio->bi_next = NULL;
-				generic_make_request(bio);
-				bio = next;
-			}
-			unplug = 1;
-
-			continue;
-		}
-
-		if (list_empty(head))
+		spin_lock_irqsave(&conf->device_lock, flags);
+		if (list_empty(head)) {
+			spin_unlock_irqrestore(&conf->device_lock, flags);
 			break;
+		}
 		r1_bio = list_entry(head->prev, r1bio_t, retry_list);
 		list_del(head->prev);
 		conf->nr_queued--;
@@ -1590,7 +1611,6 @@ static void raid1d(mddev_t *mddev)
 			}
 		}
 	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
 	if (unplug)
 		unplug_slaves(mddev);
 }