Sophie: kernel-2.6.18-238.19.1.el5.centos.plus src

kernel-2.6.18-238.19.1.el5.centos.plus.src.rpm

From: Laszlo Ersek <lersek@redhat.com>
Date: Wed, 24 Nov 2010 11:27:03 -0500
Subject: [virt] xen: add bounds req-process loop in blkback/blktap
Message-id: <1290598023-3901-1-git-send-email-lersek@redhat.com>
Patchwork-id: 4518
O-Subject: [kernel team] [RHEL5.6 PATCH] request-processing loop is unbounded
	in blkback/blktap (BZ#654546)
Bugzilla: 654546
CVE: CVE-2010-4247
RH-Acked-by: Andrew Jones <drjones@redhat.com>
RH-Acked-by: Rik van Riel <riel@redhat.com>

Problem:

  Quoting the BZ report:

    If the frontend pass a bad index of production request, the backend
    will enter an endless loop and then cause a excessive CPU
    consumption.

    [...]

    Version-Release number of selected component (if applicable):
    2.6.18-194.el5xen

    [...]

    Steps to Reproduce:
    1. build a guest kernel with the patch attached.
    2. run domU with the patched kernel

    Actual results:
    Dom0 got hung.

  I verified the bug's presence under the newest RHEL5 kernel build:

    host kernel:
      2.6.18-233.el5xen

    original guest kernel, which was used as the starting point to build
    a patched malicious kernel:
      2.6.18-233.el5xen

    Rebooted the guest with the malicious kernel. One core of the host
    is spun at 100% by "blktap.2.xvda", and messages like

      blk_tap: unknown operation [%d]
      blk_tap: Bad number of segments in request (%d)

    are logged in a busy loop. The guest crashes / disappears. Previous
    ssh sessions to the host persisted originally, but as soon as I
    tried to open /var/log/messages, they froze. The host could not be
    rebooted from the console, I was forced to use the power button.

Fix:

  Backport upstream changesets 391:77f831cbb91d and 392:7070d34f251c:

    http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/77f831cbb91d
    http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/7070d34f251c

  Summary:

    blktap & blkback:
    - add yield point to unbounded request processing loop
    - sleep a bit when request type is invalid (frontend probably crazy)
    - check for kthread_should_stop() in request processing loop

  Testing:

    # uname -r    # on the host
    2.6.18-233.el5.unbounded_blkback_bz_654546xen

    Starting the offending guest resulted in (1) the following messages:

        47 blk_tap: Bad number of segments in request
        28 blk_tap: unknown operation

    in total, appearing slowly (almost traceable by the naked eye), and
    (2) the malicious guest being shut off again.

Brew:

  https://brewweb.devel.redhat.com/taskinfo?taskID=2910162

Please review. Thanks.

Signed-off-by: Laszlo Ersek <lersek@redhat.com>

diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c
index 12378bd..c4144ca 100644
--- a/drivers/xen/blkback/blkback.c
+++ b/drivers/xen/blkback/blkback.c
@@ -38,6 +38,7 @@
 #include <linux/spinlock.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
+#include <linux/delay.h>
 #include <xen/balloon.h>
 #include <asm/hypervisor.h>
 #include <asm/hypercall.h>
@@ -303,7 +304,7 @@ static int do_block_io_op(blkif_t *blkif)
 	rp = blk_rings->common.sring->req_prod;
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
 
-	while ((rc != rp)) {
+	while (rc != rp) {
 
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
 			break;
@@ -315,6 +316,11 @@ static int do_block_io_op(blkif_t *blkif)
 			break;
 		}
 
+		if (kthread_should_stop()) {
+			more_to_do = 1;
+			break;
+		}
+
 		switch (blkif->blk_protocol) {
 		case BLKIF_PROTOCOL_NATIVE:
 			memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
@@ -340,6 +346,9 @@ static int do_block_io_op(blkif_t *blkif)
 			dispatch_rw_block_io(blkif, &req, pending_req);
 			break;
 		default:
+			/* A good sign something is wrong: sleep for a while to
+			 * avoid excessive CPU consumption by a bad guest. */
+			msleep(1);
 			DPRINTK("error: unknown block io operation [%d]\n",
 				req.operation);
 			make_response(blkif, req.id, req.operation,
@@ -347,7 +356,11 @@ static int do_block_io_op(blkif_t *blkif)
 			free_req(pending_req);
 			break;
 		}
+
+		/* Yield point for this unbounded loop. */
+		cond_resched();
 	}
+
 	return more_to_do;
 }
 
@@ -484,6 +497,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
  fail_response:
 	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 	free_req(pending_req);
+	msleep(1); /* back off a bit */
 } 
 
 
diff --git a/drivers/xen/blktap/blktapmain.c b/drivers/xen/blktap/blktapmain.c
index c4682e8..3d66ffd 100644
--- a/drivers/xen/blktap/blktapmain.c
+++ b/drivers/xen/blktap/blktapmain.c
@@ -48,6 +48,7 @@
 #include <linux/major.h>
 #include <linux/gfp.h>
 #include <linux/poll.h>
+#include <linux/delay.h>
 #include <asm/tlbflush.h>
 
 #define MAX_TAP_DEV 100     /*the maximum number of tapdisk ring devices    */
@@ -1135,6 +1136,11 @@ static int do_block_io_op(blkif_t *blkif)
 			break;
 		}
 
+		if (kthread_should_stop()) {
+			more_to_do = 1;
+			break;
+		}
+
 		switch (blkif->blk_protocol) {
 		case BLKIF_PROTOCOL_NATIVE:
 			memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc),
@@ -1163,6 +1169,9 @@ static int do_block_io_op(blkif_t *blkif)
 			break;
 
 		default:
+			/* A good sign something is wrong: sleep for a while to
+			 * avoid excessive CPU consumption by a bad guest. */
+			msleep(1);
 			WPRINTK("unknown operation [%d]\n",
 				req.operation);
 			make_response(blkif, req.id, req.operation,
@@ -1170,6 +1179,9 @@ static int do_block_io_op(blkif_t *blkif)
 			free_req(pending_req);
 			break;
 		}
+
+		/* Yield point for this unbounded loop. */
+		cond_resched();
 	}
 		
 	blktap_kick_user(blkif->dev_num);
@@ -1399,7 +1411,8 @@ static void dispatch_rw_block_io(blkif_t *blkif,
  fail_response:
 	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 	free_req(pending_req);
-} 
+	msleep(1); /* back off a bit */
+}