From: Hans-Joachim Picht <hpicht@redhat.com> Date: Fri, 25 Apr 2008 15:08:26 +0200 Subject: [s390] cio: avoid machine check vs. not operational race Message-id: 20080425130826.GE22728@redhat.com O-Subject: [RHEL5 U3 PATCH 5/5] s390 - cio: Avoid machine check vs. not operational races. Bugzilla: 444082 RH-Acked-by: Pete Zaitcev <zaitcev@redhat.com> Description ============ There was the possibilty that an action like ccw_device_set_offline() triggered by a device gone machine check might trigger a not oper event. Unfortunately, this could lead to the situation that we tried to unregister a subchannel twice: Once from the slow path evaluation, and once via the not oper event. Fix this by always using the same mechanism (css_enqueue_subchannel_slow()) for triggering the unregister. This makes sure that unregistration will only be done once. As an added bonus, it also simplyfies the code. Bugzilla ========= BZ 444082 https://bugzilla.redhat.com/show_bug.cgi?id=444082 Upstream status of the patch: ============================= Patch is contained in linux-2.6 as git commit 3f4cf6e72f9f6a0b046b32881acc4f829f3aaa46 Test status: ============ The patch has been tested and fixes the problem. The fix has been verified by the IBM test department. Please ACK. With best regards, Hans diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 287aa30..ce01b63 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -726,8 +726,7 @@ out: wake_up(&ccw_device_init_wq); } -void -ccw_device_call_sch_unregister(void *data) +static void ccw_device_call_sch_unregister(void *data) { struct ccw_device *cdev = data; struct subchannel *sch; diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index 4f936b2..bd6c8f3 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -81,7 +81,6 @@ int ccw_device_cancel_halt_clear(struct ccw_device *); int ccw_device_register(struct ccw_device *); void ccw_device_do_unreg_rereg(void *); -void ccw_device_call_sch_unregister(void *); void ccw_device_schedule_verify(struct ccw_device *cdev, unsigned long delay); int ccw_device_recognition(struct ccw_device *); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 2cfe4e3..c2c326b 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -589,46 +589,6 @@ ccw_device_recog_timeout(struct ccw_device *cdev, enum dev_event dev_event) } -static void -ccw_device_nopath_notify(void *data) -{ - unsigned long flags; - struct ccw_device *cdev; - struct subchannel *sch; - int ret; - - cdev = (struct ccw_device *)data; - sch = to_subchannel(cdev->dev.parent); - /* Extra sanity. */ - if (sch->lpm) - return; - ret = (sch->driver && sch->driver->notify) ? - sch->driver->notify(&sch->dev, CIO_NO_PATH) : 0; - if (!ret) { - if (get_device(&sch->dev)) { - /* Driver doesn't want to keep device. */ - cio_disable_subchannel(sch); - if (get_device(&cdev->dev)) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister, - (void *)cdev); - queue_work(ccw_device_work, - &cdev->private->kick_work); - } else - put_device(&sch->dev); - } - } else { - spin_lock_irqsave(cdev->ccwlock, flags); - cio_disable_subchannel(sch); - ccw_device_set_timeout(cdev, 0); - cdev->private->flags.fake_irb = 0; - cdev->private->state = DEV_STATE_DISCONNECTED; - ccw_device_schedule_verify(cdev, 3*HZ); - spin_unlock_irqrestore(cdev->ccwlock, flags); - wake_up(&cdev->private->wait_q); - } -} - void ccw_device_verify_done(struct ccw_device *cdev, int err) { @@ -672,9 +632,7 @@ ccw_device_verify_done(struct ccw_device *cdev, int err) default: /* Reset oper notify indication after verify error. */ cdev->private->flags.donotify = 0; - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); - queue_work(ccw_device_notify_work, &cdev->private->kick_work); + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); ccw_device_done(cdev, DEV_STATE_NOT_OPER); break; } @@ -793,53 +751,19 @@ ccw_device_recog_notoper(struct ccw_device *cdev, enum dev_event dev_event) } /* - * Handle not operational event while offline. + * Handle not operational event in non-special state. */ -static void -ccw_device_offline_notoper(struct ccw_device *cdev, enum dev_event dev_event) +static void ccw_device_generic_notoper(struct ccw_device *cdev, + enum dev_event dev_event) { struct subchannel *sch; cdev->private->state = DEV_STATE_NOT_OPER; sch = to_subchannel(cdev->dev.parent); - if (get_device(&cdev->dev)) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister, (void *)cdev); - queue_work(ccw_device_work, &cdev->private->kick_work); + if (css_enqueue_subchannel_slow(sch->schid)) { + css_clear_subchannel_slow_list(); + need_rescan = 1; } - wake_up(&cdev->private->wait_q); -} - -/* - * Handle not operational event while online. - */ -static void -ccw_device_online_notoper(struct ccw_device *cdev, enum dev_event dev_event) -{ - struct subchannel *sch; - - sch = to_subchannel(cdev->dev.parent); - if (sch->driver->notify && - sch->driver->notify(&sch->dev, sch->lpm ? CIO_GONE : CIO_NO_PATH)) { - ccw_device_set_timeout(cdev, 0); - cdev->private->flags.fake_irb = 0; - cdev->private->state = DEV_STATE_DISCONNECTED; - ccw_device_schedule_verify(cdev, 3*HZ); - wake_up(&cdev->private->wait_q); - return; - } - cdev->private->state = DEV_STATE_NOT_OPER; - cio_disable_subchannel(sch); - if (sch->schib.scsw.actl != 0) { - // FIXME: not-oper indication to device driver ? - ccw_device_call_handler(cdev); - } - if (get_device(&cdev->dev)) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister, (void *)cdev); - queue_work(ccw_device_work, &cdev->private->kick_work); - } - wake_up(&cdev->private->wait_q); } /* @@ -935,18 +859,9 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event) cdev->private->state = DEV_STATE_TIMEOUT_KILL; return; } - if (ret == -ENODEV) { - struct subchannel *sch; - - sch = to_subchannel(cdev->dev.parent); - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); - queue_work(ccw_device_notify_work, - &cdev->private->kick_work); - } else - dev_fsm_event(cdev, DEV_EVENT_NOTOPER); - } else if (cdev->handler) + if (ret == -ENODEV) + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + else if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, ERR_PTR(-ETIMEDOUT)); } @@ -1033,11 +948,9 @@ ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event) if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, ERR_PTR(-ETIMEDOUT)); - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); - queue_work(ccw_device_notify_work, &cdev->private->kick_work); - } else if (cdev->private->flags.doverify) + if (!sch->lpm) + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + else if (cdev->private->flags.doverify) /* Start delayed path verification. */ ccw_device_online_verify(cdev, 0); } @@ -1053,16 +966,7 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event) return; } if (ret == -ENODEV) { - struct subchannel *sch; - - sch = to_subchannel(cdev->dev.parent); - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); - queue_work(ccw_device_notify_work, - &cdev->private->kick_work); - } else - dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); return; } //FIXME: Can we get here? @@ -1084,22 +988,14 @@ void device_kill_io(struct subchannel *sch) return; } if (ret == -ENODEV) { - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, cdev); - queue_work(ccw_device_notify_work, - &cdev->private->kick_work); - } else - dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); return; } if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, ERR_PTR(-EIO)); - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, cdev); - queue_work(ccw_device_notify_work, &cdev->private->kick_work); - } else + if (!sch->lpm) + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + else /* Start delayed path verification. */ ccw_device_online_verify(cdev, 0); } @@ -1130,11 +1026,9 @@ ccw_device_wait4io_irq(struct ccw_device *cdev, enum dev_event dev_event) ccw_device_set_timeout(cdev, 0); /* Call the handler. */ ccw_device_call_handler(cdev); - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); - queue_work(ccw_device_notify_work, &cdev->private->kick_work); - } else + if (!sch->lpm) + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + else ccw_device_online_verify(cdev, 0); } @@ -1153,23 +1047,15 @@ ccw_device_wait4io_timeout(struct ccw_device *cdev, enum dev_event dev_event) return; } if (ret == -ENODEV) { - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); - queue_work(ccw_device_notify_work, - &cdev->private->kick_work); - } else - dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); return; } if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, ERR_PTR(-ETIMEDOUT)); - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify, (void *)cdev); - queue_work(ccw_device_notify_work, &cdev->private->kick_work); - } else if (cdev->private->flags.doverify) + if (!sch->lpm) + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + else if (cdev->private->flags.doverify) /* Start delayed path verification. */ ccw_device_online_verify(cdev, 0); } @@ -1352,7 +1238,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = { [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_SENSE_PGID] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_sense_pgid_irq, [DEV_EVENT_TIMEOUT] = ccw_device_onoff_timeout, [DEV_EVENT_VERIFY] = ccw_device_nop, @@ -1364,56 +1250,56 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = { [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_OFFLINE] = { - [DEV_EVENT_NOTOPER] = ccw_device_offline_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_offline_irq, [DEV_EVENT_TIMEOUT] = ccw_device_nop, [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_VERIFY] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_verify_irq, [DEV_EVENT_TIMEOUT] = ccw_device_onoff_timeout, [DEV_EVENT_VERIFY] = ccw_device_delay_verify, }, [DEV_STATE_ONLINE] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_irq, [DEV_EVENT_TIMEOUT] = ccw_device_online_timeout, [DEV_EVENT_VERIFY] = ccw_device_online_verify, }, [DEV_STATE_W4SENSE] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_w4sense, [DEV_EVENT_TIMEOUT] = ccw_device_nop, [DEV_EVENT_VERIFY] = ccw_device_online_verify, }, [DEV_STATE_DISBAND_PGID] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_disband_irq, [DEV_EVENT_TIMEOUT] = ccw_device_onoff_timeout, [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_BOXED] = { - [DEV_EVENT_NOTOPER] = ccw_device_offline_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_stlck_done, [DEV_EVENT_TIMEOUT] = ccw_device_stlck_done, [DEV_EVENT_VERIFY] = ccw_device_nop, }, /* states to wait for i/o completion before doing something */ [DEV_STATE_CLEAR_VERIFY] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_clear_verify, [DEV_EVENT_TIMEOUT] = ccw_device_nop, [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_TIMEOUT_KILL] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_killing_irq, [DEV_EVENT_TIMEOUT] = ccw_device_killing_timeout, [DEV_EVENT_VERIFY] = ccw_device_nop, //FIXME }, [DEV_STATE_WAIT4IO] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_wait4io_irq, [DEV_EVENT_TIMEOUT] = ccw_device_wait4io_timeout, [DEV_EVENT_VERIFY] = ccw_device_delay_verify,