From: Mikulas Patocka <mpatocka@redhat.com> Date: Tue, 19 Aug 2008 14:09:54 -0400 Subject: [md] LVM raid-1 performance fixes Message-id: Pine.LNX.4.64.0808191354510.7851@hs20-bc2-1.build.redhat.com O-Subject: [PATCH 1/2 RHEL 5.3] bz438153 LVM raid-1 performance fixes Bugzilla: 438153 RH-Acked-by: Jonathan Brassow <jbrassow@redhat.com> RH-Acked-by: Alasdair G Kergon <agk@redhat.com> RH-Acked-by: Alasdair G Kergon <agk@redhat.com> Hi. Schedule() could cause looping and CPU consumption. schedule_timeout() (that used to be there before) could, on the other hand, cause serious performance degradation (it really happened with one custorem). This patch solves the problem properly, by using timers. (note, this is the first patch for the bug 438153, there is also second one, that I will send) The patch is already upstream and in RHEL-4.7. Testing: I compiled it on my workstation and run test from the bugzilla. Upstream commit: a2aebe03be60ae4da03507a00d60211d5e0327c3 (2.6.26-rc1) Fix bugzilla 438153. Using schedule() or schedule_timeout() (which used to be there) would block the process and degrade performance. I reworked the code to use timer: after adding something to queues, the code must call either wake (process the queue immediatelly) or delayed_wake (process the queue after at most 1/5s). Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 94b0032..92625f7 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -145,6 +145,9 @@ struct mirror_set { struct workqueue_struct *kmirrord_wq; struct work_struct kmirrord_work; + struct timer_list timer; + unsigned long timer_pending; + struct mirror mirror[0]; }; @@ -161,11 +164,30 @@ static inline sector_t region_to_sector(struct region_hash *rh, region_t region) return region << rh->region_shift; } -static inline void wake(struct mirror_set *ms) +static void wake(struct mirror_set *ms) { queue_work(ms->kmirrord_wq, &ms->kmirrord_work); } +static void delayed_wake_fn(unsigned long data) +{ + struct mirror_set *ms = (struct mirror_set *) data; + + clear_bit(0, &ms->timer_pending); + wake(ms); +} + +static void delayed_wake(struct mirror_set *ms) +{ + if (test_and_set_bit(0, &ms->timer_pending)) + return; + + ms->timer.expires = jiffies + HZ / 5; + ms->timer.data = (unsigned long) ms; + ms->timer.function = delayed_wake_fn; + add_timer(&ms->timer); +} + /* FIXME move this */ static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); @@ -1184,9 +1206,12 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) * Add bios that are delayed due to remote recovery * back on to the write queue */ - spin_lock_irq(&ms->lock); - bio_list_merge(&ms->writes, &requeue); - spin_unlock_irq(&ms->lock); + if (requeue.head) { + spin_lock_irq(&ms->lock); + bio_list_merge(&ms->writes, &requeue); + spin_unlock_irq(&ms->lock); + delayed_wake(ms); + } /* * Increment the pending counts for any regions that will @@ -1201,14 +1226,14 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) /* * Dispatch io. */ - if (ms->log_failure) { + if (unlikely(ms->log_failure)) { spin_lock_irq(&ms->lock); bio_list_merge(&ms->failures, &sync); spin_unlock_irq(&ms->lock); - } else { + wake(ms); + } else while ((bio = bio_list_pop(&sync))) do_write(ms, bio); - } while ((bio = bio_list_pop(&recover))) rh_delay(&ms->rh, bio); @@ -1255,7 +1280,7 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) spin_lock_irq(&ms->lock); bio_list_merge(&ms->failures, failures); spin_unlock_irq(&ms->lock); - wake(ms); + delayed_wake(ms); } return; } @@ -1270,8 +1295,9 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) /*----------------------------------------------------------------- * kmirrord *---------------------------------------------------------------*/ -static int do_mirror(struct mirror_set *ms) +static void do_mirror(void *data) { + struct mirror_set *ms = data; struct bio_list reads, writes, failures; spin_lock_irq(&ms->lock); @@ -1288,14 +1314,6 @@ static int do_mirror(struct mirror_set *ms) do_reads(ms, &reads); do_writes(ms, &writes); do_failures(ms, &failures); - - return (ms->writes.head || ms->failures.head) ? 1 : 0; -} - -static void do_work(void *data) -{ - while (do_mirror(data)) - schedule(); } /*----------------------------------------------------------------- @@ -1499,7 +1517,9 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) free_context(ms, ti, ms->nr_mirrors); return -ENOMEM; } - INIT_WORK(&ms->kmirrord_work, do_work, ms); + INIT_WORK(&ms->kmirrord_work, do_mirror, ms); + init_timer(&ms->timer); + ms->timer_pending = 0; r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); if (r) { @@ -1516,6 +1536,7 @@ static void mirror_dtr(struct dm_target *ti) { struct mirror_set *ms = (struct mirror_set *) ti->private; + del_timer_sync(&ms->timer); flush_workqueue(ms->kmirrord_wq); kcopyd_client_destroy(ms->kcopyd_client); destroy_workqueue(ms->kmirrord_wq);