From: Jeff Moyer <jmoyer@redhat.com> Date: Fri, 30 Oct 2009 09:46:02 -0400 Subject: [fs] dio: don't zero out pages array inside struct dio Message-id: x49pr85ro4l.fsf@segfault.boston.devel.redhat.com O-Subject: [rhel5 patch] dio: don't zero out the pages array inside struct dio Bugzilla: 488161 RH-Acked-by: Peter Staubach <staubach@redhat.com> Hi, In bug 488161, Intel reported a 0.5% OLTP performance regression introduced by the patch that zeroes out the struct dio. This patch was originally introduced to fix a bug caused by an uninitialized map_bh passed into get_block. The bulk of the regression is due to zeroing out the pages array, which is 64 pointers long. By simply moving that array to the end of the structure and zeroing out everything but that, we get back 0.6% (verified by Intel). Zach Brown Acked the patch, and I've posted it upstream. I don't forsee any problems in getting it into mainline. Comments, as always, are encouraged. Cheers, Jeff diff --git a/fs/direct-io.c b/fs/direct-io.c index d2e8f32..833e27a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -104,6 +104,18 @@ struct dio { unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ sector_t cur_page_block; /* Where it starts */ + /* BIO completion state */ + spinlock_t bio_lock; /* protects BIO fields below */ + unsigned long refcount; /* direct_io_worker() and bios */ + struct bio *bio_list; /* singly linked via bi_private */ + struct task_struct *waiter; /* waiting task (NULL if none) */ + + /* AIO related stuff */ + struct kiocb *iocb; /* kiocb */ + int is_async; /* is IO async ? */ + int io_error; /* IO error in completion path */ + ssize_t result; /* IO result */ + /* * Page fetching state. These variables belong to dio_refill_pages(). */ @@ -115,22 +127,10 @@ struct dio { * Page queue. These variables belong to dio_refill_pages() and * dio_get_page(). */ - struct page *pages[DIO_PAGES]; /* page buffer */ unsigned head; /* next page to process */ unsigned tail; /* last valid page + 1 */ int page_errors; /* errno from get_user_pages() */ - - /* BIO completion state */ - spinlock_t bio_lock; /* protects BIO fields below */ - unsigned long refcount; /* direct_io_worker() and bios */ - struct bio *bio_list; /* singly linked via bi_private */ - struct task_struct *waiter; /* waiting task (NULL if none) */ - - /* AIO related stuff */ - struct kiocb *iocb; /* kiocb */ - int is_async; /* is IO async ? */ - int io_error; /* IO error in completion path */ - ssize_t result; /* IO result */ + struct page *pages[DIO_PAGES]; /* page buffer */ }; /* @@ -1164,10 +1164,16 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } } - dio = kzalloc(sizeof(*dio), GFP_KERNEL); + dio = kmalloc(sizeof(*dio), GFP_KERNEL); retval = -ENOMEM; if (!dio) goto out; + /* + * Believe it or not, zeroing out the page array caused a .5% + * performance regression in a database benchmark. So, we take + * care to only zero out what's needed. + */ + memset(dio, 0, offsetof(struct dio, pages)); /* * For block device access DIO_NO_LOCKING is used,