From: Jerome Marchand <jmarchan@redhat.com> Date: Mon, 4 May 2009 15:41:08 +0200 Subject: [block] disable iostat collection in gendisk Message-id: 49FEF074.1070104@redhat.com O-Subject: [Patch RHEL5.4 2/2 v2] BZ484158: Disable iostat collection in gendisk Bugzilla: 484158 RH-Acked-by: Anton Arapov <aarapov@redhat.com> RH-Acked-by: Larry Woodman <lwoodman@redhat.com> RH-Acked-by: Jeff Moyer <jmoyer@redhat.com> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=484158 Description: The accounting of partition I/O stats have caused performance regression with some workload, so add a feature which allow to turn I/O accounting off through a sysfs file. This new bersion of that patch includes commit 42dad7647aec49b3ad20dd0cb832b232a6ae514f which simplifies the code. Upstream status: The feature is upstream. commit bc58ba9468d94d62c56ab9b47173583ec140b165 (main patch) commit fb8ec18c316d869271137c97320dbfd2def56569 (fix) commit 26308eab69aa193f7b3fb50764a64ae14544a39b (fix) commit 42dad7647aec49b3ad20dd0cb832b232a6ae514f (simplification) Brew: https://brewweb.devel.redhat.com/taskinfo?taskID=1782554 Test status: Built on all arch and successfully tested by me on i686. Regards, Jerome diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 5d1da32..6abdc04 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1285,6 +1285,7 @@ static const char * const rq_flags[] = { "REQ_FAILFAST_DEV", "REQ_FAILFAST_TRANSPORT", "REQ_FAILFAST_DRIVER", + "REQ_IO_STAT", }; void blk_dump_rq_flags(struct request *rq, char *msg) @@ -2031,7 +2032,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->merge_requests_fn = ll_merge_requests_fn; q->prep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; - q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); + q->queue_flags = QUEUE_FLAG_DEFAULT; q->queue_lock = lock; blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); @@ -2077,7 +2078,7 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq) } static inline struct request * -blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, +blk_alloc_request(request_queue_t *q, int flags, struct bio *bio, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -2089,7 +2090,7 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, * first three bits are identical in rq->flags and bio->bi_rw, * see bio.h and blkdev.h */ - rq->flags = rw; + rq->flags = flags; if (priv) { if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { @@ -2229,6 +2230,8 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, if (priv) rl->elvpriv++; + if (blk_queue_io_stat(q)) + rw |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); @@ -2685,7 +2688,7 @@ static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) { int rw = rq_data_dir(rq); - if (!blk_fs_request(rq) || !rq->rq_disk) + if (!blk_fs_request(rq) || !blk_do_io_stat(rq)) return; if (!new_io) { @@ -2863,6 +2866,20 @@ long blk_congestion_wait(int rw, long timeout) EXPORT_SYMBOL(blk_congestion_wait); +static void blk_account_io_merge(struct request *req) +{ + if (blk_do_io_stat(req)) { + struct hd_struct *part + = get_part(req->rq_disk, req->sector); + disk_round_stats(req->rq_disk); + req->rq_disk->in_flight--; + if (part) { + part_round_stats(part); + get_partstats(part)->in_flight--; + } + } +} + /* * Has to be called with the request spinlock acquired */ @@ -2908,16 +2925,10 @@ static int attempt_merge(request_queue_t *q, struct request *req, elv_merge_requests(q, req, next); - if (req->rq_disk) { - struct hd_struct *part - = get_part(req->rq_disk, req->sector); - disk_round_stats(req->rq_disk); - req->rq_disk->in_flight--; - if (part) { - part_round_stats(part); - get_partstats(part)->in_flight--; - } - } + /* + * 'next' is going away, so update stats accordingly + */ + blk_account_io_merge(next); req->ioprio = ioprio_best(req->ioprio, next->ioprio); @@ -3363,6 +3374,43 @@ static void blk_recalc_rq_sectors(struct request *rq, int nsect) } } +static void blk_account_io_completion(struct request *req, unsigned int bytes) +{ + if (blk_fs_request(req) && blk_do_io_stat(req)) { + const int rw = rq_data_dir(req); + all_stat_add(req->rq_disk, sectors[rw], + bytes >> 9, req->sector); + + } +} + +static void blk_account_io_done(struct request *req) +{ + if (!blk_do_io_stat(req)) + return; + + /* + * Account IO completion. bar_rq isn't accounted as a normal + * IO on queueing nor completion. Accounting the containing + * request is enough. + */ + if (blk_fs_request(req) && req != &req->q->bar_rq) { + unsigned long duration = jiffies - req->start_time; + const int rw = rq_data_dir(req); + struct gendisk *disk = req->rq_disk; + struct hd_struct *part = get_part(disk, req->sector); + + __all_stat_inc(disk, ios[rw], req->sector); + __all_stat_add(disk, ticks[rw], duration, req->sector); + disk_round_stats(disk); + disk->in_flight--; + if (part) { + part_round_stats(part); + get_partstats(part)->in_flight--; + } + } +} + static int __end_that_request_first(struct request *req, int uptodate, int nr_bytes) { @@ -3392,12 +3440,7 @@ static int __end_that_request_first(struct request *req, int uptodate, (unsigned long long)req->sector); } - if (blk_fs_request(req) && req->rq_disk) { - const int rw = rq_data_dir(req); - - all_stat_add(req->rq_disk, sectors[rw], - nr_bytes >> 9, req->sector); - } + blk_account_io_completion(req, nr_bytes); total_bytes = bio_nbytes = 0; while ((bio = req->bio) != NULL) { @@ -3602,7 +3645,6 @@ EXPORT_SYMBOL(blk_complete_request); */ void end_that_request_last(struct request *req, int uptodate) { - struct gendisk *disk = req->rq_disk; int error; /* @@ -3615,25 +3657,8 @@ void end_that_request_last(struct request *req, int uptodate) if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); - /* - * Account IO completion. bar_rq isn't accounted as a normal - * IO on queueing nor completion. Accounting the containing - * request is enough. - */ - if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { - unsigned long duration = jiffies - req->start_time; - const int rw = rq_data_dir(req); - struct hd_struct *part = get_part(disk, req->sector); + blk_account_io_done(req); - __all_stat_inc(disk, ios[rw], req->sector); - __all_stat_add(disk, ticks[rw], duration, req->sector); - disk_round_stats(disk); - disk->in_flight--; - if (part) { - part_round_stats(part); - get_partstats(part)->in_flight--; - } - } if (req->end_io) req->end_io(req, error); else @@ -3976,6 +4001,28 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_hw_sectors_kb, (page)); } +static ssize_t queue_iostats_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_io_stat(q), page); +} + +static ssize_t queue_iostats_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long stats; + ssize_t ret = queue_var_store(&stats, page, count); + + spin_lock_irq(q->queue_lock); + if (stats) + set_bit(QUEUE_FLAG_IO_STAT, &q->queue_flags); + else + clear_bit(QUEUE_FLAG_IO_STAT, &q->queue_flags); + spin_unlock_irq(q->queue_lock); + + return ret; +} + + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, @@ -3983,6 +4030,12 @@ static struct queue_sysfs_entry queue_requests_entry = { .store = queue_requests_store, }; +static struct queue_sysfs_entry queue_iostats_entry = { + .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR }, + .show = queue_iostats_show, + .store = queue_iostats_store, +}; + static struct queue_sysfs_entry queue_ra_entry = { .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, .show = queue_ra_show, @@ -4012,6 +4065,7 @@ static struct attribute *default_attrs[] = { &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, &queue_iosched_entry.attr, + &queue_iostats_entry.attr, NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fcfa093..748ff68 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -239,6 +239,7 @@ enum rq_flag_bits { __REQ_FAILFAST_DEV, /* no driver retries of device errors */ __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ + __REQ_IO_STAT, /* account I/O stat */ __REQ_NR_BITS, /* stops here */ }; @@ -272,6 +273,7 @@ enum rq_flag_bits { #define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) +#define REQ_IO_STAT (1 << __REQ_IO_STAT) /* * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME @@ -450,6 +452,10 @@ struct request_queue #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ + +#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ + (1 << QUEUE_FLAG_CLUSTER)) enum { /* @@ -495,10 +501,17 @@ enum { #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) +#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_fs_request(rq) ((rq)->flags & REQ_CMD) #define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC) #define blk_rq_started(rq) ((rq)->flags & REQ_STARTED) +#define blk_rq_io_stat(rq) ((rq)->flags & REQ_IO_STAT) + +static inline int blk_do_io_stat(struct request *rq) +{ + return rq->rq_disk && blk_rq_io_stat(rq); +} #define blk_noretry_request(rq) ((rq)->flags & REQ_FAILFAST) #define blk_failfast_dev(rq) ((rq)->flags & REQ_FAILFAST_DEV)