Sophie

Sophie

distrib > CentOS > 5 > x86_64 > by-pkgid > ea32411352494358b8d75a78402a4713 > files > 937

kernel-2.6.18-238.19.1.el5.centos.plus.src.rpm

From: Eric Sandeen <sandeen@redhat.com>
Date: Thu, 2 Sep 2010 17:43:15 -0400
Subject: [fs] direct-io: move aio_complete into ->end_io
Message-id: <4C7FE233.8010401@redhat.com>
Patchwork-id: 28024
O-Subject: [RHEL5.6 PATCH 1/3] direct-io: move aio_complete into ->end_io
Bugzilla: 617690
RH-Acked-by: Dave Chinner <dchinner@redhat.com>

For Bug 617690 -
ext4 and xfs wrong data returned on read after write if file size was changed with ftruncate

See also kernel.org bz https://bugzilla.kernel.org/show_bug.cgi?id=16165

Backport of this upstream commit:

commit 40e2e97316af6e62affab7a392e792494b8d9dde
Author: Christoph Hellwig <hch@infradead.org>
Date:   Sun Jul 18 21:17:09 2010 +0000

    direct-io: move aio_complete into ->end_io

    Filesystems with unwritten extent support must not complete an AIO request
    until the transaction to convert the extent has been commited.  That means
    the aio_complete calls needs to be moved into the ->end_io callback so
    that the filesystem can control when to call it exactly.

    This makes a bit of a mess out of dio_complete and the ->end_io callback
    prototype even more complicated.

    Signed-off-by: Christoph Hellwig <hch@lst.de>
    Reviewed-by: Jan Kara <jack@suse.cz>
    Signed-off-by: Alex Elder <aelder@sgi.com>

This required some KABI tricks because it changed dio_iodone_t,
which is part of the signature of __blockdev_direct_IO().

To work around this I've used yet another fs_flag so that ext4 &
xfs can define an iodone handler of dio_iodone2_t, cast that
into dio_iodone_t for calls into blockdev_direct_IO*(), and then
dio_complete() looks at the fs_flag to know whether it should cast
it back into the new dio_iodone2_t type and call it with more args
or just go the old route.

Thanks,
-Eric

Signed-off-by: Jarod Wilson <jarod@redhat.com>

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 01f0592..44c9378 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -216,7 +216,7 @@ static struct page *dio_get_page(struct dio *dio)
  * filesystems can use it to hold additional state between get_block calls and
  * dio_complete.
  */
-static int dio_complete(struct dio *dio, loff_t offset, int ret)
+static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
 {
 	ssize_t transferred = 0;
 
@@ -237,13 +237,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 			transferred = dio->i_size - offset;
 	}
 
-	if (dio->end_io && dio->result)
-		dio->end_io(dio->iocb, offset, transferred,
-			    dio->map_bh.b_private);
-	if (dio->lock_type == DIO_LOCKING)
-		/* lockdep: non-owner release */
-		up_read_non_owner(&dio->inode->i_alloc_sem);
-
 	if (ret == 0)
 		ret = dio->page_errors;
 	if (ret == 0)
@@ -251,6 +244,23 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 	if (ret == 0)
 		ret = transferred;
 
+	if (dio->end_io && dio->result) {
+		if (dio->inode->i_sb->s_type->fs_flags & FS_HAS_IODONE2) {
+			dio_iodone2_t *end_io = (dio_iodone2_t *)dio->end_io;
+
+			end_io(dio->iocb, offset, transferred,
+				    dio->map_bh.b_private, ret, is_async);
+		} else
+			dio->end_io(dio->iocb, offset, transferred,
+				    dio->map_bh.b_private);
+	} else if (is_async) {
+		aio_complete(dio->iocb, ret, 0);
+	}
+
+	if (dio->lock_type == DIO_LOCKING)
+		/* lockdep: non-owner release */
+		up_read_non_owner(&dio->inode->i_alloc_sem);
+
 	return ret;
 }
 
@@ -277,8 +287,7 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
 	if (remaining == 0) {
-		int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
-		aio_complete(dio->iocb, ret, 0);
+		dio_complete(dio, dio->iocb->ki_pos, 0, true);
 		kfree(dio);
 	}
 
@@ -1089,7 +1098,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
 	if (ret2 == 0) {
-		ret = dio_complete(dio, offset, ret);
+		ret = dio_complete(dio, offset, ret, false);
 		kfree(dio);
 	} else
 		BUG_ON(ret != -EIOCBQUEUED);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5daef8e..b8e48ea 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3732,7 +3732,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
 }
 
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
-			    ssize_t size, void *private)
+			    ssize_t size, void *private, int ret,
+			    bool is_async)
 {
         ext4_io_end_t *io_end = iocb->private;
 	struct ext4_inode_info *ei;
@@ -3741,7 +3742,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 
 	/* if not async direct IO or dio with 0 bytes write, just return */
 	if (!io_end || !size)
-		return;
+		goto out;
 
 	ei = EXT4_I(io_end->inode);
 
@@ -3754,7 +3755,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 	if (io_end->flag != DIO_AIO_UNWRITTEN){
 		ext4_free_io_end(io_end);
 		iocb->private = NULL;
-		return;
+		goto out;
 	}
 
 	io_end->offset = offset;
@@ -3770,6 +3771,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 	/* queue the work to convert unwritten extents to written */
 	queue_work(wq, &io_end->work);
 	iocb->private = NULL;
+out:
+	if (is_async)
+		aio_complete(iocb, ret, 0);
 }
 /*
  * For ext4 extent files, ext4 will do direct-io write to holes,
@@ -3841,7 +3845,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 					 inode->i_sb->s_bdev, iov,
 					 offset, nr_segs,
 					 ext4_get_block_dio_write,
-					 ext4_end_io_dio);
+					 (dio_iodone_t *)ext4_end_io_dio);
 		if (iocb->private)
 			EXT4_I(inode)->cur_aio_dio = NULL;
 		/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e631779..1de676d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4032,7 +4032,8 @@ static struct file_system_type ext4_fs_type = {
 	.get_sb		= ext4_get_sb,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV|FS_HAS_FALLOCATE|FS_HAS_FIEMAP
-			 |FS_HAS_FREEZE|FS_HAS_TRYTOFREE|FS_HAS_GETRESV,
+			 |FS_HAS_FREEZE|FS_HAS_TRYTOFREE|FS_HAS_GETRESV
+			 |FS_HAS_IODONE2,
 };
 
 #ifdef CONFIG_EXT4DEV_COMPAT
@@ -4052,7 +4053,8 @@ static struct file_system_type ext4dev_fs_type = {
 	.get_sb		= ext4dev_get_sb,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV|FS_HAS_FALLOCATE|FS_HAS_FIEMAP
-			  |FS_HAS_FREEZE|FS_HAS_TRYTOFREE|FS_HAS_GETRESV,
+			  |FS_HAS_FREEZE|FS_HAS_TRYTOFREE|FS_HAS_GETRESV
+			  |FS_HAS_IODONE2,
 };
 MODULE_ALIAS("ext4dev");
 #endif
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1d1c34..4795179 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -608,7 +608,9 @@ bail:
 static void ocfs2_dio_end_io(struct kiocb *iocb,
 			     loff_t offset,
 			     ssize_t bytes,
-			     void *private)
+			     void *private,
+			     int ret,
+			     bool is_async)
 {
 	struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
 
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index b2062f3..0f334b5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1433,7 +1433,9 @@ xfs_end_io_direct(
 	struct kiocb	*iocb,
 	loff_t		offset,
 	ssize_t		size,
-	void		*private)
+	void		*private,
+	int		ret,
+	bool		is_async)
 {
 	xfs_ioend_t	*ioend = iocb->private;
 
@@ -1479,6 +1481,9 @@ xfs_end_io_direct(
 	 * against double-freeing.
 	 */
 	iocb->private = NULL;
+
+	if (is_async)
+		aio_complete(iocb, ret, 0);
 }
 
 STATIC ssize_t
@@ -1501,13 +1506,13 @@ xfs_vm_direct_IO(
 		ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
 			bdev, iov, offset, nr_segs,
 			xfs_get_blocks_direct,
-			xfs_end_io_direct);
+			(dio_iodone_t *)xfs_end_io_direct);
 	} else {
 		iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
 		ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
 			bdev, iov, offset, nr_segs,
 			xfs_get_blocks_direct,
-			xfs_end_io_direct);
+			(dio_iodone_t *)xfs_end_io_direct);
 	}
 
 	if (unlikely(ret != -EIOCBQUEUED && iocb->private))
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 67d9810..14ee645 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -38,6 +38,8 @@ typedef struct xfs_ioend {
 	size_t			io_size;	/* size of the extent */
 	xfs_off_t		io_offset;	/* offset in the file */
 	struct work_struct	io_work;	/* xfsdatad work queue */
+	struct kiocb		*io_iocb;
+	int			io_result;
 } xfs_ioend_t;
 
 extern const struct address_space_operations_ext xfs_address_space_operations;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index ee26f44..6c0745e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1890,7 +1890,8 @@ static struct file_system_type xfs_fs_type = {
 	.get_sb			= xfs_fs_get_sb,
 	.kill_sb		= kill_block_super,
 	.fs_flags		= FS_REQUIRES_DEV|FS_HAS_FALLOCATE|
-					FS_HAS_FIEMAP|FS_HAS_FREEZE,
+					FS_HAS_FIEMAP|FS_HAS_FREEZE|
+					FS_HAS_IODONE2,
 };
 
 STATIC int __init
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 670b760..8f43fcc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -97,6 +97,7 @@ extern int dir_notify_enable;
 #define FS_HAS_FREEZE 16      /* Safe to check for ->freeze_fs etc */
 #define FS_HAS_TRYTOFREE 32   /* Safe to check for ->bdev_try_to_free... */
 #define FS_HAS_GETRESV 64     /* Safe to check for ->get_reserved_space */
+#define FS_HAS_IODONE2 128    /* dio->io_done is type dio_iodone2_t */
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move()
 					 * during rename() internally.
@@ -324,6 +325,9 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 			ssize_t bytes, void *private);
+typedef void (dio_iodone2_t)(struct kiocb *iocb, loff_t offset,
+			ssize_t bytes, void *private, int ret,
+			bool is_async);
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what