fs: kill i_alloc_sem
i_alloc_sem is a rather special rw_semaphore. It's the last one that may
be released by a non-owner, and it's write side is always mirrored by
real exclusion. It's intended use it to wait for all pending direct I/O
requests to finish before starting a truncate.
Replace it with a hand-grown construct:
- exclusion for truncates is already guaranteed by i_mutex, so it can
simply fall way
- the reader side is replaced by an i_dio_count member in struct inode
that counts the number of pending direct I/O requests. Truncate can't
proceed as long as it's non-zero
- when i_dio_count reaches non-zero we wake up a pending truncate using
wake_up_bit on a new bit in i_flags
- new references to i_dio_count can't appear while we are waiting for
it to read zero because the direct I/O count always needs i_mutex
(or an equivalent like XFS's i_iolock) for starting a new operation.
This scheme is much simpler, and saves the space of a spinlock_t and a
struct list_head in struct inode (typically 160 bits on a non-debug 64-bit
system).
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1393742..2fe9207 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -779,7 +779,7 @@
struct timespec i_ctime;
blkcnt_t i_blocks;
unsigned short i_bytes;
- struct rw_semaphore i_alloc_sem;
+ atomic_t i_dio_count;
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct file_lock *i_flock;
struct address_space *i_mapping;
@@ -1705,6 +1705,10 @@
* set during data writeback, and cleared with a wakeup
* on the bit address once it is done.
*
+ * I_REFERENCED Marks the inode as recently references on the LRU list.
+ *
+ * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit().
+ *
* Q: What is the difference between I_WILL_FREE and I_FREEING?
*/
#define I_DIRTY_SYNC (1 << 0)
@@ -1718,6 +1722,8 @@
#define __I_SYNC 7
#define I_SYNC (1 << __I_SYNC)
#define I_REFERENCED (1 << 8)
+#define __I_DIO_WAKEUP 9
+#define I_DIO_WAKEUP (1 << I_DIO_WAKEUP)
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
@@ -1828,7 +1834,6 @@
struct lock_class_key i_lock_key;
struct lock_class_key i_mutex_key;
struct lock_class_key i_mutex_dir_key;
- struct lock_class_key i_alloc_sem_key;
};
extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
@@ -2404,6 +2409,8 @@
};
void dio_end_io(struct bio *bio, int error);
+void inode_dio_wait(struct inode *inode);
+void inode_dio_done(struct inode *inode);
ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,