|  | /* | 
|  | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README | 
|  | */ | 
|  |  | 
|  | #include <linux/time.h> | 
|  | #include <linux/reiserfs_fs.h> | 
|  | #include <linux/reiserfs_acl.h> | 
|  | #include <linux/reiserfs_xattr.h> | 
|  | #include <asm/uaccess.h> | 
|  | #include <linux/pagemap.h> | 
|  | #include <linux/swap.h> | 
|  | #include <linux/writeback.h> | 
|  | #include <linux/blkdev.h> | 
|  | #include <linux/buffer_head.h> | 
|  | #include <linux/quotaops.h> | 
|  |  | 
|  | /* | 
|  | ** We pack the tails of files on file close, not at the time they are written. | 
|  | ** This implies an unnecessary copy of the tail and an unnecessary indirect item | 
|  | ** insertion/balancing, for files that are written in one write. | 
|  | ** It avoids unnecessary tail packings (balances) for files that are written in | 
|  | ** multiple writes and are small enough to have tails. | 
|  | ** | 
|  | ** file_release is called by the VFS layer when the file is closed.  If | 
|  | ** this is the last open file descriptor, and the file | 
|  | ** small enough to have a tail, and the tail is currently in an | 
|  | ** unformatted node, the tail is converted back into a direct item. | 
|  | ** | 
|  | ** We use reiserfs_truncate_file to pack the tail, since it already has | 
|  | ** all the conditions coded. | 
|  | */ | 
|  | static int reiserfs_file_release(struct inode *inode, struct file *filp) | 
|  | { | 
|  |  | 
|  | struct reiserfs_transaction_handle th; | 
|  | int err; | 
|  | int jbegin_failure = 0; | 
|  |  | 
|  | BUG_ON(!S_ISREG(inode->i_mode)); | 
|  |  | 
|  | /* fast out for when nothing needs to be done */ | 
|  | if ((atomic_read(&inode->i_count) > 1 || | 
|  | !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || | 
|  | !tail_has_to_be_packed(inode)) && | 
|  | REISERFS_I(inode)->i_prealloc_count <= 0) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | mutex_lock(&inode->i_mutex); | 
|  |  | 
|  | mutex_lock(&(REISERFS_I(inode)->i_mmap)); | 
|  | if (REISERFS_I(inode)->i_flags & i_ever_mapped) | 
|  | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 
|  |  | 
|  | reiserfs_write_lock(inode->i_sb); | 
|  | /* freeing preallocation only involves relogging blocks that | 
|  | * are already in the current transaction.  preallocation gets | 
|  | * freed at the end of each transaction, so it is impossible for | 
|  | * us to log any additional blocks (including quota blocks) | 
|  | */ | 
|  | err = journal_begin(&th, inode->i_sb, 1); | 
|  | if (err) { | 
|  | /* uh oh, we can't allow the inode to go away while there | 
|  | * is still preallocation blocks pending.  Try to join the | 
|  | * aborted transaction | 
|  | */ | 
|  | jbegin_failure = err; | 
|  | err = journal_join_abort(&th, inode->i_sb, 1); | 
|  |  | 
|  | if (err) { | 
|  | /* hmpf, our choices here aren't good.  We can pin the inode | 
|  | * which will disallow unmount from every happening, we can | 
|  | * do nothing, which will corrupt random memory on unmount, | 
|  | * or we can forcibly remove the file from the preallocation | 
|  | * list, which will leak blocks on disk.  Lets pin the inode | 
|  | * and let the admin know what is going on. | 
|  | */ | 
|  | igrab(inode); | 
|  | reiserfs_warning(inode->i_sb, "clm-9001", | 
|  | "pinning inode %lu because the " | 
|  | "preallocation can't be freed", | 
|  | inode->i_ino); | 
|  | goto out; | 
|  | } | 
|  | } | 
|  | reiserfs_update_inode_transaction(inode); | 
|  |  | 
|  | #ifdef REISERFS_PREALLOCATE | 
|  | reiserfs_discard_prealloc(&th, inode); | 
|  | #endif | 
|  | err = journal_end(&th, inode->i_sb, 1); | 
|  |  | 
|  | /* copy back the error code from journal_begin */ | 
|  | if (!err) | 
|  | err = jbegin_failure; | 
|  |  | 
|  | if (!err && atomic_read(&inode->i_count) <= 1 && | 
|  | (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && | 
|  | tail_has_to_be_packed(inode)) { | 
|  | /* if regular file is released by last holder and it has been | 
|  | appended (we append by unformatted node only) or its direct | 
|  | item(s) had to be converted, then it may have to be | 
|  | indirect2direct converted */ | 
|  | err = reiserfs_truncate_file(inode, 0); | 
|  | } | 
|  | out: | 
|  | mutex_unlock(&(REISERFS_I(inode)->i_mmap)); | 
|  | mutex_unlock(&inode->i_mutex); | 
|  | reiserfs_write_unlock(inode->i_sb); | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma) | 
|  | { | 
|  | struct inode *inode; | 
|  |  | 
|  | inode = file->f_path.dentry->d_inode; | 
|  | mutex_lock(&(REISERFS_I(inode)->i_mmap)); | 
|  | REISERFS_I(inode)->i_flags |= i_ever_mapped; | 
|  | mutex_unlock(&(REISERFS_I(inode)->i_mmap)); | 
|  |  | 
|  | return generic_file_mmap(file, vma); | 
|  | } | 
|  |  | 
|  | static void reiserfs_vfs_truncate_file(struct inode *inode) | 
|  | { | 
|  | reiserfs_truncate_file(inode, 1); | 
|  | } | 
|  |  | 
|  | /* Sync a reiserfs file. */ | 
|  |  | 
|  | /* | 
|  | * FIXME: sync_mapping_buffers() never has anything to sync.  Can | 
|  | * be removed... | 
|  | */ | 
|  |  | 
|  | static int reiserfs_sync_file(struct file *filp, | 
|  | struct dentry *dentry, int datasync) | 
|  | { | 
|  | struct inode *inode = dentry->d_inode; | 
|  | int err; | 
|  | int barrier_done; | 
|  |  | 
|  | BUG_ON(!S_ISREG(inode->i_mode)); | 
|  | err = sync_mapping_buffers(inode->i_mapping); | 
|  | reiserfs_write_lock(inode->i_sb); | 
|  | barrier_done = reiserfs_commit_for_inode(inode); | 
|  | reiserfs_write_unlock(inode->i_sb); | 
|  | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) | 
|  | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 
|  | if (barrier_done < 0) | 
|  | return barrier_done; | 
|  | return (err < 0) ? -EIO : 0; | 
|  | } | 
|  |  | 
|  | /* taken fs/buffer.c:__block_commit_write */ | 
|  | int reiserfs_commit_page(struct inode *inode, struct page *page, | 
|  | unsigned from, unsigned to) | 
|  | { | 
|  | unsigned block_start, block_end; | 
|  | int partial = 0; | 
|  | unsigned blocksize; | 
|  | struct buffer_head *bh, *head; | 
|  | unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; | 
|  | int new; | 
|  | int logit = reiserfs_file_data_log(inode); | 
|  | struct super_block *s = inode->i_sb; | 
|  | int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; | 
|  | struct reiserfs_transaction_handle th; | 
|  | int ret = 0; | 
|  |  | 
|  | th.t_trans_id = 0; | 
|  | blocksize = 1 << inode->i_blkbits; | 
|  |  | 
|  | if (logit) { | 
|  | reiserfs_write_lock(s); | 
|  | ret = journal_begin(&th, s, bh_per_page + 1); | 
|  | if (ret) | 
|  | goto drop_write_lock; | 
|  | reiserfs_update_inode_transaction(inode); | 
|  | } | 
|  | for (bh = head = page_buffers(page), block_start = 0; | 
|  | bh != head || !block_start; | 
|  | block_start = block_end, bh = bh->b_this_page) { | 
|  |  | 
|  | new = buffer_new(bh); | 
|  | clear_buffer_new(bh); | 
|  | block_end = block_start + blocksize; | 
|  | if (block_end <= from || block_start >= to) { | 
|  | if (!buffer_uptodate(bh)) | 
|  | partial = 1; | 
|  | } else { | 
|  | set_buffer_uptodate(bh); | 
|  | if (logit) { | 
|  | reiserfs_prepare_for_journal(s, bh, 1); | 
|  | journal_mark_dirty(&th, s, bh); | 
|  | } else if (!buffer_dirty(bh)) { | 
|  | mark_buffer_dirty(bh); | 
|  | /* do data=ordered on any page past the end | 
|  | * of file and any buffer marked BH_New. | 
|  | */ | 
|  | if (reiserfs_data_ordered(inode->i_sb) && | 
|  | (new || page->index >= i_size_index)) { | 
|  | reiserfs_add_ordered_list(inode, bh); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | if (logit) { | 
|  | ret = journal_end(&th, s, bh_per_page + 1); | 
|  | drop_write_lock: | 
|  | reiserfs_write_unlock(s); | 
|  | } | 
|  | /* | 
|  | * If this is a partial write which happened to make all buffers | 
|  | * uptodate then we can optimize away a bogus readpage() for | 
|  | * the next read(). Here we 'discover' whether the page went | 
|  | * uptodate as a result of this (potentially partial) write. | 
|  | */ | 
|  | if (!partial) | 
|  | SetPageUptodate(page); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* Write @count bytes at position @ppos in a file indicated by @file | 
|  | from the buffer @buf. | 
|  |  | 
|  | generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want | 
|  | something simple that works.  It is not for serious use by general purpose filesystems, excepting the one that it was | 
|  | written for (ext2/3).  This is for several reasons: | 
|  |  | 
|  | * It has no understanding of any filesystem specific optimizations. | 
|  |  | 
|  | * It enters the filesystem repeatedly for each page that is written. | 
|  |  | 
|  | * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key | 
|  | * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time | 
|  | * to reiserfs which allows for fewer tree traversals. | 
|  |  | 
|  | * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks. | 
|  |  | 
|  | * Asking the block allocation code for blocks one at a time is slightly less efficient. | 
|  |  | 
|  | All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to | 
|  | use it, but we were in a hurry to make code freeze, and so it couldn't be revised then.  This new code should make | 
|  | things right finally. | 
|  |  | 
|  | Future Features: providing search_by_key with hints. | 
|  |  | 
|  | */ | 
|  | static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going to write into */ | 
|  | const char __user * buf,	/*  pointer to user supplied data | 
|  | (in userspace) */ | 
|  | size_t count,	/* amount of bytes to write */ | 
|  | loff_t * ppos	/* pointer to position in file that we start writing at. Should be updated to | 
|  | * new current position before returning. */ | 
|  | ) | 
|  | { | 
|  | struct inode *inode = file->f_path.dentry->d_inode;	// Inode of the file that we are writing to. | 
|  | /* To simplify coding at this time, we store | 
|  | locked pages in array for now */ | 
|  | struct reiserfs_transaction_handle th; | 
|  | th.t_trans_id = 0; | 
|  |  | 
|  | /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items | 
|  | * lying around (most of the disk, in fact). Despite the filesystem | 
|  | * now being a v3.6 format, the old items still can't support large | 
|  | * file sizes. Catch this case here, as the rest of the VFS layer is | 
|  | * oblivious to the different limitations between old and new items. | 
|  | * reiserfs_setattr catches this for truncates. This chunk is lifted | 
|  | * from generic_write_checks. */ | 
|  | if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && | 
|  | *ppos + count > MAX_NON_LFS) { | 
|  | if (*ppos >= MAX_NON_LFS) { | 
|  | return -EFBIG; | 
|  | } | 
|  | if (count > MAX_NON_LFS - (unsigned long)*ppos) | 
|  | count = MAX_NON_LFS - (unsigned long)*ppos; | 
|  | } | 
|  |  | 
|  | return do_sync_write(file, buf, count, ppos); | 
|  | } | 
|  |  | 
|  | const struct file_operations reiserfs_file_operations = { | 
|  | .read = do_sync_read, | 
|  | .write = reiserfs_file_write, | 
|  | .ioctl = reiserfs_ioctl, | 
|  | #ifdef CONFIG_COMPAT | 
|  | .compat_ioctl = reiserfs_compat_ioctl, | 
|  | #endif | 
|  | .mmap = reiserfs_file_mmap, | 
|  | .open = generic_file_open, | 
|  | .release = reiserfs_file_release, | 
|  | .fsync = reiserfs_sync_file, | 
|  | .aio_read = generic_file_aio_read, | 
|  | .aio_write = generic_file_aio_write, | 
|  | .splice_read = generic_file_splice_read, | 
|  | .splice_write = generic_file_splice_write, | 
|  | .llseek = generic_file_llseek, | 
|  | }; | 
|  |  | 
|  | const struct inode_operations reiserfs_file_inode_operations = { | 
|  | .truncate = reiserfs_vfs_truncate_file, | 
|  | .setattr = reiserfs_setattr, | 
|  | .setxattr = reiserfs_setxattr, | 
|  | .getxattr = reiserfs_getxattr, | 
|  | .listxattr = reiserfs_listxattr, | 
|  | .removexattr = reiserfs_removexattr, | 
|  | .permission = reiserfs_permission, | 
|  | }; |