| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * linux/fs/nfs/write.c | 
 | 3 |  * | 
 | 4 |  * Writing file data over NFS. | 
 | 5 |  * | 
 | 6 |  * We do it like this: When a (user) process wishes to write data to an | 
 | 7 |  * NFS file, a write request is allocated that contains the RPC task data | 
 | 8 |  * plus some info on the page to be written, and added to the inode's | 
 | 9 |  * write chain. If the process writes past the end of the page, an async | 
 | 10 |  * RPC call to write the page is scheduled immediately; otherwise, the call | 
 | 11 |  * is delayed for a few seconds. | 
 | 12 |  * | 
 | 13 |  * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE. | 
 | 14 |  * | 
 | 15 |  * Write requests are kept on the inode's writeback list. Each entry in | 
 | 16 |  * that list references the page (portion) to be written. When the | 
 | 17 |  * cache timeout has expired, the RPC task is woken up, and tries to | 
 | 18 |  * lock the page. As soon as it manages to do so, the request is moved | 
 | 19 |  * from the writeback list to the writelock list. | 
 | 20 |  * | 
 | 21 |  * Note: we must make sure never to confuse the inode passed in the | 
 | 22 |  * write_page request with the one in page->inode. As far as I understand | 
 | 23 |  * it, these are different when doing a swap-out. | 
 | 24 |  * | 
 | 25 |  * To understand everything that goes on here and in the NFS read code, | 
 | 26 |  * one should be aware that a page is locked in exactly one of the following | 
 | 27 |  * cases: | 
 | 28 |  * | 
 | 29 |  *  -	A write request is in progress. | 
 | 30 |  *  -	A user process is in generic_file_write/nfs_update_page | 
 | 31 |  *  -	A user process is in generic_file_read | 
 | 32 |  * | 
 | 33 |  * Also note that because of the way pages are invalidated in | 
 | 34 |  * nfs_revalidate_inode, the following assertions hold: | 
 | 35 |  * | 
 | 36 |  *  -	If a page is dirty, there will be no read requests (a page will | 
 | 37 |  *	not be re-read unless invalidated by nfs_revalidate_inode). | 
 | 38 |  *  -	If the page is not uptodate, there will be no pending write | 
 | 39 |  *	requests, and no process will be in nfs_update_page. | 
 | 40 |  * | 
 | 41 |  * FIXME: Interaction with the vmscan routines is not optimal yet. | 
 | 42 |  * Either vmscan must be made nfs-savvy, or we need a different page | 
 | 43 |  * reclaim concept that supports something like FS-independent | 
 | 44 |  * buffer_heads with a b_ops-> field. | 
 | 45 |  * | 
 | 46 |  * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> | 
 | 47 |  */ | 
 | 48 |  | 
 | 49 | #include <linux/config.h> | 
 | 50 | #include <linux/types.h> | 
 | 51 | #include <linux/slab.h> | 
 | 52 | #include <linux/mm.h> | 
 | 53 | #include <linux/pagemap.h> | 
 | 54 | #include <linux/file.h> | 
 | 55 | #include <linux/mpage.h> | 
 | 56 | #include <linux/writeback.h> | 
 | 57 |  | 
 | 58 | #include <linux/sunrpc/clnt.h> | 
 | 59 | #include <linux/nfs_fs.h> | 
 | 60 | #include <linux/nfs_mount.h> | 
 | 61 | #include <linux/nfs_page.h> | 
 | 62 | #include <asm/uaccess.h> | 
 | 63 | #include <linux/smp_lock.h> | 
 | 64 |  | 
 | 65 | #include "delegation.h" | 
 | 66 |  | 
 | 67 | #define NFSDBG_FACILITY		NFSDBG_PAGECACHE | 
 | 68 |  | 
 | 69 | #define MIN_POOL_WRITE		(32) | 
 | 70 | #define MIN_POOL_COMMIT		(4) | 
 | 71 |  | 
 | 72 | /* | 
 | 73 |  * Local function declarations | 
 | 74 |  */ | 
 | 75 | static struct nfs_page * nfs_update_request(struct nfs_open_context*, | 
 | 76 | 					    struct inode *, | 
 | 77 | 					    struct page *, | 
 | 78 | 					    unsigned int, unsigned int); | 
 | 79 | static void nfs_writeback_done_partial(struct nfs_write_data *, int); | 
 | 80 | static void nfs_writeback_done_full(struct nfs_write_data *, int); | 
 | 81 | static int nfs_wait_on_write_congestion(struct address_space *, int); | 
 | 82 | static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); | 
 | 83 | static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, | 
 | 84 | 			   unsigned int npages, int how); | 
 | 85 |  | 
 | 86 | static kmem_cache_t *nfs_wdata_cachep; | 
 | 87 | mempool_t *nfs_wdata_mempool; | 
 | 88 | static mempool_t *nfs_commit_mempool; | 
 | 89 |  | 
 | 90 | static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); | 
 | 91 |  | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 92 | static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 93 | { | 
 | 94 | 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 95 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 96 | 	if (p) { | 
 | 97 | 		memset(p, 0, sizeof(*p)); | 
 | 98 | 		INIT_LIST_HEAD(&p->pages); | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 99 | 		if (pagecount < NFS_PAGEVEC_SIZE) | 
 | 100 | 			p->pagevec = &p->page_array[0]; | 
 | 101 | 		else { | 
 | 102 | 			size_t size = ++pagecount * sizeof(struct page *); | 
 | 103 | 			p->pagevec = kmalloc(size, GFP_NOFS); | 
 | 104 | 			if (p->pagevec) { | 
 | 105 | 				memset(p->pagevec, 0, size); | 
 | 106 | 			} else { | 
 | 107 | 				mempool_free(p, nfs_commit_mempool); | 
 | 108 | 				p = NULL; | 
 | 109 | 			} | 
 | 110 | 		} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 111 | 	} | 
 | 112 | 	return p; | 
 | 113 | } | 
 | 114 |  | 
 | 115 | static inline void nfs_commit_free(struct nfs_write_data *p) | 
 | 116 | { | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 117 | 	if (p && (p->pagevec != &p->page_array[0])) | 
 | 118 | 		kfree(p->pagevec); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 119 | 	mempool_free(p, nfs_commit_mempool); | 
 | 120 | } | 
 | 121 |  | 
| Trond Myklebust | 963d8fe | 2006-01-03 09:55:04 +0100 | [diff] [blame] | 122 | void nfs_writedata_release(void *wdata) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 123 | { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 124 | 	nfs_writedata_free(wdata); | 
 | 125 | } | 
 | 126 |  | 
 | 127 | /* Adjust the file length if we're writing beyond the end */ | 
 | 128 | static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) | 
 | 129 | { | 
 | 130 | 	struct inode *inode = page->mapping->host; | 
 | 131 | 	loff_t end, i_size = i_size_read(inode); | 
 | 132 | 	unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; | 
 | 133 |  | 
 | 134 | 	if (i_size > 0 && page->index < end_index) | 
 | 135 | 		return; | 
 | 136 | 	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); | 
 | 137 | 	if (i_size >= end) | 
 | 138 | 		return; | 
 | 139 | 	i_size_write(inode, end); | 
 | 140 | } | 
 | 141 |  | 
 | 142 | /* We can set the PG_uptodate flag if we see that a write request | 
 | 143 |  * covers the full page. | 
 | 144 |  */ | 
 | 145 | static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) | 
 | 146 | { | 
 | 147 | 	loff_t end_offs; | 
 | 148 |  | 
 | 149 | 	if (PageUptodate(page)) | 
 | 150 | 		return; | 
 | 151 | 	if (base != 0) | 
 | 152 | 		return; | 
 | 153 | 	if (count == PAGE_CACHE_SIZE) { | 
 | 154 | 		SetPageUptodate(page); | 
 | 155 | 		return; | 
 | 156 | 	} | 
 | 157 |  | 
 | 158 | 	end_offs = i_size_read(page->mapping->host) - 1; | 
 | 159 | 	if (end_offs < 0) | 
 | 160 | 		return; | 
 | 161 | 	/* Is this the last page? */ | 
 | 162 | 	if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT)) | 
 | 163 | 		return; | 
 | 164 | 	/* This is the last page: set PG_uptodate if we cover the entire | 
 | 165 | 	 * extent of the data, then zero the rest of the page. | 
 | 166 | 	 */ | 
 | 167 | 	if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) { | 
 | 168 | 		memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); | 
 | 169 | 		SetPageUptodate(page); | 
 | 170 | 	} | 
 | 171 | } | 
 | 172 |  | 
 | 173 | /* | 
 | 174 |  * Write a page synchronously. | 
 | 175 |  * Offset is the data offset within the page. | 
 | 176 |  */ | 
 | 177 | static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, | 
 | 178 | 		struct page *page, unsigned int offset, unsigned int count, | 
 | 179 | 		int how) | 
 | 180 | { | 
 | 181 | 	unsigned int	wsize = NFS_SERVER(inode)->wsize; | 
 | 182 | 	int		result, written = 0; | 
 | 183 | 	struct nfs_write_data *wdata; | 
 | 184 |  | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 185 | 	wdata = nfs_writedata_alloc(1); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 186 | 	if (!wdata) | 
 | 187 | 		return -ENOMEM; | 
 | 188 |  | 
 | 189 | 	wdata->flags = how; | 
 | 190 | 	wdata->cred = ctx->cred; | 
 | 191 | 	wdata->inode = inode; | 
 | 192 | 	wdata->args.fh = NFS_FH(inode); | 
 | 193 | 	wdata->args.context = ctx; | 
 | 194 | 	wdata->args.pages = &page; | 
 | 195 | 	wdata->args.stable = NFS_FILE_SYNC; | 
 | 196 | 	wdata->args.pgbase = offset; | 
 | 197 | 	wdata->args.count = wsize; | 
 | 198 | 	wdata->res.fattr = &wdata->fattr; | 
 | 199 | 	wdata->res.verf = &wdata->verf; | 
 | 200 |  | 
 | 201 | 	dprintk("NFS:      nfs_writepage_sync(%s/%Ld %d@%Ld)\n", | 
 | 202 | 		inode->i_sb->s_id, | 
 | 203 | 		(long long)NFS_FILEID(inode), | 
 | 204 | 		count, (long long)(page_offset(page) + offset)); | 
 | 205 |  | 
| Trond Myklebust | bb713d6 | 2005-12-03 15:20:14 -0500 | [diff] [blame] | 206 | 	set_page_writeback(page); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 207 | 	nfs_begin_data_update(inode); | 
 | 208 | 	do { | 
 | 209 | 		if (count < wsize) | 
 | 210 | 			wdata->args.count = count; | 
 | 211 | 		wdata->args.offset = page_offset(page) + wdata->args.pgbase; | 
 | 212 |  | 
 | 213 | 		result = NFS_PROTO(inode)->write(wdata); | 
 | 214 |  | 
 | 215 | 		if (result < 0) { | 
 | 216 | 			/* Must mark the page invalid after I/O error */ | 
 | 217 | 			ClearPageUptodate(page); | 
 | 218 | 			goto io_error; | 
 | 219 | 		} | 
 | 220 | 		if (result < wdata->args.count) | 
 | 221 | 			printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n", | 
 | 222 | 					wdata->args.count, result); | 
 | 223 |  | 
 | 224 | 		wdata->args.offset += result; | 
 | 225 | 	        wdata->args.pgbase += result; | 
 | 226 | 		written += result; | 
 | 227 | 		count -= result; | 
 | 228 | 	} while (count); | 
 | 229 | 	/* Update file length */ | 
 | 230 | 	nfs_grow_file(page, offset, written); | 
 | 231 | 	/* Set the PG_uptodate flag? */ | 
 | 232 | 	nfs_mark_uptodate(page, offset, written); | 
 | 233 |  | 
 | 234 | 	if (PageError(page)) | 
 | 235 | 		ClearPageError(page); | 
 | 236 |  | 
 | 237 | io_error: | 
| Trond Myklebust | 951a143 | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 238 | 	nfs_end_data_update(inode); | 
| Trond Myklebust | bb713d6 | 2005-12-03 15:20:14 -0500 | [diff] [blame] | 239 | 	end_page_writeback(page); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 240 | 	nfs_writedata_free(wdata); | 
 | 241 | 	return written ? written : result; | 
 | 242 | } | 
 | 243 |  | 
 | 244 | static int nfs_writepage_async(struct nfs_open_context *ctx, | 
 | 245 | 		struct inode *inode, struct page *page, | 
 | 246 | 		unsigned int offset, unsigned int count) | 
 | 247 | { | 
 | 248 | 	struct nfs_page	*req; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 249 |  | 
 | 250 | 	req = nfs_update_request(ctx, inode, page, offset, count); | 
| Trond Myklebust | abd3e64 | 2006-01-03 09:55:02 +0100 | [diff] [blame] | 251 | 	if (IS_ERR(req)) | 
 | 252 | 		return PTR_ERR(req); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 253 | 	/* Update file length */ | 
 | 254 | 	nfs_grow_file(page, offset, count); | 
 | 255 | 	/* Set the PG_uptodate flag? */ | 
 | 256 | 	nfs_mark_uptodate(page, offset, count); | 
 | 257 | 	nfs_unlock_request(req); | 
| Trond Myklebust | abd3e64 | 2006-01-03 09:55:02 +0100 | [diff] [blame] | 258 | 	return 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 259 | } | 
 | 260 |  | 
 | 261 | static int wb_priority(struct writeback_control *wbc) | 
 | 262 | { | 
 | 263 | 	if (wbc->for_reclaim) | 
 | 264 | 		return FLUSH_HIGHPRI; | 
 | 265 | 	if (wbc->for_kupdate) | 
 | 266 | 		return FLUSH_LOWPRI; | 
 | 267 | 	return 0; | 
 | 268 | } | 
 | 269 |  | 
 | 270 | /* | 
 | 271 |  * Write an mmapped page to the server. | 
 | 272 |  */ | 
 | 273 | int nfs_writepage(struct page *page, struct writeback_control *wbc) | 
 | 274 | { | 
 | 275 | 	struct nfs_open_context *ctx; | 
 | 276 | 	struct inode *inode = page->mapping->host; | 
 | 277 | 	unsigned long end_index; | 
 | 278 | 	unsigned offset = PAGE_CACHE_SIZE; | 
 | 279 | 	loff_t i_size = i_size_read(inode); | 
 | 280 | 	int inode_referenced = 0; | 
 | 281 | 	int priority = wb_priority(wbc); | 
 | 282 | 	int err; | 
 | 283 |  | 
 | 284 | 	/* | 
 | 285 | 	 * Note: We need to ensure that we have a reference to the inode | 
 | 286 | 	 *       if we are to do asynchronous writes. If not, waiting | 
 | 287 | 	 *       in nfs_wait_on_request() may deadlock with clear_inode(). | 
 | 288 | 	 * | 
 | 289 | 	 *       If igrab() fails here, then it is in any case safe to | 
 | 290 | 	 *       call nfs_wb_page(), since there will be no pending writes. | 
 | 291 | 	 */ | 
 | 292 | 	if (igrab(inode) != 0) | 
 | 293 | 		inode_referenced = 1; | 
 | 294 | 	end_index = i_size >> PAGE_CACHE_SHIFT; | 
 | 295 |  | 
 | 296 | 	/* Ensure we've flushed out any previous writes */ | 
 | 297 | 	nfs_wb_page_priority(inode, page, priority); | 
 | 298 |  | 
 | 299 | 	/* easy case */ | 
 | 300 | 	if (page->index < end_index) | 
 | 301 | 		goto do_it; | 
 | 302 | 	/* things got complicated... */ | 
 | 303 | 	offset = i_size & (PAGE_CACHE_SIZE-1); | 
 | 304 |  | 
 | 305 | 	/* OK, are we completely out? */ | 
 | 306 | 	err = 0; /* potential race with truncate - ignore */ | 
 | 307 | 	if (page->index >= end_index+1 || !offset) | 
 | 308 | 		goto out; | 
 | 309 | do_it: | 
| Trond Myklebust | d530838 | 2005-11-04 15:33:38 -0500 | [diff] [blame] | 310 | 	ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 311 | 	if (ctx == NULL) { | 
 | 312 | 		err = -EBADF; | 
 | 313 | 		goto out; | 
 | 314 | 	} | 
 | 315 | 	lock_kernel(); | 
 | 316 | 	if (!IS_SYNC(inode) && inode_referenced) { | 
 | 317 | 		err = nfs_writepage_async(ctx, inode, page, 0, offset); | 
| Trond Myklebust | abd3e64 | 2006-01-03 09:55:02 +0100 | [diff] [blame] | 318 | 		if (!wbc->for_writepages) | 
 | 319 | 			nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 320 | 	} else { | 
 | 321 | 		err = nfs_writepage_sync(ctx, inode, page, 0, | 
 | 322 | 						offset, priority); | 
 | 323 | 		if (err >= 0) { | 
 | 324 | 			if (err != offset) | 
 | 325 | 				redirty_page_for_writepage(wbc, page); | 
 | 326 | 			err = 0; | 
 | 327 | 		} | 
 | 328 | 	} | 
 | 329 | 	unlock_kernel(); | 
 | 330 | 	put_nfs_open_context(ctx); | 
 | 331 | out: | 
 | 332 | 	unlock_page(page); | 
 | 333 | 	if (inode_referenced) | 
 | 334 | 		iput(inode); | 
 | 335 | 	return err;  | 
 | 336 | } | 
 | 337 |  | 
 | 338 | /* | 
 | 339 |  * Note: causes nfs_update_request() to block on the assumption | 
 | 340 |  * 	 that the writeback is generated due to memory pressure. | 
 | 341 |  */ | 
 | 342 | int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | 
 | 343 | { | 
 | 344 | 	struct backing_dev_info *bdi = mapping->backing_dev_info; | 
 | 345 | 	struct inode *inode = mapping->host; | 
 | 346 | 	int err; | 
 | 347 |  | 
 | 348 | 	err = generic_writepages(mapping, wbc); | 
 | 349 | 	if (err) | 
 | 350 | 		return err; | 
 | 351 | 	while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { | 
 | 352 | 		if (wbc->nonblocking) | 
 | 353 | 			return 0; | 
 | 354 | 		nfs_wait_on_write_congestion(mapping, 0); | 
 | 355 | 	} | 
 | 356 | 	err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); | 
 | 357 | 	if (err < 0) | 
 | 358 | 		goto out; | 
 | 359 | 	wbc->nr_to_write -= err; | 
 | 360 | 	if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { | 
 | 361 | 		err = nfs_wait_on_requests(inode, 0, 0); | 
 | 362 | 		if (err < 0) | 
 | 363 | 			goto out; | 
 | 364 | 	} | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 365 | 	err = nfs_commit_inode(inode, wb_priority(wbc)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 366 | 	if (err > 0) { | 
 | 367 | 		wbc->nr_to_write -= err; | 
 | 368 | 		err = 0; | 
 | 369 | 	} | 
 | 370 | out: | 
 | 371 | 	clear_bit(BDI_write_congested, &bdi->state); | 
 | 372 | 	wake_up_all(&nfs_write_congestion); | 
 | 373 | 	return err; | 
 | 374 | } | 
 | 375 |  | 
 | 376 | /* | 
 | 377 |  * Insert a write request into an inode | 
 | 378 |  */ | 
 | 379 | static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | 
 | 380 | { | 
 | 381 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
 | 382 | 	int error; | 
 | 383 |  | 
 | 384 | 	error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); | 
 | 385 | 	BUG_ON(error == -EEXIST); | 
 | 386 | 	if (error) | 
 | 387 | 		return error; | 
 | 388 | 	if (!nfsi->npages) { | 
 | 389 | 		igrab(inode); | 
 | 390 | 		nfs_begin_data_update(inode); | 
 | 391 | 		if (nfs_have_delegation(inode, FMODE_WRITE)) | 
 | 392 | 			nfsi->change_attr++; | 
 | 393 | 	} | 
 | 394 | 	nfsi->npages++; | 
 | 395 | 	atomic_inc(&req->wb_count); | 
 | 396 | 	return 0; | 
 | 397 | } | 
 | 398 |  | 
 | 399 | /* | 
 | 400 |  * Insert a write request into an inode | 
 | 401 |  */ | 
 | 402 | static void nfs_inode_remove_request(struct nfs_page *req) | 
 | 403 | { | 
 | 404 | 	struct inode *inode = req->wb_context->dentry->d_inode; | 
 | 405 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
 | 406 |  | 
 | 407 | 	BUG_ON (!NFS_WBACK_BUSY(req)); | 
 | 408 |  | 
 | 409 | 	spin_lock(&nfsi->req_lock); | 
 | 410 | 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); | 
 | 411 | 	nfsi->npages--; | 
 | 412 | 	if (!nfsi->npages) { | 
 | 413 | 		spin_unlock(&nfsi->req_lock); | 
| Trond Myklebust | 951a143 | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 414 | 		nfs_end_data_update(inode); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 415 | 		iput(inode); | 
 | 416 | 	} else | 
 | 417 | 		spin_unlock(&nfsi->req_lock); | 
 | 418 | 	nfs_clear_request(req); | 
 | 419 | 	nfs_release_request(req); | 
 | 420 | } | 
 | 421 |  | 
 | 422 | /* | 
 | 423 |  * Find a request | 
 | 424 |  */ | 
 | 425 | static inline struct nfs_page * | 
 | 426 | _nfs_find_request(struct inode *inode, unsigned long index) | 
 | 427 | { | 
 | 428 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
 | 429 | 	struct nfs_page *req; | 
 | 430 |  | 
 | 431 | 	req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index); | 
 | 432 | 	if (req) | 
 | 433 | 		atomic_inc(&req->wb_count); | 
 | 434 | 	return req; | 
 | 435 | } | 
 | 436 |  | 
 | 437 | static struct nfs_page * | 
 | 438 | nfs_find_request(struct inode *inode, unsigned long index) | 
 | 439 | { | 
 | 440 | 	struct nfs_page		*req; | 
 | 441 | 	struct nfs_inode	*nfsi = NFS_I(inode); | 
 | 442 |  | 
 | 443 | 	spin_lock(&nfsi->req_lock); | 
 | 444 | 	req = _nfs_find_request(inode, index); | 
 | 445 | 	spin_unlock(&nfsi->req_lock); | 
 | 446 | 	return req; | 
 | 447 | } | 
 | 448 |  | 
 | 449 | /* | 
 | 450 |  * Add a request to the inode's dirty list. | 
 | 451 |  */ | 
 | 452 | static void | 
 | 453 | nfs_mark_request_dirty(struct nfs_page *req) | 
 | 454 | { | 
 | 455 | 	struct inode *inode = req->wb_context->dentry->d_inode; | 
 | 456 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
 | 457 |  | 
 | 458 | 	spin_lock(&nfsi->req_lock); | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 459 | 	radix_tree_tag_set(&nfsi->nfs_page_tree, | 
 | 460 | 			req->wb_index, NFS_PAGE_TAG_DIRTY); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 461 | 	nfs_list_add_request(req, &nfsi->dirty); | 
 | 462 | 	nfsi->ndirty++; | 
 | 463 | 	spin_unlock(&nfsi->req_lock); | 
 | 464 | 	inc_page_state(nr_dirty); | 
 | 465 | 	mark_inode_dirty(inode); | 
 | 466 | } | 
 | 467 |  | 
 | 468 | /* | 
 | 469 |  * Check if a request is dirty | 
 | 470 |  */ | 
 | 471 | static inline int | 
 | 472 | nfs_dirty_request(struct nfs_page *req) | 
 | 473 | { | 
 | 474 | 	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); | 
 | 475 | 	return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty; | 
 | 476 | } | 
 | 477 |  | 
 | 478 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
 | 479 | /* | 
 | 480 |  * Add a request to the inode's commit list. | 
 | 481 |  */ | 
 | 482 | static void | 
 | 483 | nfs_mark_request_commit(struct nfs_page *req) | 
 | 484 | { | 
 | 485 | 	struct inode *inode = req->wb_context->dentry->d_inode; | 
 | 486 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
 | 487 |  | 
 | 488 | 	spin_lock(&nfsi->req_lock); | 
 | 489 | 	nfs_list_add_request(req, &nfsi->commit); | 
 | 490 | 	nfsi->ncommit++; | 
 | 491 | 	spin_unlock(&nfsi->req_lock); | 
 | 492 | 	inc_page_state(nr_unstable); | 
 | 493 | 	mark_inode_dirty(inode); | 
 | 494 | } | 
 | 495 | #endif | 
 | 496 |  | 
 | 497 | /* | 
 | 498 |  * Wait for a request to complete. | 
 | 499 |  * | 
 | 500 |  * Interruptible by signals only if mounted with intr flag. | 
 | 501 |  */ | 
 | 502 | static int | 
 | 503 | nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) | 
 | 504 | { | 
 | 505 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
 | 506 | 	struct nfs_page *req; | 
 | 507 | 	unsigned long		idx_end, next; | 
 | 508 | 	unsigned int		res = 0; | 
 | 509 | 	int			error; | 
 | 510 |  | 
 | 511 | 	if (npages == 0) | 
 | 512 | 		idx_end = ~0; | 
 | 513 | 	else | 
 | 514 | 		idx_end = idx_start + npages - 1; | 
 | 515 |  | 
 | 516 | 	spin_lock(&nfsi->req_lock); | 
 | 517 | 	next = idx_start; | 
| Trond Myklebust | c6a556b | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 518 | 	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 519 | 		if (req->wb_index > idx_end) | 
 | 520 | 			break; | 
 | 521 |  | 
 | 522 | 		next = req->wb_index + 1; | 
| Trond Myklebust | c6a556b | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 523 | 		BUG_ON(!NFS_WBACK_BUSY(req)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 524 |  | 
 | 525 | 		atomic_inc(&req->wb_count); | 
 | 526 | 		spin_unlock(&nfsi->req_lock); | 
 | 527 | 		error = nfs_wait_on_request(req); | 
 | 528 | 		nfs_release_request(req); | 
 | 529 | 		if (error < 0) | 
 | 530 | 			return error; | 
 | 531 | 		spin_lock(&nfsi->req_lock); | 
 | 532 | 		res++; | 
 | 533 | 	} | 
 | 534 | 	spin_unlock(&nfsi->req_lock); | 
 | 535 | 	return res; | 
 | 536 | } | 
 | 537 |  | 
 | 538 | /* | 
 | 539 |  * nfs_scan_dirty - Scan an inode for dirty requests | 
 | 540 |  * @inode: NFS inode to scan | 
 | 541 |  * @dst: destination list | 
 | 542 |  * @idx_start: lower bound of page->index to scan. | 
 | 543 |  * @npages: idx_start + npages sets the upper bound to scan. | 
 | 544 |  * | 
 | 545 |  * Moves requests from the inode's dirty page list. | 
 | 546 |  * The requests are *not* checked to ensure that they form a contiguous set. | 
 | 547 |  */ | 
 | 548 | static int | 
 | 549 | nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) | 
 | 550 | { | 
 | 551 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 552 | 	int res = 0; | 
 | 553 |  | 
 | 554 | 	if (nfsi->ndirty != 0) { | 
 | 555 | 		res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); | 
 | 556 | 		nfsi->ndirty -= res; | 
 | 557 | 		sub_page_state(nr_dirty,res); | 
 | 558 | 		if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) | 
 | 559 | 			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); | 
 | 560 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 561 | 	return res; | 
 | 562 | } | 
 | 563 |  | 
 | 564 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
 | 565 | /* | 
 | 566 |  * nfs_scan_commit - Scan an inode for commit requests | 
 | 567 |  * @inode: NFS inode to scan | 
 | 568 |  * @dst: destination list | 
 | 569 |  * @idx_start: lower bound of page->index to scan. | 
 | 570 |  * @npages: idx_start + npages sets the upper bound to scan. | 
 | 571 |  * | 
 | 572 |  * Moves requests from the inode's 'commit' request list. | 
 | 573 |  * The requests are *not* checked to ensure that they form a contiguous set. | 
 | 574 |  */ | 
 | 575 | static int | 
 | 576 | nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) | 
 | 577 | { | 
 | 578 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 579 | 	int res = 0; | 
 | 580 |  | 
 | 581 | 	if (nfsi->ncommit != 0) { | 
 | 582 | 		res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); | 
 | 583 | 		nfsi->ncommit -= res; | 
 | 584 | 		if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) | 
 | 585 | 			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); | 
 | 586 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 587 | 	return res; | 
 | 588 | } | 
 | 589 | #endif | 
 | 590 |  | 
 | 591 | static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) | 
 | 592 | { | 
 | 593 | 	struct backing_dev_info *bdi = mapping->backing_dev_info; | 
 | 594 | 	DEFINE_WAIT(wait); | 
 | 595 | 	int ret = 0; | 
 | 596 |  | 
 | 597 | 	might_sleep(); | 
 | 598 |  | 
 | 599 | 	if (!bdi_write_congested(bdi)) | 
 | 600 | 		return 0; | 
 | 601 | 	if (intr) { | 
 | 602 | 		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); | 
 | 603 | 		sigset_t oldset; | 
 | 604 |  | 
 | 605 | 		rpc_clnt_sigmask(clnt, &oldset); | 
 | 606 | 		prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE); | 
 | 607 | 		if (bdi_write_congested(bdi)) { | 
 | 608 | 			if (signalled()) | 
 | 609 | 				ret = -ERESTARTSYS; | 
 | 610 | 			else | 
 | 611 | 				schedule(); | 
 | 612 | 		} | 
 | 613 | 		rpc_clnt_sigunmask(clnt, &oldset); | 
 | 614 | 	} else { | 
 | 615 | 		prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE); | 
 | 616 | 		if (bdi_write_congested(bdi)) | 
 | 617 | 			schedule(); | 
 | 618 | 	} | 
 | 619 | 	finish_wait(&nfs_write_congestion, &wait); | 
 | 620 | 	return ret; | 
 | 621 | } | 
 | 622 |  | 
 | 623 |  | 
 | 624 | /* | 
 | 625 |  * Try to update any existing write request, or create one if there is none. | 
 | 626 |  * In order to match, the request's credentials must match those of | 
 | 627 |  * the calling process. | 
 | 628 |  * | 
 | 629 |  * Note: Should always be called with the Page Lock held! | 
 | 630 |  */ | 
 | 631 | static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, | 
 | 632 | 		struct inode *inode, struct page *page, | 
 | 633 | 		unsigned int offset, unsigned int bytes) | 
 | 634 | { | 
 | 635 | 	struct nfs_server *server = NFS_SERVER(inode); | 
 | 636 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
 | 637 | 	struct nfs_page		*req, *new = NULL; | 
 | 638 | 	unsigned long		rqend, end; | 
 | 639 |  | 
 | 640 | 	end = offset + bytes; | 
 | 641 |  | 
 | 642 | 	if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR)) | 
 | 643 | 		return ERR_PTR(-ERESTARTSYS); | 
 | 644 | 	for (;;) { | 
 | 645 | 		/* Loop over all inode entries and see if we find | 
 | 646 | 		 * A request for the page we wish to update | 
 | 647 | 		 */ | 
 | 648 | 		spin_lock(&nfsi->req_lock); | 
 | 649 | 		req = _nfs_find_request(inode, page->index); | 
 | 650 | 		if (req) { | 
 | 651 | 			if (!nfs_lock_request_dontget(req)) { | 
 | 652 | 				int error; | 
 | 653 | 				spin_unlock(&nfsi->req_lock); | 
 | 654 | 				error = nfs_wait_on_request(req); | 
 | 655 | 				nfs_release_request(req); | 
| Neil Brown | 1dd594b | 2006-03-20 13:44:04 -0500 | [diff] [blame] | 656 | 				if (error < 0) { | 
 | 657 | 					if (new) | 
 | 658 | 						nfs_release_request(new); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 659 | 					return ERR_PTR(error); | 
| Neil Brown | 1dd594b | 2006-03-20 13:44:04 -0500 | [diff] [blame] | 660 | 				} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 661 | 				continue; | 
 | 662 | 			} | 
 | 663 | 			spin_unlock(&nfsi->req_lock); | 
 | 664 | 			if (new) | 
 | 665 | 				nfs_release_request(new); | 
 | 666 | 			break; | 
 | 667 | 		} | 
 | 668 |  | 
 | 669 | 		if (new) { | 
 | 670 | 			int error; | 
 | 671 | 			nfs_lock_request_dontget(new); | 
 | 672 | 			error = nfs_inode_add_request(inode, new); | 
 | 673 | 			if (error) { | 
 | 674 | 				spin_unlock(&nfsi->req_lock); | 
 | 675 | 				nfs_unlock_request(new); | 
 | 676 | 				return ERR_PTR(error); | 
 | 677 | 			} | 
 | 678 | 			spin_unlock(&nfsi->req_lock); | 
 | 679 | 			nfs_mark_request_dirty(new); | 
 | 680 | 			return new; | 
 | 681 | 		} | 
 | 682 | 		spin_unlock(&nfsi->req_lock); | 
 | 683 |  | 
 | 684 | 		new = nfs_create_request(ctx, inode, page, offset, bytes); | 
 | 685 | 		if (IS_ERR(new)) | 
 | 686 | 			return new; | 
 | 687 | 	} | 
 | 688 |  | 
 | 689 | 	/* We have a request for our page. | 
 | 690 | 	 * If the creds don't match, or the | 
 | 691 | 	 * page addresses don't match, | 
 | 692 | 	 * tell the caller to wait on the conflicting | 
 | 693 | 	 * request. | 
 | 694 | 	 */ | 
 | 695 | 	rqend = req->wb_offset + req->wb_bytes; | 
 | 696 | 	if (req->wb_context != ctx | 
 | 697 | 	    || req->wb_page != page | 
 | 698 | 	    || !nfs_dirty_request(req) | 
 | 699 | 	    || offset > rqend || end < req->wb_offset) { | 
 | 700 | 		nfs_unlock_request(req); | 
 | 701 | 		return ERR_PTR(-EBUSY); | 
 | 702 | 	} | 
 | 703 |  | 
 | 704 | 	/* Okay, the request matches. Update the region */ | 
 | 705 | 	if (offset < req->wb_offset) { | 
 | 706 | 		req->wb_offset = offset; | 
 | 707 | 		req->wb_pgbase = offset; | 
 | 708 | 		req->wb_bytes = rqend - req->wb_offset; | 
 | 709 | 	} | 
 | 710 |  | 
 | 711 | 	if (end > rqend) | 
 | 712 | 		req->wb_bytes = end - req->wb_offset; | 
 | 713 |  | 
 | 714 | 	return req; | 
 | 715 | } | 
 | 716 |  | 
 | 717 | int nfs_flush_incompatible(struct file *file, struct page *page) | 
 | 718 | { | 
 | 719 | 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; | 
 | 720 | 	struct inode	*inode = page->mapping->host; | 
 | 721 | 	struct nfs_page	*req; | 
 | 722 | 	int		status = 0; | 
 | 723 | 	/* | 
 | 724 | 	 * Look for a request corresponding to this page. If there | 
 | 725 | 	 * is one, and it belongs to another file, we flush it out | 
 | 726 | 	 * before we try to copy anything into the page. Do this | 
 | 727 | 	 * due to the lack of an ACCESS-type call in NFSv2. | 
 | 728 | 	 * Also do the same if we find a request from an existing | 
 | 729 | 	 * dropped page. | 
 | 730 | 	 */ | 
 | 731 | 	req = nfs_find_request(inode, page->index); | 
 | 732 | 	if (req) { | 
 | 733 | 		if (req->wb_page != page || ctx != req->wb_context) | 
 | 734 | 			status = nfs_wb_page(inode, page); | 
 | 735 | 		nfs_release_request(req); | 
 | 736 | 	} | 
 | 737 | 	return (status < 0) ? status : 0; | 
 | 738 | } | 
 | 739 |  | 
 | 740 | /* | 
 | 741 |  * Update and possibly write a cached page of an NFS file. | 
 | 742 |  * | 
 | 743 |  * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad | 
 | 744 |  * things with a page scheduled for an RPC call (e.g. invalidate it). | 
 | 745 |  */ | 
 | 746 | int nfs_updatepage(struct file *file, struct page *page, | 
 | 747 | 		unsigned int offset, unsigned int count) | 
 | 748 | { | 
 | 749 | 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 750 | 	struct inode	*inode = page->mapping->host; | 
 | 751 | 	struct nfs_page	*req; | 
 | 752 | 	int		status = 0; | 
 | 753 |  | 
 | 754 | 	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n", | 
| Chuck Lever | 0bbacc4 | 2005-11-01 16:53:32 -0500 | [diff] [blame] | 755 | 		file->f_dentry->d_parent->d_name.name, | 
 | 756 | 		file->f_dentry->d_name.name, count, | 
 | 757 | 		(long long)(page_offset(page) +offset)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 758 |  | 
 | 759 | 	if (IS_SYNC(inode)) { | 
 | 760 | 		status = nfs_writepage_sync(ctx, inode, page, offset, count, 0); | 
 | 761 | 		if (status > 0) { | 
 | 762 | 			if (offset == 0 && status == PAGE_CACHE_SIZE) | 
 | 763 | 				SetPageUptodate(page); | 
 | 764 | 			return 0; | 
 | 765 | 		} | 
 | 766 | 		return status; | 
 | 767 | 	} | 
 | 768 |  | 
 | 769 | 	/* If we're not using byte range locks, and we know the page | 
 | 770 | 	 * is entirely in cache, it may be more efficient to avoid | 
 | 771 | 	 * fragmenting write requests. | 
 | 772 | 	 */ | 
| Trond Myklebust | ab0a3db | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 773 | 	if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 774 | 		loff_t end_offs = i_size_read(inode) - 1; | 
 | 775 | 		unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; | 
 | 776 |  | 
 | 777 | 		count += offset; | 
 | 778 | 		offset = 0; | 
 | 779 | 		if (unlikely(end_offs < 0)) { | 
 | 780 | 			/* Do nothing */ | 
 | 781 | 		} else if (page->index == end_index) { | 
 | 782 | 			unsigned int pglen; | 
 | 783 | 			pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1; | 
 | 784 | 			if (count < pglen) | 
 | 785 | 				count = pglen; | 
 | 786 | 		} else if (page->index < end_index) | 
 | 787 | 			count = PAGE_CACHE_SIZE; | 
 | 788 | 	} | 
 | 789 |  | 
 | 790 | 	/* | 
 | 791 | 	 * Try to find an NFS request corresponding to this page | 
 | 792 | 	 * and update it. | 
 | 793 | 	 * If the existing request cannot be updated, we must flush | 
 | 794 | 	 * it out now. | 
 | 795 | 	 */ | 
 | 796 | 	do { | 
 | 797 | 		req = nfs_update_request(ctx, inode, page, offset, count); | 
 | 798 | 		status = (IS_ERR(req)) ? PTR_ERR(req) : 0; | 
 | 799 | 		if (status != -EBUSY) | 
 | 800 | 			break; | 
 | 801 | 		/* Request could not be updated. Flush it out and try again */ | 
 | 802 | 		status = nfs_wb_page(inode, page); | 
 | 803 | 	} while (status >= 0); | 
 | 804 | 	if (status < 0) | 
 | 805 | 		goto done; | 
 | 806 |  | 
 | 807 | 	status = 0; | 
 | 808 |  | 
 | 809 | 	/* Update file length */ | 
 | 810 | 	nfs_grow_file(page, offset, count); | 
 | 811 | 	/* Set the PG_uptodate flag? */ | 
 | 812 | 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); | 
 | 813 | 	nfs_unlock_request(req); | 
 | 814 | done: | 
 | 815 |         dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n", | 
 | 816 | 			status, (long long)i_size_read(inode)); | 
 | 817 | 	if (status < 0) | 
 | 818 | 		ClearPageUptodate(page); | 
 | 819 | 	return status; | 
 | 820 | } | 
 | 821 |  | 
 | 822 | static void nfs_writepage_release(struct nfs_page *req) | 
 | 823 | { | 
 | 824 | 	end_page_writeback(req->wb_page); | 
 | 825 |  | 
 | 826 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
 | 827 | 	if (!PageError(req->wb_page)) { | 
 | 828 | 		if (NFS_NEED_RESCHED(req)) { | 
 | 829 | 			nfs_mark_request_dirty(req); | 
 | 830 | 			goto out; | 
 | 831 | 		} else if (NFS_NEED_COMMIT(req)) { | 
 | 832 | 			nfs_mark_request_commit(req); | 
 | 833 | 			goto out; | 
 | 834 | 		} | 
 | 835 | 	} | 
 | 836 | 	nfs_inode_remove_request(req); | 
 | 837 |  | 
 | 838 | out: | 
 | 839 | 	nfs_clear_commit(req); | 
 | 840 | 	nfs_clear_reschedule(req); | 
 | 841 | #else | 
 | 842 | 	nfs_inode_remove_request(req); | 
 | 843 | #endif | 
| Trond Myklebust | c6a556b | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 844 | 	nfs_clear_page_writeback(req); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 845 | } | 
 | 846 |  | 
 | 847 | static inline int flush_task_priority(int how) | 
 | 848 | { | 
 | 849 | 	switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { | 
 | 850 | 		case FLUSH_HIGHPRI: | 
 | 851 | 			return RPC_PRIORITY_HIGH; | 
 | 852 | 		case FLUSH_LOWPRI: | 
 | 853 | 			return RPC_PRIORITY_LOW; | 
 | 854 | 	} | 
 | 855 | 	return RPC_PRIORITY_NORMAL; | 
 | 856 | } | 
 | 857 |  | 
 | 858 | /* | 
 | 859 |  * Set up the argument/result storage required for the RPC call. | 
 | 860 |  */ | 
 | 861 | static void nfs_write_rpcsetup(struct nfs_page *req, | 
 | 862 | 		struct nfs_write_data *data, | 
 | 863 | 		unsigned int count, unsigned int offset, | 
 | 864 | 		int how) | 
 | 865 | { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 866 | 	struct inode		*inode; | 
 | 867 |  | 
 | 868 | 	/* Set up the RPC argument and reply structs | 
 | 869 | 	 * NB: take care not to mess about with data->commit et al. */ | 
 | 870 |  | 
 | 871 | 	data->req = req; | 
 | 872 | 	data->inode = inode = req->wb_context->dentry->d_inode; | 
 | 873 | 	data->cred = req->wb_context->cred; | 
 | 874 |  | 
 | 875 | 	data->args.fh     = NFS_FH(inode); | 
 | 876 | 	data->args.offset = req_offset(req) + offset; | 
 | 877 | 	data->args.pgbase = req->wb_pgbase + offset; | 
 | 878 | 	data->args.pages  = data->pagevec; | 
 | 879 | 	data->args.count  = count; | 
 | 880 | 	data->args.context = req->wb_context; | 
 | 881 |  | 
 | 882 | 	data->res.fattr   = &data->fattr; | 
 | 883 | 	data->res.count   = count; | 
 | 884 | 	data->res.verf    = &data->verf; | 
| Trond Myklebust | 0e574af | 2005-10-27 22:12:38 -0400 | [diff] [blame] | 885 | 	nfs_fattr_init(&data->fattr); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 886 |  | 
 | 887 | 	NFS_PROTO(inode)->write_setup(data, how); | 
 | 888 |  | 
 | 889 | 	data->task.tk_priority = flush_task_priority(how); | 
 | 890 | 	data->task.tk_cookie = (unsigned long)inode; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 891 |  | 
 | 892 | 	dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n", | 
| Chuck Lever | 0bbacc4 | 2005-11-01 16:53:32 -0500 | [diff] [blame] | 893 | 		data->task.tk_pid, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 894 | 		inode->i_sb->s_id, | 
 | 895 | 		(long long)NFS_FILEID(inode), | 
 | 896 | 		count, | 
 | 897 | 		(unsigned long long)data->args.offset); | 
 | 898 | } | 
 | 899 |  | 
 | 900 | static void nfs_execute_write(struct nfs_write_data *data) | 
 | 901 | { | 
 | 902 | 	struct rpc_clnt *clnt = NFS_CLIENT(data->inode); | 
 | 903 | 	sigset_t oldset; | 
 | 904 |  | 
 | 905 | 	rpc_clnt_sigmask(clnt, &oldset); | 
 | 906 | 	lock_kernel(); | 
 | 907 | 	rpc_execute(&data->task); | 
 | 908 | 	unlock_kernel(); | 
 | 909 | 	rpc_clnt_sigunmask(clnt, &oldset); | 
 | 910 | } | 
 | 911 |  | 
 | 912 | /* | 
 | 913 |  * Generate multiple small requests to write out a single | 
 | 914 |  * contiguous dirty area on one page. | 
 | 915 |  */ | 
 | 916 | static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) | 
 | 917 | { | 
 | 918 | 	struct nfs_page *req = nfs_list_entry(head->next); | 
 | 919 | 	struct page *page = req->wb_page; | 
 | 920 | 	struct nfs_write_data *data; | 
 | 921 | 	unsigned int wsize = NFS_SERVER(inode)->wsize; | 
 | 922 | 	unsigned int nbytes, offset; | 
 | 923 | 	int requests = 0; | 
 | 924 | 	LIST_HEAD(list); | 
 | 925 |  | 
 | 926 | 	nfs_list_remove_request(req); | 
 | 927 |  | 
 | 928 | 	nbytes = req->wb_bytes; | 
 | 929 | 	for (;;) { | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 930 | 		data = nfs_writedata_alloc(1); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 931 | 		if (!data) | 
 | 932 | 			goto out_bad; | 
 | 933 | 		list_add(&data->pages, &list); | 
 | 934 | 		requests++; | 
 | 935 | 		if (nbytes <= wsize) | 
 | 936 | 			break; | 
 | 937 | 		nbytes -= wsize; | 
 | 938 | 	} | 
 | 939 | 	atomic_set(&req->wb_complete, requests); | 
 | 940 |  | 
 | 941 | 	ClearPageError(page); | 
| Trond Myklebust | bb713d6 | 2005-12-03 15:20:14 -0500 | [diff] [blame] | 942 | 	set_page_writeback(page); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 943 | 	offset = 0; | 
 | 944 | 	nbytes = req->wb_bytes; | 
 | 945 | 	do { | 
 | 946 | 		data = list_entry(list.next, struct nfs_write_data, pages); | 
 | 947 | 		list_del_init(&data->pages); | 
 | 948 |  | 
 | 949 | 		data->pagevec[0] = page; | 
 | 950 | 		data->complete = nfs_writeback_done_partial; | 
 | 951 |  | 
 | 952 | 		if (nbytes > wsize) { | 
 | 953 | 			nfs_write_rpcsetup(req, data, wsize, offset, how); | 
 | 954 | 			offset += wsize; | 
 | 955 | 			nbytes -= wsize; | 
 | 956 | 		} else { | 
 | 957 | 			nfs_write_rpcsetup(req, data, nbytes, offset, how); | 
 | 958 | 			nbytes = 0; | 
 | 959 | 		} | 
 | 960 | 		nfs_execute_write(data); | 
 | 961 | 	} while (nbytes != 0); | 
 | 962 |  | 
 | 963 | 	return 0; | 
 | 964 |  | 
 | 965 | out_bad: | 
 | 966 | 	while (!list_empty(&list)) { | 
 | 967 | 		data = list_entry(list.next, struct nfs_write_data, pages); | 
 | 968 | 		list_del(&data->pages); | 
 | 969 | 		nfs_writedata_free(data); | 
 | 970 | 	} | 
 | 971 | 	nfs_mark_request_dirty(req); | 
| Trond Myklebust | c6a556b | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 972 | 	nfs_clear_page_writeback(req); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 973 | 	return -ENOMEM; | 
 | 974 | } | 
 | 975 |  | 
 | 976 | /* | 
 | 977 |  * Create an RPC task for the given write request and kick it. | 
 | 978 |  * The page must have been locked by the caller. | 
 | 979 |  * | 
 | 980 |  * It may happen that the page we're passed is not marked dirty. | 
 | 981 |  * This is the case if nfs_updatepage detects a conflicting request | 
 | 982 |  * that has been written but not committed. | 
 | 983 |  */ | 
 | 984 | static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) | 
 | 985 | { | 
 | 986 | 	struct nfs_page		*req; | 
 | 987 | 	struct page		**pages; | 
 | 988 | 	struct nfs_write_data	*data; | 
 | 989 | 	unsigned int		count; | 
 | 990 |  | 
 | 991 | 	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) | 
 | 992 | 		return nfs_flush_multi(head, inode, how); | 
 | 993 |  | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 994 | 	data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 995 | 	if (!data) | 
 | 996 | 		goto out_bad; | 
 | 997 |  | 
 | 998 | 	pages = data->pagevec; | 
 | 999 | 	count = 0; | 
 | 1000 | 	while (!list_empty(head)) { | 
 | 1001 | 		req = nfs_list_entry(head->next); | 
 | 1002 | 		nfs_list_remove_request(req); | 
 | 1003 | 		nfs_list_add_request(req, &data->pages); | 
 | 1004 | 		ClearPageError(req->wb_page); | 
| Trond Myklebust | bb713d6 | 2005-12-03 15:20:14 -0500 | [diff] [blame] | 1005 | 		set_page_writeback(req->wb_page); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1006 | 		*pages++ = req->wb_page; | 
 | 1007 | 		count += req->wb_bytes; | 
 | 1008 | 	} | 
 | 1009 | 	req = nfs_list_entry(data->pages.next); | 
 | 1010 |  | 
 | 1011 | 	data->complete = nfs_writeback_done_full; | 
 | 1012 | 	/* Set up the argument struct */ | 
 | 1013 | 	nfs_write_rpcsetup(req, data, count, 0, how); | 
 | 1014 |  | 
 | 1015 | 	nfs_execute_write(data); | 
 | 1016 | 	return 0; | 
 | 1017 |  out_bad: | 
 | 1018 | 	while (!list_empty(head)) { | 
 | 1019 | 		struct nfs_page *req = nfs_list_entry(head->next); | 
 | 1020 | 		nfs_list_remove_request(req); | 
 | 1021 | 		nfs_mark_request_dirty(req); | 
| Trond Myklebust | c6a556b | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 1022 | 		nfs_clear_page_writeback(req); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1023 | 	} | 
 | 1024 | 	return -ENOMEM; | 
 | 1025 | } | 
 | 1026 |  | 
 | 1027 | static int | 
 | 1028 | nfs_flush_list(struct list_head *head, int wpages, int how) | 
 | 1029 | { | 
 | 1030 | 	LIST_HEAD(one_request); | 
 | 1031 | 	struct nfs_page		*req; | 
 | 1032 | 	int			error = 0; | 
 | 1033 | 	unsigned int		pages = 0; | 
 | 1034 |  | 
 | 1035 | 	while (!list_empty(head)) { | 
 | 1036 | 		pages += nfs_coalesce_requests(head, &one_request, wpages); | 
 | 1037 | 		req = nfs_list_entry(one_request.next); | 
 | 1038 | 		error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how); | 
 | 1039 | 		if (error < 0) | 
 | 1040 | 			break; | 
 | 1041 | 	} | 
 | 1042 | 	if (error >= 0) | 
 | 1043 | 		return pages; | 
 | 1044 |  | 
 | 1045 | 	while (!list_empty(head)) { | 
 | 1046 | 		req = nfs_list_entry(head->next); | 
 | 1047 | 		nfs_list_remove_request(req); | 
 | 1048 | 		nfs_mark_request_dirty(req); | 
| Trond Myklebust | c6a556b | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 1049 | 		nfs_clear_page_writeback(req); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1050 | 	} | 
 | 1051 | 	return error; | 
 | 1052 | } | 
 | 1053 |  | 
 | 1054 | /* | 
 | 1055 |  * Handle a write reply that flushed part of a page. | 
 | 1056 |  */ | 
 | 1057 | static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) | 
 | 1058 | { | 
 | 1059 | 	struct nfs_page		*req = data->req; | 
 | 1060 | 	struct page		*page = req->wb_page; | 
 | 1061 |  | 
 | 1062 | 	dprintk("NFS: write (%s/%Ld %d@%Ld)", | 
 | 1063 | 		req->wb_context->dentry->d_inode->i_sb->s_id, | 
 | 1064 | 		(long long)NFS_FILEID(req->wb_context->dentry->d_inode), | 
 | 1065 | 		req->wb_bytes, | 
 | 1066 | 		(long long)req_offset(req)); | 
 | 1067 |  | 
 | 1068 | 	if (status < 0) { | 
 | 1069 | 		ClearPageUptodate(page); | 
 | 1070 | 		SetPageError(page); | 
 | 1071 | 		req->wb_context->error = status; | 
 | 1072 | 		dprintk(", error = %d\n", status); | 
 | 1073 | 	} else { | 
 | 1074 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
 | 1075 | 		if (data->verf.committed < NFS_FILE_SYNC) { | 
 | 1076 | 			if (!NFS_NEED_COMMIT(req)) { | 
 | 1077 | 				nfs_defer_commit(req); | 
 | 1078 | 				memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); | 
 | 1079 | 				dprintk(" defer commit\n"); | 
 | 1080 | 			} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { | 
 | 1081 | 				nfs_defer_reschedule(req); | 
 | 1082 | 				dprintk(" server reboot detected\n"); | 
 | 1083 | 			} | 
 | 1084 | 		} else | 
 | 1085 | #endif | 
 | 1086 | 			dprintk(" OK\n"); | 
 | 1087 | 	} | 
 | 1088 |  | 
 | 1089 | 	if (atomic_dec_and_test(&req->wb_complete)) | 
 | 1090 | 		nfs_writepage_release(req); | 
 | 1091 | } | 
 | 1092 |  | 
 | 1093 | /* | 
 | 1094 |  * Handle a write reply that flushes a whole page. | 
 | 1095 |  * | 
 | 1096 |  * FIXME: There is an inherent race with invalidate_inode_pages and | 
 | 1097 |  *	  writebacks since the page->count is kept > 1 for as long | 
 | 1098 |  *	  as the page has a write request pending. | 
 | 1099 |  */ | 
 | 1100 | static void nfs_writeback_done_full(struct nfs_write_data *data, int status) | 
 | 1101 | { | 
 | 1102 | 	struct nfs_page		*req; | 
 | 1103 | 	struct page		*page; | 
 | 1104 |  | 
 | 1105 | 	/* Update attributes as result of writeback. */ | 
 | 1106 | 	while (!list_empty(&data->pages)) { | 
 | 1107 | 		req = nfs_list_entry(data->pages.next); | 
 | 1108 | 		nfs_list_remove_request(req); | 
 | 1109 | 		page = req->wb_page; | 
 | 1110 |  | 
 | 1111 | 		dprintk("NFS: write (%s/%Ld %d@%Ld)", | 
 | 1112 | 			req->wb_context->dentry->d_inode->i_sb->s_id, | 
 | 1113 | 			(long long)NFS_FILEID(req->wb_context->dentry->d_inode), | 
 | 1114 | 			req->wb_bytes, | 
 | 1115 | 			(long long)req_offset(req)); | 
 | 1116 |  | 
 | 1117 | 		if (status < 0) { | 
 | 1118 | 			ClearPageUptodate(page); | 
 | 1119 | 			SetPageError(page); | 
 | 1120 | 			req->wb_context->error = status; | 
 | 1121 | 			end_page_writeback(page); | 
 | 1122 | 			nfs_inode_remove_request(req); | 
 | 1123 | 			dprintk(", error = %d\n", status); | 
 | 1124 | 			goto next; | 
 | 1125 | 		} | 
 | 1126 | 		end_page_writeback(page); | 
 | 1127 |  | 
 | 1128 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
 | 1129 | 		if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { | 
 | 1130 | 			nfs_inode_remove_request(req); | 
 | 1131 | 			dprintk(" OK\n"); | 
 | 1132 | 			goto next; | 
 | 1133 | 		} | 
 | 1134 | 		memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); | 
 | 1135 | 		nfs_mark_request_commit(req); | 
 | 1136 | 		dprintk(" marked for commit\n"); | 
 | 1137 | #else | 
 | 1138 | 		nfs_inode_remove_request(req); | 
 | 1139 | #endif | 
 | 1140 | 	next: | 
| Trond Myklebust | c6a556b | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 1141 | 		nfs_clear_page_writeback(req); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1142 | 	} | 
 | 1143 | } | 
 | 1144 |  | 
 | 1145 | /* | 
 | 1146 |  * This function is called when the WRITE call is complete. | 
 | 1147 |  */ | 
| Trond Myklebust | 963d8fe | 2006-01-03 09:55:04 +0100 | [diff] [blame] | 1148 | void nfs_writeback_done(struct rpc_task *task, void *calldata) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1149 | { | 
| Trond Myklebust | 963d8fe | 2006-01-03 09:55:04 +0100 | [diff] [blame] | 1150 | 	struct nfs_write_data	*data = calldata; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1151 | 	struct nfs_writeargs	*argp = &data->args; | 
 | 1152 | 	struct nfs_writeres	*resp = &data->res; | 
 | 1153 |  | 
 | 1154 | 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n", | 
 | 1155 | 		task->tk_pid, task->tk_status); | 
 | 1156 |  | 
 | 1157 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
 | 1158 | 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) { | 
 | 1159 | 		/* We tried a write call, but the server did not | 
 | 1160 | 		 * commit data to stable storage even though we | 
 | 1161 | 		 * requested it. | 
 | 1162 | 		 * Note: There is a known bug in Tru64 < 5.0 in which | 
 | 1163 | 		 *	 the server reports NFS_DATA_SYNC, but performs | 
 | 1164 | 		 *	 NFS_FILE_SYNC. We therefore implement this checking | 
 | 1165 | 		 *	 as a dprintk() in order to avoid filling syslog. | 
 | 1166 | 		 */ | 
 | 1167 | 		static unsigned long    complain; | 
 | 1168 |  | 
 | 1169 | 		if (time_before(complain, jiffies)) { | 
 | 1170 | 			dprintk("NFS: faulty NFS server %s:" | 
 | 1171 | 				" (committed = %d) != (stable = %d)\n", | 
 | 1172 | 				NFS_SERVER(data->inode)->hostname, | 
 | 1173 | 				resp->verf->committed, argp->stable); | 
 | 1174 | 			complain = jiffies + 300 * HZ; | 
 | 1175 | 		} | 
 | 1176 | 	} | 
 | 1177 | #endif | 
 | 1178 | 	/* Is this a short write? */ | 
 | 1179 | 	if (task->tk_status >= 0 && resp->count < argp->count) { | 
 | 1180 | 		static unsigned long    complain; | 
 | 1181 |  | 
 | 1182 | 		/* Has the server at least made some progress? */ | 
 | 1183 | 		if (resp->count != 0) { | 
 | 1184 | 			/* Was this an NFSv2 write or an NFSv3 stable write? */ | 
 | 1185 | 			if (resp->verf->committed != NFS_UNSTABLE) { | 
 | 1186 | 				/* Resend from where the server left off */ | 
 | 1187 | 				argp->offset += resp->count; | 
 | 1188 | 				argp->pgbase += resp->count; | 
 | 1189 | 				argp->count -= resp->count; | 
 | 1190 | 			} else { | 
 | 1191 | 				/* Resend as a stable write in order to avoid | 
 | 1192 | 				 * headaches in the case of a server crash. | 
 | 1193 | 				 */ | 
 | 1194 | 				argp->stable = NFS_FILE_SYNC; | 
 | 1195 | 			} | 
 | 1196 | 			rpc_restart_call(task); | 
 | 1197 | 			return; | 
 | 1198 | 		} | 
 | 1199 | 		if (time_before(complain, jiffies)) { | 
 | 1200 | 			printk(KERN_WARNING | 
 | 1201 | 			       "NFS: Server wrote zero bytes, expected %u.\n", | 
 | 1202 | 					argp->count); | 
 | 1203 | 			complain = jiffies + 300 * HZ; | 
 | 1204 | 		} | 
 | 1205 | 		/* Can't do anything about it except throw an error. */ | 
 | 1206 | 		task->tk_status = -EIO; | 
 | 1207 | 	} | 
 | 1208 |  | 
 | 1209 | 	/* | 
 | 1210 | 	 * Process the nfs_page list | 
 | 1211 | 	 */ | 
 | 1212 | 	data->complete(data, task->tk_status); | 
 | 1213 | } | 
 | 1214 |  | 
 | 1215 |  | 
 | 1216 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
| Trond Myklebust | 963d8fe | 2006-01-03 09:55:04 +0100 | [diff] [blame] | 1217 | void nfs_commit_release(void *wdata) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1218 | { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1219 | 	nfs_commit_free(wdata); | 
 | 1220 | } | 
 | 1221 |  | 
 | 1222 | /* | 
 | 1223 |  * Set up the argument/result storage required for the RPC call. | 
 | 1224 |  */ | 
 | 1225 | static void nfs_commit_rpcsetup(struct list_head *head, | 
 | 1226 | 		struct nfs_write_data *data, int how) | 
 | 1227 | { | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 1228 | 	struct nfs_page		*first; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1229 | 	struct inode		*inode; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1230 |  | 
 | 1231 | 	/* Set up the RPC argument and reply structs | 
 | 1232 | 	 * NB: take care not to mess about with data->commit et al. */ | 
 | 1233 |  | 
 | 1234 | 	list_splice_init(head, &data->pages); | 
 | 1235 | 	first = nfs_list_entry(data->pages.next); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1236 | 	inode = first->wb_context->dentry->d_inode; | 
 | 1237 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1238 | 	data->inode	  = inode; | 
 | 1239 | 	data->cred	  = first->wb_context->cred; | 
 | 1240 |  | 
 | 1241 | 	data->args.fh     = NFS_FH(data->inode); | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 1242 | 	/* Note: we always request a commit of the entire inode */ | 
 | 1243 | 	data->args.offset = 0; | 
 | 1244 | 	data->args.count  = 0; | 
 | 1245 | 	data->res.count   = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1246 | 	data->res.fattr   = &data->fattr; | 
 | 1247 | 	data->res.verf    = &data->verf; | 
| Trond Myklebust | 0e574af | 2005-10-27 22:12:38 -0400 | [diff] [blame] | 1248 | 	nfs_fattr_init(&data->fattr); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1249 | 	 | 
 | 1250 | 	NFS_PROTO(inode)->commit_setup(data, how); | 
 | 1251 |  | 
 | 1252 | 	data->task.tk_priority = flush_task_priority(how); | 
 | 1253 | 	data->task.tk_cookie = (unsigned long)inode; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1254 | 	 | 
| Chuck Lever | 0bbacc4 | 2005-11-01 16:53:32 -0500 | [diff] [blame] | 1255 | 	dprintk("NFS: %4d initiated commit call\n", data->task.tk_pid); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1256 | } | 
 | 1257 |  | 
 | 1258 | /* | 
 | 1259 |  * Commit dirty pages | 
 | 1260 |  */ | 
 | 1261 | static int | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 1262 | nfs_commit_list(struct inode *inode, struct list_head *head, int how) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1263 | { | 
 | 1264 | 	struct nfs_write_data	*data; | 
 | 1265 | 	struct nfs_page         *req; | 
 | 1266 |  | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 1267 | 	data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1268 |  | 
 | 1269 | 	if (!data) | 
 | 1270 | 		goto out_bad; | 
 | 1271 |  | 
 | 1272 | 	/* Set up the argument struct */ | 
 | 1273 | 	nfs_commit_rpcsetup(head, data, how); | 
 | 1274 |  | 
 | 1275 | 	nfs_execute_write(data); | 
 | 1276 | 	return 0; | 
 | 1277 |  out_bad: | 
 | 1278 | 	while (!list_empty(head)) { | 
 | 1279 | 		req = nfs_list_entry(head->next); | 
 | 1280 | 		nfs_list_remove_request(req); | 
 | 1281 | 		nfs_mark_request_commit(req); | 
| Trond Myklebust | c6a556b | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 1282 | 		nfs_clear_page_writeback(req); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1283 | 	} | 
 | 1284 | 	return -ENOMEM; | 
 | 1285 | } | 
 | 1286 |  | 
 | 1287 | /* | 
 | 1288 |  * COMMIT call returned | 
 | 1289 |  */ | 
| Trond Myklebust | 963d8fe | 2006-01-03 09:55:04 +0100 | [diff] [blame] | 1290 | void nfs_commit_done(struct rpc_task *task, void *calldata) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1291 | { | 
| Trond Myklebust | 963d8fe | 2006-01-03 09:55:04 +0100 | [diff] [blame] | 1292 | 	struct nfs_write_data	*data = calldata; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1293 | 	struct nfs_page		*req; | 
 | 1294 | 	int res = 0; | 
 | 1295 |  | 
 | 1296 |         dprintk("NFS: %4d nfs_commit_done (status %d)\n", | 
 | 1297 |                                 task->tk_pid, task->tk_status); | 
 | 1298 |  | 
 | 1299 | 	while (!list_empty(&data->pages)) { | 
 | 1300 | 		req = nfs_list_entry(data->pages.next); | 
 | 1301 | 		nfs_list_remove_request(req); | 
 | 1302 |  | 
 | 1303 | 		dprintk("NFS: commit (%s/%Ld %d@%Ld)", | 
 | 1304 | 			req->wb_context->dentry->d_inode->i_sb->s_id, | 
 | 1305 | 			(long long)NFS_FILEID(req->wb_context->dentry->d_inode), | 
 | 1306 | 			req->wb_bytes, | 
 | 1307 | 			(long long)req_offset(req)); | 
 | 1308 | 		if (task->tk_status < 0) { | 
 | 1309 | 			req->wb_context->error = task->tk_status; | 
 | 1310 | 			nfs_inode_remove_request(req); | 
 | 1311 | 			dprintk(", error = %d\n", task->tk_status); | 
 | 1312 | 			goto next; | 
 | 1313 | 		} | 
 | 1314 |  | 
 | 1315 | 		/* Okay, COMMIT succeeded, apparently. Check the verifier | 
 | 1316 | 		 * returned by the server against all stored verfs. */ | 
 | 1317 | 		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { | 
 | 1318 | 			/* We have a match */ | 
 | 1319 | 			nfs_inode_remove_request(req); | 
 | 1320 | 			dprintk(" OK\n"); | 
 | 1321 | 			goto next; | 
 | 1322 | 		} | 
 | 1323 | 		/* We have a mismatch. Write the page again */ | 
 | 1324 | 		dprintk(" mismatch\n"); | 
 | 1325 | 		nfs_mark_request_dirty(req); | 
 | 1326 | 	next: | 
| Trond Myklebust | c6a556b | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 1327 | 		nfs_clear_page_writeback(req); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1328 | 		res++; | 
 | 1329 | 	} | 
 | 1330 | 	sub_page_state(nr_unstable,res); | 
 | 1331 | } | 
 | 1332 | #endif | 
 | 1333 |  | 
 | 1334 | static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, | 
 | 1335 | 			   unsigned int npages, int how) | 
 | 1336 | { | 
 | 1337 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
 | 1338 | 	LIST_HEAD(head); | 
 | 1339 | 	int			res, | 
 | 1340 | 				error = 0; | 
 | 1341 |  | 
 | 1342 | 	spin_lock(&nfsi->req_lock); | 
 | 1343 | 	res = nfs_scan_dirty(inode, &head, idx_start, npages); | 
 | 1344 | 	spin_unlock(&nfsi->req_lock); | 
| Trond Myklebust | ab0a3db | 2005-06-22 17:16:30 +0000 | [diff] [blame] | 1345 | 	if (res) { | 
 | 1346 | 		struct nfs_server *server = NFS_SERVER(inode); | 
 | 1347 |  | 
 | 1348 | 		/* For single writes, FLUSH_STABLE is more efficient */ | 
 | 1349 | 		if (res == nfsi->npages && nfsi->npages <= server->wpages) { | 
 | 1350 | 			if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize) | 
 | 1351 | 				how |= FLUSH_STABLE; | 
 | 1352 | 		} | 
 | 1353 | 		error = nfs_flush_list(&head, server->wpages, how); | 
 | 1354 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1355 | 	if (error < 0) | 
 | 1356 | 		return error; | 
 | 1357 | 	return res; | 
 | 1358 | } | 
 | 1359 |  | 
 | 1360 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 1361 | int nfs_commit_inode(struct inode *inode, int how) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1362 | { | 
 | 1363 | 	struct nfs_inode *nfsi = NFS_I(inode); | 
 | 1364 | 	LIST_HEAD(head); | 
 | 1365 | 	int			res, | 
 | 1366 | 				error = 0; | 
 | 1367 |  | 
 | 1368 | 	spin_lock(&nfsi->req_lock); | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 1369 | 	res = nfs_scan_commit(inode, &head, 0, 0); | 
 | 1370 | 	spin_unlock(&nfsi->req_lock); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1371 | 	if (res) { | 
| Chuck Lever | 40859d7 | 2005-11-30 18:09:02 -0500 | [diff] [blame] | 1372 | 		error = nfs_commit_list(inode, &head, how); | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 1373 | 		if (error < 0) | 
 | 1374 | 			return error; | 
 | 1375 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1376 | 	return res; | 
 | 1377 | } | 
 | 1378 | #endif | 
 | 1379 |  | 
 | 1380 | int nfs_sync_inode(struct inode *inode, unsigned long idx_start, | 
 | 1381 | 		  unsigned int npages, int how) | 
 | 1382 | { | 
| Trond Myklebust | 70b9ecb | 2006-01-03 09:55:34 +0100 | [diff] [blame] | 1383 | 	int nocommit = how & FLUSH_NOCOMMIT; | 
 | 1384 | 	int wait = how & FLUSH_WAIT; | 
 | 1385 | 	int error; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1386 |  | 
| Trond Myklebust | 70b9ecb | 2006-01-03 09:55:34 +0100 | [diff] [blame] | 1387 | 	how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1388 |  | 
 | 1389 | 	do { | 
| Trond Myklebust | 70b9ecb | 2006-01-03 09:55:34 +0100 | [diff] [blame] | 1390 | 		if (wait) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1391 | 			error = nfs_wait_on_requests(inode, idx_start, npages); | 
| Trond Myklebust | 70b9ecb | 2006-01-03 09:55:34 +0100 | [diff] [blame] | 1392 | 			if (error != 0) | 
 | 1393 | 				continue; | 
 | 1394 | 		} | 
 | 1395 | 		error = nfs_flush_inode(inode, idx_start, npages, how); | 
 | 1396 | 		if (error != 0) | 
 | 1397 | 			continue; | 
 | 1398 | 		if (!nocommit) | 
| Trond Myklebust | 3da28eb | 2005-06-22 17:16:31 +0000 | [diff] [blame] | 1399 | 			error = nfs_commit_inode(inode, how); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1400 | 	} while (error > 0); | 
 | 1401 | 	return error; | 
 | 1402 | } | 
 | 1403 |  | 
 | 1404 | int nfs_init_writepagecache(void) | 
 | 1405 | { | 
 | 1406 | 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data", | 
 | 1407 | 					     sizeof(struct nfs_write_data), | 
 | 1408 | 					     0, SLAB_HWCACHE_ALIGN, | 
 | 1409 | 					     NULL, NULL); | 
 | 1410 | 	if (nfs_wdata_cachep == NULL) | 
 | 1411 | 		return -ENOMEM; | 
 | 1412 |  | 
 | 1413 | 	nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE, | 
 | 1414 | 					   mempool_alloc_slab, | 
 | 1415 | 					   mempool_free_slab, | 
 | 1416 | 					   nfs_wdata_cachep); | 
 | 1417 | 	if (nfs_wdata_mempool == NULL) | 
 | 1418 | 		return -ENOMEM; | 
 | 1419 |  | 
 | 1420 | 	nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT, | 
 | 1421 | 					   mempool_alloc_slab, | 
 | 1422 | 					   mempool_free_slab, | 
 | 1423 | 					   nfs_wdata_cachep); | 
 | 1424 | 	if (nfs_commit_mempool == NULL) | 
 | 1425 | 		return -ENOMEM; | 
 | 1426 |  | 
 | 1427 | 	return 0; | 
 | 1428 | } | 
 | 1429 |  | 
 | 1430 | void nfs_destroy_writepagecache(void) | 
 | 1431 | { | 
 | 1432 | 	mempool_destroy(nfs_commit_mempool); | 
 | 1433 | 	mempool_destroy(nfs_wdata_mempool); | 
 | 1434 | 	if (kmem_cache_destroy(nfs_wdata_cachep)) | 
 | 1435 | 		printk(KERN_INFO "nfs_write_data: not all structures were freed\n"); | 
 | 1436 | } | 
 | 1437 |  |