Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 1 | #include <linux/module.h> |
| 2 | #include <linux/buffer_head.h> |
| 3 | #include <linux/fs.h> |
| 4 | #include <linux/pagemap.h> |
| 5 | #include <linux/highmem.h> |
| 6 | #include <linux/time.h> |
| 7 | #include <linux/init.h> |
| 8 | #include <linux/string.h> |
| 9 | #include <linux/smp_lock.h> |
| 10 | #include <linux/backing-dev.h> |
| 11 | #include <linux/mpage.h> |
| 12 | #include <linux/swap.h> |
| 13 | #include <linux/writeback.h> |
| 14 | #include <linux/statfs.h> |
| 15 | #include <linux/compat.h> |
| 16 | #include "ctree.h" |
| 17 | #include "disk-io.h" |
| 18 | #include "transaction.h" |
| 19 | #include "btrfs_inode.h" |
| 20 | #include "ioctl.h" |
| 21 | #include "print-tree.h" |
| 22 | |
| 23 | |
| 24 | static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, |
| 25 | struct page **prepared_pages, |
| 26 | const char __user * buf) |
| 27 | { |
| 28 | long page_fault = 0; |
| 29 | int i; |
| 30 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
| 31 | |
| 32 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { |
| 33 | size_t count = min_t(size_t, |
| 34 | PAGE_CACHE_SIZE - offset, write_bytes); |
| 35 | struct page *page = prepared_pages[i]; |
| 36 | fault_in_pages_readable(buf, count); |
| 37 | |
| 38 | /* Copy data from userspace to the current page */ |
| 39 | kmap(page); |
| 40 | page_fault = __copy_from_user(page_address(page) + offset, |
| 41 | buf, count); |
| 42 | /* Flush processor's dcache for this page */ |
| 43 | flush_dcache_page(page); |
| 44 | kunmap(page); |
| 45 | buf += count; |
| 46 | write_bytes -= count; |
| 47 | |
| 48 | if (page_fault) |
| 49 | break; |
| 50 | } |
| 51 | return page_fault ? -EFAULT : 0; |
| 52 | } |
| 53 | |
| 54 | static void btrfs_drop_pages(struct page **pages, size_t num_pages) |
| 55 | { |
| 56 | size_t i; |
| 57 | for (i = 0; i < num_pages; i++) { |
| 58 | if (!pages[i]) |
| 59 | break; |
| 60 | unlock_page(pages[i]); |
| 61 | mark_page_accessed(pages[i]); |
| 62 | page_cache_release(pages[i]); |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | static int dirty_and_release_pages(struct btrfs_trans_handle *trans, |
| 67 | struct btrfs_root *root, |
| 68 | struct file *file, |
| 69 | struct page **pages, |
| 70 | size_t num_pages, |
| 71 | loff_t pos, |
| 72 | size_t write_bytes) |
| 73 | { |
| 74 | int i; |
| 75 | int offset; |
| 76 | int err = 0; |
| 77 | int ret; |
| 78 | int this_write; |
| 79 | struct inode *inode = file->f_path.dentry->d_inode; |
| 80 | struct buffer_head *bh; |
| 81 | struct btrfs_file_extent_item *ei; |
| 82 | |
| 83 | for (i = 0; i < num_pages; i++) { |
| 84 | offset = pos & (PAGE_CACHE_SIZE -1); |
Chris Mason | 84f54cf | 2007-06-12 07:43:08 -0400 | [diff] [blame^] | 85 | this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 86 | /* FIXME, one block at a time */ |
| 87 | |
| 88 | mutex_lock(&root->fs_info->fs_mutex); |
| 89 | trans = btrfs_start_transaction(root, 1); |
| 90 | btrfs_set_trans_block_group(trans, inode); |
| 91 | |
| 92 | bh = page_buffers(pages[i]); |
| 93 | |
| 94 | if (buffer_mapped(bh) && bh->b_blocknr == 0) { |
| 95 | struct btrfs_key key; |
| 96 | struct btrfs_path *path; |
| 97 | char *ptr; |
| 98 | u32 datasize; |
| 99 | |
| 100 | /* create an inline extent, and copy the data in */ |
| 101 | path = btrfs_alloc_path(); |
| 102 | BUG_ON(!path); |
| 103 | key.objectid = inode->i_ino; |
| 104 | key.offset = pages[i]->index << PAGE_CACHE_SHIFT; |
| 105 | key.flags = 0; |
| 106 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); |
| 107 | BUG_ON(write_bytes >= PAGE_CACHE_SIZE); |
| 108 | datasize = offset + |
| 109 | btrfs_file_extent_calc_inline_size(write_bytes); |
| 110 | |
| 111 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
| 112 | datasize); |
| 113 | BUG_ON(ret); |
| 114 | ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), |
| 115 | path->slots[0], struct btrfs_file_extent_item); |
| 116 | btrfs_set_file_extent_generation(ei, trans->transid); |
| 117 | btrfs_set_file_extent_type(ei, |
| 118 | BTRFS_FILE_EXTENT_INLINE); |
| 119 | ptr = btrfs_file_extent_inline_start(ei); |
| 120 | btrfs_memcpy(root, path->nodes[0]->b_data, |
| 121 | ptr, bh->b_data, offset + write_bytes); |
| 122 | mark_buffer_dirty(path->nodes[0]); |
| 123 | btrfs_free_path(path); |
| 124 | } else if (buffer_mapped(bh)) { |
| 125 | /* csum the file data */ |
| 126 | btrfs_csum_file_block(trans, root, inode->i_ino, |
| 127 | pages[i]->index << PAGE_CACHE_SHIFT, |
| 128 | kmap(pages[i]), PAGE_CACHE_SIZE); |
| 129 | kunmap(pages[i]); |
| 130 | } |
| 131 | SetPageChecked(pages[i]); |
| 132 | ret = btrfs_end_transaction(trans, root); |
| 133 | BUG_ON(ret); |
| 134 | mutex_unlock(&root->fs_info->fs_mutex); |
| 135 | |
| 136 | ret = btrfs_commit_write(file, pages[i], offset, |
| 137 | offset + this_write); |
| 138 | pos += this_write; |
| 139 | if (ret) { |
| 140 | err = ret; |
| 141 | goto failed; |
| 142 | } |
| 143 | WARN_ON(this_write > write_bytes); |
| 144 | write_bytes -= this_write; |
| 145 | } |
| 146 | failed: |
| 147 | return err; |
| 148 | } |
| 149 | |
| 150 | /* |
| 151 | * this is very complex, but the basic idea is to drop all extents |
| 152 | * in the range start - end. hint_block is filled in with a block number |
| 153 | * that would be a good hint to the block allocator for this file. |
| 154 | * |
| 155 | * If an extent intersects the range but is not entirely inside the range |
| 156 | * it is either truncated or split. Anything entirely inside the range |
| 157 | * is deleted from the tree. |
| 158 | */ |
| 159 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 160 | struct btrfs_root *root, struct inode *inode, |
| 161 | u64 start, u64 end, u64 *hint_block) |
| 162 | { |
| 163 | int ret; |
| 164 | struct btrfs_key key; |
| 165 | struct btrfs_leaf *leaf; |
| 166 | int slot; |
| 167 | struct btrfs_file_extent_item *extent; |
| 168 | u64 extent_end = 0; |
| 169 | int keep; |
| 170 | struct btrfs_file_extent_item old; |
| 171 | struct btrfs_path *path; |
| 172 | u64 search_start = start; |
| 173 | int bookend; |
| 174 | int found_type; |
| 175 | int found_extent; |
| 176 | int found_inline; |
| 177 | |
| 178 | path = btrfs_alloc_path(); |
| 179 | if (!path) |
| 180 | return -ENOMEM; |
| 181 | while(1) { |
| 182 | btrfs_release_path(root, path); |
| 183 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, |
| 184 | search_start, -1); |
| 185 | if (ret < 0) |
| 186 | goto out; |
| 187 | if (ret > 0) { |
| 188 | if (path->slots[0] == 0) { |
| 189 | ret = 0; |
| 190 | goto out; |
| 191 | } |
| 192 | path->slots[0]--; |
| 193 | } |
| 194 | keep = 0; |
| 195 | bookend = 0; |
| 196 | found_extent = 0; |
| 197 | found_inline = 0; |
| 198 | extent = NULL; |
| 199 | leaf = btrfs_buffer_leaf(path->nodes[0]); |
| 200 | slot = path->slots[0]; |
| 201 | btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); |
| 202 | if (key.offset >= end || key.objectid != inode->i_ino) { |
| 203 | ret = 0; |
| 204 | goto out; |
| 205 | } |
| 206 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { |
| 207 | ret = 0; |
| 208 | goto out; |
| 209 | } |
| 210 | extent = btrfs_item_ptr(leaf, slot, |
| 211 | struct btrfs_file_extent_item); |
| 212 | found_type = btrfs_file_extent_type(extent); |
| 213 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
| 214 | extent_end = key.offset + |
| 215 | (btrfs_file_extent_num_blocks(extent) << |
| 216 | inode->i_blkbits); |
| 217 | found_extent = 1; |
| 218 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
| 219 | found_inline = 1; |
| 220 | extent_end = key.offset + |
| 221 | btrfs_file_extent_inline_len(leaf->items + slot); |
| 222 | } |
| 223 | |
| 224 | /* we found nothing we can drop */ |
| 225 | if (!found_extent && !found_inline) { |
| 226 | ret = 0; |
| 227 | goto out; |
| 228 | } |
| 229 | |
| 230 | /* we found nothing inside the range */ |
| 231 | if (search_start >= extent_end) { |
| 232 | ret = 0; |
| 233 | goto out; |
| 234 | } |
| 235 | |
| 236 | /* FIXME, there's only one inline extent allowed right now */ |
| 237 | if (found_inline) { |
| 238 | u64 mask = root->blocksize - 1; |
| 239 | search_start = (extent_end + mask) & ~mask; |
| 240 | } else |
| 241 | search_start = extent_end; |
| 242 | |
| 243 | if (end < extent_end && end >= key.offset) { |
| 244 | if (found_extent) { |
| 245 | u64 disk_blocknr = |
| 246 | btrfs_file_extent_disk_blocknr(extent); |
| 247 | u64 disk_num_blocks = |
| 248 | btrfs_file_extent_disk_num_blocks(extent); |
| 249 | memcpy(&old, extent, sizeof(old)); |
| 250 | if (disk_blocknr != 0) { |
| 251 | ret = btrfs_inc_extent_ref(trans, root, |
| 252 | disk_blocknr, disk_num_blocks); |
| 253 | BUG_ON(ret); |
| 254 | } |
| 255 | } |
| 256 | WARN_ON(found_inline); |
| 257 | bookend = 1; |
| 258 | } |
| 259 | |
| 260 | /* truncate existing extent */ |
| 261 | if (start > key.offset) { |
| 262 | u64 new_num; |
| 263 | u64 old_num; |
| 264 | keep = 1; |
| 265 | WARN_ON(start & (root->blocksize - 1)); |
| 266 | if (found_extent) { |
| 267 | new_num = (start - key.offset) >> |
| 268 | inode->i_blkbits; |
| 269 | old_num = btrfs_file_extent_num_blocks(extent); |
| 270 | *hint_block = |
| 271 | btrfs_file_extent_disk_blocknr(extent); |
| 272 | if (btrfs_file_extent_disk_blocknr(extent)) { |
| 273 | inode->i_blocks -= |
| 274 | (old_num - new_num) << 3; |
| 275 | } |
| 276 | btrfs_set_file_extent_num_blocks(extent, |
| 277 | new_num); |
| 278 | mark_buffer_dirty(path->nodes[0]); |
| 279 | } else { |
| 280 | WARN_ON(1); |
| 281 | } |
| 282 | } |
| 283 | /* delete the entire extent */ |
| 284 | if (!keep) { |
| 285 | u64 disk_blocknr = 0; |
| 286 | u64 disk_num_blocks = 0; |
| 287 | u64 extent_num_blocks = 0; |
| 288 | if (found_extent) { |
| 289 | disk_blocknr = |
| 290 | btrfs_file_extent_disk_blocknr(extent); |
| 291 | disk_num_blocks = |
| 292 | btrfs_file_extent_disk_num_blocks(extent); |
| 293 | extent_num_blocks = |
| 294 | btrfs_file_extent_num_blocks(extent); |
| 295 | *hint_block = |
| 296 | btrfs_file_extent_disk_blocknr(extent); |
| 297 | } |
| 298 | ret = btrfs_del_item(trans, root, path); |
| 299 | BUG_ON(ret); |
| 300 | btrfs_release_path(root, path); |
| 301 | extent = NULL; |
| 302 | if (found_extent && disk_blocknr != 0) { |
| 303 | inode->i_blocks -= extent_num_blocks << 3; |
| 304 | ret = btrfs_free_extent(trans, root, |
| 305 | disk_blocknr, |
| 306 | disk_num_blocks, 0); |
| 307 | } |
| 308 | |
| 309 | BUG_ON(ret); |
| 310 | if (!bookend && search_start >= end) { |
| 311 | ret = 0; |
| 312 | goto out; |
| 313 | } |
| 314 | if (!bookend) |
| 315 | continue; |
| 316 | } |
| 317 | /* create bookend, splitting the extent in two */ |
| 318 | if (bookend && found_extent) { |
| 319 | struct btrfs_key ins; |
| 320 | ins.objectid = inode->i_ino; |
| 321 | ins.offset = end; |
| 322 | ins.flags = 0; |
| 323 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); |
| 324 | |
| 325 | btrfs_release_path(root, path); |
| 326 | ret = btrfs_insert_empty_item(trans, root, path, &ins, |
| 327 | sizeof(*extent)); |
| 328 | BUG_ON(ret); |
| 329 | extent = btrfs_item_ptr( |
| 330 | btrfs_buffer_leaf(path->nodes[0]), |
| 331 | path->slots[0], |
| 332 | struct btrfs_file_extent_item); |
| 333 | btrfs_set_file_extent_disk_blocknr(extent, |
| 334 | btrfs_file_extent_disk_blocknr(&old)); |
| 335 | btrfs_set_file_extent_disk_num_blocks(extent, |
| 336 | btrfs_file_extent_disk_num_blocks(&old)); |
| 337 | |
| 338 | btrfs_set_file_extent_offset(extent, |
| 339 | btrfs_file_extent_offset(&old) + |
| 340 | ((end - key.offset) >> inode->i_blkbits)); |
| 341 | WARN_ON(btrfs_file_extent_num_blocks(&old) < |
| 342 | (extent_end - end) >> inode->i_blkbits); |
| 343 | btrfs_set_file_extent_num_blocks(extent, |
| 344 | (extent_end - end) >> inode->i_blkbits); |
| 345 | |
| 346 | btrfs_set_file_extent_type(extent, |
| 347 | BTRFS_FILE_EXTENT_REG); |
| 348 | btrfs_set_file_extent_generation(extent, |
| 349 | btrfs_file_extent_generation(&old)); |
| 350 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 351 | if (btrfs_file_extent_disk_blocknr(&old) != 0) { |
| 352 | inode->i_blocks += |
| 353 | btrfs_file_extent_num_blocks(extent) << 3; |
| 354 | } |
| 355 | ret = 0; |
| 356 | goto out; |
| 357 | } |
| 358 | } |
| 359 | out: |
| 360 | btrfs_free_path(path); |
| 361 | return ret; |
| 362 | } |
| 363 | |
| 364 | /* |
| 365 | * this gets pages into the page cache and locks them down |
| 366 | */ |
| 367 | static int prepare_pages(struct btrfs_root *root, |
| 368 | struct file *file, |
| 369 | struct page **pages, |
| 370 | size_t num_pages, |
| 371 | loff_t pos, |
| 372 | unsigned long first_index, |
| 373 | unsigned long last_index, |
| 374 | size_t write_bytes, |
| 375 | u64 alloc_extent_start) |
| 376 | { |
| 377 | int i; |
| 378 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
| 379 | struct inode *inode = file->f_path.dentry->d_inode; |
| 380 | int offset; |
| 381 | int err = 0; |
| 382 | int this_write; |
| 383 | struct buffer_head *bh; |
| 384 | struct buffer_head *head; |
| 385 | loff_t isize = i_size_read(inode); |
| 386 | |
| 387 | memset(pages, 0, num_pages * sizeof(struct page *)); |
| 388 | |
| 389 | for (i = 0; i < num_pages; i++) { |
| 390 | pages[i] = grab_cache_page(inode->i_mapping, index + i); |
| 391 | if (!pages[i]) { |
| 392 | err = -ENOMEM; |
| 393 | goto failed_release; |
| 394 | } |
| 395 | cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); |
| 396 | wait_on_page_writeback(pages[i]); |
| 397 | offset = pos & (PAGE_CACHE_SIZE -1); |
Chris Mason | 84f54cf | 2007-06-12 07:43:08 -0400 | [diff] [blame^] | 398 | this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 399 | if (!page_has_buffers(pages[i])) { |
| 400 | create_empty_buffers(pages[i], |
| 401 | root->fs_info->sb->s_blocksize, |
| 402 | (1 << BH_Uptodate)); |
| 403 | } |
| 404 | head = page_buffers(pages[i]); |
| 405 | bh = head; |
| 406 | do { |
| 407 | err = btrfs_map_bh_to_logical(root, bh, |
| 408 | alloc_extent_start); |
| 409 | BUG_ON(err); |
| 410 | if (err) |
| 411 | goto failed_truncate; |
| 412 | bh = bh->b_this_page; |
| 413 | if (alloc_extent_start) |
| 414 | alloc_extent_start++; |
| 415 | } while (bh != head); |
| 416 | pos += this_write; |
| 417 | WARN_ON(this_write > write_bytes); |
| 418 | write_bytes -= this_write; |
| 419 | } |
| 420 | return 0; |
| 421 | |
| 422 | failed_release: |
| 423 | btrfs_drop_pages(pages, num_pages); |
| 424 | return err; |
| 425 | |
| 426 | failed_truncate: |
| 427 | btrfs_drop_pages(pages, num_pages); |
| 428 | if (pos > isize) |
| 429 | vmtruncate(inode, isize); |
| 430 | return err; |
| 431 | } |
| 432 | |
| 433 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, |
| 434 | size_t count, loff_t *ppos) |
| 435 | { |
| 436 | loff_t pos; |
| 437 | size_t num_written = 0; |
| 438 | int err = 0; |
| 439 | int ret = 0; |
| 440 | struct inode *inode = file->f_path.dentry->d_inode; |
| 441 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 442 | struct page *pages[8]; |
| 443 | struct page *pinned[2]; |
| 444 | unsigned long first_index; |
| 445 | unsigned long last_index; |
| 446 | u64 start_pos; |
| 447 | u64 num_blocks; |
| 448 | u64 alloc_extent_start; |
| 449 | u64 hint_block; |
| 450 | struct btrfs_trans_handle *trans; |
| 451 | struct btrfs_key ins; |
| 452 | pinned[0] = NULL; |
| 453 | pinned[1] = NULL; |
| 454 | if (file->f_flags & O_DIRECT) |
| 455 | return -EINVAL; |
| 456 | pos = *ppos; |
| 457 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
| 458 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 459 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 460 | if (err) |
| 461 | goto out; |
| 462 | if (count == 0) |
| 463 | goto out; |
| 464 | err = remove_suid(file->f_path.dentry); |
| 465 | if (err) |
| 466 | goto out; |
| 467 | file_update_time(file); |
| 468 | |
| 469 | start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); |
| 470 | num_blocks = (count + pos - start_pos + root->blocksize - 1) >> |
| 471 | inode->i_blkbits; |
| 472 | |
| 473 | mutex_lock(&inode->i_mutex); |
| 474 | first_index = pos >> PAGE_CACHE_SHIFT; |
| 475 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; |
| 476 | |
| 477 | /* |
| 478 | * there are lots of better ways to do this, but this code |
| 479 | * makes sure the first and last page in the file range are |
| 480 | * up to date and ready for cow |
| 481 | */ |
| 482 | if ((pos & (PAGE_CACHE_SIZE - 1))) { |
| 483 | pinned[0] = grab_cache_page(inode->i_mapping, first_index); |
| 484 | if (!PageUptodate(pinned[0])) { |
| 485 | ret = mpage_readpage(pinned[0], btrfs_get_block); |
| 486 | BUG_ON(ret); |
| 487 | wait_on_page_locked(pinned[0]); |
| 488 | } else { |
| 489 | unlock_page(pinned[0]); |
| 490 | } |
| 491 | } |
| 492 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { |
| 493 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
| 494 | if (!PageUptodate(pinned[1])) { |
| 495 | ret = mpage_readpage(pinned[1], btrfs_get_block); |
| 496 | BUG_ON(ret); |
| 497 | wait_on_page_locked(pinned[1]); |
| 498 | } else { |
| 499 | unlock_page(pinned[1]); |
| 500 | } |
| 501 | } |
| 502 | |
| 503 | mutex_lock(&root->fs_info->fs_mutex); |
| 504 | trans = btrfs_start_transaction(root, 1); |
| 505 | if (!trans) { |
| 506 | err = -ENOMEM; |
| 507 | mutex_unlock(&root->fs_info->fs_mutex); |
| 508 | goto out_unlock; |
| 509 | } |
| 510 | btrfs_set_trans_block_group(trans, inode); |
| 511 | /* FIXME blocksize != 4096 */ |
| 512 | inode->i_blocks += num_blocks << 3; |
| 513 | hint_block = 0; |
| 514 | |
| 515 | /* FIXME...EIEIO, ENOSPC and more */ |
| 516 | |
| 517 | /* step one, delete the existing extents in this range */ |
| 518 | if (start_pos < inode->i_size) { |
| 519 | /* FIXME blocksize != pagesize */ |
| 520 | ret = btrfs_drop_extents(trans, root, inode, |
| 521 | start_pos, |
| 522 | (pos + count + root->blocksize -1) & |
| 523 | ~((u64)root->blocksize - 1), |
| 524 | &hint_block); |
| 525 | BUG_ON(ret); |
| 526 | } |
| 527 | |
| 528 | /* insert any holes we need to create */ |
| 529 | if (inode->i_size < start_pos) { |
| 530 | u64 last_pos_in_file; |
| 531 | u64 hole_size; |
| 532 | u64 mask = root->blocksize - 1; |
| 533 | last_pos_in_file = (inode->i_size + mask) & ~mask; |
| 534 | hole_size = (start_pos - last_pos_in_file + mask) & ~mask; |
| 535 | hole_size >>= inode->i_blkbits; |
| 536 | if (last_pos_in_file < start_pos) { |
| 537 | ret = btrfs_insert_file_extent(trans, root, |
| 538 | inode->i_ino, |
| 539 | last_pos_in_file, |
| 540 | 0, 0, hole_size); |
| 541 | } |
| 542 | BUG_ON(ret); |
| 543 | } |
| 544 | |
| 545 | /* |
| 546 | * either allocate an extent for the new bytes or setup the key |
| 547 | * to show we are doing inline data in the extent |
| 548 | */ |
| 549 | if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || |
| 550 | pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { |
| 551 | ret = btrfs_alloc_extent(trans, root, inode->i_ino, |
| 552 | num_blocks, hint_block, (u64)-1, |
| 553 | &ins, 1); |
| 554 | BUG_ON(ret); |
| 555 | ret = btrfs_insert_file_extent(trans, root, inode->i_ino, |
| 556 | start_pos, ins.objectid, ins.offset, |
| 557 | ins.offset); |
| 558 | BUG_ON(ret); |
| 559 | } else { |
| 560 | ins.offset = 0; |
| 561 | ins.objectid = 0; |
| 562 | } |
| 563 | BUG_ON(ret); |
| 564 | alloc_extent_start = ins.objectid; |
| 565 | ret = btrfs_end_transaction(trans, root); |
| 566 | mutex_unlock(&root->fs_info->fs_mutex); |
| 567 | |
| 568 | while(count > 0) { |
| 569 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
Chris Mason | 84f54cf | 2007-06-12 07:43:08 -0400 | [diff] [blame^] | 570 | size_t write_bytes = min(count, |
| 571 | (size_t)PAGE_CACHE_SIZE - offset); |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 572 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
| 573 | PAGE_CACHE_SHIFT; |
| 574 | |
| 575 | memset(pages, 0, sizeof(pages)); |
| 576 | ret = prepare_pages(root, file, pages, num_pages, |
| 577 | pos, first_index, last_index, |
| 578 | write_bytes, alloc_extent_start); |
| 579 | BUG_ON(ret); |
| 580 | |
| 581 | /* FIXME blocks != pagesize */ |
| 582 | if (alloc_extent_start) |
| 583 | alloc_extent_start += num_pages; |
| 584 | ret = btrfs_copy_from_user(pos, num_pages, |
| 585 | write_bytes, pages, buf); |
| 586 | BUG_ON(ret); |
| 587 | |
| 588 | ret = dirty_and_release_pages(NULL, root, file, pages, |
| 589 | num_pages, pos, write_bytes); |
| 590 | BUG_ON(ret); |
| 591 | btrfs_drop_pages(pages, num_pages); |
| 592 | |
| 593 | buf += write_bytes; |
| 594 | count -= write_bytes; |
| 595 | pos += write_bytes; |
| 596 | num_written += write_bytes; |
| 597 | |
| 598 | balance_dirty_pages_ratelimited(inode->i_mapping); |
| 599 | btrfs_btree_balance_dirty(root); |
| 600 | cond_resched(); |
| 601 | } |
| 602 | out_unlock: |
| 603 | mutex_unlock(&inode->i_mutex); |
| 604 | out: |
| 605 | if (pinned[0]) |
| 606 | page_cache_release(pinned[0]); |
| 607 | if (pinned[1]) |
| 608 | page_cache_release(pinned[1]); |
| 609 | *ppos = pos; |
| 610 | current->backing_dev_info = NULL; |
| 611 | mark_inode_dirty(inode); |
| 612 | return num_written ? num_written : err; |
| 613 | } |
| 614 | |
| 615 | /* |
| 616 | * FIXME, do this by stuffing the csum we want in the info hanging off |
| 617 | * page->private. For now, verify file csums on read |
| 618 | */ |
| 619 | static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, |
| 620 | unsigned long offset, unsigned long size) |
| 621 | { |
| 622 | char *kaddr; |
| 623 | unsigned long left, count = desc->count; |
| 624 | struct inode *inode = page->mapping->host; |
| 625 | |
| 626 | if (size > count) |
| 627 | size = count; |
| 628 | |
| 629 | if (!PageChecked(page)) { |
| 630 | /* FIXME, do it per block */ |
| 631 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 632 | int ret; |
| 633 | struct buffer_head *bh; |
| 634 | |
| 635 | if (page_has_buffers(page)) { |
| 636 | bh = page_buffers(page); |
| 637 | if (!buffer_mapped(bh)) { |
| 638 | SetPageChecked(page); |
| 639 | goto checked; |
| 640 | } |
| 641 | } |
| 642 | |
| 643 | ret = btrfs_csum_verify_file_block(root, |
| 644 | page->mapping->host->i_ino, |
| 645 | page->index << PAGE_CACHE_SHIFT, |
| 646 | kmap(page), PAGE_CACHE_SIZE); |
| 647 | if (ret) { |
| 648 | if (ret != -ENOENT) { |
| 649 | printk("failed to verify ino %lu page %lu ret %d\n", |
| 650 | page->mapping->host->i_ino, |
| 651 | page->index, ret); |
| 652 | memset(page_address(page), 1, PAGE_CACHE_SIZE); |
| 653 | flush_dcache_page(page); |
| 654 | } |
| 655 | } |
| 656 | SetPageChecked(page); |
| 657 | kunmap(page); |
| 658 | } |
| 659 | checked: |
| 660 | /* |
| 661 | * Faults on the destination of a read are common, so do it before |
| 662 | * taking the kmap. |
| 663 | */ |
| 664 | if (!fault_in_pages_writeable(desc->arg.buf, size)) { |
| 665 | kaddr = kmap_atomic(page, KM_USER0); |
| 666 | left = __copy_to_user_inatomic(desc->arg.buf, |
| 667 | kaddr + offset, size); |
| 668 | kunmap_atomic(kaddr, KM_USER0); |
| 669 | if (left == 0) |
| 670 | goto success; |
| 671 | } |
| 672 | |
| 673 | /* Do it the slow way */ |
| 674 | kaddr = kmap(page); |
| 675 | left = __copy_to_user(desc->arg.buf, kaddr + offset, size); |
| 676 | kunmap(page); |
| 677 | |
| 678 | if (left) { |
| 679 | size -= left; |
| 680 | desc->error = -EFAULT; |
| 681 | } |
| 682 | success: |
| 683 | desc->count = count - size; |
| 684 | desc->written += size; |
| 685 | desc->arg.buf += size; |
| 686 | return size; |
| 687 | } |
| 688 | |
| 689 | /** |
| 690 | * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify |
| 691 | * @iocb: kernel I/O control block |
| 692 | * @iov: io vector request |
| 693 | * @nr_segs: number of segments in the iovec |
| 694 | * @pos: current file position |
| 695 | */ |
| 696 | static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, |
| 697 | unsigned long nr_segs, loff_t pos) |
| 698 | { |
| 699 | struct file *filp = iocb->ki_filp; |
| 700 | ssize_t retval; |
| 701 | unsigned long seg; |
| 702 | size_t count; |
| 703 | loff_t *ppos = &iocb->ki_pos; |
| 704 | |
| 705 | count = 0; |
| 706 | for (seg = 0; seg < nr_segs; seg++) { |
| 707 | const struct iovec *iv = &iov[seg]; |
| 708 | |
| 709 | /* |
| 710 | * If any segment has a negative length, or the cumulative |
| 711 | * length ever wraps negative then return -EINVAL. |
| 712 | */ |
| 713 | count += iv->iov_len; |
| 714 | if (unlikely((ssize_t)(count|iv->iov_len) < 0)) |
| 715 | return -EINVAL; |
| 716 | if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) |
| 717 | continue; |
| 718 | if (seg == 0) |
| 719 | return -EFAULT; |
| 720 | nr_segs = seg; |
| 721 | count -= iv->iov_len; /* This segment is no good */ |
| 722 | break; |
| 723 | } |
| 724 | retval = 0; |
| 725 | if (count) { |
| 726 | for (seg = 0; seg < nr_segs; seg++) { |
| 727 | read_descriptor_t desc; |
| 728 | |
| 729 | desc.written = 0; |
| 730 | desc.arg.buf = iov[seg].iov_base; |
| 731 | desc.count = iov[seg].iov_len; |
| 732 | if (desc.count == 0) |
| 733 | continue; |
| 734 | desc.error = 0; |
| 735 | do_generic_file_read(filp, ppos, &desc, |
| 736 | btrfs_read_actor); |
| 737 | retval += desc.written; |
| 738 | if (desc.error) { |
| 739 | retval = retval ?: desc.error; |
| 740 | break; |
| 741 | } |
| 742 | } |
| 743 | } |
| 744 | return retval; |
| 745 | } |
| 746 | |
| 747 | static int btrfs_sync_file(struct file *file, |
| 748 | struct dentry *dentry, int datasync) |
| 749 | { |
| 750 | struct inode *inode = dentry->d_inode; |
| 751 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 752 | int ret; |
| 753 | struct btrfs_trans_handle *trans; |
| 754 | |
| 755 | /* |
| 756 | * FIXME, use inode generation number to check if we can skip the |
| 757 | * commit |
| 758 | */ |
| 759 | mutex_lock(&root->fs_info->fs_mutex); |
| 760 | trans = btrfs_start_transaction(root, 1); |
| 761 | if (!trans) { |
| 762 | ret = -ENOMEM; |
| 763 | goto out; |
| 764 | } |
| 765 | ret = btrfs_commit_transaction(trans, root); |
| 766 | mutex_unlock(&root->fs_info->fs_mutex); |
| 767 | out: |
| 768 | return ret > 0 ? EIO : ret; |
| 769 | } |
| 770 | |
| 771 | struct file_operations btrfs_file_operations = { |
| 772 | .llseek = generic_file_llseek, |
| 773 | .read = do_sync_read, |
| 774 | .aio_read = btrfs_file_aio_read, |
| 775 | .write = btrfs_file_write, |
| 776 | .mmap = generic_file_mmap, |
| 777 | .open = generic_file_open, |
| 778 | .ioctl = btrfs_ioctl, |
| 779 | .fsync = btrfs_sync_file, |
| 780 | #ifdef CONFIG_COMPAT |
| 781 | .compat_ioctl = btrfs_compat_ioctl, |
| 782 | #endif |
| 783 | }; |
| 784 | |