|  | /* | 
|  | * Copyright (C) 2008 Oracle.  All rights reserved. | 
|  | * | 
|  | * This program is free software; you can redistribute it and/or | 
|  | * modify it under the terms of the GNU General Public | 
|  | * License v2 as published by the Free Software Foundation. | 
|  | * | 
|  | * This program is distributed in the hope that it will be useful, | 
|  | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | * General Public License for more details. | 
|  | * | 
|  | * You should have received a copy of the GNU General Public | 
|  | * License along with this program; if not, write to the | 
|  | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 
|  | * Boston, MA 021110-1307, USA. | 
|  | */ | 
|  | #include <linux/sched.h> | 
|  | #include <linux/gfp.h> | 
|  | #include <linux/pagemap.h> | 
|  | #include <linux/spinlock.h> | 
|  | #include <linux/page-flags.h> | 
|  | #include <asm/bug.h> | 
|  | #include "ctree.h" | 
|  | #include "extent_io.h" | 
|  | #include "locking.h" | 
|  |  | 
|  | static inline void spin_nested(struct extent_buffer *eb) | 
|  | { | 
|  | spin_lock(&eb->lock); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Setting a lock to blocking will drop the spinlock and set the | 
|  | * flag that forces other procs who want the lock to wait.  After | 
|  | * this you can safely schedule with the lock held. | 
|  | */ | 
|  | void btrfs_set_lock_blocking(struct extent_buffer *eb) | 
|  | { | 
|  | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 
|  | set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 
|  | spin_unlock(&eb->lock); | 
|  | } | 
|  | /* exit with the spin lock released and the bit set */ | 
|  | } | 
|  |  | 
|  | /* | 
|  | * clearing the blocking flag will take the spinlock again. | 
|  | * After this you can't safely schedule | 
|  | */ | 
|  | void btrfs_clear_lock_blocking(struct extent_buffer *eb) | 
|  | { | 
|  | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 
|  | spin_nested(eb); | 
|  | clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 
|  | smp_mb__after_clear_bit(); | 
|  | } | 
|  | /* exit with the spin lock held */ | 
|  | } | 
|  |  | 
|  | /* | 
|  | * unfortunately, many of the places that currently set a lock to blocking | 
|  | * don't end up blocking for very long, and often they don't block | 
|  | * at all.  For a dbench 50 run, if we don't spin on the blocking bit | 
|  | * at all, the context switch rate can jump up to 400,000/sec or more. | 
|  | * | 
|  | * So, we're still stuck with this crummy spin on the blocking bit, | 
|  | * at least until the most common causes of the short blocks | 
|  | * can be dealt with. | 
|  | */ | 
|  | static int btrfs_spin_on_block(struct extent_buffer *eb) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | for (i = 0; i < 512; i++) { | 
|  | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 
|  | return 1; | 
|  | if (need_resched()) | 
|  | break; | 
|  | cpu_relax(); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * This is somewhat different from trylock.  It will take the | 
|  | * spinlock but if it finds the lock is set to blocking, it will | 
|  | * return without the lock held. | 
|  | * | 
|  | * returns 1 if it was able to take the lock and zero otherwise | 
|  | * | 
|  | * After this call, scheduling is not safe without first calling | 
|  | * btrfs_set_lock_blocking() | 
|  | */ | 
|  | int btrfs_try_spin_lock(struct extent_buffer *eb) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | if (btrfs_spin_on_block(eb)) { | 
|  | spin_nested(eb); | 
|  | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 
|  | return 1; | 
|  | spin_unlock(&eb->lock); | 
|  | } | 
|  | /* spin for a bit on the BLOCKING flag */ | 
|  | for (i = 0; i < 2; i++) { | 
|  | cpu_relax(); | 
|  | if (!btrfs_spin_on_block(eb)) | 
|  | break; | 
|  |  | 
|  | spin_nested(eb); | 
|  | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 
|  | return 1; | 
|  | spin_unlock(&eb->lock); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * the autoremove wake function will return 0 if it tried to wake up | 
|  | * a process that was already awake, which means that process won't | 
|  | * count as an exclusive wakeup.  The waitq code will continue waking | 
|  | * procs until it finds one that was actually sleeping. | 
|  | * | 
|  | * For btrfs, this isn't quite what we want.  We want a single proc | 
|  | * to be notified that the lock is ready for taking.  If that proc | 
|  | * already happen to be awake, great, it will loop around and try for | 
|  | * the lock. | 
|  | * | 
|  | * So, btrfs_wake_function always returns 1, even when the proc that we | 
|  | * tried to wake up was already awake. | 
|  | */ | 
|  | static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, | 
|  | int sync, void *key) | 
|  | { | 
|  | autoremove_wake_function(wait, mode, sync, key); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * returns with the extent buffer spinlocked. | 
|  | * | 
|  | * This will spin and/or wait as required to take the lock, and then | 
|  | * return with the spinlock held. | 
|  | * | 
|  | * After this call, scheduling is not safe without first calling | 
|  | * btrfs_set_lock_blocking() | 
|  | */ | 
|  | int btrfs_tree_lock(struct extent_buffer *eb) | 
|  | { | 
|  | DEFINE_WAIT(wait); | 
|  | wait.func = btrfs_wake_function; | 
|  |  | 
|  | if (!btrfs_spin_on_block(eb)) | 
|  | goto sleep; | 
|  |  | 
|  | while(1) { | 
|  | spin_nested(eb); | 
|  |  | 
|  | /* nobody is blocking, exit with the spinlock held */ | 
|  | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 
|  | return 0; | 
|  |  | 
|  | /* | 
|  | * we have the spinlock, but the real owner is blocking. | 
|  | * wait for them | 
|  | */ | 
|  | spin_unlock(&eb->lock); | 
|  |  | 
|  | /* | 
|  | * spin for a bit, and if the blocking flag goes away, | 
|  | * loop around | 
|  | */ | 
|  | cpu_relax(); | 
|  | if (btrfs_spin_on_block(eb)) | 
|  | continue; | 
|  | sleep: | 
|  | prepare_to_wait_exclusive(&eb->lock_wq, &wait, | 
|  | TASK_UNINTERRUPTIBLE); | 
|  |  | 
|  | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 
|  | schedule(); | 
|  |  | 
|  | finish_wait(&eb->lock_wq, &wait); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Very quick trylock, this does not spin or schedule.  It returns | 
|  | * 1 with the spinlock held if it was able to take the lock, or it | 
|  | * returns zero if it was unable to take the lock. | 
|  | * | 
|  | * After this call, scheduling is not safe without first calling | 
|  | * btrfs_set_lock_blocking() | 
|  | */ | 
|  | int btrfs_try_tree_lock(struct extent_buffer *eb) | 
|  | { | 
|  | if (spin_trylock(&eb->lock)) { | 
|  | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 
|  | /* | 
|  | * we've got the spinlock, but the real owner is | 
|  | * blocking.  Drop the spinlock and return failure | 
|  | */ | 
|  | spin_unlock(&eb->lock); | 
|  | return 0; | 
|  | } | 
|  | return 1; | 
|  | } | 
|  | /* someone else has the spinlock giveup */ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int btrfs_tree_unlock(struct extent_buffer *eb) | 
|  | { | 
|  | /* | 
|  | * if we were a blocking owner, we don't have the spinlock held | 
|  | * just clear the bit and look for waiters | 
|  | */ | 
|  | if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 
|  | smp_mb__after_clear_bit(); | 
|  | else | 
|  | spin_unlock(&eb->lock); | 
|  |  | 
|  | if (waitqueue_active(&eb->lock_wq)) | 
|  | wake_up(&eb->lock_wq); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | void btrfs_assert_tree_locked(struct extent_buffer *eb) | 
|  | { | 
|  | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 
|  | assert_spin_locked(&eb->lock); | 
|  | } |