| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1 | /* -*- mode: c; c-basic-offset: 8; -*- | 
 | 2 |  * vim: noexpandtab sw=8 ts=8 sts=0: | 
 | 3 |  * | 
 | 4 |  * dlmglue.c | 
 | 5 |  * | 
 | 6 |  * Code which implements an OCFS2 specific interface to our DLM. | 
 | 7 |  * | 
 | 8 |  * Copyright (C) 2003, 2004 Oracle.  All rights reserved. | 
 | 9 |  * | 
 | 10 |  * This program is free software; you can redistribute it and/or | 
 | 11 |  * modify it under the terms of the GNU General Public | 
 | 12 |  * License as published by the Free Software Foundation; either | 
 | 13 |  * version 2 of the License, or (at your option) any later version. | 
 | 14 |  * | 
 | 15 |  * This program is distributed in the hope that it will be useful, | 
 | 16 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 17 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 18 |  * General Public License for more details. | 
 | 19 |  * | 
 | 20 |  * You should have received a copy of the GNU General Public | 
 | 21 |  * License along with this program; if not, write to the | 
 | 22 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 
 | 23 |  * Boston, MA 021110-1307, USA. | 
 | 24 |  */ | 
 | 25 |  | 
 | 26 | #include <linux/types.h> | 
 | 27 | #include <linux/slab.h> | 
 | 28 | #include <linux/highmem.h> | 
 | 29 | #include <linux/mm.h> | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 30 | #include <linux/kthread.h> | 
 | 31 | #include <linux/pagemap.h> | 
 | 32 | #include <linux/debugfs.h> | 
 | 33 | #include <linux/seq_file.h> | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 34 | #include <linux/time.h> | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 35 | #include <linux/quotaops.h> | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 36 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 37 | #define MLOG_MASK_PREFIX ML_DLM_GLUE | 
 | 38 | #include <cluster/masklog.h> | 
 | 39 |  | 
 | 40 | #include "ocfs2.h" | 
| Joel Becker | d24fbcd | 2008-01-25 17:02:21 -0800 | [diff] [blame] | 41 | #include "ocfs2_lockingver.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 42 |  | 
 | 43 | #include "alloc.h" | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 44 | #include "dcache.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 45 | #include "dlmglue.h" | 
 | 46 | #include "extent_map.h" | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 47 | #include "file.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 48 | #include "heartbeat.h" | 
 | 49 | #include "inode.h" | 
 | 50 | #include "journal.h" | 
| Joel Becker | 24ef181 | 2008-01-29 17:37:32 -0800 | [diff] [blame] | 51 | #include "stackglue.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 52 | #include "slot_map.h" | 
 | 53 | #include "super.h" | 
 | 54 | #include "uptodate.h" | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 55 | #include "quota.h" | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 56 | #include "refcounttree.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 57 |  | 
 | 58 | #include "buffer_head_io.h" | 
 | 59 |  | 
 | 60 | struct ocfs2_mask_waiter { | 
 | 61 | 	struct list_head	mw_item; | 
 | 62 | 	int			mw_status; | 
 | 63 | 	struct completion	mw_complete; | 
 | 64 | 	unsigned long		mw_mask; | 
 | 65 | 	unsigned long		mw_goal; | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 66 | #ifdef CONFIG_OCFS2_FS_STATS | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 67 | 	ktime_t			mw_lock_start; | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 68 | #endif | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 69 | }; | 
 | 70 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 71 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 
 | 72 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 73 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 74 | static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 75 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 76 | /* | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 77 |  * Return value from ->downconvert_worker functions. | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 78 |  * | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 79 |  * These control the precise actions of ocfs2_unblock_lock() | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 80 |  * and ocfs2_process_blocked_lock() | 
 | 81 |  * | 
 | 82 |  */ | 
 | 83 | enum ocfs2_unblock_action { | 
 | 84 | 	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */ | 
 | 85 | 	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire | 
 | 86 | 				      * ->post_unlock callback */ | 
 | 87 | 	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire | 
 | 88 | 				      * ->post_unlock() callback. */ | 
 | 89 | }; | 
 | 90 |  | 
 | 91 | struct ocfs2_unblock_ctl { | 
 | 92 | 	int requeue; | 
 | 93 | 	enum ocfs2_unblock_action unblock_action; | 
 | 94 | }; | 
 | 95 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 96 | /* Lockdep class keys */ | 
 | 97 | struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; | 
 | 98 |  | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 99 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | 
 | 100 | 					int new_level); | 
 | 101 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); | 
 | 102 |  | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 103 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 
 | 104 | 				     int blocking); | 
 | 105 |  | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 106 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | 
 | 107 | 				       int blocking); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 108 |  | 
 | 109 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 
 | 110 | 				     struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 111 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 112 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); | 
| Adrian Bunk | 6cb129f | 2007-04-26 00:29:35 -0700 | [diff] [blame] | 113 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 114 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, | 
 | 115 | 					    int new_level); | 
 | 116 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, | 
 | 117 | 					 int blocking); | 
 | 118 |  | 
| Adrian Bunk | 6cb129f | 2007-04-26 00:29:35 -0700 | [diff] [blame] | 119 | #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) | 
 | 120 |  | 
 | 121 | /* This aids in debugging situations where a bad LVB might be involved. */ | 
 | 122 | static void ocfs2_dump_meta_lvb_info(u64 level, | 
 | 123 | 				     const char *function, | 
 | 124 | 				     unsigned int line, | 
 | 125 | 				     struct ocfs2_lock_res *lockres) | 
 | 126 | { | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 127 | 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Adrian Bunk | 6cb129f | 2007-04-26 00:29:35 -0700 | [diff] [blame] | 128 |  | 
 | 129 | 	mlog(level, "LVB information for %s (called from %s:%u):\n", | 
 | 130 | 	     lockres->l_name, function, line); | 
 | 131 | 	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", | 
 | 132 | 	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), | 
 | 133 | 	     be32_to_cpu(lvb->lvb_igeneration)); | 
 | 134 | 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", | 
 | 135 | 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize), | 
 | 136 | 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), | 
 | 137 | 	     be16_to_cpu(lvb->lvb_imode)); | 
 | 138 | 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " | 
 | 139 | 	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), | 
 | 140 | 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed), | 
 | 141 | 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed), | 
 | 142 | 	     (long long)be64_to_cpu(lvb->lvb_imtime_packed), | 
 | 143 | 	     be32_to_cpu(lvb->lvb_iattr)); | 
 | 144 | } | 
 | 145 |  | 
 | 146 |  | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 147 | /* | 
 | 148 |  * OCFS2 Lock Resource Operations | 
 | 149 |  * | 
 | 150 |  * These fine tune the behavior of the generic dlmglue locking infrastructure. | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 151 |  * | 
 | 152 |  * The most basic of lock types can point ->l_priv to their respective | 
 | 153 |  * struct ocfs2_super and allow the default actions to manage things. | 
 | 154 |  * | 
 | 155 |  * Right now, each lock type also needs to implement an init function, | 
 | 156 |  * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() | 
 | 157 |  * should be called when the lock is no longer needed (i.e., object | 
 | 158 |  * destruction time). | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 159 |  */ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 160 | struct ocfs2_lock_res_ops { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 161 | 	/* | 
 | 162 | 	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define | 
 | 163 | 	 * this callback if ->l_priv is not an ocfs2_super pointer | 
 | 164 | 	 */ | 
 | 165 | 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 166 |  | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 167 | 	/* | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 168 | 	 * Optionally called in the downconvert thread after a | 
 | 169 | 	 * successful downconvert. The lockres will not be referenced | 
 | 170 | 	 * after this callback is called, so it is safe to free | 
 | 171 | 	 * memory, etc. | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 172 | 	 * | 
 | 173 | 	 * The exact semantics of when this is called are controlled | 
 | 174 | 	 * by ->downconvert_worker() | 
 | 175 | 	 */ | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 176 | 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 177 |  | 
 | 178 | 	/* | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 179 | 	 * Allow a lock type to add checks to determine whether it is | 
 | 180 | 	 * safe to downconvert a lock. Return 0 to re-queue the | 
 | 181 | 	 * downconvert at a later time, nonzero to continue. | 
 | 182 | 	 * | 
 | 183 | 	 * For most locks, the default checks that there are no | 
 | 184 | 	 * incompatible holders are sufficient. | 
 | 185 | 	 * | 
 | 186 | 	 * Called with the lockres spinlock held. | 
 | 187 | 	 */ | 
 | 188 | 	int (*check_downconvert)(struct ocfs2_lock_res *, int); | 
 | 189 |  | 
 | 190 | 	/* | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 191 | 	 * Allows a lock type to populate the lock value block. This | 
 | 192 | 	 * is called on downconvert, and when we drop a lock. | 
 | 193 | 	 * | 
 | 194 | 	 * Locks that want to use this should set LOCK_TYPE_USES_LVB | 
 | 195 | 	 * in the flags field. | 
 | 196 | 	 * | 
 | 197 | 	 * Called with the lockres spinlock held. | 
 | 198 | 	 */ | 
 | 199 | 	void (*set_lvb)(struct ocfs2_lock_res *); | 
 | 200 |  | 
 | 201 | 	/* | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 202 | 	 * Called from the downconvert thread when it is determined | 
 | 203 | 	 * that a lock will be downconverted. This is called without | 
 | 204 | 	 * any locks held so the function can do work that might | 
 | 205 | 	 * schedule (syncing out data, etc). | 
 | 206 | 	 * | 
 | 207 | 	 * This should return any one of the ocfs2_unblock_action | 
 | 208 | 	 * values, depending on what it wants the thread to do. | 
 | 209 | 	 */ | 
 | 210 | 	int (*downconvert_worker)(struct ocfs2_lock_res *, int); | 
 | 211 |  | 
 | 212 | 	/* | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 213 | 	 * LOCK_TYPE_* flags which describe the specific requirements | 
 | 214 | 	 * of a lock type. Descriptions of each individual flag follow. | 
 | 215 | 	 */ | 
 | 216 | 	int flags; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 217 | }; | 
 | 218 |  | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 219 | /* | 
 | 220 |  * Some locks want to "refresh" potentially stale data when a | 
 | 221 |  * meaningful (PRMODE or EXMODE) lock level is first obtained. If this | 
 | 222 |  * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the | 
 | 223 |  * individual lockres l_flags member from the ast function. It is | 
 | 224 |  * expected that the locking wrapper will clear the | 
 | 225 |  * OCFS2_LOCK_NEEDS_REFRESH flag when done. | 
 | 226 |  */ | 
 | 227 | #define LOCK_TYPE_REQUIRES_REFRESH 0x1 | 
 | 228 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 229 | /* | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 230 |  * Indicate that a lock type makes use of the lock value block. The | 
 | 231 |  * ->set_lvb lock type callback must be defined. | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 232 |  */ | 
 | 233 | #define LOCK_TYPE_USES_LVB		0x2 | 
 | 234 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 235 | static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 236 | 	.get_osb	= ocfs2_get_inode_osb, | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 237 | 	.flags		= 0, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 238 | }; | 
 | 239 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 240 | static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 241 | 	.get_osb	= ocfs2_get_inode_osb, | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 242 | 	.check_downconvert = ocfs2_check_meta_downconvert, | 
 | 243 | 	.set_lvb	= ocfs2_set_meta_lvb, | 
| Mark Fasheh | f1f5406 | 2007-10-18 15:13:59 -0700 | [diff] [blame] | 244 | 	.downconvert_worker = ocfs2_data_convert_worker, | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 245 | 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 246 | }; | 
 | 247 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 248 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 249 | 	.flags		= LOCK_TYPE_REQUIRES_REFRESH, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 250 | }; | 
 | 251 |  | 
 | 252 | static struct ocfs2_lock_res_ops ocfs2_rename_lops = { | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 253 | 	.flags		= 0, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 254 | }; | 
 | 255 |  | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 256 | static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { | 
 | 257 | 	.flags		= 0, | 
 | 258 | }; | 
 | 259 |  | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 260 | static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { | 
 | 261 | 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | 
 | 262 | }; | 
 | 263 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 264 | static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 265 | 	.get_osb	= ocfs2_get_dentry_osb, | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 266 | 	.post_unlock	= ocfs2_dentry_post_unlock, | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 267 | 	.downconvert_worker = ocfs2_dentry_convert_worker, | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 268 | 	.flags		= 0, | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 269 | }; | 
 | 270 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 271 | static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | 
 | 272 | 	.get_osb	= ocfs2_get_inode_osb, | 
 | 273 | 	.flags		= 0, | 
 | 274 | }; | 
 | 275 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 276 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | 
 | 277 | 	.get_osb	= ocfs2_get_file_osb, | 
 | 278 | 	.flags		= 0, | 
 | 279 | }; | 
 | 280 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 281 | static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { | 
 | 282 | 	.set_lvb	= ocfs2_set_qinfo_lvb, | 
 | 283 | 	.get_osb	= ocfs2_get_qinfo_osb, | 
 | 284 | 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, | 
 | 285 | }; | 
 | 286 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 287 | static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { | 
 | 288 | 	.check_downconvert = ocfs2_check_refcount_downconvert, | 
 | 289 | 	.downconvert_worker = ocfs2_refcount_convert_worker, | 
 | 290 | 	.flags		= 0, | 
 | 291 | }; | 
 | 292 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 293 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 
 | 294 | { | 
 | 295 | 	return lockres->l_type == OCFS2_LOCK_TYPE_META || | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 296 | 		lockres->l_type == OCFS2_LOCK_TYPE_RW || | 
 | 297 | 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 298 | } | 
 | 299 |  | 
| Joel Becker | c0e4133 | 2010-01-29 14:46:44 -0800 | [diff] [blame] | 300 | static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 301 | { | 
 | 302 | 	return container_of(lksb, struct ocfs2_lock_res, l_lksb); | 
 | 303 | } | 
 | 304 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 305 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) | 
 | 306 | { | 
 | 307 | 	BUG_ON(!ocfs2_is_inode_lock(lockres)); | 
 | 308 |  | 
 | 309 | 	return (struct inode *) lockres->l_priv; | 
 | 310 | } | 
 | 311 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 312 | static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) | 
 | 313 | { | 
 | 314 | 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); | 
 | 315 |  | 
 | 316 | 	return (struct ocfs2_dentry_lock *)lockres->l_priv; | 
 | 317 | } | 
 | 318 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 319 | static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) | 
 | 320 | { | 
 | 321 | 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); | 
 | 322 |  | 
 | 323 | 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv; | 
 | 324 | } | 
 | 325 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 326 | static inline struct ocfs2_refcount_tree * | 
 | 327 | ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) | 
 | 328 | { | 
 | 329 | 	return container_of(res, struct ocfs2_refcount_tree, rf_lockres); | 
 | 330 | } | 
 | 331 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 332 | static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) | 
 | 333 | { | 
 | 334 | 	if (lockres->l_ops->get_osb) | 
 | 335 | 		return lockres->l_ops->get_osb(lockres); | 
 | 336 |  | 
 | 337 | 	return (struct ocfs2_super *)lockres->l_priv; | 
 | 338 | } | 
 | 339 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 340 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 
 | 341 | 			     struct ocfs2_lock_res *lockres, | 
 | 342 | 			     int level, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 343 | 			     u32 dlm_flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 344 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 
 | 345 | 						     int wanted); | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 346 | static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, | 
 | 347 | 				   struct ocfs2_lock_res *lockres, | 
 | 348 | 				   int level, unsigned long caller_ip); | 
 | 349 | static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, | 
 | 350 | 					struct ocfs2_lock_res *lockres, | 
 | 351 | 					int level) | 
 | 352 | { | 
 | 353 | 	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); | 
 | 354 | } | 
 | 355 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 356 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); | 
 | 357 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); | 
 | 358 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); | 
 | 359 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); | 
 | 360 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | 
 | 361 | 					struct ocfs2_lock_res *lockres); | 
 | 362 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 
 | 363 | 						int convert); | 
| Sunil Mushran | c74ff8b | 2009-02-03 12:37:14 -0800 | [diff] [blame] | 364 | #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\ | 
 | 365 | 	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\ | 
 | 366 | 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\ | 
 | 367 | 		     _err, _func, _lockres->l_name);					\ | 
 | 368 | 	else										\ | 
 | 369 | 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\ | 
 | 370 | 		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\ | 
 | 371 | 		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 372 | } while (0) | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 373 | static int ocfs2_downconvert_thread(void *arg); | 
 | 374 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 
 | 375 | 					struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 376 | static int ocfs2_inode_lock_update(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 377 | 				  struct buffer_head **bh); | 
 | 378 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 
 | 379 | static inline int ocfs2_highest_compat_lock_level(int level); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 380 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 
 | 381 | 					      int new_level); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 382 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 
 | 383 | 				  struct ocfs2_lock_res *lockres, | 
 | 384 | 				  int new_level, | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 385 | 				  int lvb, | 
 | 386 | 				  unsigned int generation); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 387 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 
 | 388 | 				        struct ocfs2_lock_res *lockres); | 
 | 389 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 
 | 390 | 				struct ocfs2_lock_res *lockres); | 
 | 391 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 392 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 393 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 
 | 394 | 				  u64 blkno, | 
 | 395 | 				  u32 generation, | 
 | 396 | 				  char *name) | 
 | 397 | { | 
 | 398 | 	int len; | 
 | 399 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 400 | 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); | 
 | 401 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 402 | 	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", | 
 | 403 | 		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, | 
 | 404 | 		       (long long)blkno, generation); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 405 |  | 
 | 406 | 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); | 
 | 407 |  | 
 | 408 | 	mlog(0, "built lock resource with name: %s\n", name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 409 | } | 
 | 410 |  | 
| Ingo Molnar | 34af946 | 2006-06-27 02:53:55 -0700 | [diff] [blame] | 411 | static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 412 |  | 
 | 413 | static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, | 
 | 414 | 				       struct ocfs2_dlm_debug *dlm_debug) | 
 | 415 | { | 
 | 416 | 	mlog(0, "Add tracking for lockres %s\n", res->l_name); | 
 | 417 |  | 
 | 418 | 	spin_lock(&ocfs2_dlm_tracking_lock); | 
 | 419 | 	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); | 
 | 420 | 	spin_unlock(&ocfs2_dlm_tracking_lock); | 
 | 421 | } | 
 | 422 |  | 
 | 423 | static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) | 
 | 424 | { | 
 | 425 | 	spin_lock(&ocfs2_dlm_tracking_lock); | 
 | 426 | 	if (!list_empty(&res->l_debug_list)) | 
 | 427 | 		list_del_init(&res->l_debug_list); | 
 | 428 | 	spin_unlock(&ocfs2_dlm_tracking_lock); | 
 | 429 | } | 
 | 430 |  | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 431 | #ifdef CONFIG_OCFS2_FS_STATS | 
 | 432 | static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) | 
 | 433 | { | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 434 | 	res->l_lock_refresh = 0; | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 435 | 	memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats)); | 
 | 436 | 	memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats)); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 437 | } | 
 | 438 |  | 
 | 439 | static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, | 
 | 440 | 				    struct ocfs2_mask_waiter *mw, int ret) | 
 | 441 | { | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 442 | 	u32 usec; | 
 | 443 | 	ktime_t kt; | 
 | 444 | 	struct ocfs2_lock_stats *stats; | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 445 |  | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 446 | 	if (level == LKM_PRMODE) | 
 | 447 | 		stats = &res->l_lock_prmode; | 
 | 448 | 	else if (level == LKM_EXMODE) | 
 | 449 | 		stats = &res->l_lock_exmode; | 
 | 450 | 	else | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 451 | 		return; | 
 | 452 |  | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 453 | 	kt = ktime_sub(ktime_get(), mw->mw_lock_start); | 
 | 454 | 	usec = ktime_to_us(kt); | 
 | 455 |  | 
 | 456 | 	stats->ls_gets++; | 
 | 457 | 	stats->ls_total += ktime_to_ns(kt); | 
 | 458 | 	/* overflow */ | 
 | 459 | 	if (unlikely(stats->ls_gets) == 0) { | 
 | 460 | 		stats->ls_gets++; | 
 | 461 | 		stats->ls_total = ktime_to_ns(kt); | 
 | 462 | 	} | 
 | 463 |  | 
 | 464 | 	if (stats->ls_max < usec) | 
 | 465 | 		stats->ls_max = usec; | 
 | 466 |  | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 467 | 	if (ret) | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 468 | 		stats->ls_fail++; | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 469 | } | 
 | 470 |  | 
 | 471 | static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) | 
 | 472 | { | 
 | 473 | 	lockres->l_lock_refresh++; | 
 | 474 | } | 
 | 475 |  | 
 | 476 | static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) | 
 | 477 | { | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 478 | 	mw->mw_lock_start = ktime_get(); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 479 | } | 
 | 480 | #else | 
 | 481 | static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) | 
 | 482 | { | 
 | 483 | } | 
 | 484 | static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, | 
 | 485 | 			   int level, struct ocfs2_mask_waiter *mw, int ret) | 
 | 486 | { | 
 | 487 | } | 
 | 488 | static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) | 
 | 489 | { | 
 | 490 | } | 
 | 491 | static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) | 
 | 492 | { | 
 | 493 | } | 
 | 494 | #endif | 
 | 495 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 496 | static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, | 
 | 497 | 				       struct ocfs2_lock_res *res, | 
 | 498 | 				       enum ocfs2_lock_type type, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 499 | 				       struct ocfs2_lock_res_ops *ops, | 
 | 500 | 				       void *priv) | 
 | 501 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 502 | 	res->l_type          = type; | 
 | 503 | 	res->l_ops           = ops; | 
 | 504 | 	res->l_priv          = priv; | 
 | 505 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 506 | 	res->l_level         = DLM_LOCK_IV; | 
 | 507 | 	res->l_requested     = DLM_LOCK_IV; | 
 | 508 | 	res->l_blocking      = DLM_LOCK_IV; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 509 | 	res->l_action        = OCFS2_AST_INVALID; | 
 | 510 | 	res->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
 | 511 |  | 
 | 512 | 	res->l_flags         = OCFS2_LOCK_INITIALIZED; | 
 | 513 |  | 
 | 514 | 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 515 |  | 
 | 516 | 	ocfs2_init_lock_stats(res); | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 517 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 
 | 518 | 	if (type != OCFS2_LOCK_TYPE_OPEN) | 
 | 519 | 		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], | 
 | 520 | 				 &lockdep_keys[type], 0); | 
 | 521 | 	else | 
 | 522 | 		res->l_lockdep_map.key = NULL; | 
 | 523 | #endif | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 524 | } | 
 | 525 |  | 
 | 526 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) | 
 | 527 | { | 
 | 528 | 	/* This also clears out the lock status block */ | 
 | 529 | 	memset(res, 0, sizeof(struct ocfs2_lock_res)); | 
 | 530 | 	spin_lock_init(&res->l_lock); | 
 | 531 | 	init_waitqueue_head(&res->l_event); | 
 | 532 | 	INIT_LIST_HEAD(&res->l_blocked_list); | 
 | 533 | 	INIT_LIST_HEAD(&res->l_mask_waiters); | 
 | 534 | } | 
 | 535 |  | 
 | 536 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 
 | 537 | 			       enum ocfs2_lock_type type, | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 538 | 			       unsigned int generation, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 539 | 			       struct inode *inode) | 
 | 540 | { | 
 | 541 | 	struct ocfs2_lock_res_ops *ops; | 
 | 542 |  | 
 | 543 | 	switch(type) { | 
 | 544 | 		case OCFS2_LOCK_TYPE_RW: | 
 | 545 | 			ops = &ocfs2_inode_rw_lops; | 
 | 546 | 			break; | 
 | 547 | 		case OCFS2_LOCK_TYPE_META: | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 548 | 			ops = &ocfs2_inode_inode_lops; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 549 | 			break; | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 550 | 		case OCFS2_LOCK_TYPE_OPEN: | 
 | 551 | 			ops = &ocfs2_inode_open_lops; | 
 | 552 | 			break; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 553 | 		default: | 
 | 554 | 			mlog_bug_on_msg(1, "type: %d\n", type); | 
 | 555 | 			ops = NULL; /* thanks, gcc */ | 
 | 556 | 			break; | 
 | 557 | 	}; | 
 | 558 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 559 | 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 560 | 			      generation, res->l_name); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 561 | 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); | 
 | 562 | } | 
 | 563 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 564 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | 
 | 565 | { | 
 | 566 | 	struct inode *inode = ocfs2_lock_res_inode(lockres); | 
 | 567 |  | 
 | 568 | 	return OCFS2_SB(inode->i_sb); | 
 | 569 | } | 
 | 570 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 571 | static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) | 
 | 572 | { | 
 | 573 | 	struct ocfs2_mem_dqinfo *info = lockres->l_priv; | 
 | 574 |  | 
 | 575 | 	return OCFS2_SB(info->dqi_gi.dqi_sb); | 
 | 576 | } | 
 | 577 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 578 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) | 
 | 579 | { | 
 | 580 | 	struct ocfs2_file_private *fp = lockres->l_priv; | 
 | 581 |  | 
 | 582 | 	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); | 
 | 583 | } | 
 | 584 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 585 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 
 | 586 | { | 
 | 587 | 	__be64 inode_blkno_be; | 
 | 588 |  | 
 | 589 | 	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], | 
 | 590 | 	       sizeof(__be64)); | 
 | 591 |  | 
 | 592 | 	return be64_to_cpu(inode_blkno_be); | 
 | 593 | } | 
 | 594 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 595 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) | 
 | 596 | { | 
 | 597 | 	struct ocfs2_dentry_lock *dl = lockres->l_priv; | 
 | 598 |  | 
 | 599 | 	return OCFS2_SB(dl->dl_inode->i_sb); | 
 | 600 | } | 
 | 601 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 602 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | 
 | 603 | 				u64 parent, struct inode *inode) | 
 | 604 | { | 
 | 605 | 	int len; | 
 | 606 | 	u64 inode_blkno = OCFS2_I(inode)->ip_blkno; | 
 | 607 | 	__be64 inode_blkno_be = cpu_to_be64(inode_blkno); | 
 | 608 | 	struct ocfs2_lock_res *lockres = &dl->dl_lockres; | 
 | 609 |  | 
 | 610 | 	ocfs2_lock_res_init_once(lockres); | 
 | 611 |  | 
 | 612 | 	/* | 
 | 613 | 	 * Unfortunately, the standard lock naming scheme won't work | 
 | 614 | 	 * here because we have two 16 byte values to use. Instead, | 
 | 615 | 	 * we'll stuff the inode number as a binary value. We still | 
 | 616 | 	 * want error prints to show something without garbling the | 
 | 617 | 	 * display, so drop a null byte in there before the inode | 
 | 618 | 	 * number. A future version of OCFS2 will likely use all | 
 | 619 | 	 * binary lock names. The stringified names have been a | 
 | 620 | 	 * tremendous aid in debugging, but now that the debugfs | 
 | 621 | 	 * interface exists, we can mangle things there if need be. | 
 | 622 | 	 * | 
 | 623 | 	 * NOTE: We also drop the standard "pad" value (the total lock | 
 | 624 | 	 * name size stays the same though - the last part is all | 
 | 625 | 	 * zeros due to the memset in ocfs2_lock_res_init_once() | 
 | 626 | 	 */ | 
 | 627 | 	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, | 
 | 628 | 		       "%c%016llx", | 
 | 629 | 		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), | 
 | 630 | 		       (long long)parent); | 
 | 631 |  | 
 | 632 | 	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); | 
 | 633 |  | 
 | 634 | 	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, | 
 | 635 | 	       sizeof(__be64)); | 
 | 636 |  | 
 | 637 | 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | 
 | 638 | 				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, | 
 | 639 | 				   dl); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 640 | } | 
 | 641 |  | 
 | 642 | static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, | 
 | 643 | 				      struct ocfs2_super *osb) | 
 | 644 | { | 
 | 645 | 	/* Superblock lockres doesn't come from a slab so we call init | 
 | 646 | 	 * once on it manually.  */ | 
 | 647 | 	ocfs2_lock_res_init_once(res); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 648 | 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, | 
 | 649 | 			      0, res->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 650 | 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 651 | 				   &ocfs2_super_lops, osb); | 
 | 652 | } | 
 | 653 |  | 
 | 654 | static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | 
 | 655 | 				       struct ocfs2_super *osb) | 
 | 656 | { | 
 | 657 | 	/* Rename lockres doesn't come from a slab so we call init | 
 | 658 | 	 * once on it manually.  */ | 
 | 659 | 	ocfs2_lock_res_init_once(res); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 660 | 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); | 
 | 661 | 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 662 | 				   &ocfs2_rename_lops, osb); | 
 | 663 | } | 
 | 664 |  | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 665 | static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, | 
 | 666 | 					 struct ocfs2_super *osb) | 
 | 667 | { | 
 | 668 | 	/* nfs_sync lockres doesn't come from a slab so we call init | 
 | 669 | 	 * once on it manually.  */ | 
 | 670 | 	ocfs2_lock_res_init_once(res); | 
 | 671 | 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); | 
 | 672 | 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, | 
 | 673 | 				   &ocfs2_nfs_sync_lops, osb); | 
 | 674 | } | 
 | 675 |  | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 676 | static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, | 
 | 677 | 					    struct ocfs2_super *osb) | 
 | 678 | { | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 679 | 	ocfs2_lock_res_init_once(res); | 
 | 680 | 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); | 
 | 681 | 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, | 
 | 682 | 				   &ocfs2_orphan_scan_lops, osb); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 683 | } | 
 | 684 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 685 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | 
 | 686 | 			      struct ocfs2_file_private *fp) | 
 | 687 | { | 
 | 688 | 	struct inode *inode = fp->fp_file->f_mapping->host; | 
 | 689 | 	struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
 | 690 |  | 
 | 691 | 	ocfs2_lock_res_init_once(lockres); | 
 | 692 | 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, | 
 | 693 | 			      inode->i_generation, lockres->l_name); | 
 | 694 | 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | 
 | 695 | 				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, | 
 | 696 | 				   fp); | 
 | 697 | 	lockres->l_flags |= OCFS2_LOCK_NOCACHE; | 
 | 698 | } | 
 | 699 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 700 | void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, | 
 | 701 | 			       struct ocfs2_mem_dqinfo *info) | 
 | 702 | { | 
 | 703 | 	ocfs2_lock_res_init_once(lockres); | 
 | 704 | 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, | 
 | 705 | 			      0, lockres->l_name); | 
 | 706 | 	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, | 
 | 707 | 				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, | 
 | 708 | 				   info); | 
 | 709 | } | 
 | 710 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 711 | void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, | 
 | 712 | 				  struct ocfs2_super *osb, u64 ref_blkno, | 
 | 713 | 				  unsigned int generation) | 
 | 714 | { | 
 | 715 | 	ocfs2_lock_res_init_once(lockres); | 
 | 716 | 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, | 
 | 717 | 			      generation, lockres->l_name); | 
 | 718 | 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, | 
 | 719 | 				   &ocfs2_refcount_block_lops, osb); | 
 | 720 | } | 
 | 721 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 722 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 
 | 723 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 724 | 	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) | 
 | 725 | 		return; | 
 | 726 |  | 
 | 727 | 	ocfs2_remove_lockres_tracking(res); | 
 | 728 |  | 
 | 729 | 	mlog_bug_on_msg(!list_empty(&res->l_blocked_list), | 
 | 730 | 			"Lockres %s is on the blocked list\n", | 
 | 731 | 			res->l_name); | 
 | 732 | 	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), | 
 | 733 | 			"Lockres %s has mask waiters pending\n", | 
 | 734 | 			res->l_name); | 
 | 735 | 	mlog_bug_on_msg(spin_is_locked(&res->l_lock), | 
 | 736 | 			"Lockres %s is locked\n", | 
 | 737 | 			res->l_name); | 
 | 738 | 	mlog_bug_on_msg(res->l_ro_holders, | 
 | 739 | 			"Lockres %s has %u ro holders\n", | 
 | 740 | 			res->l_name, res->l_ro_holders); | 
 | 741 | 	mlog_bug_on_msg(res->l_ex_holders, | 
 | 742 | 			"Lockres %s has %u ex holders\n", | 
 | 743 | 			res->l_name, res->l_ex_holders); | 
 | 744 |  | 
 | 745 | 	/* Need to clear out the lock status block for the dlm */ | 
 | 746 | 	memset(&res->l_lksb, 0, sizeof(res->l_lksb)); | 
 | 747 |  | 
 | 748 | 	res->l_flags = 0UL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 749 | } | 
 | 750 |  | 
 | 751 | static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, | 
 | 752 | 				     int level) | 
 | 753 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 754 | 	BUG_ON(!lockres); | 
 | 755 |  | 
 | 756 | 	switch(level) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 757 | 	case DLM_LOCK_EX: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 758 | 		lockres->l_ex_holders++; | 
 | 759 | 		break; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 760 | 	case DLM_LOCK_PR: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 761 | 		lockres->l_ro_holders++; | 
 | 762 | 		break; | 
 | 763 | 	default: | 
 | 764 | 		BUG(); | 
 | 765 | 	} | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 766 | } | 
 | 767 |  | 
 | 768 | static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | 
 | 769 | 				     int level) | 
 | 770 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 771 | 	BUG_ON(!lockres); | 
 | 772 |  | 
 | 773 | 	switch(level) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 774 | 	case DLM_LOCK_EX: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 775 | 		BUG_ON(!lockres->l_ex_holders); | 
 | 776 | 		lockres->l_ex_holders--; | 
 | 777 | 		break; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 778 | 	case DLM_LOCK_PR: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 779 | 		BUG_ON(!lockres->l_ro_holders); | 
 | 780 | 		lockres->l_ro_holders--; | 
 | 781 | 		break; | 
 | 782 | 	default: | 
 | 783 | 		BUG(); | 
 | 784 | 	} | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 785 | } | 
 | 786 |  | 
 | 787 | /* WARNING: This function lives in a world where the only three lock | 
 | 788 |  * levels are EX, PR, and NL. It *will* have to be adjusted when more | 
 | 789 |  * lock types are added. */ | 
 | 790 | static inline int ocfs2_highest_compat_lock_level(int level) | 
 | 791 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 792 | 	int new_level = DLM_LOCK_EX; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 793 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 794 | 	if (level == DLM_LOCK_EX) | 
 | 795 | 		new_level = DLM_LOCK_NL; | 
 | 796 | 	else if (level == DLM_LOCK_PR) | 
 | 797 | 		new_level = DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 798 | 	return new_level; | 
 | 799 | } | 
 | 800 |  | 
 | 801 | static void lockres_set_flags(struct ocfs2_lock_res *lockres, | 
 | 802 | 			      unsigned long newflags) | 
 | 803 | { | 
| Christoph Hellwig | 800deef | 2007-05-17 16:03:13 +0200 | [diff] [blame] | 804 | 	struct ocfs2_mask_waiter *mw, *tmp; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 805 |  | 
 | 806 |  	assert_spin_locked(&lockres->l_lock); | 
 | 807 |  | 
 | 808 | 	lockres->l_flags = newflags; | 
 | 809 |  | 
| Christoph Hellwig | 800deef | 2007-05-17 16:03:13 +0200 | [diff] [blame] | 810 | 	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 811 | 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 
 | 812 | 			continue; | 
 | 813 |  | 
 | 814 | 		list_del_init(&mw->mw_item); | 
 | 815 | 		mw->mw_status = 0; | 
 | 816 | 		complete(&mw->mw_complete); | 
 | 817 | 	} | 
 | 818 | } | 
 | 819 | static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) | 
 | 820 | { | 
 | 821 | 	lockres_set_flags(lockres, lockres->l_flags | or); | 
 | 822 | } | 
 | 823 | static void lockres_clear_flags(struct ocfs2_lock_res *lockres, | 
 | 824 | 				unsigned long clear) | 
 | 825 | { | 
 | 826 | 	lockres_set_flags(lockres, lockres->l_flags & ~clear); | 
 | 827 | } | 
 | 828 |  | 
 | 829 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) | 
 | 830 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 831 | 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 
 | 832 | 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 
 | 833 | 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 834 | 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 835 |  | 
 | 836 | 	lockres->l_level = lockres->l_requested; | 
 | 837 | 	if (lockres->l_level <= | 
 | 838 | 	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 839 | 		lockres->l_blocking = DLM_LOCK_NL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 840 | 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 
 | 841 | 	} | 
 | 842 | 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 843 | } | 
 | 844 |  | 
 | 845 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) | 
 | 846 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 847 | 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 
 | 848 | 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 
 | 849 |  | 
 | 850 | 	/* Convert from RO to EX doesn't really need anything as our | 
 | 851 | 	 * information is already up to data. Convert from NL to | 
 | 852 | 	 * *anything* however should mark ourselves as needing an | 
 | 853 | 	 * update */ | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 854 | 	if (lockres->l_level == DLM_LOCK_NL && | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 855 | 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 856 | 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 
 | 857 |  | 
 | 858 | 	lockres->l_level = lockres->l_requested; | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 859 |  | 
 | 860 | 	/* | 
 | 861 | 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing | 
 | 862 | 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from | 
 | 863 | 	 * downconverting the lock before the upconvert has fully completed. | 
 | 864 | 	 */ | 
 | 865 | 	lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | 
 | 866 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 867 | 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 868 | } | 
 | 869 |  | 
 | 870 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) | 
 | 871 | { | 
| Roel Kluin | 3cf0c50 | 2007-10-27 00:20:36 +0200 | [diff] [blame] | 872 | 	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 873 | 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 
 | 874 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 875 | 	if (lockres->l_requested > DLM_LOCK_NL && | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 876 | 	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) && | 
 | 877 | 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 878 | 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 
 | 879 |  | 
 | 880 | 	lockres->l_level = lockres->l_requested; | 
 | 881 | 	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); | 
 | 882 | 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 883 | } | 
 | 884 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 885 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | 
 | 886 | 				     int level) | 
 | 887 | { | 
 | 888 | 	int needs_downconvert = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 889 |  | 
 | 890 | 	assert_spin_locked(&lockres->l_lock); | 
 | 891 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 892 | 	if (level > lockres->l_blocking) { | 
 | 893 | 		/* only schedule a downconvert if we haven't already scheduled | 
 | 894 | 		 * one that goes low enough to satisfy the level we're | 
 | 895 | 		 * blocking.  this also catches the case where we get | 
 | 896 | 		 * duplicate BASTs */ | 
 | 897 | 		if (ocfs2_highest_compat_lock_level(level) < | 
 | 898 | 		    ocfs2_highest_compat_lock_level(lockres->l_blocking)) | 
 | 899 | 			needs_downconvert = 1; | 
 | 900 |  | 
 | 901 | 		lockres->l_blocking = level; | 
 | 902 | 	} | 
 | 903 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 904 | 	mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", | 
 | 905 | 	     lockres->l_name, level, lockres->l_level, lockres->l_blocking, | 
 | 906 | 	     needs_downconvert); | 
 | 907 |  | 
| Wengang Wang | 0b94a90 | 2010-01-21 10:50:02 -0800 | [diff] [blame] | 908 | 	if (needs_downconvert) | 
 | 909 | 		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 910 | 	mlog(0, "needs_downconvert = %d\n", needs_downconvert); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 911 | 	return needs_downconvert; | 
 | 912 | } | 
 | 913 |  | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 914 | /* | 
 | 915 |  * OCFS2_LOCK_PENDING and l_pending_gen. | 
 | 916 |  * | 
 | 917 |  * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting | 
 | 918 |  * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock() | 
 | 919 |  * for more details on the race. | 
 | 920 |  * | 
 | 921 |  * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces | 
 | 922 |  * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock() | 
 | 923 |  * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear | 
 | 924 |  * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns, | 
 | 925 |  * the caller is going to try to clear PENDING again.  If nothing else is | 
 | 926 |  * happening, __lockres_clear_pending() sees PENDING is unset and does | 
 | 927 |  * nothing. | 
 | 928 |  * | 
 | 929 |  * But what if another path (eg downconvert thread) has just started a | 
 | 930 |  * new locking action?  The other path has re-set PENDING.  Our path | 
 | 931 |  * cannot clear PENDING, because that will re-open the original race | 
 | 932 |  * window. | 
 | 933 |  * | 
 | 934 |  * [Example] | 
 | 935 |  * | 
 | 936 |  * ocfs2_meta_lock() | 
 | 937 |  *  ocfs2_cluster_lock() | 
 | 938 |  *   set BUSY | 
 | 939 |  *   set PENDING | 
 | 940 |  *   drop l_lock | 
 | 941 |  *   ocfs2_dlm_lock() | 
 | 942 |  *    ocfs2_locking_ast()		ocfs2_downconvert_thread() | 
 | 943 |  *     clear PENDING			 ocfs2_unblock_lock() | 
 | 944 |  *					  take_l_lock | 
 | 945 |  *					  !BUSY | 
 | 946 |  *					  ocfs2_prepare_downconvert() | 
 | 947 |  *					   set BUSY | 
 | 948 |  *					   set PENDING | 
 | 949 |  *					  drop l_lock | 
 | 950 |  *   take l_lock | 
 | 951 |  *   clear PENDING | 
 | 952 |  *   drop l_lock | 
 | 953 |  *			<window> | 
 | 954 |  *					  ocfs2_dlm_lock() | 
 | 955 |  * | 
 | 956 |  * So as you can see, we now have a window where l_lock is not held, | 
 | 957 |  * PENDING is not set, and ocfs2_dlm_lock() has not been called. | 
 | 958 |  * | 
 | 959 |  * The core problem is that ocfs2_cluster_lock() has cleared the PENDING | 
 | 960 |  * set by ocfs2_prepare_downconvert().  That wasn't nice. | 
 | 961 |  * | 
 | 962 |  * To solve this we introduce l_pending_gen.  A call to | 
 | 963 |  * lockres_clear_pending() will only do so when it is passed a generation | 
 | 964 |  * number that matches the lockres.  lockres_set_pending() will return the | 
 | 965 |  * current generation number.  When ocfs2_cluster_lock() goes to clear | 
 | 966 |  * PENDING, it passes the generation it got from set_pending().  In our | 
 | 967 |  * example above, the generation numbers will *not* match.  Thus, | 
 | 968 |  * ocfs2_cluster_lock() will not clear the PENDING set by | 
 | 969 |  * ocfs2_prepare_downconvert(). | 
 | 970 |  */ | 
 | 971 |  | 
 | 972 | /* Unlocked version for ocfs2_locking_ast() */ | 
 | 973 | static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, | 
 | 974 | 				    unsigned int generation, | 
 | 975 | 				    struct ocfs2_super *osb) | 
 | 976 | { | 
 | 977 | 	assert_spin_locked(&lockres->l_lock); | 
 | 978 |  | 
 | 979 | 	/* | 
 | 980 | 	 * The ast and locking functions can race us here.  The winner | 
 | 981 | 	 * will clear pending, the loser will not. | 
 | 982 | 	 */ | 
 | 983 | 	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || | 
 | 984 | 	    (lockres->l_pending_gen != generation)) | 
 | 985 | 		return; | 
 | 986 |  | 
 | 987 | 	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); | 
 | 988 | 	lockres->l_pending_gen++; | 
 | 989 |  | 
 | 990 | 	/* | 
 | 991 | 	 * The downconvert thread may have skipped us because we | 
 | 992 | 	 * were PENDING.  Wake it up. | 
 | 993 | 	 */ | 
 | 994 | 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 
 | 995 | 		ocfs2_wake_downconvert_thread(osb); | 
 | 996 | } | 
 | 997 |  | 
 | 998 | /* Locked version for callers of ocfs2_dlm_lock() */ | 
 | 999 | static void lockres_clear_pending(struct ocfs2_lock_res *lockres, | 
 | 1000 | 				  unsigned int generation, | 
 | 1001 | 				  struct ocfs2_super *osb) | 
 | 1002 | { | 
 | 1003 | 	unsigned long flags; | 
 | 1004 |  | 
 | 1005 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1006 | 	__lockres_clear_pending(lockres, generation, osb); | 
 | 1007 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1008 | } | 
 | 1009 |  | 
 | 1010 | static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) | 
 | 1011 | { | 
 | 1012 | 	assert_spin_locked(&lockres->l_lock); | 
 | 1013 | 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 
 | 1014 |  | 
 | 1015 | 	lockres_or_flags(lockres, OCFS2_LOCK_PENDING); | 
 | 1016 |  | 
 | 1017 | 	return lockres->l_pending_gen; | 
 | 1018 | } | 
 | 1019 |  | 
| Joel Becker | c0e4133 | 2010-01-29 14:46:44 -0800 | [diff] [blame] | 1020 | static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1021 | { | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1022 | 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 1023 | 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1024 | 	int needs_downconvert; | 
 | 1025 | 	unsigned long flags; | 
 | 1026 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1027 | 	BUG_ON(level <= DLM_LOCK_NL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1028 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1029 | 	mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " | 
 | 1030 | 	     "type %s\n", lockres->l_name, level, lockres->l_level, | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 1031 | 	     ocfs2_lock_type_string(lockres->l_type)); | 
 | 1032 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1033 | 	/* | 
 | 1034 | 	 * We can skip the bast for locks which don't enable caching - | 
 | 1035 | 	 * they'll be dropped at the earliest possible time anyway. | 
 | 1036 | 	 */ | 
 | 1037 | 	if (lockres->l_flags & OCFS2_LOCK_NOCACHE) | 
 | 1038 | 		return; | 
 | 1039 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1040 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1041 | 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 
 | 1042 | 	if (needs_downconvert) | 
 | 1043 | 		ocfs2_schedule_blocked_lock(osb, lockres); | 
 | 1044 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1045 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 1046 | 	wake_up(&lockres->l_event); | 
 | 1047 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 1048 | 	ocfs2_wake_downconvert_thread(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1049 | } | 
 | 1050 |  | 
| Joel Becker | c0e4133 | 2010-01-29 14:46:44 -0800 | [diff] [blame] | 1051 | static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1052 | { | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1053 | 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1054 | 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1055 | 	unsigned long flags; | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1056 | 	int status; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1057 |  | 
 | 1058 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1059 |  | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1060 | 	status = ocfs2_dlm_lock_status(&lockres->l_lksb); | 
 | 1061 |  | 
 | 1062 | 	if (status == -EAGAIN) { | 
 | 1063 | 		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
 | 1064 | 		goto out; | 
 | 1065 | 	} | 
 | 1066 |  | 
 | 1067 | 	if (status) { | 
| Joel Becker | 8f2c9c1 | 2008-02-01 12:16:57 -0800 | [diff] [blame] | 1068 | 		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1069 | 		     lockres->l_name, status); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1070 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1071 | 		return; | 
 | 1072 | 	} | 
 | 1073 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1074 | 	mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " | 
 | 1075 | 	     "level %d => %d\n", lockres->l_name, lockres->l_action, | 
 | 1076 | 	     lockres->l_unlock_action, lockres->l_level, lockres->l_requested); | 
 | 1077 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1078 | 	switch(lockres->l_action) { | 
 | 1079 | 	case OCFS2_AST_ATTACH: | 
 | 1080 | 		ocfs2_generic_handle_attach_action(lockres); | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 1081 | 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1082 | 		break; | 
 | 1083 | 	case OCFS2_AST_CONVERT: | 
 | 1084 | 		ocfs2_generic_handle_convert_action(lockres); | 
 | 1085 | 		break; | 
 | 1086 | 	case OCFS2_AST_DOWNCONVERT: | 
 | 1087 | 		ocfs2_generic_handle_downconvert_action(lockres); | 
 | 1088 | 		break; | 
 | 1089 | 	default: | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1090 | 		mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " | 
 | 1091 | 		     "flags 0x%lx, unlock: %u\n", | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 1092 | 		     lockres->l_name, lockres->l_action, lockres->l_flags, | 
 | 1093 | 		     lockres->l_unlock_action); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1094 | 		BUG(); | 
 | 1095 | 	} | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1096 | out: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1097 | 	/* set it to something invalid so if we get called again we | 
 | 1098 | 	 * can catch it. */ | 
 | 1099 | 	lockres->l_action = OCFS2_AST_INVALID; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1100 |  | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1101 | 	/* Did we try to cancel this lock?  Clear that state */ | 
 | 1102 | 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) | 
 | 1103 | 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
 | 1104 |  | 
 | 1105 | 	/* | 
 | 1106 | 	 * We may have beaten the locking functions here.  We certainly | 
 | 1107 | 	 * know that dlm_lock() has been called :-) | 
 | 1108 | 	 * Because we can't have two lock calls in flight at once, we | 
 | 1109 | 	 * can use lockres->l_pending_gen. | 
 | 1110 | 	 */ | 
 | 1111 | 	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb); | 
 | 1112 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1113 | 	wake_up(&lockres->l_event); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 1114 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1115 | } | 
 | 1116 |  | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 1117 | static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) | 
 | 1118 | { | 
 | 1119 | 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | 
 | 1120 | 	unsigned long flags; | 
 | 1121 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1122 | 	mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", | 
 | 1123 | 	     lockres->l_name, lockres->l_unlock_action); | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 1124 |  | 
 | 1125 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1126 | 	if (error) { | 
 | 1127 | 		mlog(ML_ERROR, "Dlm passes error %d for lock %s, " | 
 | 1128 | 		     "unlock_action %d\n", error, lockres->l_name, | 
 | 1129 | 		     lockres->l_unlock_action); | 
 | 1130 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 1131 | 		return; | 
 | 1132 | 	} | 
 | 1133 |  | 
 | 1134 | 	switch(lockres->l_unlock_action) { | 
 | 1135 | 	case OCFS2_UNLOCK_CANCEL_CONVERT: | 
 | 1136 | 		mlog(0, "Cancel convert success for %s\n", lockres->l_name); | 
 | 1137 | 		lockres->l_action = OCFS2_AST_INVALID; | 
 | 1138 | 		/* Downconvert thread may have requeued this lock, we | 
 | 1139 | 		 * need to wake it. */ | 
 | 1140 | 		if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 
 | 1141 | 			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); | 
 | 1142 | 		break; | 
 | 1143 | 	case OCFS2_UNLOCK_DROP_LOCK: | 
 | 1144 | 		lockres->l_level = DLM_LOCK_IV; | 
 | 1145 | 		break; | 
 | 1146 | 	default: | 
 | 1147 | 		BUG(); | 
 | 1148 | 	} | 
 | 1149 |  | 
 | 1150 | 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
 | 1151 | 	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
 | 1152 | 	wake_up(&lockres->l_event); | 
 | 1153 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 1154 | } | 
 | 1155 |  | 
 | 1156 | /* | 
 | 1157 |  * This is the filesystem locking protocol.  It provides the lock handling | 
 | 1158 |  * hooks for the underlying DLM.  It has a maximum version number. | 
 | 1159 |  * The version number allows interoperability with systems running at | 
 | 1160 |  * the same major number and an equal or smaller minor number. | 
 | 1161 |  * | 
 | 1162 |  * Whenever the filesystem does new things with locks (adds or removes a | 
 | 1163 |  * lock, orders them differently, does different things underneath a lock), | 
 | 1164 |  * the version must be changed.  The protocol is negotiated when joining | 
 | 1165 |  * the dlm domain.  A node may join the domain if its major version is | 
 | 1166 |  * identical to all other nodes and its minor version is greater than | 
 | 1167 |  * or equal to all other nodes.  When its minor version is greater than | 
 | 1168 |  * the other nodes, it will run at the minor version specified by the | 
 | 1169 |  * other nodes. | 
 | 1170 |  * | 
 | 1171 |  * If a locking change is made that will not be compatible with older | 
 | 1172 |  * versions, the major number must be increased and the minor version set | 
 | 1173 |  * to zero.  If a change merely adds a behavior that can be disabled when | 
 | 1174 |  * speaking to older versions, the minor version must be increased.  If a | 
 | 1175 |  * change adds a fully backwards compatible change (eg, LVB changes that | 
 | 1176 |  * are just ignored by older versions), the version does not need to be | 
 | 1177 |  * updated. | 
 | 1178 |  */ | 
 | 1179 | static struct ocfs2_locking_protocol lproto = { | 
 | 1180 | 	.lp_max_version = { | 
 | 1181 | 		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | 
 | 1182 | 		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | 
 | 1183 | 	}, | 
 | 1184 | 	.lp_lock_ast		= ocfs2_locking_ast, | 
 | 1185 | 	.lp_blocking_ast	= ocfs2_blocking_ast, | 
 | 1186 | 	.lp_unlock_ast		= ocfs2_unlock_ast, | 
 | 1187 | }; | 
 | 1188 |  | 
 | 1189 | void ocfs2_set_locking_protocol(void) | 
 | 1190 | { | 
 | 1191 | 	ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); | 
 | 1192 | } | 
 | 1193 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1194 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 
 | 1195 | 						int convert) | 
 | 1196 | { | 
 | 1197 | 	unsigned long flags; | 
 | 1198 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1199 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1200 | 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1201 | 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1202 | 	if (convert) | 
 | 1203 | 		lockres->l_action = OCFS2_AST_INVALID; | 
 | 1204 | 	else | 
 | 1205 | 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
 | 1206 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1207 |  | 
 | 1208 | 	wake_up(&lockres->l_event); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1209 | } | 
 | 1210 |  | 
 | 1211 | /* Note: If we detect another process working on the lock (i.e., | 
 | 1212 |  * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller | 
 | 1213 |  * to do the right thing in that case. | 
 | 1214 |  */ | 
 | 1215 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 
 | 1216 | 			     struct ocfs2_lock_res *lockres, | 
 | 1217 | 			     int level, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1218 | 			     u32 dlm_flags) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1219 | { | 
 | 1220 | 	int ret = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1221 | 	unsigned long flags; | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1222 | 	unsigned int gen; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1223 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1224 | 	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1225 | 	     dlm_flags); | 
 | 1226 |  | 
 | 1227 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1228 | 	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || | 
 | 1229 | 	    (lockres->l_flags & OCFS2_LOCK_BUSY)) { | 
 | 1230 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1231 | 		goto bail; | 
 | 1232 | 	} | 
 | 1233 |  | 
 | 1234 | 	lockres->l_action = OCFS2_AST_ATTACH; | 
 | 1235 | 	lockres->l_requested = level; | 
 | 1236 | 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1237 | 	gen = lockres_set_pending(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1238 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1239 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 1240 | 	ret = ocfs2_dlm_lock(osb->cconn, | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1241 | 			     level, | 
 | 1242 | 			     &lockres->l_lksb, | 
 | 1243 | 			     dlm_flags, | 
 | 1244 | 			     lockres->l_name, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1245 | 			     OCFS2_LOCK_ID_MAX_LEN - 1); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1246 | 	lockres_clear_pending(lockres, gen, osb); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1247 | 	if (ret) { | 
 | 1248 | 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1249 | 		ocfs2_recover_from_dlm_error(lockres, 1); | 
 | 1250 | 	} | 
 | 1251 |  | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1252 | 	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1253 |  | 
 | 1254 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1255 | 	return ret; | 
 | 1256 | } | 
 | 1257 |  | 
 | 1258 | static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, | 
 | 1259 | 					int flag) | 
 | 1260 | { | 
 | 1261 | 	unsigned long flags; | 
 | 1262 | 	int ret; | 
 | 1263 |  | 
 | 1264 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1265 | 	ret = lockres->l_flags & flag; | 
 | 1266 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1267 |  | 
 | 1268 | 	return ret; | 
 | 1269 | } | 
 | 1270 |  | 
 | 1271 | static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) | 
 | 1272 |  | 
 | 1273 | { | 
 | 1274 | 	wait_event(lockres->l_event, | 
 | 1275 | 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); | 
 | 1276 | } | 
 | 1277 |  | 
 | 1278 | static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) | 
 | 1279 |  | 
 | 1280 | { | 
 | 1281 | 	wait_event(lockres->l_event, | 
 | 1282 | 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); | 
 | 1283 | } | 
 | 1284 |  | 
 | 1285 | /* predict what lock level we'll be dropping down to on behalf | 
 | 1286 |  * of another node, and return true if the currently wanted | 
 | 1287 |  * level will be compatible with it. */ | 
 | 1288 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 
 | 1289 | 						     int wanted) | 
 | 1290 | { | 
 | 1291 | 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 
 | 1292 |  | 
 | 1293 | 	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); | 
 | 1294 | } | 
 | 1295 |  | 
 | 1296 | static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) | 
 | 1297 | { | 
 | 1298 | 	INIT_LIST_HEAD(&mw->mw_item); | 
 | 1299 | 	init_completion(&mw->mw_complete); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 1300 | 	ocfs2_init_start_time(mw); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1301 | } | 
 | 1302 |  | 
 | 1303 | static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) | 
 | 1304 | { | 
 | 1305 | 	wait_for_completion(&mw->mw_complete); | 
 | 1306 | 	/* Re-arm the completion in case we want to wait on it again */ | 
 | 1307 | 	INIT_COMPLETION(mw->mw_complete); | 
 | 1308 | 	return mw->mw_status; | 
 | 1309 | } | 
 | 1310 |  | 
 | 1311 | static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, | 
 | 1312 | 				    struct ocfs2_mask_waiter *mw, | 
 | 1313 | 				    unsigned long mask, | 
 | 1314 | 				    unsigned long goal) | 
 | 1315 | { | 
 | 1316 | 	BUG_ON(!list_empty(&mw->mw_item)); | 
 | 1317 |  | 
 | 1318 | 	assert_spin_locked(&lockres->l_lock); | 
 | 1319 |  | 
 | 1320 | 	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); | 
 | 1321 | 	mw->mw_mask = mask; | 
 | 1322 | 	mw->mw_goal = goal; | 
 | 1323 | } | 
 | 1324 |  | 
 | 1325 | /* returns 0 if the mw that was removed was already satisfied, -EBUSY | 
 | 1326 |  * if the mask still hadn't reached its goal */ | 
 | 1327 | static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | 
 | 1328 | 				      struct ocfs2_mask_waiter *mw) | 
 | 1329 | { | 
 | 1330 | 	unsigned long flags; | 
 | 1331 | 	int ret = 0; | 
 | 1332 |  | 
 | 1333 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1334 | 	if (!list_empty(&mw->mw_item)) { | 
 | 1335 | 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 
 | 1336 | 			ret = -EBUSY; | 
 | 1337 |  | 
 | 1338 | 		list_del_init(&mw->mw_item); | 
 | 1339 | 		init_completion(&mw->mw_complete); | 
 | 1340 | 	} | 
 | 1341 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1342 |  | 
 | 1343 | 	return ret; | 
 | 1344 |  | 
 | 1345 | } | 
 | 1346 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1347 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | 
 | 1348 | 					     struct ocfs2_lock_res *lockres) | 
 | 1349 | { | 
 | 1350 | 	int ret; | 
 | 1351 |  | 
 | 1352 | 	ret = wait_for_completion_interruptible(&mw->mw_complete); | 
 | 1353 | 	if (ret) | 
 | 1354 | 		lockres_remove_mask_waiter(lockres, mw); | 
 | 1355 | 	else | 
 | 1356 | 		ret = mw->mw_status; | 
 | 1357 | 	/* Re-arm the completion in case we want to wait on it again */ | 
 | 1358 | 	INIT_COMPLETION(mw->mw_complete); | 
 | 1359 | 	return ret; | 
 | 1360 | } | 
 | 1361 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 1362 | static int __ocfs2_cluster_lock(struct ocfs2_super *osb, | 
 | 1363 | 				struct ocfs2_lock_res *lockres, | 
 | 1364 | 				int level, | 
 | 1365 | 				u32 lkm_flags, | 
 | 1366 | 				int arg_flags, | 
 | 1367 | 				int l_subclass, | 
 | 1368 | 				unsigned long caller_ip) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1369 | { | 
 | 1370 | 	struct ocfs2_mask_waiter mw; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1371 | 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); | 
 | 1372 | 	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ | 
 | 1373 | 	unsigned long flags; | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1374 | 	unsigned int gen; | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1375 | 	int noqueue_attempted = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1376 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1377 | 	ocfs2_init_mask_waiter(&mw); | 
 | 1378 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 1379 | 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1380 | 		lkm_flags |= DLM_LKF_VALBLK; | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 1381 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1382 | again: | 
 | 1383 | 	wait = 0; | 
 | 1384 |  | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1385 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1386 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1387 | 	if (catch_signals && signal_pending(current)) { | 
 | 1388 | 		ret = -ERESTARTSYS; | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1389 | 		goto unlock; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1390 | 	} | 
 | 1391 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1392 | 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, | 
 | 1393 | 			"Cluster lock called on freeing lockres %s! flags " | 
 | 1394 | 			"0x%lx\n", lockres->l_name, lockres->l_flags); | 
 | 1395 |  | 
 | 1396 | 	/* We only compare against the currently granted level | 
 | 1397 | 	 * here. If the lock is blocked waiting on a downconvert, | 
 | 1398 | 	 * we'll get caught below. */ | 
 | 1399 | 	if (lockres->l_flags & OCFS2_LOCK_BUSY && | 
 | 1400 | 	    level > lockres->l_level) { | 
 | 1401 | 		/* is someone sitting in dlm_lock? If so, wait on | 
 | 1402 | 		 * them. */ | 
 | 1403 | 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
 | 1404 | 		wait = 1; | 
 | 1405 | 		goto unlock; | 
 | 1406 | 	} | 
 | 1407 |  | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1408 | 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { | 
 | 1409 | 		/* | 
 | 1410 | 		 * We've upconverted. If the lock now has a level we can | 
 | 1411 | 		 * work with, we take it. If, however, the lock is not at the | 
 | 1412 | 		 * required level, we go thru the full cycle. One way this could | 
 | 1413 | 		 * happen is if a process requesting an upconvert to PR is | 
 | 1414 | 		 * closely followed by another requesting upconvert to an EX. | 
 | 1415 | 		 * If the process requesting EX lands here, we want it to | 
 | 1416 | 		 * continue attempting to upconvert and let the process | 
 | 1417 | 		 * requesting PR take the lock. | 
 | 1418 | 		 * If multiple processes request upconvert to PR, the first one | 
 | 1419 | 		 * here will take the lock. The others will have to go thru the | 
 | 1420 | 		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending | 
 | 1421 | 		 * downconvert request. | 
 | 1422 | 		 */ | 
 | 1423 | 		if (level <= lockres->l_level) | 
 | 1424 | 			goto update_holders; | 
 | 1425 | 	} | 
 | 1426 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1427 | 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED && | 
 | 1428 | 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) { | 
 | 1429 | 		/* is the lock is currently blocked on behalf of | 
 | 1430 | 		 * another node */ | 
 | 1431 | 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); | 
 | 1432 | 		wait = 1; | 
 | 1433 | 		goto unlock; | 
 | 1434 | 	} | 
 | 1435 |  | 
 | 1436 | 	if (level > lockres->l_level) { | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1437 | 		if (noqueue_attempted > 0) { | 
 | 1438 | 			ret = -EAGAIN; | 
 | 1439 | 			goto unlock; | 
 | 1440 | 		} | 
 | 1441 | 		if (lkm_flags & DLM_LKF_NOQUEUE) | 
 | 1442 | 			noqueue_attempted = 1; | 
 | 1443 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1444 | 		if (lockres->l_action != OCFS2_AST_INVALID) | 
 | 1445 | 			mlog(ML_ERROR, "lockres %s has action %u pending\n", | 
 | 1446 | 			     lockres->l_name, lockres->l_action); | 
 | 1447 |  | 
| Mark Fasheh | 019d1b2 | 2007-10-05 12:09:05 -0700 | [diff] [blame] | 1448 | 		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 
 | 1449 | 			lockres->l_action = OCFS2_AST_ATTACH; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1450 | 			lkm_flags &= ~DLM_LKF_CONVERT; | 
| Mark Fasheh | 019d1b2 | 2007-10-05 12:09:05 -0700 | [diff] [blame] | 1451 | 		} else { | 
 | 1452 | 			lockres->l_action = OCFS2_AST_CONVERT; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1453 | 			lkm_flags |= DLM_LKF_CONVERT; | 
| Mark Fasheh | 019d1b2 | 2007-10-05 12:09:05 -0700 | [diff] [blame] | 1454 | 		} | 
 | 1455 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1456 | 		lockres->l_requested = level; | 
 | 1457 | 		lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1458 | 		gen = lockres_set_pending(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1459 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1460 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1461 | 		BUG_ON(level == DLM_LOCK_IV); | 
 | 1462 | 		BUG_ON(level == DLM_LOCK_NL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1463 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1464 | 		mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1465 | 		     lockres->l_name, lockres->l_level, level); | 
 | 1466 |  | 
 | 1467 | 		/* call dlm_lock to upgrade lock now */ | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 1468 | 		ret = ocfs2_dlm_lock(osb->cconn, | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1469 | 				     level, | 
 | 1470 | 				     &lockres->l_lksb, | 
 | 1471 | 				     lkm_flags, | 
 | 1472 | 				     lockres->l_name, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1473 | 				     OCFS2_LOCK_ID_MAX_LEN - 1); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1474 | 		lockres_clear_pending(lockres, gen, osb); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1475 | 		if (ret) { | 
 | 1476 | 			if (!(lkm_flags & DLM_LKF_NOQUEUE) || | 
 | 1477 | 			    (ret != -EAGAIN)) { | 
| Joel Becker | 24ef181 | 2008-01-29 17:37:32 -0800 | [diff] [blame] | 1478 | 				ocfs2_log_dlm_error("ocfs2_dlm_lock", | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1479 | 						    ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1480 | 			} | 
 | 1481 | 			ocfs2_recover_from_dlm_error(lockres, 1); | 
 | 1482 | 			goto out; | 
 | 1483 | 		} | 
 | 1484 |  | 
| Coly Li | 73ac36e | 2009-01-07 18:09:16 -0800 | [diff] [blame] | 1485 | 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1486 | 		     lockres->l_name); | 
 | 1487 |  | 
 | 1488 | 		/* At this point we've gone inside the dlm and need to | 
 | 1489 | 		 * complete our work regardless. */ | 
 | 1490 | 		catch_signals = 0; | 
 | 1491 |  | 
 | 1492 | 		/* wait for busy to clear and carry on */ | 
 | 1493 | 		goto again; | 
 | 1494 | 	} | 
 | 1495 |  | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1496 | update_holders: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1497 | 	/* Ok, if we get here then we're good to go. */ | 
 | 1498 | 	ocfs2_inc_holders(lockres, level); | 
 | 1499 |  | 
 | 1500 | 	ret = 0; | 
 | 1501 | unlock: | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1502 | 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | 
 | 1503 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1504 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1505 | out: | 
 | 1506 | 	/* | 
 | 1507 | 	 * This is helping work around a lock inversion between the page lock | 
 | 1508 | 	 * and dlm locks.  One path holds the page lock while calling aops | 
 | 1509 | 	 * which block acquiring dlm locks.  The voting thread holds dlm | 
 | 1510 | 	 * locks while acquiring page locks while down converting data locks. | 
 | 1511 | 	 * This block is helping an aop path notice the inversion and back | 
 | 1512 | 	 * off to unlock its page lock before trying the dlm lock again. | 
 | 1513 | 	 */ | 
 | 1514 | 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && | 
 | 1515 | 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { | 
 | 1516 | 		wait = 0; | 
 | 1517 | 		if (lockres_remove_mask_waiter(lockres, &mw)) | 
 | 1518 | 			ret = -EAGAIN; | 
 | 1519 | 		else | 
 | 1520 | 			goto again; | 
 | 1521 | 	} | 
 | 1522 | 	if (wait) { | 
 | 1523 | 		ret = ocfs2_wait_for_mask(&mw); | 
 | 1524 | 		if (ret == 0) | 
 | 1525 | 			goto again; | 
 | 1526 | 		mlog_errno(ret); | 
 | 1527 | 	} | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 1528 | 	ocfs2_update_lock_stats(lockres, level, &mw, ret); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1529 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 1530 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 
 | 1531 | 	if (!ret && lockres->l_lockdep_map.key != NULL) { | 
 | 1532 | 		if (level == DLM_LOCK_PR) | 
 | 1533 | 			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, | 
 | 1534 | 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE), | 
 | 1535 | 				caller_ip); | 
 | 1536 | 		else | 
 | 1537 | 			rwsem_acquire(&lockres->l_lockdep_map, l_subclass, | 
 | 1538 | 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE), | 
 | 1539 | 				caller_ip); | 
 | 1540 | 	} | 
 | 1541 | #endif | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1542 | 	return ret; | 
 | 1543 | } | 
 | 1544 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 1545 | static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, | 
 | 1546 | 				     struct ocfs2_lock_res *lockres, | 
 | 1547 | 				     int level, | 
 | 1548 | 				     u32 lkm_flags, | 
 | 1549 | 				     int arg_flags) | 
 | 1550 | { | 
 | 1551 | 	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, | 
 | 1552 | 				    0, _RET_IP_); | 
 | 1553 | } | 
 | 1554 |  | 
 | 1555 |  | 
 | 1556 | static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, | 
 | 1557 | 				   struct ocfs2_lock_res *lockres, | 
 | 1558 | 				   int level, | 
 | 1559 | 				   unsigned long caller_ip) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1560 | { | 
 | 1561 | 	unsigned long flags; | 
 | 1562 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1563 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1564 | 	ocfs2_dec_holders(lockres, level); | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 1565 | 	ocfs2_downconvert_on_unlock(osb, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1566 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 1567 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 
 | 1568 | 	if (lockres->l_lockdep_map.key != NULL) | 
 | 1569 | 		rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); | 
 | 1570 | #endif | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1571 | } | 
 | 1572 |  | 
| Adrian Bunk | da66116e | 2006-11-20 03:24:28 +0100 | [diff] [blame] | 1573 | static int ocfs2_create_new_lock(struct ocfs2_super *osb, | 
 | 1574 | 				 struct ocfs2_lock_res *lockres, | 
 | 1575 | 				 int ex, | 
 | 1576 | 				 int local) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1577 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1578 | 	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1579 | 	unsigned long flags; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1580 | 	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1581 |  | 
 | 1582 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1583 | 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 
 | 1584 | 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); | 
 | 1585 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1586 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1587 | 	return ocfs2_lock_create(osb, lockres, level, lkm_flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1588 | } | 
 | 1589 |  | 
 | 1590 | /* Grants us an EX lock on the data and metadata resources, skipping | 
 | 1591 |  * the normal cluster directory lookup. Use this ONLY on newly created | 
 | 1592 |  * inodes which other nodes can't possibly see, and which haven't been | 
 | 1593 |  * hashed in the inode hash yet. This can give us a good performance | 
 | 1594 |  * increase as it'll skip the network broadcast normally associated | 
 | 1595 |  * with creating a new lock resource. */ | 
 | 1596 | int ocfs2_create_new_inode_locks(struct inode *inode) | 
 | 1597 | { | 
 | 1598 | 	int ret; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 1599 | 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1600 |  | 
 | 1601 | 	BUG_ON(!inode); | 
 | 1602 | 	BUG_ON(!ocfs2_inode_is_new(inode)); | 
 | 1603 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1604 | 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1605 |  | 
 | 1606 | 	/* NOTE: That we don't increment any of the holder counts, nor | 
 | 1607 | 	 * do we add anything to a journal handle. Since this is | 
 | 1608 | 	 * supposed to be a new inode which the cluster doesn't know | 
 | 1609 | 	 * about yet, there is no need to.  As far as the LVB handling | 
 | 1610 | 	 * is concerned, this is basically like acquiring an EX lock | 
 | 1611 | 	 * on a resource which has an invalid one -- we'll set it | 
 | 1612 | 	 * valid when we release the EX. */ | 
 | 1613 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1614 | 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1615 | 	if (ret) { | 
 | 1616 | 		mlog_errno(ret); | 
 | 1617 | 		goto bail; | 
 | 1618 | 	} | 
 | 1619 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1620 | 	/* | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1621 | 	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1622 | 	 * don't use a generation in their lock names. | 
 | 1623 | 	 */ | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 1624 | 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1625 | 	if (ret) { | 
 | 1626 | 		mlog_errno(ret); | 
 | 1627 | 		goto bail; | 
 | 1628 | 	} | 
 | 1629 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1630 | 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); | 
 | 1631 | 	if (ret) { | 
 | 1632 | 		mlog_errno(ret); | 
 | 1633 | 		goto bail; | 
 | 1634 | 	} | 
 | 1635 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1636 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1637 | 	return ret; | 
 | 1638 | } | 
 | 1639 |  | 
 | 1640 | int ocfs2_rw_lock(struct inode *inode, int write) | 
 | 1641 | { | 
 | 1642 | 	int status, level; | 
 | 1643 | 	struct ocfs2_lock_res *lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1644 | 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1645 |  | 
 | 1646 | 	BUG_ON(!inode); | 
 | 1647 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1648 | 	mlog(0, "inode %llu take %s RW lock\n", | 
 | 1649 | 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1650 | 	     write ? "EXMODE" : "PRMODE"); | 
 | 1651 |  | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 1652 | 	if (ocfs2_mount_local(osb)) | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1653 | 		return 0; | 
 | 1654 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1655 | 	lockres = &OCFS2_I(inode)->ip_rw_lockres; | 
 | 1656 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1657 | 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1658 |  | 
 | 1659 | 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, | 
 | 1660 | 				    0); | 
 | 1661 | 	if (status < 0) | 
 | 1662 | 		mlog_errno(status); | 
 | 1663 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1664 | 	return status; | 
 | 1665 | } | 
 | 1666 |  | 
 | 1667 | void ocfs2_rw_unlock(struct inode *inode, int write) | 
 | 1668 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1669 | 	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1670 | 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1671 | 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1672 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1673 | 	mlog(0, "inode %llu drop %s RW lock\n", | 
 | 1674 | 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1675 | 	     write ? "EXMODE" : "PRMODE"); | 
 | 1676 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1677 | 	if (!ocfs2_mount_local(osb)) | 
 | 1678 | 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1679 | } | 
 | 1680 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1681 | /* | 
 | 1682 |  * ocfs2_open_lock always get PR mode lock. | 
 | 1683 |  */ | 
 | 1684 | int ocfs2_open_lock(struct inode *inode) | 
 | 1685 | { | 
 | 1686 | 	int status = 0; | 
 | 1687 | 	struct ocfs2_lock_res *lockres; | 
 | 1688 | 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
 | 1689 |  | 
 | 1690 | 	BUG_ON(!inode); | 
 | 1691 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1692 | 	mlog(0, "inode %llu take PRMODE open lock\n", | 
 | 1693 | 	     (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
 | 1694 |  | 
| Tiger Yang | 03efed8 | 2011-05-28 00:34:19 +0800 | [diff] [blame] | 1695 | 	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1696 | 		goto out; | 
 | 1697 |  | 
 | 1698 | 	lockres = &OCFS2_I(inode)->ip_open_lockres; | 
 | 1699 |  | 
 | 1700 | 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1701 | 				    DLM_LOCK_PR, 0, 0); | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1702 | 	if (status < 0) | 
 | 1703 | 		mlog_errno(status); | 
 | 1704 |  | 
 | 1705 | out: | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1706 | 	return status; | 
 | 1707 | } | 
 | 1708 |  | 
 | 1709 | int ocfs2_try_open_lock(struct inode *inode, int write) | 
 | 1710 | { | 
 | 1711 | 	int status = 0, level; | 
 | 1712 | 	struct ocfs2_lock_res *lockres; | 
 | 1713 | 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
 | 1714 |  | 
 | 1715 | 	BUG_ON(!inode); | 
 | 1716 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1717 | 	mlog(0, "inode %llu try to take %s open lock\n", | 
 | 1718 | 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
 | 1719 | 	     write ? "EXMODE" : "PRMODE"); | 
 | 1720 |  | 
| Tiger Yang | 03efed8 | 2011-05-28 00:34:19 +0800 | [diff] [blame] | 1721 | 	if (ocfs2_is_hard_readonly(osb)) { | 
 | 1722 | 		if (write) | 
 | 1723 | 			status = -EROFS; | 
 | 1724 | 		goto out; | 
 | 1725 | 	} | 
 | 1726 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1727 | 	if (ocfs2_mount_local(osb)) | 
 | 1728 | 		goto out; | 
 | 1729 |  | 
 | 1730 | 	lockres = &OCFS2_I(inode)->ip_open_lockres; | 
 | 1731 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1732 | 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1733 |  | 
 | 1734 | 	/* | 
 | 1735 | 	 * The file system may already holding a PRMODE/EXMODE open lock. | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1736 | 	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1737 | 	 * other nodes and the -EAGAIN will indicate to the caller that | 
 | 1738 | 	 * this inode is still in use. | 
 | 1739 | 	 */ | 
 | 1740 | 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1741 | 				    level, DLM_LKF_NOQUEUE, 0); | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1742 |  | 
 | 1743 | out: | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1744 | 	return status; | 
 | 1745 | } | 
 | 1746 |  | 
 | 1747 | /* | 
 | 1748 |  * ocfs2_open_unlock unlock PR and EX mode open locks. | 
 | 1749 |  */ | 
 | 1750 | void ocfs2_open_unlock(struct inode *inode) | 
 | 1751 | { | 
 | 1752 | 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; | 
 | 1753 | 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
 | 1754 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1755 | 	mlog(0, "inode %llu drop open lock\n", | 
 | 1756 | 	     (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
 | 1757 |  | 
 | 1758 | 	if (ocfs2_mount_local(osb)) | 
 | 1759 | 		goto out; | 
 | 1760 |  | 
 | 1761 | 	if(lockres->l_ro_holders) | 
 | 1762 | 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1763 | 				     DLM_LOCK_PR); | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1764 | 	if(lockres->l_ex_holders) | 
 | 1765 | 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1766 | 				     DLM_LOCK_EX); | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1767 |  | 
 | 1768 | out: | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 1769 | 	return; | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1770 | } | 
 | 1771 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1772 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, | 
 | 1773 | 				     int level) | 
 | 1774 | { | 
 | 1775 | 	int ret; | 
 | 1776 | 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 
 | 1777 | 	unsigned long flags; | 
 | 1778 | 	struct ocfs2_mask_waiter mw; | 
 | 1779 |  | 
 | 1780 | 	ocfs2_init_mask_waiter(&mw); | 
 | 1781 |  | 
 | 1782 | retry_cancel: | 
 | 1783 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1784 | 	if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 
 | 1785 | 		ret = ocfs2_prepare_cancel_convert(osb, lockres); | 
 | 1786 | 		if (ret) { | 
 | 1787 | 			spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1788 | 			ret = ocfs2_cancel_convert(osb, lockres); | 
 | 1789 | 			if (ret < 0) { | 
 | 1790 | 				mlog_errno(ret); | 
 | 1791 | 				goto out; | 
 | 1792 | 			} | 
 | 1793 | 			goto retry_cancel; | 
 | 1794 | 		} | 
 | 1795 | 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
 | 1796 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1797 |  | 
 | 1798 | 		ocfs2_wait_for_mask(&mw); | 
 | 1799 | 		goto retry_cancel; | 
 | 1800 | 	} | 
 | 1801 |  | 
 | 1802 | 	ret = -ERESTARTSYS; | 
 | 1803 | 	/* | 
 | 1804 | 	 * We may still have gotten the lock, in which case there's no | 
 | 1805 | 	 * point to restarting the syscall. | 
 | 1806 | 	 */ | 
 | 1807 | 	if (lockres->l_level == level) | 
 | 1808 | 		ret = 0; | 
 | 1809 |  | 
 | 1810 | 	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, | 
 | 1811 | 	     lockres->l_flags, lockres->l_level, lockres->l_action); | 
 | 1812 |  | 
 | 1813 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1814 |  | 
 | 1815 | out: | 
 | 1816 | 	return ret; | 
 | 1817 | } | 
 | 1818 |  | 
 | 1819 | /* | 
 | 1820 |  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | 
 | 1821 |  * flock() calls. The locking approach this requires is sufficiently | 
 | 1822 |  * different from all other cluster lock types that we implement a | 
| Daniel Mack | 3ad2f3f | 2010-02-03 08:01:28 +0800 | [diff] [blame] | 1823 |  * separate path to the "low-level" dlm calls. In particular: | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1824 |  * | 
 | 1825 |  * - No optimization of lock levels is done - we take at exactly | 
 | 1826 |  *   what's been requested. | 
 | 1827 |  * | 
 | 1828 |  * - No lock caching is employed. We immediately downconvert to | 
 | 1829 |  *   no-lock at unlock time. This also means flock locks never go on | 
 | 1830 |  *   the blocking list). | 
 | 1831 |  * | 
 | 1832 |  * - Since userspace can trivially deadlock itself with flock, we make | 
 | 1833 |  *   sure to allow cancellation of a misbehaving applications flock() | 
 | 1834 |  *   request. | 
 | 1835 |  * | 
 | 1836 |  * - Access to any flock lockres doesn't require concurrency, so we | 
 | 1837 |  *   can simplify the code by requiring the caller to guarantee | 
 | 1838 |  *   serialization of dlmglue flock calls. | 
 | 1839 |  */ | 
 | 1840 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | 
 | 1841 | { | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1842 | 	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
 | 1843 | 	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1844 | 	unsigned long flags; | 
 | 1845 | 	struct ocfs2_file_private *fp = file->private_data; | 
 | 1846 | 	struct ocfs2_lock_res *lockres = &fp->fp_flock; | 
 | 1847 | 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | 
 | 1848 | 	struct ocfs2_mask_waiter mw; | 
 | 1849 |  | 
 | 1850 | 	ocfs2_init_mask_waiter(&mw); | 
 | 1851 |  | 
 | 1852 | 	if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1853 | 	    (lockres->l_level > DLM_LOCK_NL)) { | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1854 | 		mlog(ML_ERROR, | 
 | 1855 | 		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | 
 | 1856 | 		     "level: %u\n", lockres->l_name, lockres->l_flags, | 
 | 1857 | 		     lockres->l_level); | 
 | 1858 | 		return -EINVAL; | 
 | 1859 | 	} | 
 | 1860 |  | 
 | 1861 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1862 | 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 
 | 1863 | 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
 | 1864 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1865 |  | 
 | 1866 | 		/* | 
 | 1867 | 		 * Get the lock at NLMODE to start - that way we | 
 | 1868 | 		 * can cancel the upconvert request if need be. | 
 | 1869 | 		 */ | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1870 | 		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1871 | 		if (ret < 0) { | 
 | 1872 | 			mlog_errno(ret); | 
 | 1873 | 			goto out; | 
 | 1874 | 		} | 
 | 1875 |  | 
 | 1876 | 		ret = ocfs2_wait_for_mask(&mw); | 
 | 1877 | 		if (ret) { | 
 | 1878 | 			mlog_errno(ret); | 
 | 1879 | 			goto out; | 
 | 1880 | 		} | 
 | 1881 | 		spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1882 | 	} | 
 | 1883 |  | 
 | 1884 | 	lockres->l_action = OCFS2_AST_CONVERT; | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1885 | 	lkm_flags |= DLM_LKF_CONVERT; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1886 | 	lockres->l_requested = level; | 
 | 1887 | 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
 | 1888 |  | 
 | 1889 | 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
 | 1890 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1891 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 1892 | 	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1893 | 			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1894 | 	if (ret) { | 
 | 1895 | 		if (!trylock || (ret != -EAGAIN)) { | 
| Joel Becker | 24ef181 | 2008-01-29 17:37:32 -0800 | [diff] [blame] | 1896 | 			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1897 | 			ret = -EINVAL; | 
 | 1898 | 		} | 
 | 1899 |  | 
 | 1900 | 		ocfs2_recover_from_dlm_error(lockres, 1); | 
 | 1901 | 		lockres_remove_mask_waiter(lockres, &mw); | 
 | 1902 | 		goto out; | 
 | 1903 | 	} | 
 | 1904 |  | 
 | 1905 | 	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); | 
 | 1906 | 	if (ret == -ERESTARTSYS) { | 
 | 1907 | 		/* | 
 | 1908 | 		 * Userspace can cause deadlock itself with | 
 | 1909 | 		 * flock(). Current behavior locally is to allow the | 
 | 1910 | 		 * deadlock, but abort the system call if a signal is | 
 | 1911 | 		 * received. We follow this example, otherwise a | 
 | 1912 | 		 * poorly written program could sit in kernel until | 
 | 1913 | 		 * reboot. | 
 | 1914 | 		 * | 
 | 1915 | 		 * Handling this is a bit more complicated for Ocfs2 | 
 | 1916 | 		 * though. We can't exit this function with an | 
 | 1917 | 		 * outstanding lock request, so a cancel convert is | 
 | 1918 | 		 * required. We intentionally overwrite 'ret' - if the | 
 | 1919 | 		 * cancel fails and the lock was granted, it's easier | 
| André Goddard Rosa | af901ca | 2009-11-14 13:09:05 -0200 | [diff] [blame] | 1920 | 		 * to just bubble success back up to the user. | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1921 | 		 */ | 
 | 1922 | 		ret = ocfs2_flock_handle_signal(lockres, level); | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1923 | 	} else if (!ret && (level > lockres->l_level)) { | 
 | 1924 | 		/* Trylock failed asynchronously */ | 
 | 1925 | 		BUG_ON(!trylock); | 
 | 1926 | 		ret = -EAGAIN; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1927 | 	} | 
 | 1928 |  | 
 | 1929 | out: | 
 | 1930 |  | 
 | 1931 | 	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", | 
 | 1932 | 	     lockres->l_name, ex, trylock, ret); | 
 | 1933 | 	return ret; | 
 | 1934 | } | 
 | 1935 |  | 
 | 1936 | void ocfs2_file_unlock(struct file *file) | 
 | 1937 | { | 
 | 1938 | 	int ret; | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1939 | 	unsigned int gen; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1940 | 	unsigned long flags; | 
 | 1941 | 	struct ocfs2_file_private *fp = file->private_data; | 
 | 1942 | 	struct ocfs2_lock_res *lockres = &fp->fp_flock; | 
 | 1943 | 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | 
 | 1944 | 	struct ocfs2_mask_waiter mw; | 
 | 1945 |  | 
 | 1946 | 	ocfs2_init_mask_waiter(&mw); | 
 | 1947 |  | 
 | 1948 | 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) | 
 | 1949 | 		return; | 
 | 1950 |  | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1951 | 	if (lockres->l_level == DLM_LOCK_NL) | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1952 | 		return; | 
 | 1953 |  | 
 | 1954 | 	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | 
 | 1955 | 	     lockres->l_name, lockres->l_flags, lockres->l_level, | 
 | 1956 | 	     lockres->l_action); | 
 | 1957 |  | 
 | 1958 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 1959 | 	/* | 
 | 1960 | 	 * Fake a blocking ast for the downconvert code. | 
 | 1961 | 	 */ | 
 | 1962 | 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1963 | 	lockres->l_blocking = DLM_LOCK_EX; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1964 |  | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1965 | 	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1966 | 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
 | 1967 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 1968 |  | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1969 | 	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1970 | 	if (ret) { | 
 | 1971 | 		mlog_errno(ret); | 
 | 1972 | 		return; | 
 | 1973 | 	} | 
 | 1974 |  | 
 | 1975 | 	ret = ocfs2_wait_for_mask(&mw); | 
 | 1976 | 	if (ret) | 
 | 1977 | 		mlog_errno(ret); | 
 | 1978 | } | 
 | 1979 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 1980 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 
 | 1981 | 					struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1982 | { | 
 | 1983 | 	int kick = 0; | 
 | 1984 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1985 | 	/* If we know that another node is waiting on our lock, kick | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 1986 | 	 * the downconvert thread * pre-emptively when we reach a release | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1987 | 	 * condition. */ | 
 | 1988 | 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 
 | 1989 | 		switch(lockres->l_blocking) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1990 | 		case DLM_LOCK_EX: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1991 | 			if (!lockres->l_ex_holders && !lockres->l_ro_holders) | 
 | 1992 | 				kick = 1; | 
 | 1993 | 			break; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1994 | 		case DLM_LOCK_PR: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1995 | 			if (!lockres->l_ex_holders) | 
 | 1996 | 				kick = 1; | 
 | 1997 | 			break; | 
 | 1998 | 		default: | 
 | 1999 | 			BUG(); | 
 | 2000 | 		} | 
 | 2001 | 	} | 
 | 2002 |  | 
 | 2003 | 	if (kick) | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 2004 | 		ocfs2_wake_downconvert_thread(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2005 | } | 
 | 2006 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2007 | #define OCFS2_SEC_BITS   34 | 
 | 2008 | #define OCFS2_SEC_SHIFT  (64 - 34) | 
 | 2009 | #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1) | 
 | 2010 |  | 
 | 2011 | /* LVB only has room for 64 bits of time here so we pack it for | 
 | 2012 |  * now. */ | 
 | 2013 | static u64 ocfs2_pack_timespec(struct timespec *spec) | 
 | 2014 | { | 
 | 2015 | 	u64 res; | 
 | 2016 | 	u64 sec = spec->tv_sec; | 
 | 2017 | 	u32 nsec = spec->tv_nsec; | 
 | 2018 |  | 
 | 2019 | 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); | 
 | 2020 |  | 
 | 2021 | 	return res; | 
 | 2022 | } | 
 | 2023 |  | 
 | 2024 | /* Call this with the lockres locked. I am reasonably sure we don't | 
 | 2025 |  * need ip_lock in this function as anyone who would be changing those | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2026 |  * values is supposed to be blocked in ocfs2_inode_lock right now. */ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2027 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 
 | 2028 | { | 
 | 2029 | 	struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2030 | 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2031 | 	struct ocfs2_meta_lvb *lvb; | 
 | 2032 |  | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 2033 | 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2034 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 2035 | 	/* | 
 | 2036 | 	 * Invalidate the LVB of a deleted inode - this way other | 
 | 2037 | 	 * nodes are forced to go to disk and discover the new inode | 
 | 2038 | 	 * status. | 
 | 2039 | 	 */ | 
 | 2040 | 	if (oi->ip_flags & OCFS2_INODE_DELETED) { | 
 | 2041 | 		lvb->lvb_version = 0; | 
 | 2042 | 		goto out; | 
 | 2043 | 	} | 
 | 2044 |  | 
| Mark Fasheh | 4d3b83f | 2006-09-12 15:22:18 -0700 | [diff] [blame] | 2045 | 	lvb->lvb_version   = OCFS2_LVB_VERSION; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2046 | 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode)); | 
 | 2047 | 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); | 
 | 2048 | 	lvb->lvb_iuid      = cpu_to_be32(inode->i_uid); | 
 | 2049 | 	lvb->lvb_igid      = cpu_to_be32(inode->i_gid); | 
 | 2050 | 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode); | 
 | 2051 | 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink); | 
 | 2052 | 	lvb->lvb_iatime_packed  = | 
 | 2053 | 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); | 
 | 2054 | 	lvb->lvb_ictime_packed = | 
 | 2055 | 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); | 
 | 2056 | 	lvb->lvb_imtime_packed = | 
 | 2057 | 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); | 
| Herbert Poetzl | ca4d147 | 2006-07-03 17:27:12 -0700 | [diff] [blame] | 2058 | 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr); | 
| Mark Fasheh | 15b1e36 | 2007-09-07 13:58:15 -0700 | [diff] [blame] | 2059 | 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); | 
| Mark Fasheh | f9e2d82 | 2006-09-12 15:35:49 -0700 | [diff] [blame] | 2060 | 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2061 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 2062 | out: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2063 | 	mlog_meta_lvb(0, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2064 | } | 
 | 2065 |  | 
 | 2066 | static void ocfs2_unpack_timespec(struct timespec *spec, | 
 | 2067 | 				  u64 packed_time) | 
 | 2068 | { | 
 | 2069 | 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; | 
 | 2070 | 	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; | 
 | 2071 | } | 
 | 2072 |  | 
 | 2073 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 
 | 2074 | { | 
 | 2075 | 	struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2076 | 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2077 | 	struct ocfs2_meta_lvb *lvb; | 
 | 2078 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2079 | 	mlog_meta_lvb(0, lockres); | 
 | 2080 |  | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 2081 | 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2082 |  | 
 | 2083 | 	/* We're safe here without the lockres lock... */ | 
 | 2084 | 	spin_lock(&oi->ip_lock); | 
 | 2085 | 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); | 
 | 2086 | 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); | 
 | 2087 |  | 
| Herbert Poetzl | ca4d147 | 2006-07-03 17:27:12 -0700 | [diff] [blame] | 2088 | 	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); | 
| Mark Fasheh | 15b1e36 | 2007-09-07 13:58:15 -0700 | [diff] [blame] | 2089 | 	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); | 
| Herbert Poetzl | ca4d147 | 2006-07-03 17:27:12 -0700 | [diff] [blame] | 2090 | 	ocfs2_set_inode_flags(inode); | 
 | 2091 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2092 | 	/* fast-symlinks are a special case */ | 
 | 2093 | 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) | 
 | 2094 | 		inode->i_blocks = 0; | 
 | 2095 | 	else | 
| Mark Fasheh | 8110b07 | 2007-03-22 16:53:23 -0700 | [diff] [blame] | 2096 | 		inode->i_blocks = ocfs2_inode_sector_count(inode); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2097 |  | 
 | 2098 | 	inode->i_uid     = be32_to_cpu(lvb->lvb_iuid); | 
 | 2099 | 	inode->i_gid     = be32_to_cpu(lvb->lvb_igid); | 
 | 2100 | 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode); | 
| Miklos Szeredi | bfe8684 | 2011-10-28 14:13:29 +0200 | [diff] [blame] | 2101 | 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2102 | 	ocfs2_unpack_timespec(&inode->i_atime, | 
 | 2103 | 			      be64_to_cpu(lvb->lvb_iatime_packed)); | 
 | 2104 | 	ocfs2_unpack_timespec(&inode->i_mtime, | 
 | 2105 | 			      be64_to_cpu(lvb->lvb_imtime_packed)); | 
 | 2106 | 	ocfs2_unpack_timespec(&inode->i_ctime, | 
 | 2107 | 			      be64_to_cpu(lvb->lvb_ictime_packed)); | 
 | 2108 | 	spin_unlock(&oi->ip_lock); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2109 | } | 
 | 2110 |  | 
| Mark Fasheh | f9e2d82 | 2006-09-12 15:35:49 -0700 | [diff] [blame] | 2111 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, | 
 | 2112 | 					      struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2113 | { | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 2114 | 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2115 |  | 
| Joel Becker | 1c520df | 2009-06-19 15:14:13 -0700 | [diff] [blame] | 2116 | 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) | 
 | 2117 | 	    && lvb->lvb_version == OCFS2_LVB_VERSION | 
| Mark Fasheh | f9e2d82 | 2006-09-12 15:35:49 -0700 | [diff] [blame] | 2118 | 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2119 | 		return 1; | 
 | 2120 | 	return 0; | 
 | 2121 | } | 
 | 2122 |  | 
 | 2123 | /* Determine whether a lock resource needs to be refreshed, and | 
 | 2124 |  * arbitrate who gets to refresh it. | 
 | 2125 |  * | 
 | 2126 |  *   0 means no refresh needed. | 
 | 2127 |  * | 
 | 2128 |  *   > 0 means you need to refresh this and you MUST call | 
 | 2129 |  *   ocfs2_complete_lock_res_refresh afterwards. */ | 
 | 2130 | static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) | 
 | 2131 | { | 
 | 2132 | 	unsigned long flags; | 
 | 2133 | 	int status = 0; | 
 | 2134 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2135 | refresh_check: | 
 | 2136 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 2137 | 	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { | 
 | 2138 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 2139 | 		goto bail; | 
 | 2140 | 	} | 
 | 2141 |  | 
 | 2142 | 	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { | 
 | 2143 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 2144 |  | 
 | 2145 | 		ocfs2_wait_on_refreshing_lock(lockres); | 
 | 2146 | 		goto refresh_check; | 
 | 2147 | 	} | 
 | 2148 |  | 
 | 2149 | 	/* Ok, I'll be the one to refresh this lock. */ | 
 | 2150 | 	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); | 
 | 2151 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 2152 |  | 
 | 2153 | 	status = 1; | 
 | 2154 | bail: | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 2155 | 	mlog(0, "status %d\n", status); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2156 | 	return status; | 
 | 2157 | } | 
 | 2158 |  | 
 | 2159 | /* If status is non zero, I'll mark it as not being in refresh | 
 | 2160 |  * anymroe, but i won't clear the needs refresh flag. */ | 
 | 2161 | static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, | 
 | 2162 | 						   int status) | 
 | 2163 | { | 
 | 2164 | 	unsigned long flags; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2165 |  | 
 | 2166 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 2167 | 	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); | 
 | 2168 | 	if (!status) | 
 | 2169 | 		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 
 | 2170 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 2171 |  | 
 | 2172 | 	wake_up(&lockres->l_event); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2173 | } | 
 | 2174 |  | 
 | 2175 | /* may or may not return a bh if it went to disk. */ | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2176 | static int ocfs2_inode_lock_update(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2177 | 				  struct buffer_head **bh) | 
 | 2178 | { | 
 | 2179 | 	int status = 0; | 
 | 2180 | 	struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2181 | 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2182 | 	struct ocfs2_dinode *fe; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2183 | 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2184 |  | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 2185 | 	if (ocfs2_mount_local(osb)) | 
 | 2186 | 		goto bail; | 
 | 2187 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2188 | 	spin_lock(&oi->ip_lock); | 
 | 2189 | 	if (oi->ip_flags & OCFS2_INODE_DELETED) { | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2190 | 		mlog(0, "Orphaned inode %llu was deleted while we " | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2191 | 		     "were waiting on a lock. ip_flags = 0x%x\n", | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2192 | 		     (unsigned long long)oi->ip_blkno, oi->ip_flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2193 | 		spin_unlock(&oi->ip_lock); | 
 | 2194 | 		status = -ENOENT; | 
 | 2195 | 		goto bail; | 
 | 2196 | 	} | 
 | 2197 | 	spin_unlock(&oi->ip_lock); | 
 | 2198 |  | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 2199 | 	if (!ocfs2_should_refresh_lock_res(lockres)) | 
 | 2200 | 		goto bail; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2201 |  | 
 | 2202 | 	/* This will discard any caching information we might have had | 
 | 2203 | 	 * for the inode metadata. */ | 
| Joel Becker | 8cb471e | 2009-02-10 20:00:41 -0800 | [diff] [blame] | 2204 | 	ocfs2_metadata_cache_purge(INODE_CACHE(inode)); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2205 |  | 
| Mark Fasheh | 8341897 | 2007-04-23 18:53:12 -0700 | [diff] [blame] | 2206 | 	ocfs2_extent_map_trunc(inode, 0); | 
 | 2207 |  | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 2208 | 	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2209 | 		mlog(0, "Trusting LVB on inode %llu\n", | 
 | 2210 | 		     (unsigned long long)oi->ip_blkno); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2211 | 		ocfs2_refresh_inode_from_lvb(inode); | 
 | 2212 | 	} else { | 
 | 2213 | 		/* Boo, we have to go to disk. */ | 
 | 2214 | 		/* read bh, cast, ocfs2_refresh_inode */ | 
| Joel Becker | b657c95 | 2008-11-13 14:49:11 -0800 | [diff] [blame] | 2215 | 		status = ocfs2_read_inode_block(inode, bh); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2216 | 		if (status < 0) { | 
 | 2217 | 			mlog_errno(status); | 
 | 2218 | 			goto bail_refresh; | 
 | 2219 | 		} | 
 | 2220 | 		fe = (struct ocfs2_dinode *) (*bh)->b_data; | 
 | 2221 |  | 
 | 2222 | 		/* This is a good chance to make sure we're not | 
| Joel Becker | b657c95 | 2008-11-13 14:49:11 -0800 | [diff] [blame] | 2223 | 		 * locking an invalid object.  ocfs2_read_inode_block() | 
 | 2224 | 		 * already checked that the inode block is sane. | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2225 | 		 * | 
 | 2226 | 		 * We bug on a stale inode here because we checked | 
 | 2227 | 		 * above whether it was wiped from disk. The wiping | 
 | 2228 | 		 * node provides a guarantee that we receive that | 
 | 2229 | 		 * message and can mark the inode before dropping any | 
 | 2230 | 		 * locks associated with it. */ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2231 | 		mlog_bug_on_msg(inode->i_generation != | 
 | 2232 | 				le32_to_cpu(fe->i_generation), | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2233 | 				"Invalid dinode %llu disk generation: %u " | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2234 | 				"inode->i_generation: %u\n", | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2235 | 				(unsigned long long)oi->ip_blkno, | 
 | 2236 | 				le32_to_cpu(fe->i_generation), | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2237 | 				inode->i_generation); | 
 | 2238 | 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || | 
 | 2239 | 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2240 | 				"Stale dinode %llu dtime: %llu flags: 0x%x\n", | 
 | 2241 | 				(unsigned long long)oi->ip_blkno, | 
 | 2242 | 				(unsigned long long)le64_to_cpu(fe->i_dtime), | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2243 | 				le32_to_cpu(fe->i_flags)); | 
 | 2244 |  | 
 | 2245 | 		ocfs2_refresh_inode(inode, fe); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2246 | 		ocfs2_track_lock_refresh(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2247 | 	} | 
 | 2248 |  | 
 | 2249 | 	status = 0; | 
 | 2250 | bail_refresh: | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 2251 | 	ocfs2_complete_lock_res_refresh(lockres, status); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2252 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2253 | 	return status; | 
 | 2254 | } | 
 | 2255 |  | 
 | 2256 | static int ocfs2_assign_bh(struct inode *inode, | 
 | 2257 | 			   struct buffer_head **ret_bh, | 
 | 2258 | 			   struct buffer_head *passed_bh) | 
 | 2259 | { | 
 | 2260 | 	int status; | 
 | 2261 |  | 
 | 2262 | 	if (passed_bh) { | 
 | 2263 | 		/* Ok, the update went to disk for us, use the | 
 | 2264 | 		 * returned bh. */ | 
 | 2265 | 		*ret_bh = passed_bh; | 
 | 2266 | 		get_bh(*ret_bh); | 
 | 2267 |  | 
 | 2268 | 		return 0; | 
 | 2269 | 	} | 
 | 2270 |  | 
| Joel Becker | b657c95 | 2008-11-13 14:49:11 -0800 | [diff] [blame] | 2271 | 	status = ocfs2_read_inode_block(inode, ret_bh); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2272 | 	if (status < 0) | 
 | 2273 | 		mlog_errno(status); | 
 | 2274 |  | 
 | 2275 | 	return status; | 
 | 2276 | } | 
 | 2277 |  | 
 | 2278 | /* | 
 | 2279 |  * returns < 0 error if the callback will never be called, otherwise | 
 | 2280 |  * the result of the lock will be communicated via the callback. | 
 | 2281 |  */ | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 2282 | int ocfs2_inode_lock_full_nested(struct inode *inode, | 
 | 2283 | 				 struct buffer_head **ret_bh, | 
 | 2284 | 				 int ex, | 
 | 2285 | 				 int arg_flags, | 
 | 2286 | 				 int subclass) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2287 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2288 | 	int status, level, acquired; | 
 | 2289 | 	u32 dlm_flags; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2290 | 	struct ocfs2_lock_res *lockres = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2291 | 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
 | 2292 | 	struct buffer_head *local_bh = NULL; | 
 | 2293 |  | 
 | 2294 | 	BUG_ON(!inode); | 
 | 2295 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2296 | 	mlog(0, "inode %llu, take %s META lock\n", | 
 | 2297 | 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2298 | 	     ex ? "EXMODE" : "PRMODE"); | 
 | 2299 |  | 
 | 2300 | 	status = 0; | 
 | 2301 | 	acquired = 0; | 
 | 2302 | 	/* We'll allow faking a readonly metadata lock for | 
 | 2303 | 	 * rodevices. */ | 
 | 2304 | 	if (ocfs2_is_hard_readonly(osb)) { | 
 | 2305 | 		if (ex) | 
 | 2306 | 			status = -EROFS; | 
| Tiger Yang | 03efed8 | 2011-05-28 00:34:19 +0800 | [diff] [blame] | 2307 | 		goto getbh; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2308 | 	} | 
 | 2309 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2310 | 	if (ocfs2_mount_local(osb)) | 
 | 2311 | 		goto local; | 
 | 2312 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2313 | 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 
| Joel Becker | 553abd0 | 2008-02-01 12:03:57 -0800 | [diff] [blame] | 2314 | 		ocfs2_wait_for_recovery(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2315 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2316 | 	lockres = &OCFS2_I(inode)->ip_inode_lockres; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2317 | 	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2318 | 	dlm_flags = 0; | 
 | 2319 | 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2320 | 		dlm_flags |= DLM_LKF_NOQUEUE; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2321 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 2322 | 	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, | 
 | 2323 | 				      arg_flags, subclass, _RET_IP_); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2324 | 	if (status < 0) { | 
 | 2325 | 		if (status != -EAGAIN && status != -EIOCBRETRY) | 
 | 2326 | 			mlog_errno(status); | 
 | 2327 | 		goto bail; | 
 | 2328 | 	} | 
 | 2329 |  | 
 | 2330 | 	/* Notify the error cleanup path to drop the cluster lock. */ | 
 | 2331 | 	acquired = 1; | 
 | 2332 |  | 
 | 2333 | 	/* We wait twice because a node may have died while we were in | 
 | 2334 | 	 * the lower dlm layers. The second time though, we've | 
 | 2335 | 	 * committed to owning this lock so we don't allow signals to | 
 | 2336 | 	 * abort the operation. */ | 
 | 2337 | 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 
| Joel Becker | 553abd0 | 2008-02-01 12:03:57 -0800 | [diff] [blame] | 2338 | 		ocfs2_wait_for_recovery(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2339 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2340 | local: | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 2341 | 	/* | 
 | 2342 | 	 * We only see this flag if we're being called from | 
 | 2343 | 	 * ocfs2_read_locked_inode(). It means we're locking an inode | 
 | 2344 | 	 * which hasn't been populated yet, so clear the refresh flag | 
 | 2345 | 	 * and let the caller handle it. | 
 | 2346 | 	 */ | 
 | 2347 | 	if (inode->i_state & I_NEW) { | 
 | 2348 | 		status = 0; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2349 | 		if (lockres) | 
 | 2350 | 			ocfs2_complete_lock_res_refresh(lockres, 0); | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 2351 | 		goto bail; | 
 | 2352 | 	} | 
 | 2353 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2354 | 	/* This is fun. The caller may want a bh back, or it may | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2355 | 	 * not. ocfs2_inode_lock_update definitely wants one in, but | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2356 | 	 * may or may not read one, depending on what's in the | 
 | 2357 | 	 * LVB. The result of all of this is that we've *only* gone to | 
 | 2358 | 	 * disk if we have to, so the complexity is worthwhile. */ | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2359 | 	status = ocfs2_inode_lock_update(inode, &local_bh); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2360 | 	if (status < 0) { | 
 | 2361 | 		if (status != -ENOENT) | 
 | 2362 | 			mlog_errno(status); | 
 | 2363 | 		goto bail; | 
 | 2364 | 	} | 
| Tiger Yang | 03efed8 | 2011-05-28 00:34:19 +0800 | [diff] [blame] | 2365 | getbh: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2366 | 	if (ret_bh) { | 
 | 2367 | 		status = ocfs2_assign_bh(inode, ret_bh, local_bh); | 
 | 2368 | 		if (status < 0) { | 
 | 2369 | 			mlog_errno(status); | 
 | 2370 | 			goto bail; | 
 | 2371 | 		} | 
 | 2372 | 	} | 
 | 2373 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2374 | bail: | 
 | 2375 | 	if (status < 0) { | 
 | 2376 | 		if (ret_bh && (*ret_bh)) { | 
 | 2377 | 			brelse(*ret_bh); | 
 | 2378 | 			*ret_bh = NULL; | 
 | 2379 | 		} | 
 | 2380 | 		if (acquired) | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2381 | 			ocfs2_inode_unlock(inode, ex); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2382 | 	} | 
 | 2383 |  | 
 | 2384 | 	if (local_bh) | 
 | 2385 | 		brelse(local_bh); | 
 | 2386 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2387 | 	return status; | 
 | 2388 | } | 
 | 2389 |  | 
 | 2390 | /* | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 2391 |  * This is working around a lock inversion between tasks acquiring DLM | 
 | 2392 |  * locks while holding a page lock and the downconvert thread which | 
 | 2393 |  * blocks dlm lock acquiry while acquiring page locks. | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2394 |  * | 
 | 2395 |  * ** These _with_page variantes are only intended to be called from aop | 
 | 2396 |  * methods that hold page locks and return a very specific *positive* error | 
 | 2397 |  * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 
 | 2398 |  * | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 2399 |  * The DLM is called such that it returns -EAGAIN if it would have | 
 | 2400 |  * blocked waiting for the downconvert thread.  In that case we unlock | 
 | 2401 |  * our page so the downconvert thread can make progress.  Once we've | 
 | 2402 |  * done this we have to return AOP_TRUNCATED_PAGE so the aop method | 
 | 2403 |  * that called us can bubble that back up into the VFS who will then | 
 | 2404 |  * immediately retry the aop call. | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2405 |  * | 
 | 2406 |  * We do a blocking lock and immediate unlock before returning, though, so that | 
 | 2407 |  * the lock has a great chance of being cached on this node by the time the VFS | 
 | 2408 |  * calls back to retry the aop.    This has a potential to livelock as nodes | 
 | 2409 |  * ping locks back and forth, but that's a risk we're willing to take to avoid | 
 | 2410 |  * the lock inversion simply. | 
 | 2411 |  */ | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2412 | int ocfs2_inode_lock_with_page(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2413 | 			      struct buffer_head **ret_bh, | 
 | 2414 | 			      int ex, | 
 | 2415 | 			      struct page *page) | 
 | 2416 | { | 
 | 2417 | 	int ret; | 
 | 2418 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2419 | 	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2420 | 	if (ret == -EAGAIN) { | 
 | 2421 | 		unlock_page(page); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2422 | 		if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) | 
 | 2423 | 			ocfs2_inode_unlock(inode, ex); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2424 | 		ret = AOP_TRUNCATED_PAGE; | 
 | 2425 | 	} | 
 | 2426 |  | 
 | 2427 | 	return ret; | 
 | 2428 | } | 
 | 2429 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2430 | int ocfs2_inode_lock_atime(struct inode *inode, | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 2431 | 			  struct vfsmount *vfsmnt, | 
 | 2432 | 			  int *level) | 
 | 2433 | { | 
 | 2434 | 	int ret; | 
 | 2435 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2436 | 	ret = ocfs2_inode_lock(inode, NULL, 0); | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 2437 | 	if (ret < 0) { | 
 | 2438 | 		mlog_errno(ret); | 
 | 2439 | 		return ret; | 
 | 2440 | 	} | 
 | 2441 |  | 
 | 2442 | 	/* | 
 | 2443 | 	 * If we should update atime, we will get EX lock, | 
 | 2444 | 	 * otherwise we just get PR lock. | 
 | 2445 | 	 */ | 
 | 2446 | 	if (ocfs2_should_update_atime(inode, vfsmnt)) { | 
 | 2447 | 		struct buffer_head *bh = NULL; | 
 | 2448 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2449 | 		ocfs2_inode_unlock(inode, 0); | 
 | 2450 | 		ret = ocfs2_inode_lock(inode, &bh, 1); | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 2451 | 		if (ret < 0) { | 
 | 2452 | 			mlog_errno(ret); | 
 | 2453 | 			return ret; | 
 | 2454 | 		} | 
 | 2455 | 		*level = 1; | 
 | 2456 | 		if (ocfs2_should_update_atime(inode, vfsmnt)) | 
 | 2457 | 			ocfs2_update_inode_atime(inode, bh); | 
 | 2458 | 		if (bh) | 
 | 2459 | 			brelse(bh); | 
 | 2460 | 	} else | 
 | 2461 | 		*level = 0; | 
 | 2462 |  | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 2463 | 	return ret; | 
 | 2464 | } | 
 | 2465 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2466 | void ocfs2_inode_unlock(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2467 | 		       int ex) | 
 | 2468 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2469 | 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2470 | 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2471 | 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2472 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2473 | 	mlog(0, "inode %llu drop %s META lock\n", | 
 | 2474 | 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2475 | 	     ex ? "EXMODE" : "PRMODE"); | 
 | 2476 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2477 | 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | 
 | 2478 | 	    !ocfs2_mount_local(osb)) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2479 | 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2480 | } | 
 | 2481 |  | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2482 | int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2483 | { | 
 | 2484 | 	struct ocfs2_lock_res *lockres; | 
 | 2485 | 	struct ocfs2_orphan_scan_lvb *lvb; | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2486 | 	int status = 0; | 
 | 2487 |  | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2488 | 	if (ocfs2_is_hard_readonly(osb)) | 
 | 2489 | 		return -EROFS; | 
 | 2490 |  | 
 | 2491 | 	if (ocfs2_mount_local(osb)) | 
 | 2492 | 		return 0; | 
 | 2493 |  | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2494 | 	lockres = &osb->osb_orphan_scan.os_lockres; | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2495 | 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2496 | 	if (status < 0) | 
 | 2497 | 		return status; | 
 | 2498 |  | 
 | 2499 | 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Joel Becker | 1c520df | 2009-06-19 15:14:13 -0700 | [diff] [blame] | 2500 | 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && | 
 | 2501 | 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2502 | 		*seqno = be32_to_cpu(lvb->lvb_os_seqno); | 
| Sunil Mushran | 3211949 | 2009-06-19 16:53:18 -0700 | [diff] [blame] | 2503 | 	else | 
 | 2504 | 		*seqno = osb->osb_orphan_scan.os_seqno + 1; | 
 | 2505 |  | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2506 | 	return status; | 
 | 2507 | } | 
 | 2508 |  | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2509 | void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2510 | { | 
 | 2511 | 	struct ocfs2_lock_res *lockres; | 
 | 2512 | 	struct ocfs2_orphan_scan_lvb *lvb; | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2513 |  | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2514 | 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { | 
 | 2515 | 		lockres = &osb->osb_orphan_scan.os_lockres; | 
 | 2516 | 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
 | 2517 | 		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; | 
 | 2518 | 		lvb->lvb_os_seqno = cpu_to_be32(seqno); | 
 | 2519 | 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); | 
 | 2520 | 	} | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2521 | } | 
 | 2522 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2523 | int ocfs2_super_lock(struct ocfs2_super *osb, | 
 | 2524 | 		     int ex) | 
 | 2525 | { | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2526 | 	int status = 0; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2527 | 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2528 | 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2529 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2530 | 	if (ocfs2_is_hard_readonly(osb)) | 
 | 2531 | 		return -EROFS; | 
 | 2532 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2533 | 	if (ocfs2_mount_local(osb)) | 
 | 2534 | 		goto bail; | 
 | 2535 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2536 | 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 
 | 2537 | 	if (status < 0) { | 
 | 2538 | 		mlog_errno(status); | 
 | 2539 | 		goto bail; | 
 | 2540 | 	} | 
 | 2541 |  | 
 | 2542 | 	/* The super block lock path is really in the best position to | 
 | 2543 | 	 * know when resources covered by the lock need to be | 
 | 2544 | 	 * refreshed, so we do it here. Of course, making sense of | 
 | 2545 | 	 * everything is up to the caller :) */ | 
 | 2546 | 	status = ocfs2_should_refresh_lock_res(lockres); | 
 | 2547 | 	if (status < 0) { | 
 | 2548 | 		mlog_errno(status); | 
 | 2549 | 		goto bail; | 
 | 2550 | 	} | 
 | 2551 | 	if (status) { | 
| Mark Fasheh | 8e8a460 | 2008-02-01 11:59:09 -0800 | [diff] [blame] | 2552 | 		status = ocfs2_refresh_slot_info(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2553 |  | 
 | 2554 | 		ocfs2_complete_lock_res_refresh(lockres, status); | 
 | 2555 |  | 
 | 2556 | 		if (status < 0) | 
 | 2557 | 			mlog_errno(status); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2558 | 		ocfs2_track_lock_refresh(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2559 | 	} | 
 | 2560 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2561 | 	return status; | 
 | 2562 | } | 
 | 2563 |  | 
 | 2564 | void ocfs2_super_unlock(struct ocfs2_super *osb, | 
 | 2565 | 			int ex) | 
 | 2566 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2567 | 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2568 | 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 
 | 2569 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2570 | 	if (!ocfs2_mount_local(osb)) | 
 | 2571 | 		ocfs2_cluster_unlock(osb, lockres, level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2572 | } | 
 | 2573 |  | 
 | 2574 | int ocfs2_rename_lock(struct ocfs2_super *osb) | 
 | 2575 | { | 
 | 2576 | 	int status; | 
 | 2577 | 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 
 | 2578 |  | 
 | 2579 | 	if (ocfs2_is_hard_readonly(osb)) | 
 | 2580 | 		return -EROFS; | 
 | 2581 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2582 | 	if (ocfs2_mount_local(osb)) | 
 | 2583 | 		return 0; | 
 | 2584 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2585 | 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2586 | 	if (status < 0) | 
 | 2587 | 		mlog_errno(status); | 
 | 2588 |  | 
 | 2589 | 	return status; | 
 | 2590 | } | 
 | 2591 |  | 
 | 2592 | void ocfs2_rename_unlock(struct ocfs2_super *osb) | 
 | 2593 | { | 
 | 2594 | 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 
 | 2595 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2596 | 	if (!ocfs2_mount_local(osb)) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2597 | 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2598 | } | 
 | 2599 |  | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 2600 | int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) | 
 | 2601 | { | 
 | 2602 | 	int status; | 
 | 2603 | 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; | 
 | 2604 |  | 
 | 2605 | 	if (ocfs2_is_hard_readonly(osb)) | 
 | 2606 | 		return -EROFS; | 
 | 2607 |  | 
 | 2608 | 	if (ocfs2_mount_local(osb)) | 
 | 2609 | 		return 0; | 
 | 2610 |  | 
 | 2611 | 	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, | 
 | 2612 | 				    0, 0); | 
 | 2613 | 	if (status < 0) | 
 | 2614 | 		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); | 
 | 2615 |  | 
 | 2616 | 	return status; | 
 | 2617 | } | 
 | 2618 |  | 
 | 2619 | void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) | 
 | 2620 | { | 
 | 2621 | 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; | 
 | 2622 |  | 
 | 2623 | 	if (!ocfs2_mount_local(osb)) | 
 | 2624 | 		ocfs2_cluster_unlock(osb, lockres, | 
 | 2625 | 				     ex ? LKM_EXMODE : LKM_PRMODE); | 
 | 2626 | } | 
 | 2627 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2628 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | 
 | 2629 | { | 
 | 2630 | 	int ret; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2631 | 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2632 | 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 
 | 2633 | 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 
 | 2634 |  | 
 | 2635 | 	BUG_ON(!dl); | 
 | 2636 |  | 
| Tiger Yang | 03efed8 | 2011-05-28 00:34:19 +0800 | [diff] [blame] | 2637 | 	if (ocfs2_is_hard_readonly(osb)) { | 
 | 2638 | 		if (ex) | 
 | 2639 | 			return -EROFS; | 
 | 2640 | 		return 0; | 
 | 2641 | 	} | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2642 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2643 | 	if (ocfs2_mount_local(osb)) | 
 | 2644 | 		return 0; | 
 | 2645 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2646 | 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); | 
 | 2647 | 	if (ret < 0) | 
 | 2648 | 		mlog_errno(ret); | 
 | 2649 |  | 
 | 2650 | 	return ret; | 
 | 2651 | } | 
 | 2652 |  | 
 | 2653 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | 
 | 2654 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2655 | 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2656 | 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 
 | 2657 | 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 
 | 2658 |  | 
| Tiger Yang | 03efed8 | 2011-05-28 00:34:19 +0800 | [diff] [blame] | 2659 | 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2660 | 		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2661 | } | 
 | 2662 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2663 | /* Reference counting of the dlm debug structure. We want this because | 
 | 2664 |  * open references on the debug inodes can live on after a mount, so | 
 | 2665 |  * we can't rely on the ocfs2_super to always exist. */ | 
 | 2666 | static void ocfs2_dlm_debug_free(struct kref *kref) | 
 | 2667 | { | 
 | 2668 | 	struct ocfs2_dlm_debug *dlm_debug; | 
 | 2669 |  | 
 | 2670 | 	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); | 
 | 2671 |  | 
 | 2672 | 	kfree(dlm_debug); | 
 | 2673 | } | 
 | 2674 |  | 
 | 2675 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) | 
 | 2676 | { | 
 | 2677 | 	if (dlm_debug) | 
 | 2678 | 		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); | 
 | 2679 | } | 
 | 2680 |  | 
 | 2681 | static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) | 
 | 2682 | { | 
 | 2683 | 	kref_get(&debug->d_refcnt); | 
 | 2684 | } | 
 | 2685 |  | 
 | 2686 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) | 
 | 2687 | { | 
 | 2688 | 	struct ocfs2_dlm_debug *dlm_debug; | 
 | 2689 |  | 
 | 2690 | 	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); | 
 | 2691 | 	if (!dlm_debug) { | 
 | 2692 | 		mlog_errno(-ENOMEM); | 
 | 2693 | 		goto out; | 
 | 2694 | 	} | 
 | 2695 |  | 
 | 2696 | 	kref_init(&dlm_debug->d_refcnt); | 
 | 2697 | 	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); | 
 | 2698 | 	dlm_debug->d_locking_state = NULL; | 
 | 2699 | out: | 
 | 2700 | 	return dlm_debug; | 
 | 2701 | } | 
 | 2702 |  | 
 | 2703 | /* Access to this is arbitrated for us via seq_file->sem. */ | 
 | 2704 | struct ocfs2_dlm_seq_priv { | 
 | 2705 | 	struct ocfs2_dlm_debug *p_dlm_debug; | 
 | 2706 | 	struct ocfs2_lock_res p_iter_res; | 
 | 2707 | 	struct ocfs2_lock_res p_tmp_res; | 
 | 2708 | }; | 
 | 2709 |  | 
 | 2710 | static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, | 
 | 2711 | 						 struct ocfs2_dlm_seq_priv *priv) | 
 | 2712 | { | 
 | 2713 | 	struct ocfs2_lock_res *iter, *ret = NULL; | 
 | 2714 | 	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; | 
 | 2715 |  | 
 | 2716 | 	assert_spin_locked(&ocfs2_dlm_tracking_lock); | 
 | 2717 |  | 
 | 2718 | 	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { | 
 | 2719 | 		/* discover the head of the list */ | 
 | 2720 | 		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { | 
 | 2721 | 			mlog(0, "End of list found, %p\n", ret); | 
 | 2722 | 			break; | 
 | 2723 | 		} | 
 | 2724 |  | 
 | 2725 | 		/* We track our "dummy" iteration lockres' by a NULL | 
 | 2726 | 		 * l_ops field. */ | 
 | 2727 | 		if (iter->l_ops != NULL) { | 
 | 2728 | 			ret = iter; | 
 | 2729 | 			break; | 
 | 2730 | 		} | 
 | 2731 | 	} | 
 | 2732 |  | 
 | 2733 | 	return ret; | 
 | 2734 | } | 
 | 2735 |  | 
 | 2736 | static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) | 
 | 2737 | { | 
 | 2738 | 	struct ocfs2_dlm_seq_priv *priv = m->private; | 
 | 2739 | 	struct ocfs2_lock_res *iter; | 
 | 2740 |  | 
 | 2741 | 	spin_lock(&ocfs2_dlm_tracking_lock); | 
 | 2742 | 	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); | 
 | 2743 | 	if (iter) { | 
 | 2744 | 		/* Since lockres' have the lifetime of their container | 
 | 2745 | 		 * (which can be inodes, ocfs2_supers, etc) we want to | 
 | 2746 | 		 * copy this out to a temporary lockres while still | 
 | 2747 | 		 * under the spinlock. Obviously after this we can't | 
 | 2748 | 		 * trust any pointers on the copy returned, but that's | 
 | 2749 | 		 * ok as the information we want isn't typically held | 
 | 2750 | 		 * in them. */ | 
 | 2751 | 		priv->p_tmp_res = *iter; | 
 | 2752 | 		iter = &priv->p_tmp_res; | 
 | 2753 | 	} | 
 | 2754 | 	spin_unlock(&ocfs2_dlm_tracking_lock); | 
 | 2755 |  | 
 | 2756 | 	return iter; | 
 | 2757 | } | 
 | 2758 |  | 
 | 2759 | static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) | 
 | 2760 | { | 
 | 2761 | } | 
 | 2762 |  | 
 | 2763 | static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) | 
 | 2764 | { | 
 | 2765 | 	struct ocfs2_dlm_seq_priv *priv = m->private; | 
 | 2766 | 	struct ocfs2_lock_res *iter = v; | 
 | 2767 | 	struct ocfs2_lock_res *dummy = &priv->p_iter_res; | 
 | 2768 |  | 
 | 2769 | 	spin_lock(&ocfs2_dlm_tracking_lock); | 
 | 2770 | 	iter = ocfs2_dlm_next_res(iter, priv); | 
 | 2771 | 	list_del_init(&dummy->l_debug_list); | 
 | 2772 | 	if (iter) { | 
 | 2773 | 		list_add(&dummy->l_debug_list, &iter->l_debug_list); | 
 | 2774 | 		priv->p_tmp_res = *iter; | 
 | 2775 | 		iter = &priv->p_tmp_res; | 
 | 2776 | 	} | 
 | 2777 | 	spin_unlock(&ocfs2_dlm_tracking_lock); | 
 | 2778 |  | 
 | 2779 | 	return iter; | 
 | 2780 | } | 
 | 2781 |  | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 2782 | /* | 
 | 2783 |  * Version is used by debugfs.ocfs2 to determine the format being used | 
 | 2784 |  * | 
 | 2785 |  * New in version 2 | 
 | 2786 |  *	- Lock stats printed | 
 | 2787 |  * New in version 3 | 
 | 2788 |  *	- Max time in lock stats is in usecs (instead of nsecs) | 
 | 2789 |  */ | 
 | 2790 | #define OCFS2_DLM_DEBUG_STR_VERSION 3 | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2791 | static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | 
 | 2792 | { | 
 | 2793 | 	int i; | 
 | 2794 | 	char *lvb; | 
 | 2795 | 	struct ocfs2_lock_res *lockres = v; | 
 | 2796 |  | 
 | 2797 | 	if (!lockres) | 
 | 2798 | 		return -EINVAL; | 
 | 2799 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2800 | 	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); | 
 | 2801 |  | 
 | 2802 | 	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) | 
 | 2803 | 		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, | 
 | 2804 | 			   lockres->l_name, | 
 | 2805 | 			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); | 
 | 2806 | 	else | 
 | 2807 | 		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); | 
 | 2808 |  | 
 | 2809 | 	seq_printf(m, "%d\t" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2810 | 		   "0x%lx\t" | 
 | 2811 | 		   "0x%x\t" | 
 | 2812 | 		   "0x%x\t" | 
 | 2813 | 		   "%u\t" | 
 | 2814 | 		   "%u\t" | 
 | 2815 | 		   "%d\t" | 
 | 2816 | 		   "%d\t", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2817 | 		   lockres->l_level, | 
 | 2818 | 		   lockres->l_flags, | 
 | 2819 | 		   lockres->l_action, | 
 | 2820 | 		   lockres->l_unlock_action, | 
 | 2821 | 		   lockres->l_ro_holders, | 
 | 2822 | 		   lockres->l_ex_holders, | 
 | 2823 | 		   lockres->l_requested, | 
 | 2824 | 		   lockres->l_blocking); | 
 | 2825 |  | 
 | 2826 | 	/* Dump the raw LVB */ | 
| Joel Becker | 8f2c9c1 | 2008-02-01 12:16:57 -0800 | [diff] [blame] | 2827 | 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2828 | 	for(i = 0; i < DLM_LVB_LEN; i++) | 
 | 2829 | 		seq_printf(m, "0x%x\t", lvb[i]); | 
 | 2830 |  | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2831 | #ifdef CONFIG_OCFS2_FS_STATS | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 2832 | # define lock_num_prmode(_l)		((_l)->l_lock_prmode.ls_gets) | 
 | 2833 | # define lock_num_exmode(_l)		((_l)->l_lock_exmode.ls_gets) | 
 | 2834 | # define lock_num_prmode_failed(_l)	((_l)->l_lock_prmode.ls_fail) | 
 | 2835 | # define lock_num_exmode_failed(_l)	((_l)->l_lock_exmode.ls_fail) | 
 | 2836 | # define lock_total_prmode(_l)		((_l)->l_lock_prmode.ls_total) | 
 | 2837 | # define lock_total_exmode(_l)		((_l)->l_lock_exmode.ls_total) | 
 | 2838 | # define lock_max_prmode(_l)		((_l)->l_lock_prmode.ls_max) | 
 | 2839 | # define lock_max_exmode(_l)		((_l)->l_lock_exmode.ls_max) | 
 | 2840 | # define lock_refresh(_l)		((_l)->l_lock_refresh) | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2841 | #else | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 2842 | # define lock_num_prmode(_l)		(0) | 
 | 2843 | # define lock_num_exmode(_l)		(0) | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2844 | # define lock_num_prmode_failed(_l)	(0) | 
 | 2845 | # define lock_num_exmode_failed(_l)	(0) | 
| Randy Dunlap | dd25e55 | 2008-05-28 14:41:00 -0700 | [diff] [blame] | 2846 | # define lock_total_prmode(_l)		(0ULL) | 
 | 2847 | # define lock_total_exmode(_l)		(0ULL) | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2848 | # define lock_max_prmode(_l)		(0) | 
 | 2849 | # define lock_max_exmode(_l)		(0) | 
 | 2850 | # define lock_refresh(_l)		(0) | 
 | 2851 | #endif | 
 | 2852 | 	/* The following seq_print was added in version 2 of this output */ | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 2853 | 	seq_printf(m, "%u\t" | 
 | 2854 | 		   "%u\t" | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2855 | 		   "%u\t" | 
 | 2856 | 		   "%u\t" | 
 | 2857 | 		   "%llu\t" | 
 | 2858 | 		   "%llu\t" | 
 | 2859 | 		   "%u\t" | 
 | 2860 | 		   "%u\t" | 
 | 2861 | 		   "%u\t", | 
 | 2862 | 		   lock_num_prmode(lockres), | 
 | 2863 | 		   lock_num_exmode(lockres), | 
 | 2864 | 		   lock_num_prmode_failed(lockres), | 
 | 2865 | 		   lock_num_exmode_failed(lockres), | 
 | 2866 | 		   lock_total_prmode(lockres), | 
 | 2867 | 		   lock_total_exmode(lockres), | 
 | 2868 | 		   lock_max_prmode(lockres), | 
 | 2869 | 		   lock_max_exmode(lockres), | 
 | 2870 | 		   lock_refresh(lockres)); | 
 | 2871 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2872 | 	/* End the line */ | 
 | 2873 | 	seq_printf(m, "\n"); | 
 | 2874 | 	return 0; | 
 | 2875 | } | 
 | 2876 |  | 
| Jan Engelhardt | 90d9977 | 2008-01-22 20:52:20 +0100 | [diff] [blame] | 2877 | static const struct seq_operations ocfs2_dlm_seq_ops = { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2878 | 	.start =	ocfs2_dlm_seq_start, | 
 | 2879 | 	.stop =		ocfs2_dlm_seq_stop, | 
 | 2880 | 	.next =		ocfs2_dlm_seq_next, | 
 | 2881 | 	.show =		ocfs2_dlm_seq_show, | 
 | 2882 | }; | 
 | 2883 |  | 
 | 2884 | static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) | 
 | 2885 | { | 
| Joe Perches | 33fa1d9 | 2010-07-12 13:50:19 -0700 | [diff] [blame] | 2886 | 	struct seq_file *seq = file->private_data; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2887 | 	struct ocfs2_dlm_seq_priv *priv = seq->private; | 
 | 2888 | 	struct ocfs2_lock_res *res = &priv->p_iter_res; | 
 | 2889 |  | 
 | 2890 | 	ocfs2_remove_lockres_tracking(res); | 
 | 2891 | 	ocfs2_put_dlm_debug(priv->p_dlm_debug); | 
 | 2892 | 	return seq_release_private(inode, file); | 
 | 2893 | } | 
 | 2894 |  | 
 | 2895 | static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) | 
 | 2896 | { | 
 | 2897 | 	int ret; | 
 | 2898 | 	struct ocfs2_dlm_seq_priv *priv; | 
 | 2899 | 	struct seq_file *seq; | 
 | 2900 | 	struct ocfs2_super *osb; | 
 | 2901 |  | 
 | 2902 | 	priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); | 
 | 2903 | 	if (!priv) { | 
 | 2904 | 		ret = -ENOMEM; | 
 | 2905 | 		mlog_errno(ret); | 
 | 2906 | 		goto out; | 
 | 2907 | 	} | 
| Theodore Ts'o | 8e18e29 | 2006-09-27 01:50:46 -0700 | [diff] [blame] | 2908 | 	osb = inode->i_private; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2909 | 	ocfs2_get_dlm_debug(osb->osb_dlm_debug); | 
 | 2910 | 	priv->p_dlm_debug = osb->osb_dlm_debug; | 
 | 2911 | 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); | 
 | 2912 |  | 
 | 2913 | 	ret = seq_open(file, &ocfs2_dlm_seq_ops); | 
 | 2914 | 	if (ret) { | 
 | 2915 | 		kfree(priv); | 
 | 2916 | 		mlog_errno(ret); | 
 | 2917 | 		goto out; | 
 | 2918 | 	} | 
 | 2919 |  | 
| Joe Perches | 33fa1d9 | 2010-07-12 13:50:19 -0700 | [diff] [blame] | 2920 | 	seq = file->private_data; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2921 | 	seq->private = priv; | 
 | 2922 |  | 
 | 2923 | 	ocfs2_add_lockres_tracking(&priv->p_iter_res, | 
 | 2924 | 				   priv->p_dlm_debug); | 
 | 2925 |  | 
 | 2926 | out: | 
 | 2927 | 	return ret; | 
 | 2928 | } | 
 | 2929 |  | 
| Arjan van de Ven | 4b6f5d2 | 2006-03-28 01:56:42 -0800 | [diff] [blame] | 2930 | static const struct file_operations ocfs2_dlm_debug_fops = { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2931 | 	.open =		ocfs2_dlm_debug_open, | 
 | 2932 | 	.release =	ocfs2_dlm_debug_release, | 
 | 2933 | 	.read =		seq_read, | 
 | 2934 | 	.llseek =	seq_lseek, | 
 | 2935 | }; | 
 | 2936 |  | 
 | 2937 | static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) | 
 | 2938 | { | 
 | 2939 | 	int ret = 0; | 
 | 2940 | 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; | 
 | 2941 |  | 
 | 2942 | 	dlm_debug->d_locking_state = debugfs_create_file("locking_state", | 
 | 2943 | 							 S_IFREG|S_IRUSR, | 
 | 2944 | 							 osb->osb_debug_root, | 
 | 2945 | 							 osb, | 
 | 2946 | 							 &ocfs2_dlm_debug_fops); | 
 | 2947 | 	if (!dlm_debug->d_locking_state) { | 
 | 2948 | 		ret = -EINVAL; | 
 | 2949 | 		mlog(ML_ERROR, | 
 | 2950 | 		     "Unable to create locking state debugfs file.\n"); | 
 | 2951 | 		goto out; | 
 | 2952 | 	} | 
 | 2953 |  | 
 | 2954 | 	ocfs2_get_dlm_debug(dlm_debug); | 
 | 2955 | out: | 
 | 2956 | 	return ret; | 
 | 2957 | } | 
 | 2958 |  | 
 | 2959 | static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) | 
 | 2960 | { | 
 | 2961 | 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; | 
 | 2962 |  | 
 | 2963 | 	if (dlm_debug) { | 
 | 2964 | 		debugfs_remove(dlm_debug->d_locking_state); | 
 | 2965 | 		ocfs2_put_dlm_debug(dlm_debug); | 
 | 2966 | 	} | 
 | 2967 | } | 
 | 2968 |  | 
 | 2969 | int ocfs2_dlm_init(struct ocfs2_super *osb) | 
 | 2970 | { | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2971 | 	int status = 0; | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 2972 | 	struct ocfs2_cluster_connection *conn = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2973 |  | 
| Mark Fasheh | 0abd6d1 | 2008-01-29 16:59:56 -0800 | [diff] [blame] | 2974 | 	if (ocfs2_mount_local(osb)) { | 
 | 2975 | 		osb->node_num = 0; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2976 | 		goto local; | 
| Mark Fasheh | 0abd6d1 | 2008-01-29 16:59:56 -0800 | [diff] [blame] | 2977 | 	} | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2978 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2979 | 	status = ocfs2_dlm_init_debug(osb); | 
 | 2980 | 	if (status < 0) { | 
 | 2981 | 		mlog_errno(status); | 
 | 2982 | 		goto bail; | 
 | 2983 | 	} | 
 | 2984 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 2985 | 	/* launch downconvert thread */ | 
 | 2986 | 	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); | 
 | 2987 | 	if (IS_ERR(osb->dc_task)) { | 
 | 2988 | 		status = PTR_ERR(osb->dc_task); | 
 | 2989 | 		osb->dc_task = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2990 | 		mlog_errno(status); | 
 | 2991 | 		goto bail; | 
 | 2992 | 	} | 
 | 2993 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2994 | 	/* for now, uuid == domain */ | 
| Joel Becker | 9c6c877 | 2008-02-01 15:17:30 -0800 | [diff] [blame] | 2995 | 	status = ocfs2_cluster_connect(osb->osb_cluster_stack, | 
 | 2996 | 				       osb->uuid_str, | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 2997 | 				       strlen(osb->uuid_str), | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 2998 | 				       &lproto, ocfs2_do_node_down, osb, | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 2999 | 				       &conn); | 
 | 3000 | 	if (status) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3001 | 		mlog_errno(status); | 
 | 3002 | 		goto bail; | 
 | 3003 | 	} | 
 | 3004 |  | 
| Mark Fasheh | 0abd6d1 | 2008-01-29 16:59:56 -0800 | [diff] [blame] | 3005 | 	status = ocfs2_cluster_this_node(&osb->node_num); | 
 | 3006 | 	if (status < 0) { | 
 | 3007 | 		mlog_errno(status); | 
 | 3008 | 		mlog(ML_ERROR, | 
 | 3009 | 		     "could not find this host's node number\n"); | 
| Joel Becker | 286eaa9 | 2008-02-01 15:03:57 -0800 | [diff] [blame] | 3010 | 		ocfs2_cluster_disconnect(conn, 0); | 
| Mark Fasheh | 0abd6d1 | 2008-01-29 16:59:56 -0800 | [diff] [blame] | 3011 | 		goto bail; | 
 | 3012 | 	} | 
 | 3013 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 3014 | local: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3015 | 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); | 
 | 3016 | 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 3017 | 	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 3018 | 	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3019 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3020 | 	osb->cconn = conn; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3021 |  | 
 | 3022 | 	status = 0; | 
 | 3023 | bail: | 
 | 3024 | 	if (status < 0) { | 
 | 3025 | 		ocfs2_dlm_shutdown_debug(osb); | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3026 | 		if (osb->dc_task) | 
 | 3027 | 			kthread_stop(osb->dc_task); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3028 | 	} | 
 | 3029 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3030 | 	return status; | 
 | 3031 | } | 
 | 3032 |  | 
| Joel Becker | 286eaa9 | 2008-02-01 15:03:57 -0800 | [diff] [blame] | 3033 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, | 
 | 3034 | 			int hangup_pending) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3035 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3036 | 	ocfs2_drop_osb_locks(osb); | 
 | 3037 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3038 | 	/* | 
 | 3039 | 	 * Now that we have dropped all locks and ocfs2_dismount_volume() | 
 | 3040 | 	 * has disabled recovery, the DLM won't be talking to us.  It's | 
 | 3041 | 	 * safe to tear things down before disconnecting the cluster. | 
 | 3042 | 	 */ | 
 | 3043 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3044 | 	if (osb->dc_task) { | 
 | 3045 | 		kthread_stop(osb->dc_task); | 
 | 3046 | 		osb->dc_task = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3047 | 	} | 
 | 3048 |  | 
 | 3049 | 	ocfs2_lock_res_free(&osb->osb_super_lockres); | 
 | 3050 | 	ocfs2_lock_res_free(&osb->osb_rename_lockres); | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 3051 | 	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 3052 | 	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3053 |  | 
| Joel Becker | 286eaa9 | 2008-02-01 15:03:57 -0800 | [diff] [blame] | 3054 | 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending); | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3055 | 	osb->cconn = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3056 |  | 
 | 3057 | 	ocfs2_dlm_shutdown_debug(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3058 | } | 
 | 3059 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3060 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3061 | 			   struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3062 | { | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3063 | 	int ret; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3064 | 	unsigned long flags; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3065 | 	u32 lkm_flags = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3066 |  | 
 | 3067 | 	/* We didn't get anywhere near actually using this lockres. */ | 
 | 3068 | 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) | 
 | 3069 | 		goto out; | 
 | 3070 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 3071 | 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3072 | 		lkm_flags |= DLM_LKF_VALBLK; | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 3073 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3074 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 3075 |  | 
 | 3076 | 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), | 
 | 3077 | 			"lockres %s, flags 0x%lx\n", | 
 | 3078 | 			lockres->l_name, lockres->l_flags); | 
 | 3079 |  | 
 | 3080 | 	while (lockres->l_flags & OCFS2_LOCK_BUSY) { | 
 | 3081 | 		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " | 
 | 3082 | 		     "%u, unlock_action = %u\n", | 
 | 3083 | 		     lockres->l_name, lockres->l_flags, lockres->l_action, | 
 | 3084 | 		     lockres->l_unlock_action); | 
 | 3085 |  | 
 | 3086 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3087 |  | 
 | 3088 | 		/* XXX: Today we just wait on any busy | 
 | 3089 | 		 * locks... Perhaps we need to cancel converts in the | 
 | 3090 | 		 * future? */ | 
 | 3091 | 		ocfs2_wait_on_busy_lock(lockres); | 
 | 3092 |  | 
 | 3093 | 		spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 3094 | 	} | 
 | 3095 |  | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3096 | 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 
 | 3097 | 		if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3098 | 		    lockres->l_level == DLM_LOCK_EX && | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3099 | 		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 
 | 3100 | 			lockres->l_ops->set_lvb(lockres); | 
 | 3101 | 	} | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3102 |  | 
 | 3103 | 	if (lockres->l_flags & OCFS2_LOCK_BUSY) | 
 | 3104 | 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", | 
 | 3105 | 		     lockres->l_name); | 
 | 3106 | 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 
 | 3107 | 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); | 
 | 3108 |  | 
 | 3109 | 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 
 | 3110 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3111 | 		goto out; | 
 | 3112 | 	} | 
 | 3113 |  | 
 | 3114 | 	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); | 
 | 3115 |  | 
 | 3116 | 	/* make sure we never get here while waiting for an ast to | 
 | 3117 | 	 * fire. */ | 
 | 3118 | 	BUG_ON(lockres->l_action != OCFS2_AST_INVALID); | 
 | 3119 |  | 
 | 3120 | 	/* is this necessary? */ | 
 | 3121 | 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
 | 3122 | 	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; | 
 | 3123 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3124 |  | 
 | 3125 | 	mlog(0, "lock %s\n", lockres->l_name); | 
 | 3126 |  | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 3127 | 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3128 | 	if (ret) { | 
 | 3129 | 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3130 | 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 
| Joel Becker | cf0acdc | 2008-01-29 16:59:55 -0800 | [diff] [blame] | 3131 | 		ocfs2_dlm_dump_lksb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3132 | 		BUG(); | 
 | 3133 | 	} | 
| Coly Li | 73ac36e | 2009-01-07 18:09:16 -0800 | [diff] [blame] | 3134 | 	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3135 | 	     lockres->l_name); | 
 | 3136 |  | 
 | 3137 | 	ocfs2_wait_on_busy_lock(lockres); | 
 | 3138 | out: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3139 | 	return 0; | 
 | 3140 | } | 
 | 3141 |  | 
 | 3142 | /* Mark the lockres as being dropped. It will no longer be | 
 | 3143 |  * queued if blocking, but we still may have to wait on it | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3144 |  * being dequeued from the downconvert thread before we can consider | 
| Sunil Mushran | 2bd6321 | 2010-01-25 16:57:38 -0800 | [diff] [blame] | 3145 |  * it safe to drop. | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3146 |  * | 
 | 3147 |  * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 
 | 3148 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) | 
 | 3149 | { | 
 | 3150 | 	int status; | 
 | 3151 | 	struct ocfs2_mask_waiter mw; | 
 | 3152 | 	unsigned long flags; | 
 | 3153 |  | 
 | 3154 | 	ocfs2_init_mask_waiter(&mw); | 
 | 3155 |  | 
 | 3156 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 3157 | 	lockres->l_flags |= OCFS2_LOCK_FREEING; | 
 | 3158 | 	while (lockres->l_flags & OCFS2_LOCK_QUEUED) { | 
 | 3159 | 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); | 
 | 3160 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3161 |  | 
 | 3162 | 		mlog(0, "Waiting on lockres %s\n", lockres->l_name); | 
 | 3163 |  | 
 | 3164 | 		status = ocfs2_wait_for_mask(&mw); | 
 | 3165 | 		if (status) | 
 | 3166 | 			mlog_errno(status); | 
 | 3167 |  | 
 | 3168 | 		spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 3169 | 	} | 
 | 3170 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3171 | } | 
 | 3172 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3173 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 
 | 3174 | 			       struct ocfs2_lock_res *lockres) | 
 | 3175 | { | 
 | 3176 | 	int ret; | 
 | 3177 |  | 
 | 3178 | 	ocfs2_mark_lockres_freeing(lockres); | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3179 | 	ret = ocfs2_drop_lock(osb, lockres); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3180 | 	if (ret) | 
 | 3181 | 		mlog_errno(ret); | 
 | 3182 | } | 
 | 3183 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3184 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) | 
 | 3185 | { | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3186 | 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); | 
 | 3187 | 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 3188 | 	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 3189 | 	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3190 | } | 
 | 3191 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3192 | int ocfs2_drop_inode_locks(struct inode *inode) | 
 | 3193 | { | 
 | 3194 | 	int status, err; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3195 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3196 | 	/* No need to call ocfs2_mark_lockres_freeing here - | 
 | 3197 | 	 * ocfs2_clear_inode has done it for us. */ | 
 | 3198 |  | 
 | 3199 | 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 3200 | 			      &OCFS2_I(inode)->ip_open_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3201 | 	if (err < 0) | 
 | 3202 | 		mlog_errno(err); | 
 | 3203 |  | 
 | 3204 | 	status = err; | 
 | 3205 |  | 
 | 3206 | 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 3207 | 			      &OCFS2_I(inode)->ip_inode_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3208 | 	if (err < 0) | 
 | 3209 | 		mlog_errno(err); | 
 | 3210 | 	if (err < 0 && !status) | 
 | 3211 | 		status = err; | 
 | 3212 |  | 
 | 3213 | 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3214 | 			      &OCFS2_I(inode)->ip_rw_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3215 | 	if (err < 0) | 
 | 3216 | 		mlog_errno(err); | 
 | 3217 | 	if (err < 0 && !status) | 
 | 3218 | 		status = err; | 
 | 3219 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3220 | 	return status; | 
 | 3221 | } | 
 | 3222 |  | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3223 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 
 | 3224 | 					      int new_level) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3225 | { | 
 | 3226 | 	assert_spin_locked(&lockres->l_lock); | 
 | 3227 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3228 | 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3229 |  | 
 | 3230 | 	if (lockres->l_level <= new_level) { | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3231 | 		mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " | 
 | 3232 | 		     "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " | 
 | 3233 | 		     "block %d, pgen %d\n", lockres->l_name, lockres->l_level, | 
 | 3234 | 		     new_level, list_empty(&lockres->l_blocked_list), | 
 | 3235 | 		     list_empty(&lockres->l_mask_waiters), lockres->l_type, | 
 | 3236 | 		     lockres->l_flags, lockres->l_ro_holders, | 
 | 3237 | 		     lockres->l_ex_holders, lockres->l_action, | 
 | 3238 | 		     lockres->l_unlock_action, lockres->l_requested, | 
 | 3239 | 		     lockres->l_blocking, lockres->l_pending_gen); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3240 | 		BUG(); | 
 | 3241 | 	} | 
 | 3242 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3243 | 	mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", | 
 | 3244 | 	     lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3245 |  | 
 | 3246 | 	lockres->l_action = OCFS2_AST_DOWNCONVERT; | 
 | 3247 | 	lockres->l_requested = new_level; | 
 | 3248 | 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3249 | 	return lockres_set_pending(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3250 | } | 
 | 3251 |  | 
 | 3252 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 
 | 3253 | 				  struct ocfs2_lock_res *lockres, | 
 | 3254 | 				  int new_level, | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3255 | 				  int lvb, | 
 | 3256 | 				  unsigned int generation) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3257 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3258 | 	int ret; | 
 | 3259 | 	u32 dlm_flags = DLM_LKF_CONVERT; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3260 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3261 | 	mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, | 
 | 3262 | 	     lockres->l_level, new_level); | 
 | 3263 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3264 | 	if (lvb) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3265 | 		dlm_flags |= DLM_LKF_VALBLK; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3266 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3267 | 	ret = ocfs2_dlm_lock(osb->cconn, | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3268 | 			     new_level, | 
 | 3269 | 			     &lockres->l_lksb, | 
 | 3270 | 			     dlm_flags, | 
 | 3271 | 			     lockres->l_name, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 3272 | 			     OCFS2_LOCK_ID_MAX_LEN - 1); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3273 | 	lockres_clear_pending(lockres, generation, osb); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3274 | 	if (ret) { | 
 | 3275 | 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3276 | 		ocfs2_recover_from_dlm_error(lockres, 1); | 
 | 3277 | 		goto bail; | 
 | 3278 | 	} | 
 | 3279 |  | 
 | 3280 | 	ret = 0; | 
 | 3281 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3282 | 	return ret; | 
 | 3283 | } | 
 | 3284 |  | 
| Joel Becker | 24ef181 | 2008-01-29 17:37:32 -0800 | [diff] [blame] | 3285 | /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3286 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 
 | 3287 | 				        struct ocfs2_lock_res *lockres) | 
 | 3288 | { | 
 | 3289 | 	assert_spin_locked(&lockres->l_lock); | 
 | 3290 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3291 | 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | 
 | 3292 | 		/* If we're already trying to cancel a lock conversion | 
 | 3293 | 		 * then just drop the spinlock and allow the caller to | 
 | 3294 | 		 * requeue this lock. */ | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3295 | 		mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3296 | 		return 0; | 
 | 3297 | 	} | 
 | 3298 |  | 
 | 3299 | 	/* were we in a convert when we got the bast fire? */ | 
 | 3300 | 	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && | 
 | 3301 | 	       lockres->l_action != OCFS2_AST_DOWNCONVERT); | 
 | 3302 | 	/* set things up for the unlockast to know to just | 
 | 3303 | 	 * clear out the ast_action and unset busy, etc. */ | 
 | 3304 | 	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; | 
 | 3305 |  | 
 | 3306 | 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), | 
 | 3307 | 			"lock %s, invalid flags: 0x%lx\n", | 
 | 3308 | 			lockres->l_name, lockres->l_flags); | 
 | 3309 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3310 | 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name); | 
 | 3311 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3312 | 	return 1; | 
 | 3313 | } | 
 | 3314 |  | 
 | 3315 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 
 | 3316 | 				struct ocfs2_lock_res *lockres) | 
 | 3317 | { | 
 | 3318 | 	int ret; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3319 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3320 | 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 3321 | 			       DLM_LKF_CANCEL); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3322 | 	if (ret) { | 
 | 3323 | 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3324 | 		ocfs2_recover_from_dlm_error(lockres, 0); | 
 | 3325 | 	} | 
 | 3326 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3327 | 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3328 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3329 | 	return ret; | 
 | 3330 | } | 
 | 3331 |  | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 3332 | static int ocfs2_unblock_lock(struct ocfs2_super *osb, | 
 | 3333 | 			      struct ocfs2_lock_res *lockres, | 
 | 3334 | 			      struct ocfs2_unblock_ctl *ctl) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3335 | { | 
 | 3336 | 	unsigned long flags; | 
 | 3337 | 	int blocking; | 
 | 3338 | 	int new_level; | 
| Sunil Mushran | 079b805 | 2010-02-03 10:16:54 -0800 | [diff] [blame] | 3339 | 	int level; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3340 | 	int ret = 0; | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 3341 | 	int set_lvb = 0; | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3342 | 	unsigned int gen; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3343 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3344 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 3345 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3346 | recheck: | 
| Sunil Mushran | db0f6ce | 2010-02-01 16:55:50 -0800 | [diff] [blame] | 3347 | 	/* | 
 | 3348 | 	 * Is it still blocking? If not, we have no more work to do. | 
 | 3349 | 	 */ | 
 | 3350 | 	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { | 
 | 3351 | 		BUG_ON(lockres->l_blocking != DLM_LOCK_NL); | 
 | 3352 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3353 | 		ret = 0; | 
 | 3354 | 		goto leave; | 
 | 3355 | 	} | 
 | 3356 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3357 | 	if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3358 | 		/* XXX | 
 | 3359 | 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag | 
 | 3360 | 		 * exists entirely for one reason - another thread has set | 
 | 3361 | 		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). | 
 | 3362 | 		 * | 
 | 3363 | 		 * If we do ocfs2_cancel_convert() before the other thread | 
 | 3364 | 		 * calls dlm_lock(), our cancel will do nothing.  We will | 
 | 3365 | 		 * get no ast, and we will have no way of knowing the | 
 | 3366 | 		 * cancel failed.  Meanwhile, the other thread will call | 
 | 3367 | 		 * into dlm_lock() and wait...forever. | 
 | 3368 | 		 * | 
 | 3369 | 		 * Why forever?  Because another node has asked for the | 
 | 3370 | 		 * lock first; that's why we're here in unblock_lock(). | 
 | 3371 | 		 * | 
 | 3372 | 		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is | 
 | 3373 | 		 * set, we just requeue the unblock.  Only when the other | 
 | 3374 | 		 * thread has called dlm_lock() and cleared PENDING will | 
 | 3375 | 		 * we then cancel their request. | 
 | 3376 | 		 * | 
 | 3377 | 		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING | 
 | 3378 | 		 * at the same time they set OCFS2_DLM_BUSY.  They must | 
 | 3379 | 		 * clear OCFS2_DLM_PENDING after dlm_lock() returns. | 
 | 3380 | 		 */ | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3381 | 		if (lockres->l_flags & OCFS2_LOCK_PENDING) { | 
 | 3382 | 			mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", | 
 | 3383 | 			     lockres->l_name); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3384 | 			goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3385 | 		} | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3386 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3387 | 		ctl->requeue = 1; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3388 | 		ret = ocfs2_prepare_cancel_convert(osb, lockres); | 
 | 3389 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3390 | 		if (ret) { | 
 | 3391 | 			ret = ocfs2_cancel_convert(osb, lockres); | 
 | 3392 | 			if (ret < 0) | 
 | 3393 | 				mlog_errno(ret); | 
 | 3394 | 		} | 
 | 3395 | 		goto leave; | 
 | 3396 | 	} | 
 | 3397 |  | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 3398 | 	/* | 
 | 3399 | 	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is | 
 | 3400 | 	 * set when the ast is received for an upconvert just before the | 
 | 3401 | 	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast | 
 | 3402 | 	 * on the heels of the ast, we want to delay the downconvert just | 
 | 3403 | 	 * enough to allow the up requestor to do its task. Because this | 
 | 3404 | 	 * lock is in the blocked queue, the lock will be downconverted | 
 | 3405 | 	 * as soon as the requestor is done with the lock. | 
 | 3406 | 	 */ | 
 | 3407 | 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) | 
 | 3408 | 		goto leave_requeue; | 
 | 3409 |  | 
| Sunil Mushran | 0d74125 | 2010-01-29 09:44:11 -0800 | [diff] [blame] | 3410 | 	/* | 
 | 3411 | 	 * How can we block and yet be at NL?  We were trying to upconvert | 
 | 3412 | 	 * from NL and got canceled.  The code comes back here, and now | 
 | 3413 | 	 * we notice and clear BLOCKING. | 
 | 3414 | 	 */ | 
 | 3415 | 	if (lockres->l_level == DLM_LOCK_NL) { | 
 | 3416 | 		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3417 | 		mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); | 
| Sunil Mushran | 0d74125 | 2010-01-29 09:44:11 -0800 | [diff] [blame] | 3418 | 		lockres->l_blocking = DLM_LOCK_NL; | 
 | 3419 | 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 
 | 3420 | 		spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3421 | 		goto leave; | 
 | 3422 | 	} | 
 | 3423 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3424 | 	/* if we're blocking an exclusive and we have *any* holders, | 
 | 3425 | 	 * then requeue. */ | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3426 | 	if ((lockres->l_blocking == DLM_LOCK_EX) | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3427 | 	    && (lockres->l_ex_holders || lockres->l_ro_holders)) { | 
 | 3428 | 		mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", | 
 | 3429 | 		     lockres->l_name, lockres->l_ex_holders, | 
 | 3430 | 		     lockres->l_ro_holders); | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3431 | 		goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3432 | 	} | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3433 |  | 
 | 3434 | 	/* If it's a PR we're blocking, then only | 
 | 3435 | 	 * requeue if we've got any EX holders */ | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3436 | 	if (lockres->l_blocking == DLM_LOCK_PR && | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3437 | 	    lockres->l_ex_holders) { | 
 | 3438 | 		mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", | 
 | 3439 | 		     lockres->l_name, lockres->l_ex_holders); | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3440 | 		goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3441 | 	} | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3442 |  | 
 | 3443 | 	/* | 
 | 3444 | 	 * Can we get a lock in this state if the holder counts are | 
 | 3445 | 	 * zero? The meta data unblock code used to check this. | 
 | 3446 | 	 */ | 
 | 3447 | 	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3448 | 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { | 
 | 3449 | 		mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", | 
 | 3450 | 		     lockres->l_name); | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3451 | 		goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3452 | 	} | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3453 |  | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 3454 | 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | 
 | 3455 |  | 
 | 3456 | 	if (lockres->l_ops->check_downconvert | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3457 | 	    && !lockres->l_ops->check_downconvert(lockres, new_level)) { | 
 | 3458 | 		mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", | 
 | 3459 | 		     lockres->l_name); | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 3460 | 		goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3461 | 	} | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 3462 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3463 | 	/* If we get here, then we know that there are no more | 
 | 3464 | 	 * incompatible holders (and anyone asking for an incompatible | 
 | 3465 | 	 * lock is blocked). We can now downconvert the lock */ | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 3466 | 	if (!lockres->l_ops->downconvert_worker) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3467 | 		goto downconvert; | 
 | 3468 |  | 
 | 3469 | 	/* Some lockres types want to do a bit of work before | 
 | 3470 | 	 * downconverting a lock. Allow that here. The worker function | 
 | 3471 | 	 * may sleep, so we save off a copy of what we're blocking as | 
 | 3472 | 	 * it may change while we're not holding the spin lock. */ | 
 | 3473 | 	blocking = lockres->l_blocking; | 
| Sunil Mushran | 079b805 | 2010-02-03 10:16:54 -0800 | [diff] [blame] | 3474 | 	level = lockres->l_level; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3475 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3476 |  | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 3477 | 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3478 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3479 | 	if (ctl->unblock_action == UNBLOCK_STOP_POST) { | 
 | 3480 | 		mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", | 
 | 3481 | 		     lockres->l_name); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3482 | 		goto leave; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3483 | 	} | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3484 |  | 
 | 3485 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
| Sunil Mushran | 079b805 | 2010-02-03 10:16:54 -0800 | [diff] [blame] | 3486 | 	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3487 | 		/* If this changed underneath us, then we can't drop | 
 | 3488 | 		 * it just yet. */ | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3489 | 		mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " | 
 | 3490 | 		     "Recheck\n", lockres->l_name, blocking, | 
 | 3491 | 		     lockres->l_blocking, level, lockres->l_level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3492 | 		goto recheck; | 
 | 3493 | 	} | 
 | 3494 |  | 
 | 3495 | downconvert: | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3496 | 	ctl->requeue = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3497 |  | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 3498 | 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3499 | 		if (lockres->l_level == DLM_LOCK_EX) | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 3500 | 			set_lvb = 1; | 
 | 3501 |  | 
 | 3502 | 		/* | 
 | 3503 | 		 * We only set the lvb if the lock has been fully | 
 | 3504 | 		 * refreshed - otherwise we risk setting stale | 
 | 3505 | 		 * data. Otherwise, there's no need to actually clear | 
 | 3506 | 		 * out the lvb here as it's value is still valid. | 
 | 3507 | 		 */ | 
 | 3508 | 		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 
 | 3509 | 			lockres->l_ops->set_lvb(lockres); | 
 | 3510 | 	} | 
 | 3511 |  | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3512 | 	gen = ocfs2_prepare_downconvert(lockres, new_level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3513 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3514 | 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, | 
 | 3515 | 				     gen); | 
 | 3516 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3517 | leave: | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 3518 | 	if (ret) | 
 | 3519 | 		mlog_errno(ret); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3520 | 	return ret; | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3521 |  | 
 | 3522 | leave_requeue: | 
 | 3523 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3524 | 	ctl->requeue = 1; | 
 | 3525 |  | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3526 | 	return 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3527 | } | 
 | 3528 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3529 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 
 | 3530 | 				     int blocking) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3531 | { | 
 | 3532 | 	struct inode *inode; | 
 | 3533 | 	struct address_space *mapping; | 
| Goldwyn Rodrigues | 5e98d49 | 2010-06-28 10:04:32 -0500 | [diff] [blame] | 3534 | 	struct ocfs2_inode_info *oi; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3535 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3536 |        	inode = ocfs2_lock_res_inode(lockres); | 
 | 3537 | 	mapping = inode->i_mapping; | 
 | 3538 |  | 
| Goldwyn Rodrigues | 5e98d49 | 2010-06-28 10:04:32 -0500 | [diff] [blame] | 3539 | 	if (S_ISDIR(inode->i_mode)) { | 
 | 3540 | 		oi = OCFS2_I(inode); | 
 | 3541 | 		oi->ip_dir_lock_gen++; | 
 | 3542 | 		mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); | 
 | 3543 | 		goto out; | 
 | 3544 | 	} | 
 | 3545 |  | 
| Mark Fasheh | 1044e40 | 2008-02-28 17:16:03 -0800 | [diff] [blame] | 3546 | 	if (!S_ISREG(inode->i_mode)) | 
| Mark Fasheh | f1f5406 | 2007-10-18 15:13:59 -0700 | [diff] [blame] | 3547 | 		goto out; | 
 | 3548 |  | 
| Mark Fasheh | 7f4a2a9 | 2006-12-11 11:06:36 -0800 | [diff] [blame] | 3549 | 	/* | 
 | 3550 | 	 * We need this before the filemap_fdatawrite() so that it can | 
 | 3551 | 	 * transfer the dirty bit from the PTE to the | 
 | 3552 | 	 * page. Unfortunately this means that even for EX->PR | 
 | 3553 | 	 * downconverts, we'll lose our mappings and have to build | 
 | 3554 | 	 * them up again. | 
 | 3555 | 	 */ | 
 | 3556 | 	unmap_mapping_range(mapping, 0, 0, 0); | 
 | 3557 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3558 | 	if (filemap_fdatawrite(mapping)) { | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 3559 | 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", | 
 | 3560 | 		     (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3561 | 	} | 
 | 3562 | 	sync_mapping_buffers(mapping); | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3563 | 	if (blocking == DLM_LOCK_EX) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3564 | 		truncate_inode_pages(mapping, 0); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3565 | 	} else { | 
 | 3566 | 		/* We only need to wait on the I/O if we're not also | 
 | 3567 | 		 * truncating pages because truncate_inode_pages waits | 
 | 3568 | 		 * for us above. We don't truncate pages if we're | 
 | 3569 | 		 * blocking anything < EXMODE because we want to keep | 
 | 3570 | 		 * them around in that case. */ | 
 | 3571 | 		filemap_fdatawait(mapping); | 
 | 3572 | 	} | 
 | 3573 |  | 
| Mark Fasheh | f1f5406 | 2007-10-18 15:13:59 -0700 | [diff] [blame] | 3574 | out: | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3575 | 	return UNBLOCK_CONTINUE; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3576 | } | 
 | 3577 |  | 
| Tao Ma | a433848 | 2009-08-18 11:19:29 +0800 | [diff] [blame] | 3578 | static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, | 
 | 3579 | 				 struct ocfs2_lock_res *lockres, | 
 | 3580 | 				 int new_level) | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3581 | { | 
| Tao Ma | a433848 | 2009-08-18 11:19:29 +0800 | [diff] [blame] | 3582 | 	int checkpointed = ocfs2_ci_fully_checkpointed(ci); | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3583 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3584 | 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); | 
 | 3585 | 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3586 |  | 
 | 3587 | 	if (checkpointed) | 
 | 3588 | 		return 1; | 
 | 3589 |  | 
| Tao Ma | a433848 | 2009-08-18 11:19:29 +0800 | [diff] [blame] | 3590 | 	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3591 | 	return 0; | 
 | 3592 | } | 
 | 3593 |  | 
| Tao Ma | a433848 | 2009-08-18 11:19:29 +0800 | [diff] [blame] | 3594 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | 
 | 3595 | 					int new_level) | 
 | 3596 | { | 
 | 3597 | 	struct inode *inode = ocfs2_lock_res_inode(lockres); | 
 | 3598 |  | 
 | 3599 | 	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); | 
 | 3600 | } | 
 | 3601 |  | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3602 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | 
 | 3603 | { | 
 | 3604 | 	struct inode *inode = ocfs2_lock_res_inode(lockres); | 
 | 3605 |  | 
 | 3606 | 	__ocfs2_stuff_meta_lvb(inode); | 
 | 3607 | } | 
 | 3608 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3609 | /* | 
 | 3610 |  * Does the final reference drop on our dentry lock. Right now this | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3611 |  * happens in the downconvert thread, but we could choose to simplify the | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3612 |  * dlmglue API and push these off to the ocfs2_wq in the future. | 
 | 3613 |  */ | 
 | 3614 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 
 | 3615 | 				     struct ocfs2_lock_res *lockres) | 
 | 3616 | { | 
 | 3617 | 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | 
 | 3618 | 	ocfs2_dentry_lock_put(osb, dl); | 
 | 3619 | } | 
 | 3620 |  | 
 | 3621 | /* | 
 | 3622 |  * d_delete() matching dentries before the lock downconvert. | 
 | 3623 |  * | 
 | 3624 |  * At this point, any process waiting to destroy the | 
 | 3625 |  * dentry_lock due to last ref count is stopped by the | 
 | 3626 |  * OCFS2_LOCK_QUEUED flag. | 
 | 3627 |  * | 
 | 3628 |  * We have two potential problems | 
 | 3629 |  * | 
 | 3630 |  * 1) If we do the last reference drop on our dentry_lock (via dput) | 
 | 3631 |  *    we'll wind up in ocfs2_release_dentry_lock(), waiting on | 
 | 3632 |  *    the downconvert to finish. Instead we take an elevated | 
 | 3633 |  *    reference and push the drop until after we've completed our | 
 | 3634 |  *    unblock processing. | 
 | 3635 |  * | 
 | 3636 |  * 2) There might be another process with a final reference, | 
 | 3637 |  *    waiting on us to finish processing. If this is the case, we | 
 | 3638 |  *    detect it and exit out - there's no more dentries anyway. | 
 | 3639 |  */ | 
 | 3640 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | 
 | 3641 | 				       int blocking) | 
 | 3642 | { | 
 | 3643 | 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | 
 | 3644 | 	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); | 
 | 3645 | 	struct dentry *dentry; | 
 | 3646 | 	unsigned long flags; | 
 | 3647 | 	int extra_ref = 0; | 
 | 3648 |  | 
 | 3649 | 	/* | 
 | 3650 | 	 * This node is blocking another node from getting a read | 
 | 3651 | 	 * lock. This happens when we've renamed within a | 
 | 3652 | 	 * directory. We've forced the other nodes to d_delete(), but | 
 | 3653 | 	 * we never actually dropped our lock because it's still | 
 | 3654 | 	 * valid. The downconvert code will retain a PR for this node, | 
 | 3655 | 	 * so there's no further work to do. | 
 | 3656 | 	 */ | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3657 | 	if (blocking == DLM_LOCK_PR) | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3658 | 		return UNBLOCK_CONTINUE; | 
 | 3659 |  | 
 | 3660 | 	/* | 
 | 3661 | 	 * Mark this inode as potentially orphaned. The code in | 
 | 3662 | 	 * ocfs2_delete_inode() will figure out whether it actually | 
 | 3663 | 	 * needs to be freed or not. | 
 | 3664 | 	 */ | 
 | 3665 | 	spin_lock(&oi->ip_lock); | 
 | 3666 | 	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | 
 | 3667 | 	spin_unlock(&oi->ip_lock); | 
 | 3668 |  | 
 | 3669 | 	/* | 
 | 3670 | 	 * Yuck. We need to make sure however that the check of | 
 | 3671 | 	 * OCFS2_LOCK_FREEING and the extra reference are atomic with | 
 | 3672 | 	 * respect to a reference decrement or the setting of that | 
 | 3673 | 	 * flag. | 
 | 3674 | 	 */ | 
 | 3675 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 3676 | 	spin_lock(&dentry_attach_lock); | 
 | 3677 | 	if (!(lockres->l_flags & OCFS2_LOCK_FREEING) | 
 | 3678 | 	    && dl->dl_count) { | 
 | 3679 | 		dl->dl_count++; | 
 | 3680 | 		extra_ref = 1; | 
 | 3681 | 	} | 
 | 3682 | 	spin_unlock(&dentry_attach_lock); | 
 | 3683 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3684 |  | 
 | 3685 | 	mlog(0, "extra_ref = %d\n", extra_ref); | 
 | 3686 |  | 
 | 3687 | 	/* | 
 | 3688 | 	 * We have a process waiting on us in ocfs2_dentry_iput(), | 
 | 3689 | 	 * which means we can't have any more outstanding | 
 | 3690 | 	 * aliases. There's no need to do any more work. | 
 | 3691 | 	 */ | 
 | 3692 | 	if (!extra_ref) | 
 | 3693 | 		return UNBLOCK_CONTINUE; | 
 | 3694 |  | 
 | 3695 | 	spin_lock(&dentry_attach_lock); | 
 | 3696 | 	while (1) { | 
 | 3697 | 		dentry = ocfs2_find_local_alias(dl->dl_inode, | 
 | 3698 | 						dl->dl_parent_blkno, 1); | 
 | 3699 | 		if (!dentry) | 
 | 3700 | 			break; | 
 | 3701 | 		spin_unlock(&dentry_attach_lock); | 
 | 3702 |  | 
 | 3703 | 		mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, | 
 | 3704 | 		     dentry->d_name.name); | 
 | 3705 |  | 
 | 3706 | 		/* | 
 | 3707 | 		 * The following dcache calls may do an | 
 | 3708 | 		 * iput(). Normally we don't want that from the | 
 | 3709 | 		 * downconverting thread, but in this case it's ok | 
 | 3710 | 		 * because the requesting node already has an | 
 | 3711 | 		 * exclusive lock on the inode, so it can't be queued | 
 | 3712 | 		 * for a downconvert. | 
 | 3713 | 		 */ | 
 | 3714 | 		d_delete(dentry); | 
 | 3715 | 		dput(dentry); | 
 | 3716 |  | 
 | 3717 | 		spin_lock(&dentry_attach_lock); | 
 | 3718 | 	} | 
 | 3719 | 	spin_unlock(&dentry_attach_lock); | 
 | 3720 |  | 
 | 3721 | 	/* | 
 | 3722 | 	 * If we are the last holder of this dentry lock, there is no | 
 | 3723 | 	 * reason to downconvert so skip straight to the unlock. | 
 | 3724 | 	 */ | 
 | 3725 | 	if (dl->dl_count == 1) | 
 | 3726 | 		return UNBLOCK_STOP_POST; | 
 | 3727 |  | 
 | 3728 | 	return UNBLOCK_CONTINUE_POST; | 
 | 3729 | } | 
 | 3730 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 3731 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, | 
 | 3732 | 					    int new_level) | 
 | 3733 | { | 
 | 3734 | 	struct ocfs2_refcount_tree *tree = | 
 | 3735 | 				ocfs2_lock_res_refcount_tree(lockres); | 
 | 3736 |  | 
 | 3737 | 	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); | 
 | 3738 | } | 
 | 3739 |  | 
 | 3740 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, | 
 | 3741 | 					 int blocking) | 
 | 3742 | { | 
 | 3743 | 	struct ocfs2_refcount_tree *tree = | 
 | 3744 | 				ocfs2_lock_res_refcount_tree(lockres); | 
 | 3745 |  | 
 | 3746 | 	ocfs2_metadata_cache_purge(&tree->rf_ci); | 
 | 3747 |  | 
 | 3748 | 	return UNBLOCK_CONTINUE; | 
 | 3749 | } | 
 | 3750 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3751 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) | 
 | 3752 | { | 
 | 3753 | 	struct ocfs2_qinfo_lvb *lvb; | 
 | 3754 | 	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); | 
 | 3755 | 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, | 
 | 3756 | 					    oinfo->dqi_gi.dqi_type); | 
 | 3757 |  | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 3758 | 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3759 | 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; | 
 | 3760 | 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); | 
 | 3761 | 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); | 
 | 3762 | 	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); | 
 | 3763 | 	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); | 
 | 3764 | 	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); | 
 | 3765 | 	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3766 | } | 
 | 3767 |  | 
 | 3768 | void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) | 
 | 3769 | { | 
 | 3770 | 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; | 
 | 3771 | 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); | 
 | 3772 | 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
 | 3773 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3774 | 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) | 
 | 3775 | 		ocfs2_cluster_unlock(osb, lockres, level); | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3776 | } | 
 | 3777 |  | 
 | 3778 | static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) | 
 | 3779 | { | 
 | 3780 | 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, | 
 | 3781 | 					    oinfo->dqi_gi.dqi_type); | 
 | 3782 | 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; | 
 | 3783 | 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Joel Becker | 85eb8b7 | 2008-11-25 15:31:27 +0100 | [diff] [blame] | 3784 | 	struct buffer_head *bh = NULL; | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3785 | 	struct ocfs2_global_disk_dqinfo *gdinfo; | 
 | 3786 | 	int status = 0; | 
 | 3787 |  | 
| Joel Becker | 1c520df | 2009-06-19 15:14:13 -0700 | [diff] [blame] | 3788 | 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && | 
 | 3789 | 	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3790 | 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); | 
 | 3791 | 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); | 
 | 3792 | 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); | 
 | 3793 | 		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); | 
 | 3794 | 		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); | 
 | 3795 | 		oinfo->dqi_gi.dqi_free_entry = | 
 | 3796 | 					be32_to_cpu(lvb->lvb_free_entry); | 
 | 3797 | 	} else { | 
| Jan Kara | ae4f6ef | 2010-04-28 19:04:29 +0200 | [diff] [blame] | 3798 | 		status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, | 
 | 3799 | 						     oinfo->dqi_giblk, &bh); | 
| Joel Becker | 85eb8b7 | 2008-11-25 15:31:27 +0100 | [diff] [blame] | 3800 | 		if (status) { | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3801 | 			mlog_errno(status); | 
 | 3802 | 			goto bail; | 
 | 3803 | 		} | 
 | 3804 | 		gdinfo = (struct ocfs2_global_disk_dqinfo *) | 
 | 3805 | 					(bh->b_data + OCFS2_GLOBAL_INFO_OFF); | 
 | 3806 | 		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); | 
 | 3807 | 		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); | 
 | 3808 | 		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); | 
 | 3809 | 		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); | 
 | 3810 | 		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); | 
 | 3811 | 		oinfo->dqi_gi.dqi_free_entry = | 
 | 3812 | 					le32_to_cpu(gdinfo->dqi_free_entry); | 
 | 3813 | 		brelse(bh); | 
 | 3814 | 		ocfs2_track_lock_refresh(lockres); | 
 | 3815 | 	} | 
 | 3816 |  | 
 | 3817 | bail: | 
 | 3818 | 	return status; | 
 | 3819 | } | 
 | 3820 |  | 
 | 3821 | /* Lock quota info, this function expects at least shared lock on the quota file | 
 | 3822 |  * so that we can safely refresh quota info from disk. */ | 
 | 3823 | int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) | 
 | 3824 | { | 
 | 3825 | 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; | 
 | 3826 | 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); | 
 | 3827 | 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
 | 3828 | 	int status = 0; | 
 | 3829 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3830 | 	/* On RO devices, locking really isn't needed... */ | 
 | 3831 | 	if (ocfs2_is_hard_readonly(osb)) { | 
 | 3832 | 		if (ex) | 
 | 3833 | 			status = -EROFS; | 
 | 3834 | 		goto bail; | 
 | 3835 | 	} | 
 | 3836 | 	if (ocfs2_mount_local(osb)) | 
 | 3837 | 		goto bail; | 
 | 3838 |  | 
 | 3839 | 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 
 | 3840 | 	if (status < 0) { | 
 | 3841 | 		mlog_errno(status); | 
 | 3842 | 		goto bail; | 
 | 3843 | 	} | 
 | 3844 | 	if (!ocfs2_should_refresh_lock_res(lockres)) | 
 | 3845 | 		goto bail; | 
 | 3846 | 	/* OK, we have the lock but we need to refresh the quota info */ | 
 | 3847 | 	status = ocfs2_refresh_qinfo(oinfo); | 
 | 3848 | 	if (status) | 
 | 3849 | 		ocfs2_qinfo_unlock(oinfo, ex); | 
 | 3850 | 	ocfs2_complete_lock_res_refresh(lockres, status); | 
 | 3851 | bail: | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3852 | 	return status; | 
 | 3853 | } | 
 | 3854 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 3855 | int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) | 
 | 3856 | { | 
 | 3857 | 	int status; | 
 | 3858 | 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
 | 3859 | 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; | 
 | 3860 | 	struct ocfs2_super *osb = lockres->l_priv; | 
 | 3861 |  | 
 | 3862 |  | 
 | 3863 | 	if (ocfs2_is_hard_readonly(osb)) | 
 | 3864 | 		return -EROFS; | 
 | 3865 |  | 
 | 3866 | 	if (ocfs2_mount_local(osb)) | 
 | 3867 | 		return 0; | 
 | 3868 |  | 
 | 3869 | 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 
 | 3870 | 	if (status < 0) | 
 | 3871 | 		mlog_errno(status); | 
 | 3872 |  | 
 | 3873 | 	return status; | 
 | 3874 | } | 
 | 3875 |  | 
 | 3876 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) | 
 | 3877 | { | 
 | 3878 | 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
 | 3879 | 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; | 
 | 3880 | 	struct ocfs2_super *osb = lockres->l_priv; | 
 | 3881 |  | 
 | 3882 | 	if (!ocfs2_mount_local(osb)) | 
 | 3883 | 		ocfs2_cluster_unlock(osb, lockres, level); | 
 | 3884 | } | 
 | 3885 |  | 
| Adrian Bunk | 0060005 | 2008-01-29 00:11:41 +0200 | [diff] [blame] | 3886 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 
 | 3887 | 				       struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3888 | { | 
 | 3889 | 	int status; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3890 | 	struct ocfs2_unblock_ctl ctl = {0, 0,}; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3891 | 	unsigned long flags; | 
 | 3892 |  | 
 | 3893 | 	/* Our reference to the lockres in this function can be | 
 | 3894 | 	 * considered valid until we remove the OCFS2_LOCK_QUEUED | 
 | 3895 | 	 * flag. */ | 
 | 3896 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3897 | 	BUG_ON(!lockres); | 
 | 3898 | 	BUG_ON(!lockres->l_ops); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3899 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3900 | 	mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3901 |  | 
 | 3902 | 	/* Detect whether a lock has been marked as going away while | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3903 | 	 * the downconvert thread was processing other things. A lock can | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3904 | 	 * still be marked with OCFS2_LOCK_FREEING after this check, | 
 | 3905 | 	 * but short circuiting here will still save us some | 
 | 3906 | 	 * performance. */ | 
 | 3907 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 3908 | 	if (lockres->l_flags & OCFS2_LOCK_FREEING) | 
 | 3909 | 		goto unqueue; | 
 | 3910 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3911 |  | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 3912 | 	status = ocfs2_unblock_lock(osb, lockres, &ctl); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3913 | 	if (status < 0) | 
 | 3914 | 		mlog_errno(status); | 
 | 3915 |  | 
 | 3916 | 	spin_lock_irqsave(&lockres->l_lock, flags); | 
 | 3917 | unqueue: | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3918 | 	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3919 | 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); | 
 | 3920 | 	} else | 
 | 3921 | 		ocfs2_schedule_blocked_lock(osb, lockres); | 
 | 3922 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3923 | 	mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3924 | 	     ctl.requeue ? "yes" : "no"); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3925 | 	spin_unlock_irqrestore(&lockres->l_lock, flags); | 
 | 3926 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3927 | 	if (ctl.unblock_action != UNBLOCK_CONTINUE | 
 | 3928 | 	    && lockres->l_ops->post_unlock) | 
 | 3929 | 		lockres->l_ops->post_unlock(osb, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3930 | } | 
 | 3931 |  | 
 | 3932 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | 
 | 3933 | 					struct ocfs2_lock_res *lockres) | 
 | 3934 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3935 | 	assert_spin_locked(&lockres->l_lock); | 
 | 3936 |  | 
 | 3937 | 	if (lockres->l_flags & OCFS2_LOCK_FREEING) { | 
 | 3938 | 		/* Do not schedule a lock for downconvert when it's on | 
 | 3939 | 		 * the way to destruction - any nodes wanting access | 
 | 3940 | 		 * to the resource will get it soon. */ | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3941 | 		mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3942 | 		     lockres->l_name, lockres->l_flags); | 
 | 3943 | 		return; | 
 | 3944 | 	} | 
 | 3945 |  | 
 | 3946 | 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 
 | 3947 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3948 | 	spin_lock(&osb->dc_task_lock); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3949 | 	if (list_empty(&lockres->l_blocked_list)) { | 
 | 3950 | 		list_add_tail(&lockres->l_blocked_list, | 
 | 3951 | 			      &osb->blocked_lock_list); | 
 | 3952 | 		osb->blocked_lock_count++; | 
 | 3953 | 	} | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3954 | 	spin_unlock(&osb->dc_task_lock); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3955 | } | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3956 |  | 
 | 3957 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) | 
 | 3958 | { | 
 | 3959 | 	unsigned long processed; | 
 | 3960 | 	struct ocfs2_lock_res *lockres; | 
 | 3961 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3962 | 	spin_lock(&osb->dc_task_lock); | 
 | 3963 | 	/* grab this early so we know to try again if a state change and | 
 | 3964 | 	 * wake happens part-way through our work  */ | 
 | 3965 | 	osb->dc_work_sequence = osb->dc_wake_sequence; | 
 | 3966 |  | 
 | 3967 | 	processed = osb->blocked_lock_count; | 
 | 3968 | 	while (processed) { | 
 | 3969 | 		BUG_ON(list_empty(&osb->blocked_lock_list)); | 
 | 3970 |  | 
 | 3971 | 		lockres = list_entry(osb->blocked_lock_list.next, | 
 | 3972 | 				     struct ocfs2_lock_res, l_blocked_list); | 
 | 3973 | 		list_del_init(&lockres->l_blocked_list); | 
 | 3974 | 		osb->blocked_lock_count--; | 
 | 3975 | 		spin_unlock(&osb->dc_task_lock); | 
 | 3976 |  | 
 | 3977 | 		BUG_ON(!processed); | 
 | 3978 | 		processed--; | 
 | 3979 |  | 
 | 3980 | 		ocfs2_process_blocked_lock(osb, lockres); | 
 | 3981 |  | 
 | 3982 | 		spin_lock(&osb->dc_task_lock); | 
 | 3983 | 	} | 
 | 3984 | 	spin_unlock(&osb->dc_task_lock); | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3985 | } | 
 | 3986 |  | 
 | 3987 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) | 
 | 3988 | { | 
 | 3989 | 	int empty = 0; | 
 | 3990 |  | 
 | 3991 | 	spin_lock(&osb->dc_task_lock); | 
 | 3992 | 	if (list_empty(&osb->blocked_lock_list)) | 
 | 3993 | 		empty = 1; | 
 | 3994 |  | 
 | 3995 | 	spin_unlock(&osb->dc_task_lock); | 
 | 3996 | 	return empty; | 
 | 3997 | } | 
 | 3998 |  | 
 | 3999 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) | 
 | 4000 | { | 
 | 4001 | 	int should_wake = 0; | 
 | 4002 |  | 
 | 4003 | 	spin_lock(&osb->dc_task_lock); | 
 | 4004 | 	if (osb->dc_work_sequence != osb->dc_wake_sequence) | 
 | 4005 | 		should_wake = 1; | 
 | 4006 | 	spin_unlock(&osb->dc_task_lock); | 
 | 4007 |  | 
 | 4008 | 	return should_wake; | 
 | 4009 | } | 
 | 4010 |  | 
| Adrian Bunk | 200bfae | 2008-02-17 10:20:38 +0200 | [diff] [blame] | 4011 | static int ocfs2_downconvert_thread(void *arg) | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 4012 | { | 
 | 4013 | 	int status = 0; | 
 | 4014 | 	struct ocfs2_super *osb = arg; | 
 | 4015 |  | 
 | 4016 | 	/* only quit once we've been asked to stop and there is no more | 
 | 4017 | 	 * work available */ | 
 | 4018 | 	while (!(kthread_should_stop() && | 
 | 4019 | 		ocfs2_downconvert_thread_lists_empty(osb))) { | 
 | 4020 |  | 
 | 4021 | 		wait_event_interruptible(osb->dc_event, | 
 | 4022 | 					 ocfs2_downconvert_thread_should_wake(osb) || | 
 | 4023 | 					 kthread_should_stop()); | 
 | 4024 |  | 
 | 4025 | 		mlog(0, "downconvert_thread: awoken\n"); | 
 | 4026 |  | 
 | 4027 | 		ocfs2_downconvert_thread_do_work(osb); | 
 | 4028 | 	} | 
 | 4029 |  | 
 | 4030 | 	osb->dc_task = NULL; | 
 | 4031 | 	return status; | 
 | 4032 | } | 
 | 4033 |  | 
 | 4034 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) | 
 | 4035 | { | 
 | 4036 | 	spin_lock(&osb->dc_task_lock); | 
 | 4037 | 	/* make sure the voting thread gets a swipe at whatever changes | 
 | 4038 | 	 * the caller may have made to the voting state */ | 
 | 4039 | 	osb->dc_wake_sequence++; | 
 | 4040 | 	spin_unlock(&osb->dc_task_lock); | 
 | 4041 | 	wake_up(&osb->dc_event); | 
 | 4042 | } |