| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1 | /* -*- mode: c; c-basic-offset: 8; -*- | 
|  | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 
|  | 3 | * | 
|  | 4 | * dlmglue.c | 
|  | 5 | * | 
|  | 6 | * Code which implements an OCFS2 specific interface to our DLM. | 
|  | 7 | * | 
|  | 8 | * Copyright (C) 2003, 2004 Oracle.  All rights reserved. | 
|  | 9 | * | 
|  | 10 | * This program is free software; you can redistribute it and/or | 
|  | 11 | * modify it under the terms of the GNU General Public | 
|  | 12 | * License as published by the Free Software Foundation; either | 
|  | 13 | * version 2 of the License, or (at your option) any later version. | 
|  | 14 | * | 
|  | 15 | * This program is distributed in the hope that it will be useful, | 
|  | 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 18 | * General Public License for more details. | 
|  | 19 | * | 
|  | 20 | * You should have received a copy of the GNU General Public | 
|  | 21 | * License along with this program; if not, write to the | 
|  | 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 
|  | 23 | * Boston, MA 021110-1307, USA. | 
|  | 24 | */ | 
|  | 25 |  | 
|  | 26 | #include <linux/types.h> | 
|  | 27 | #include <linux/slab.h> | 
|  | 28 | #include <linux/highmem.h> | 
|  | 29 | #include <linux/mm.h> | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 30 | #include <linux/crc32.h> | 
|  | 31 | #include <linux/kthread.h> | 
|  | 32 | #include <linux/pagemap.h> | 
|  | 33 | #include <linux/debugfs.h> | 
|  | 34 | #include <linux/seq_file.h> | 
|  | 35 |  | 
|  | 36 | #include <cluster/heartbeat.h> | 
|  | 37 | #include <cluster/nodemanager.h> | 
|  | 38 | #include <cluster/tcp.h> | 
|  | 39 |  | 
|  | 40 | #include <dlm/dlmapi.h> | 
|  | 41 |  | 
|  | 42 | #define MLOG_MASK_PREFIX ML_DLM_GLUE | 
|  | 43 | #include <cluster/masklog.h> | 
|  | 44 |  | 
|  | 45 | #include "ocfs2.h" | 
|  | 46 |  | 
|  | 47 | #include "alloc.h" | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 48 | #include "dcache.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 49 | #include "dlmglue.h" | 
|  | 50 | #include "extent_map.h" | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 51 | #include "file.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 52 | #include "heartbeat.h" | 
|  | 53 | #include "inode.h" | 
|  | 54 | #include "journal.h" | 
|  | 55 | #include "slot_map.h" | 
|  | 56 | #include "super.h" | 
|  | 57 | #include "uptodate.h" | 
|  | 58 | #include "vote.h" | 
|  | 59 |  | 
|  | 60 | #include "buffer_head_io.h" | 
|  | 61 |  | 
|  | 62 | struct ocfs2_mask_waiter { | 
|  | 63 | struct list_head	mw_item; | 
|  | 64 | int			mw_status; | 
|  | 65 | struct completion	mw_complete; | 
|  | 66 | unsigned long		mw_mask; | 
|  | 67 | unsigned long		mw_goal; | 
|  | 68 | }; | 
|  | 69 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 70 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 
|  | 71 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 72 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 73 | /* | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 74 | * Return value from ->downconvert_worker functions. | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 75 | * | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 76 | * These control the precise actions of ocfs2_unblock_lock() | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 77 | * and ocfs2_process_blocked_lock() | 
|  | 78 | * | 
|  | 79 | */ | 
|  | 80 | enum ocfs2_unblock_action { | 
|  | 81 | UNBLOCK_CONTINUE	= 0, /* Continue downconvert */ | 
|  | 82 | UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire | 
|  | 83 | * ->post_unlock callback */ | 
|  | 84 | UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire | 
|  | 85 | * ->post_unlock() callback. */ | 
|  | 86 | }; | 
|  | 87 |  | 
|  | 88 | struct ocfs2_unblock_ctl { | 
|  | 89 | int requeue; | 
|  | 90 | enum ocfs2_unblock_action unblock_action; | 
|  | 91 | }; | 
|  | 92 |  | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 93 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | 
|  | 94 | int new_level); | 
|  | 95 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); | 
|  | 96 |  | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 97 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 98 | int blocking); | 
|  | 99 |  | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 100 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 101 | int blocking); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 102 |  | 
|  | 103 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 
|  | 104 | struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 105 |  | 
| Adrian Bunk | 6cb129f | 2007-04-26 00:29:35 -0700 | [diff] [blame] | 106 |  | 
|  | 107 | #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) | 
|  | 108 |  | 
|  | 109 | /* This aids in debugging situations where a bad LVB might be involved. */ | 
|  | 110 | static void ocfs2_dump_meta_lvb_info(u64 level, | 
|  | 111 | const char *function, | 
|  | 112 | unsigned int line, | 
|  | 113 | struct ocfs2_lock_res *lockres) | 
|  | 114 | { | 
|  | 115 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 
|  | 116 |  | 
|  | 117 | mlog(level, "LVB information for %s (called from %s:%u):\n", | 
|  | 118 | lockres->l_name, function, line); | 
|  | 119 | mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", | 
|  | 120 | lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), | 
|  | 121 | be32_to_cpu(lvb->lvb_igeneration)); | 
|  | 122 | mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", | 
|  | 123 | (unsigned long long)be64_to_cpu(lvb->lvb_isize), | 
|  | 124 | be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), | 
|  | 125 | be16_to_cpu(lvb->lvb_imode)); | 
|  | 126 | mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " | 
|  | 127 | "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), | 
|  | 128 | (long long)be64_to_cpu(lvb->lvb_iatime_packed), | 
|  | 129 | (long long)be64_to_cpu(lvb->lvb_ictime_packed), | 
|  | 130 | (long long)be64_to_cpu(lvb->lvb_imtime_packed), | 
|  | 131 | be32_to_cpu(lvb->lvb_iattr)); | 
|  | 132 | } | 
|  | 133 |  | 
|  | 134 |  | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 135 | /* | 
|  | 136 | * OCFS2 Lock Resource Operations | 
|  | 137 | * | 
|  | 138 | * These fine tune the behavior of the generic dlmglue locking infrastructure. | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 139 | * | 
|  | 140 | * The most basic of lock types can point ->l_priv to their respective | 
|  | 141 | * struct ocfs2_super and allow the default actions to manage things. | 
|  | 142 | * | 
|  | 143 | * Right now, each lock type also needs to implement an init function, | 
|  | 144 | * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() | 
|  | 145 | * should be called when the lock is no longer needed (i.e., object | 
|  | 146 | * destruction time). | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 147 | */ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 148 | struct ocfs2_lock_res_ops { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 149 | /* | 
|  | 150 | * Translate an ocfs2_lock_res * into an ocfs2_super *. Define | 
|  | 151 | * this callback if ->l_priv is not an ocfs2_super pointer | 
|  | 152 | */ | 
|  | 153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 154 |  | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 155 | /* | 
|  | 156 | * Optionally called in the downconvert (or "vote") thread | 
|  | 157 | * after a successful downconvert. The lockres will not be | 
|  | 158 | * referenced after this callback is called, so it is safe to | 
|  | 159 | * free memory, etc. | 
|  | 160 | * | 
|  | 161 | * The exact semantics of when this is called are controlled | 
|  | 162 | * by ->downconvert_worker() | 
|  | 163 | */ | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 164 | void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 165 |  | 
|  | 166 | /* | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 167 | * Allow a lock type to add checks to determine whether it is | 
|  | 168 | * safe to downconvert a lock. Return 0 to re-queue the | 
|  | 169 | * downconvert at a later time, nonzero to continue. | 
|  | 170 | * | 
|  | 171 | * For most locks, the default checks that there are no | 
|  | 172 | * incompatible holders are sufficient. | 
|  | 173 | * | 
|  | 174 | * Called with the lockres spinlock held. | 
|  | 175 | */ | 
|  | 176 | int (*check_downconvert)(struct ocfs2_lock_res *, int); | 
|  | 177 |  | 
|  | 178 | /* | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 179 | * Allows a lock type to populate the lock value block. This | 
|  | 180 | * is called on downconvert, and when we drop a lock. | 
|  | 181 | * | 
|  | 182 | * Locks that want to use this should set LOCK_TYPE_USES_LVB | 
|  | 183 | * in the flags field. | 
|  | 184 | * | 
|  | 185 | * Called with the lockres spinlock held. | 
|  | 186 | */ | 
|  | 187 | void (*set_lvb)(struct ocfs2_lock_res *); | 
|  | 188 |  | 
|  | 189 | /* | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 190 | * Called from the downconvert thread when it is determined | 
|  | 191 | * that a lock will be downconverted. This is called without | 
|  | 192 | * any locks held so the function can do work that might | 
|  | 193 | * schedule (syncing out data, etc). | 
|  | 194 | * | 
|  | 195 | * This should return any one of the ocfs2_unblock_action | 
|  | 196 | * values, depending on what it wants the thread to do. | 
|  | 197 | */ | 
|  | 198 | int (*downconvert_worker)(struct ocfs2_lock_res *, int); | 
|  | 199 |  | 
|  | 200 | /* | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 201 | * LOCK_TYPE_* flags which describe the specific requirements | 
|  | 202 | * of a lock type. Descriptions of each individual flag follow. | 
|  | 203 | */ | 
|  | 204 | int flags; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 205 | }; | 
|  | 206 |  | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 207 | /* | 
|  | 208 | * Some locks want to "refresh" potentially stale data when a | 
|  | 209 | * meaningful (PRMODE or EXMODE) lock level is first obtained. If this | 
|  | 210 | * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the | 
|  | 211 | * individual lockres l_flags member from the ast function. It is | 
|  | 212 | * expected that the locking wrapper will clear the | 
|  | 213 | * OCFS2_LOCK_NEEDS_REFRESH flag when done. | 
|  | 214 | */ | 
|  | 215 | #define LOCK_TYPE_REQUIRES_REFRESH 0x1 | 
|  | 216 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 217 | /* | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 218 | * Indicate that a lock type makes use of the lock value block. The | 
|  | 219 | * ->set_lvb lock type callback must be defined. | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 220 | */ | 
|  | 221 | #define LOCK_TYPE_USES_LVB		0x2 | 
|  | 222 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 223 | static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 224 | .get_osb	= ocfs2_get_inode_osb, | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 225 | .flags		= 0, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 226 | }; | 
|  | 227 |  | 
|  | 228 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 229 | .get_osb	= ocfs2_get_inode_osb, | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 230 | .check_downconvert = ocfs2_check_meta_downconvert, | 
|  | 231 | .set_lvb	= ocfs2_set_meta_lvb, | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 232 | .flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 233 | }; | 
|  | 234 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 235 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 236 | .get_osb	= ocfs2_get_inode_osb, | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 237 | .downconvert_worker = ocfs2_data_convert_worker, | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 238 | .flags		= 0, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 239 | }; | 
|  | 240 |  | 
|  | 241 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 242 | .flags		= LOCK_TYPE_REQUIRES_REFRESH, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 243 | }; | 
|  | 244 |  | 
|  | 245 | static struct ocfs2_lock_res_ops ocfs2_rename_lops = { | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 246 | .flags		= 0, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 247 | }; | 
|  | 248 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 249 | static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 250 | .get_osb	= ocfs2_get_dentry_osb, | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 251 | .post_unlock	= ocfs2_dentry_post_unlock, | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 252 | .downconvert_worker = ocfs2_dentry_convert_worker, | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 253 | .flags		= 0, | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 254 | }; | 
|  | 255 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 256 | static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | 
|  | 257 | .get_osb	= ocfs2_get_inode_osb, | 
|  | 258 | .flags		= 0, | 
|  | 259 | }; | 
|  | 260 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 
|  | 262 | { | 
|  | 263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 
|  | 264 | lockres->l_type == OCFS2_LOCK_TYPE_DATA || | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 265 | lockres->l_type == OCFS2_LOCK_TYPE_RW || | 
|  | 266 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 267 | } | 
|  | 268 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 269 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) | 
|  | 270 | { | 
|  | 271 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | 
|  | 272 |  | 
|  | 273 | return (struct inode *) lockres->l_priv; | 
|  | 274 | } | 
|  | 275 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 276 | static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) | 
|  | 277 | { | 
|  | 278 | BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); | 
|  | 279 |  | 
|  | 280 | return (struct ocfs2_dentry_lock *)lockres->l_priv; | 
|  | 281 | } | 
|  | 282 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 283 | static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) | 
|  | 284 | { | 
|  | 285 | if (lockres->l_ops->get_osb) | 
|  | 286 | return lockres->l_ops->get_osb(lockres); | 
|  | 287 |  | 
|  | 288 | return (struct ocfs2_super *)lockres->l_priv; | 
|  | 289 | } | 
|  | 290 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 291 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 
|  | 292 | struct ocfs2_lock_res *lockres, | 
|  | 293 | int level, | 
|  | 294 | int dlm_flags); | 
|  | 295 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 
|  | 296 | int wanted); | 
|  | 297 | static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | 
|  | 298 | struct ocfs2_lock_res *lockres, | 
|  | 299 | int level); | 
|  | 300 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); | 
|  | 301 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); | 
|  | 302 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); | 
|  | 303 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); | 
|  | 304 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | 
|  | 305 | struct ocfs2_lock_res *lockres); | 
|  | 306 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 
|  | 307 | int convert); | 
|  | 308 | #define ocfs2_log_dlm_error(_func, _stat, _lockres) do {	\ | 
|  | 309 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on "	\ | 
|  | 310 | "resource %s: %s\n", dlm_errname(_stat), _func,	\ | 
|  | 311 | _lockres->l_name, dlm_errmsg(_stat));		\ | 
|  | 312 | } while (0) | 
|  | 313 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 
|  | 314 | struct ocfs2_lock_res *lockres); | 
|  | 315 | static int ocfs2_meta_lock_update(struct inode *inode, | 
|  | 316 | struct buffer_head **bh); | 
|  | 317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 
|  | 318 | static inline int ocfs2_highest_compat_lock_level(int level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 319 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 320 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 
|  | 321 | u64 blkno, | 
|  | 322 | u32 generation, | 
|  | 323 | char *name) | 
|  | 324 | { | 
|  | 325 | int len; | 
|  | 326 |  | 
|  | 327 | mlog_entry_void(); | 
|  | 328 |  | 
|  | 329 | BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); | 
|  | 330 |  | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 331 | len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", | 
|  | 332 | ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, | 
|  | 333 | (long long)blkno, generation); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 334 |  | 
|  | 335 | BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); | 
|  | 336 |  | 
|  | 337 | mlog(0, "built lock resource with name: %s\n", name); | 
|  | 338 |  | 
|  | 339 | mlog_exit_void(); | 
|  | 340 | } | 
|  | 341 |  | 
| Ingo Molnar | 34af946 | 2006-06-27 02:53:55 -0700 | [diff] [blame] | 342 | static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 343 |  | 
|  | 344 | static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, | 
|  | 345 | struct ocfs2_dlm_debug *dlm_debug) | 
|  | 346 | { | 
|  | 347 | mlog(0, "Add tracking for lockres %s\n", res->l_name); | 
|  | 348 |  | 
|  | 349 | spin_lock(&ocfs2_dlm_tracking_lock); | 
|  | 350 | list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); | 
|  | 351 | spin_unlock(&ocfs2_dlm_tracking_lock); | 
|  | 352 | } | 
|  | 353 |  | 
|  | 354 | static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) | 
|  | 355 | { | 
|  | 356 | spin_lock(&ocfs2_dlm_tracking_lock); | 
|  | 357 | if (!list_empty(&res->l_debug_list)) | 
|  | 358 | list_del_init(&res->l_debug_list); | 
|  | 359 | spin_unlock(&ocfs2_dlm_tracking_lock); | 
|  | 360 | } | 
|  | 361 |  | 
|  | 362 | static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, | 
|  | 363 | struct ocfs2_lock_res *res, | 
|  | 364 | enum ocfs2_lock_type type, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 365 | struct ocfs2_lock_res_ops *ops, | 
|  | 366 | void *priv) | 
|  | 367 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 368 | res->l_type          = type; | 
|  | 369 | res->l_ops           = ops; | 
|  | 370 | res->l_priv          = priv; | 
|  | 371 |  | 
|  | 372 | res->l_level         = LKM_IVMODE; | 
|  | 373 | res->l_requested     = LKM_IVMODE; | 
|  | 374 | res->l_blocking      = LKM_IVMODE; | 
|  | 375 | res->l_action        = OCFS2_AST_INVALID; | 
|  | 376 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
|  | 377 |  | 
|  | 378 | res->l_flags         = OCFS2_LOCK_INITIALIZED; | 
|  | 379 |  | 
|  | 380 | ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); | 
|  | 381 | } | 
|  | 382 |  | 
|  | 383 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) | 
|  | 384 | { | 
|  | 385 | /* This also clears out the lock status block */ | 
|  | 386 | memset(res, 0, sizeof(struct ocfs2_lock_res)); | 
|  | 387 | spin_lock_init(&res->l_lock); | 
|  | 388 | init_waitqueue_head(&res->l_event); | 
|  | 389 | INIT_LIST_HEAD(&res->l_blocked_list); | 
|  | 390 | INIT_LIST_HEAD(&res->l_mask_waiters); | 
|  | 391 | } | 
|  | 392 |  | 
|  | 393 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 
|  | 394 | enum ocfs2_lock_type type, | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 395 | unsigned int generation, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 396 | struct inode *inode) | 
|  | 397 | { | 
|  | 398 | struct ocfs2_lock_res_ops *ops; | 
|  | 399 |  | 
|  | 400 | switch(type) { | 
|  | 401 | case OCFS2_LOCK_TYPE_RW: | 
|  | 402 | ops = &ocfs2_inode_rw_lops; | 
|  | 403 | break; | 
|  | 404 | case OCFS2_LOCK_TYPE_META: | 
|  | 405 | ops = &ocfs2_inode_meta_lops; | 
|  | 406 | break; | 
|  | 407 | case OCFS2_LOCK_TYPE_DATA: | 
|  | 408 | ops = &ocfs2_inode_data_lops; | 
|  | 409 | break; | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 410 | case OCFS2_LOCK_TYPE_OPEN: | 
|  | 411 | ops = &ocfs2_inode_open_lops; | 
|  | 412 | break; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 413 | default: | 
|  | 414 | mlog_bug_on_msg(1, "type: %d\n", type); | 
|  | 415 | ops = NULL; /* thanks, gcc */ | 
|  | 416 | break; | 
|  | 417 | }; | 
|  | 418 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 419 | ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 420 | generation, res->l_name); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 421 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); | 
|  | 422 | } | 
|  | 423 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 424 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | 
|  | 425 | { | 
|  | 426 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 
|  | 427 |  | 
|  | 428 | return OCFS2_SB(inode->i_sb); | 
|  | 429 | } | 
|  | 430 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 431 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 
|  | 432 | { | 
|  | 433 | __be64 inode_blkno_be; | 
|  | 434 |  | 
|  | 435 | memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], | 
|  | 436 | sizeof(__be64)); | 
|  | 437 |  | 
|  | 438 | return be64_to_cpu(inode_blkno_be); | 
|  | 439 | } | 
|  | 440 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 441 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) | 
|  | 442 | { | 
|  | 443 | struct ocfs2_dentry_lock *dl = lockres->l_priv; | 
|  | 444 |  | 
|  | 445 | return OCFS2_SB(dl->dl_inode->i_sb); | 
|  | 446 | } | 
|  | 447 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 448 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | 
|  | 449 | u64 parent, struct inode *inode) | 
|  | 450 | { | 
|  | 451 | int len; | 
|  | 452 | u64 inode_blkno = OCFS2_I(inode)->ip_blkno; | 
|  | 453 | __be64 inode_blkno_be = cpu_to_be64(inode_blkno); | 
|  | 454 | struct ocfs2_lock_res *lockres = &dl->dl_lockres; | 
|  | 455 |  | 
|  | 456 | ocfs2_lock_res_init_once(lockres); | 
|  | 457 |  | 
|  | 458 | /* | 
|  | 459 | * Unfortunately, the standard lock naming scheme won't work | 
|  | 460 | * here because we have two 16 byte values to use. Instead, | 
|  | 461 | * we'll stuff the inode number as a binary value. We still | 
|  | 462 | * want error prints to show something without garbling the | 
|  | 463 | * display, so drop a null byte in there before the inode | 
|  | 464 | * number. A future version of OCFS2 will likely use all | 
|  | 465 | * binary lock names. The stringified names have been a | 
|  | 466 | * tremendous aid in debugging, but now that the debugfs | 
|  | 467 | * interface exists, we can mangle things there if need be. | 
|  | 468 | * | 
|  | 469 | * NOTE: We also drop the standard "pad" value (the total lock | 
|  | 470 | * name size stays the same though - the last part is all | 
|  | 471 | * zeros due to the memset in ocfs2_lock_res_init_once() | 
|  | 472 | */ | 
|  | 473 | len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, | 
|  | 474 | "%c%016llx", | 
|  | 475 | ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), | 
|  | 476 | (long long)parent); | 
|  | 477 |  | 
|  | 478 | BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); | 
|  | 479 |  | 
|  | 480 | memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, | 
|  | 481 | sizeof(__be64)); | 
|  | 482 |  | 
|  | 483 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | 
|  | 484 | OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, | 
|  | 485 | dl); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 486 | } | 
|  | 487 |  | 
|  | 488 | static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, | 
|  | 489 | struct ocfs2_super *osb) | 
|  | 490 | { | 
|  | 491 | /* Superblock lockres doesn't come from a slab so we call init | 
|  | 492 | * once on it manually.  */ | 
|  | 493 | ocfs2_lock_res_init_once(res); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 494 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, | 
|  | 495 | 0, res->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 496 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 497 | &ocfs2_super_lops, osb); | 
|  | 498 | } | 
|  | 499 |  | 
|  | 500 | static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | 
|  | 501 | struct ocfs2_super *osb) | 
|  | 502 | { | 
|  | 503 | /* Rename lockres doesn't come from a slab so we call init | 
|  | 504 | * once on it manually.  */ | 
|  | 505 | ocfs2_lock_res_init_once(res); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 506 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); | 
|  | 507 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 508 | &ocfs2_rename_lops, osb); | 
|  | 509 | } | 
|  | 510 |  | 
|  | 511 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 
|  | 512 | { | 
|  | 513 | mlog_entry_void(); | 
|  | 514 |  | 
|  | 515 | if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) | 
|  | 516 | return; | 
|  | 517 |  | 
|  | 518 | ocfs2_remove_lockres_tracking(res); | 
|  | 519 |  | 
|  | 520 | mlog_bug_on_msg(!list_empty(&res->l_blocked_list), | 
|  | 521 | "Lockres %s is on the blocked list\n", | 
|  | 522 | res->l_name); | 
|  | 523 | mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), | 
|  | 524 | "Lockres %s has mask waiters pending\n", | 
|  | 525 | res->l_name); | 
|  | 526 | mlog_bug_on_msg(spin_is_locked(&res->l_lock), | 
|  | 527 | "Lockres %s is locked\n", | 
|  | 528 | res->l_name); | 
|  | 529 | mlog_bug_on_msg(res->l_ro_holders, | 
|  | 530 | "Lockres %s has %u ro holders\n", | 
|  | 531 | res->l_name, res->l_ro_holders); | 
|  | 532 | mlog_bug_on_msg(res->l_ex_holders, | 
|  | 533 | "Lockres %s has %u ex holders\n", | 
|  | 534 | res->l_name, res->l_ex_holders); | 
|  | 535 |  | 
|  | 536 | /* Need to clear out the lock status block for the dlm */ | 
|  | 537 | memset(&res->l_lksb, 0, sizeof(res->l_lksb)); | 
|  | 538 |  | 
|  | 539 | res->l_flags = 0UL; | 
|  | 540 | mlog_exit_void(); | 
|  | 541 | } | 
|  | 542 |  | 
|  | 543 | static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, | 
|  | 544 | int level) | 
|  | 545 | { | 
|  | 546 | mlog_entry_void(); | 
|  | 547 |  | 
|  | 548 | BUG_ON(!lockres); | 
|  | 549 |  | 
|  | 550 | switch(level) { | 
|  | 551 | case LKM_EXMODE: | 
|  | 552 | lockres->l_ex_holders++; | 
|  | 553 | break; | 
|  | 554 | case LKM_PRMODE: | 
|  | 555 | lockres->l_ro_holders++; | 
|  | 556 | break; | 
|  | 557 | default: | 
|  | 558 | BUG(); | 
|  | 559 | } | 
|  | 560 |  | 
|  | 561 | mlog_exit_void(); | 
|  | 562 | } | 
|  | 563 |  | 
|  | 564 | static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | 
|  | 565 | int level) | 
|  | 566 | { | 
|  | 567 | mlog_entry_void(); | 
|  | 568 |  | 
|  | 569 | BUG_ON(!lockres); | 
|  | 570 |  | 
|  | 571 | switch(level) { | 
|  | 572 | case LKM_EXMODE: | 
|  | 573 | BUG_ON(!lockres->l_ex_holders); | 
|  | 574 | lockres->l_ex_holders--; | 
|  | 575 | break; | 
|  | 576 | case LKM_PRMODE: | 
|  | 577 | BUG_ON(!lockres->l_ro_holders); | 
|  | 578 | lockres->l_ro_holders--; | 
|  | 579 | break; | 
|  | 580 | default: | 
|  | 581 | BUG(); | 
|  | 582 | } | 
|  | 583 | mlog_exit_void(); | 
|  | 584 | } | 
|  | 585 |  | 
|  | 586 | /* WARNING: This function lives in a world where the only three lock | 
|  | 587 | * levels are EX, PR, and NL. It *will* have to be adjusted when more | 
|  | 588 | * lock types are added. */ | 
|  | 589 | static inline int ocfs2_highest_compat_lock_level(int level) | 
|  | 590 | { | 
|  | 591 | int new_level = LKM_EXMODE; | 
|  | 592 |  | 
|  | 593 | if (level == LKM_EXMODE) | 
|  | 594 | new_level = LKM_NLMODE; | 
|  | 595 | else if (level == LKM_PRMODE) | 
|  | 596 | new_level = LKM_PRMODE; | 
|  | 597 | return new_level; | 
|  | 598 | } | 
|  | 599 |  | 
|  | 600 | static void lockres_set_flags(struct ocfs2_lock_res *lockres, | 
|  | 601 | unsigned long newflags) | 
|  | 602 | { | 
| Christoph Hellwig | 800deef | 2007-05-17 16:03:13 +0200 | [diff] [blame] | 603 | struct ocfs2_mask_waiter *mw, *tmp; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 604 |  | 
|  | 605 | assert_spin_locked(&lockres->l_lock); | 
|  | 606 |  | 
|  | 607 | lockres->l_flags = newflags; | 
|  | 608 |  | 
| Christoph Hellwig | 800deef | 2007-05-17 16:03:13 +0200 | [diff] [blame] | 609 | list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 610 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 
|  | 611 | continue; | 
|  | 612 |  | 
|  | 613 | list_del_init(&mw->mw_item); | 
|  | 614 | mw->mw_status = 0; | 
|  | 615 | complete(&mw->mw_complete); | 
|  | 616 | } | 
|  | 617 | } | 
|  | 618 | static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) | 
|  | 619 | { | 
|  | 620 | lockres_set_flags(lockres, lockres->l_flags | or); | 
|  | 621 | } | 
|  | 622 | static void lockres_clear_flags(struct ocfs2_lock_res *lockres, | 
|  | 623 | unsigned long clear) | 
|  | 624 | { | 
|  | 625 | lockres_set_flags(lockres, lockres->l_flags & ~clear); | 
|  | 626 | } | 
|  | 627 |  | 
|  | 628 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) | 
|  | 629 | { | 
|  | 630 | mlog_entry_void(); | 
|  | 631 |  | 
|  | 632 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 
|  | 633 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 
|  | 634 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 
|  | 635 | BUG_ON(lockres->l_blocking <= LKM_NLMODE); | 
|  | 636 |  | 
|  | 637 | lockres->l_level = lockres->l_requested; | 
|  | 638 | if (lockres->l_level <= | 
|  | 639 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { | 
|  | 640 | lockres->l_blocking = LKM_NLMODE; | 
|  | 641 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 
|  | 642 | } | 
|  | 643 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 644 |  | 
|  | 645 | mlog_exit_void(); | 
|  | 646 | } | 
|  | 647 |  | 
|  | 648 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) | 
|  | 649 | { | 
|  | 650 | mlog_entry_void(); | 
|  | 651 |  | 
|  | 652 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 
|  | 653 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 
|  | 654 |  | 
|  | 655 | /* Convert from RO to EX doesn't really need anything as our | 
|  | 656 | * information is already up to data. Convert from NL to | 
|  | 657 | * *anything* however should mark ourselves as needing an | 
|  | 658 | * update */ | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 659 | if (lockres->l_level == LKM_NLMODE && | 
|  | 660 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 661 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 
|  | 662 |  | 
|  | 663 | lockres->l_level = lockres->l_requested; | 
|  | 664 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 665 |  | 
|  | 666 | mlog_exit_void(); | 
|  | 667 | } | 
|  | 668 |  | 
|  | 669 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) | 
|  | 670 | { | 
|  | 671 | mlog_entry_void(); | 
|  | 672 |  | 
|  | 673 | BUG_ON((!lockres->l_flags & OCFS2_LOCK_BUSY)); | 
|  | 674 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 
|  | 675 |  | 
|  | 676 | if (lockres->l_requested > LKM_NLMODE && | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 677 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && | 
|  | 678 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 679 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 
|  | 680 |  | 
|  | 681 | lockres->l_level = lockres->l_requested; | 
|  | 682 | lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); | 
|  | 683 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 684 |  | 
|  | 685 | mlog_exit_void(); | 
|  | 686 | } | 
|  | 687 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 688 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | 
|  | 689 | int level) | 
|  | 690 | { | 
|  | 691 | int needs_downconvert = 0; | 
|  | 692 | mlog_entry_void(); | 
|  | 693 |  | 
|  | 694 | assert_spin_locked(&lockres->l_lock); | 
|  | 695 |  | 
|  | 696 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 
|  | 697 |  | 
|  | 698 | if (level > lockres->l_blocking) { | 
|  | 699 | /* only schedule a downconvert if we haven't already scheduled | 
|  | 700 | * one that goes low enough to satisfy the level we're | 
|  | 701 | * blocking.  this also catches the case where we get | 
|  | 702 | * duplicate BASTs */ | 
|  | 703 | if (ocfs2_highest_compat_lock_level(level) < | 
|  | 704 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) | 
|  | 705 | needs_downconvert = 1; | 
|  | 706 |  | 
|  | 707 | lockres->l_blocking = level; | 
|  | 708 | } | 
|  | 709 |  | 
|  | 710 | mlog_exit(needs_downconvert); | 
|  | 711 | return needs_downconvert; | 
|  | 712 | } | 
|  | 713 |  | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 714 | static void ocfs2_blocking_ast(void *opaque, int level) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 715 | { | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 716 | struct ocfs2_lock_res *lockres = opaque; | 
|  | 717 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 718 | int needs_downconvert; | 
|  | 719 | unsigned long flags; | 
|  | 720 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 721 | BUG_ON(level <= LKM_NLMODE); | 
|  | 722 |  | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 723 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", | 
|  | 724 | lockres->l_name, level, lockres->l_level, | 
|  | 725 | ocfs2_lock_type_string(lockres->l_type)); | 
|  | 726 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 727 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 728 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 
|  | 729 | if (needs_downconvert) | 
|  | 730 | ocfs2_schedule_blocked_lock(osb, lockres); | 
|  | 731 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 732 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 733 | wake_up(&lockres->l_event); | 
|  | 734 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 735 | ocfs2_kick_vote_thread(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 736 | } | 
|  | 737 |  | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 738 | static void ocfs2_locking_ast(void *opaque) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 739 | { | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 740 | struct ocfs2_lock_res *lockres = opaque; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 741 | struct dlm_lockstatus *lksb = &lockres->l_lksb; | 
|  | 742 | unsigned long flags; | 
|  | 743 |  | 
|  | 744 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 745 |  | 
|  | 746 | if (lksb->status != DLM_NORMAL) { | 
|  | 747 | mlog(ML_ERROR, "lockres %s: lksb status value of %u!\n", | 
|  | 748 | lockres->l_name, lksb->status); | 
|  | 749 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 750 | return; | 
|  | 751 | } | 
|  | 752 |  | 
|  | 753 | switch(lockres->l_action) { | 
|  | 754 | case OCFS2_AST_ATTACH: | 
|  | 755 | ocfs2_generic_handle_attach_action(lockres); | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 756 | lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 757 | break; | 
|  | 758 | case OCFS2_AST_CONVERT: | 
|  | 759 | ocfs2_generic_handle_convert_action(lockres); | 
|  | 760 | break; | 
|  | 761 | case OCFS2_AST_DOWNCONVERT: | 
|  | 762 | ocfs2_generic_handle_downconvert_action(lockres); | 
|  | 763 | break; | 
|  | 764 | default: | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 765 | mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " | 
|  | 766 | "lockres flags = 0x%lx, unlock action: %u\n", | 
|  | 767 | lockres->l_name, lockres->l_action, lockres->l_flags, | 
|  | 768 | lockres->l_unlock_action); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 769 | BUG(); | 
|  | 770 | } | 
|  | 771 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 772 | /* set it to something invalid so if we get called again we | 
|  | 773 | * can catch it. */ | 
|  | 774 | lockres->l_action = OCFS2_AST_INVALID; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 775 |  | 
|  | 776 | wake_up(&lockres->l_event); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 777 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 778 | } | 
|  | 779 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 780 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 
|  | 781 | int convert) | 
|  | 782 | { | 
|  | 783 | unsigned long flags; | 
|  | 784 |  | 
|  | 785 | mlog_entry_void(); | 
|  | 786 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 787 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 788 | if (convert) | 
|  | 789 | lockres->l_action = OCFS2_AST_INVALID; | 
|  | 790 | else | 
|  | 791 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
|  | 792 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 793 |  | 
|  | 794 | wake_up(&lockres->l_event); | 
|  | 795 | mlog_exit_void(); | 
|  | 796 | } | 
|  | 797 |  | 
|  | 798 | /* Note: If we detect another process working on the lock (i.e., | 
|  | 799 | * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller | 
|  | 800 | * to do the right thing in that case. | 
|  | 801 | */ | 
|  | 802 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 
|  | 803 | struct ocfs2_lock_res *lockres, | 
|  | 804 | int level, | 
|  | 805 | int dlm_flags) | 
|  | 806 | { | 
|  | 807 | int ret = 0; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 808 | enum dlm_status status = DLM_NORMAL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 809 | unsigned long flags; | 
|  | 810 |  | 
|  | 811 | mlog_entry_void(); | 
|  | 812 |  | 
|  | 813 | mlog(0, "lock %s, level = %d, flags = %d\n", lockres->l_name, level, | 
|  | 814 | dlm_flags); | 
|  | 815 |  | 
|  | 816 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 817 | if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || | 
|  | 818 | (lockres->l_flags & OCFS2_LOCK_BUSY)) { | 
|  | 819 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 820 | goto bail; | 
|  | 821 | } | 
|  | 822 |  | 
|  | 823 | lockres->l_action = OCFS2_AST_ATTACH; | 
|  | 824 | lockres->l_requested = level; | 
|  | 825 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 826 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 827 |  | 
|  | 828 | status = dlmlock(osb->dlm, | 
|  | 829 | level, | 
|  | 830 | &lockres->l_lksb, | 
|  | 831 | dlm_flags, | 
|  | 832 | lockres->l_name, | 
| Mark Fasheh | f068106 | 2006-09-08 11:40:10 -0700 | [diff] [blame] | 833 | OCFS2_LOCK_ID_MAX_LEN - 1, | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 834 | ocfs2_locking_ast, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 835 | lockres, | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 836 | ocfs2_blocking_ast); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 837 | if (status != DLM_NORMAL) { | 
|  | 838 | ocfs2_log_dlm_error("dlmlock", status, lockres); | 
|  | 839 | ret = -EINVAL; | 
|  | 840 | ocfs2_recover_from_dlm_error(lockres, 1); | 
|  | 841 | } | 
|  | 842 |  | 
|  | 843 | mlog(0, "lock %s, successfull return from dlmlock\n", lockres->l_name); | 
|  | 844 |  | 
|  | 845 | bail: | 
|  | 846 | mlog_exit(ret); | 
|  | 847 | return ret; | 
|  | 848 | } | 
|  | 849 |  | 
|  | 850 | static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, | 
|  | 851 | int flag) | 
|  | 852 | { | 
|  | 853 | unsigned long flags; | 
|  | 854 | int ret; | 
|  | 855 |  | 
|  | 856 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 857 | ret = lockres->l_flags & flag; | 
|  | 858 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 859 |  | 
|  | 860 | return ret; | 
|  | 861 | } | 
|  | 862 |  | 
|  | 863 | static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) | 
|  | 864 |  | 
|  | 865 | { | 
|  | 866 | wait_event(lockres->l_event, | 
|  | 867 | !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); | 
|  | 868 | } | 
|  | 869 |  | 
|  | 870 | static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) | 
|  | 871 |  | 
|  | 872 | { | 
|  | 873 | wait_event(lockres->l_event, | 
|  | 874 | !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); | 
|  | 875 | } | 
|  | 876 |  | 
|  | 877 | /* predict what lock level we'll be dropping down to on behalf | 
|  | 878 | * of another node, and return true if the currently wanted | 
|  | 879 | * level will be compatible with it. */ | 
|  | 880 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 
|  | 881 | int wanted) | 
|  | 882 | { | 
|  | 883 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 
|  | 884 |  | 
|  | 885 | return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); | 
|  | 886 | } | 
|  | 887 |  | 
|  | 888 | static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) | 
|  | 889 | { | 
|  | 890 | INIT_LIST_HEAD(&mw->mw_item); | 
|  | 891 | init_completion(&mw->mw_complete); | 
|  | 892 | } | 
|  | 893 |  | 
|  | 894 | static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) | 
|  | 895 | { | 
|  | 896 | wait_for_completion(&mw->mw_complete); | 
|  | 897 | /* Re-arm the completion in case we want to wait on it again */ | 
|  | 898 | INIT_COMPLETION(mw->mw_complete); | 
|  | 899 | return mw->mw_status; | 
|  | 900 | } | 
|  | 901 |  | 
|  | 902 | static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, | 
|  | 903 | struct ocfs2_mask_waiter *mw, | 
|  | 904 | unsigned long mask, | 
|  | 905 | unsigned long goal) | 
|  | 906 | { | 
|  | 907 | BUG_ON(!list_empty(&mw->mw_item)); | 
|  | 908 |  | 
|  | 909 | assert_spin_locked(&lockres->l_lock); | 
|  | 910 |  | 
|  | 911 | list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); | 
|  | 912 | mw->mw_mask = mask; | 
|  | 913 | mw->mw_goal = goal; | 
|  | 914 | } | 
|  | 915 |  | 
|  | 916 | /* returns 0 if the mw that was removed was already satisfied, -EBUSY | 
|  | 917 | * if the mask still hadn't reached its goal */ | 
|  | 918 | static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | 
|  | 919 | struct ocfs2_mask_waiter *mw) | 
|  | 920 | { | 
|  | 921 | unsigned long flags; | 
|  | 922 | int ret = 0; | 
|  | 923 |  | 
|  | 924 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 925 | if (!list_empty(&mw->mw_item)) { | 
|  | 926 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 
|  | 927 | ret = -EBUSY; | 
|  | 928 |  | 
|  | 929 | list_del_init(&mw->mw_item); | 
|  | 930 | init_completion(&mw->mw_complete); | 
|  | 931 | } | 
|  | 932 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 933 |  | 
|  | 934 | return ret; | 
|  | 935 |  | 
|  | 936 | } | 
|  | 937 |  | 
|  | 938 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 
|  | 939 | struct ocfs2_lock_res *lockres, | 
|  | 940 | int level, | 
|  | 941 | int lkm_flags, | 
|  | 942 | int arg_flags) | 
|  | 943 | { | 
|  | 944 | struct ocfs2_mask_waiter mw; | 
|  | 945 | enum dlm_status status; | 
|  | 946 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); | 
|  | 947 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ | 
|  | 948 | unsigned long flags; | 
|  | 949 |  | 
|  | 950 | mlog_entry_void(); | 
|  | 951 |  | 
|  | 952 | ocfs2_init_mask_waiter(&mw); | 
|  | 953 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 954 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 
|  | 955 | lkm_flags |= LKM_VALBLK; | 
|  | 956 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 957 | again: | 
|  | 958 | wait = 0; | 
|  | 959 |  | 
|  | 960 | if (catch_signals && signal_pending(current)) { | 
|  | 961 | ret = -ERESTARTSYS; | 
|  | 962 | goto out; | 
|  | 963 | } | 
|  | 964 |  | 
|  | 965 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 966 |  | 
|  | 967 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, | 
|  | 968 | "Cluster lock called on freeing lockres %s! flags " | 
|  | 969 | "0x%lx\n", lockres->l_name, lockres->l_flags); | 
|  | 970 |  | 
|  | 971 | /* We only compare against the currently granted level | 
|  | 972 | * here. If the lock is blocked waiting on a downconvert, | 
|  | 973 | * we'll get caught below. */ | 
|  | 974 | if (lockres->l_flags & OCFS2_LOCK_BUSY && | 
|  | 975 | level > lockres->l_level) { | 
|  | 976 | /* is someone sitting in dlm_lock? If so, wait on | 
|  | 977 | * them. */ | 
|  | 978 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
|  | 979 | wait = 1; | 
|  | 980 | goto unlock; | 
|  | 981 | } | 
|  | 982 |  | 
|  | 983 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 
|  | 984 | /* lock has not been created yet. */ | 
|  | 985 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 986 |  | 
|  | 987 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | 
|  | 988 | if (ret < 0) { | 
|  | 989 | mlog_errno(ret); | 
|  | 990 | goto out; | 
|  | 991 | } | 
|  | 992 | goto again; | 
|  | 993 | } | 
|  | 994 |  | 
|  | 995 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && | 
|  | 996 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { | 
|  | 997 | /* is the lock is currently blocked on behalf of | 
|  | 998 | * another node */ | 
|  | 999 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); | 
|  | 1000 | wait = 1; | 
|  | 1001 | goto unlock; | 
|  | 1002 | } | 
|  | 1003 |  | 
|  | 1004 | if (level > lockres->l_level) { | 
|  | 1005 | if (lockres->l_action != OCFS2_AST_INVALID) | 
|  | 1006 | mlog(ML_ERROR, "lockres %s has action %u pending\n", | 
|  | 1007 | lockres->l_name, lockres->l_action); | 
|  | 1008 |  | 
|  | 1009 | lockres->l_action = OCFS2_AST_CONVERT; | 
|  | 1010 | lockres->l_requested = level; | 
|  | 1011 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 1012 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1013 |  | 
|  | 1014 | BUG_ON(level == LKM_IVMODE); | 
|  | 1015 | BUG_ON(level == LKM_NLMODE); | 
|  | 1016 |  | 
|  | 1017 | mlog(0, "lock %s, convert from %d to level = %d\n", | 
|  | 1018 | lockres->l_name, lockres->l_level, level); | 
|  | 1019 |  | 
|  | 1020 | /* call dlm_lock to upgrade lock now */ | 
|  | 1021 | status = dlmlock(osb->dlm, | 
|  | 1022 | level, | 
|  | 1023 | &lockres->l_lksb, | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 1024 | lkm_flags|LKM_CONVERT, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1025 | lockres->l_name, | 
| Mark Fasheh | f068106 | 2006-09-08 11:40:10 -0700 | [diff] [blame] | 1026 | OCFS2_LOCK_ID_MAX_LEN - 1, | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 1027 | ocfs2_locking_ast, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1028 | lockres, | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 1029 | ocfs2_blocking_ast); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1030 | if (status != DLM_NORMAL) { | 
|  | 1031 | if ((lkm_flags & LKM_NOQUEUE) && | 
|  | 1032 | (status == DLM_NOTQUEUED)) | 
|  | 1033 | ret = -EAGAIN; | 
|  | 1034 | else { | 
|  | 1035 | ocfs2_log_dlm_error("dlmlock", status, | 
|  | 1036 | lockres); | 
|  | 1037 | ret = -EINVAL; | 
|  | 1038 | } | 
|  | 1039 | ocfs2_recover_from_dlm_error(lockres, 1); | 
|  | 1040 | goto out; | 
|  | 1041 | } | 
|  | 1042 |  | 
|  | 1043 | mlog(0, "lock %s, successfull return from dlmlock\n", | 
|  | 1044 | lockres->l_name); | 
|  | 1045 |  | 
|  | 1046 | /* At this point we've gone inside the dlm and need to | 
|  | 1047 | * complete our work regardless. */ | 
|  | 1048 | catch_signals = 0; | 
|  | 1049 |  | 
|  | 1050 | /* wait for busy to clear and carry on */ | 
|  | 1051 | goto again; | 
|  | 1052 | } | 
|  | 1053 |  | 
|  | 1054 | /* Ok, if we get here then we're good to go. */ | 
|  | 1055 | ocfs2_inc_holders(lockres, level); | 
|  | 1056 |  | 
|  | 1057 | ret = 0; | 
|  | 1058 | unlock: | 
|  | 1059 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1060 | out: | 
|  | 1061 | /* | 
|  | 1062 | * This is helping work around a lock inversion between the page lock | 
|  | 1063 | * and dlm locks.  One path holds the page lock while calling aops | 
|  | 1064 | * which block acquiring dlm locks.  The voting thread holds dlm | 
|  | 1065 | * locks while acquiring page locks while down converting data locks. | 
|  | 1066 | * This block is helping an aop path notice the inversion and back | 
|  | 1067 | * off to unlock its page lock before trying the dlm lock again. | 
|  | 1068 | */ | 
|  | 1069 | if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && | 
|  | 1070 | mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { | 
|  | 1071 | wait = 0; | 
|  | 1072 | if (lockres_remove_mask_waiter(lockres, &mw)) | 
|  | 1073 | ret = -EAGAIN; | 
|  | 1074 | else | 
|  | 1075 | goto again; | 
|  | 1076 | } | 
|  | 1077 | if (wait) { | 
|  | 1078 | ret = ocfs2_wait_for_mask(&mw); | 
|  | 1079 | if (ret == 0) | 
|  | 1080 | goto again; | 
|  | 1081 | mlog_errno(ret); | 
|  | 1082 | } | 
|  | 1083 |  | 
|  | 1084 | mlog_exit(ret); | 
|  | 1085 | return ret; | 
|  | 1086 | } | 
|  | 1087 |  | 
|  | 1088 | static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | 
|  | 1089 | struct ocfs2_lock_res *lockres, | 
|  | 1090 | int level) | 
|  | 1091 | { | 
|  | 1092 | unsigned long flags; | 
|  | 1093 |  | 
|  | 1094 | mlog_entry_void(); | 
|  | 1095 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1096 | ocfs2_dec_holders(lockres, level); | 
|  | 1097 | ocfs2_vote_on_unlock(osb, lockres); | 
|  | 1098 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1099 | mlog_exit_void(); | 
|  | 1100 | } | 
|  | 1101 |  | 
| Adrian Bunk | da66116 | 2006-11-20 03:24:28 +0100 | [diff] [blame] | 1102 | static int ocfs2_create_new_lock(struct ocfs2_super *osb, | 
|  | 1103 | struct ocfs2_lock_res *lockres, | 
|  | 1104 | int ex, | 
|  | 1105 | int local) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1106 | { | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 1107 | int level =  ex ? LKM_EXMODE : LKM_PRMODE; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1108 | unsigned long flags; | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1109 | int lkm_flags = local ? LKM_LOCAL : 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1110 |  | 
|  | 1111 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1112 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 
|  | 1113 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); | 
|  | 1114 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1115 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1116 | return ocfs2_lock_create(osb, lockres, level, lkm_flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1117 | } | 
|  | 1118 |  | 
|  | 1119 | /* Grants us an EX lock on the data and metadata resources, skipping | 
|  | 1120 | * the normal cluster directory lookup. Use this ONLY on newly created | 
|  | 1121 | * inodes which other nodes can't possibly see, and which haven't been | 
|  | 1122 | * hashed in the inode hash yet. This can give us a good performance | 
|  | 1123 | * increase as it'll skip the network broadcast normally associated | 
|  | 1124 | * with creating a new lock resource. */ | 
|  | 1125 | int ocfs2_create_new_inode_locks(struct inode *inode) | 
|  | 1126 | { | 
|  | 1127 | int ret; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 1128 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1129 |  | 
|  | 1130 | BUG_ON(!inode); | 
|  | 1131 | BUG_ON(!ocfs2_inode_is_new(inode)); | 
|  | 1132 |  | 
|  | 1133 | mlog_entry_void(); | 
|  | 1134 |  | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1135 | mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1136 |  | 
|  | 1137 | /* NOTE: That we don't increment any of the holder counts, nor | 
|  | 1138 | * do we add anything to a journal handle. Since this is | 
|  | 1139 | * supposed to be a new inode which the cluster doesn't know | 
|  | 1140 | * about yet, there is no need to.  As far as the LVB handling | 
|  | 1141 | * is concerned, this is basically like acquiring an EX lock | 
|  | 1142 | * on a resource which has an invalid one -- we'll set it | 
|  | 1143 | * valid when we release the EX. */ | 
|  | 1144 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1145 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1146 | if (ret) { | 
|  | 1147 | mlog_errno(ret); | 
|  | 1148 | goto bail; | 
|  | 1149 | } | 
|  | 1150 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1151 | /* | 
|  | 1152 | * We don't want to use LKM_LOCAL on a meta data lock as they | 
|  | 1153 | * don't use a generation in their lock names. | 
|  | 1154 | */ | 
|  | 1155 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1156 | if (ret) { | 
|  | 1157 | mlog_errno(ret); | 
|  | 1158 | goto bail; | 
|  | 1159 | } | 
|  | 1160 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1161 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1162 | if (ret) { | 
|  | 1163 | mlog_errno(ret); | 
|  | 1164 | goto bail; | 
|  | 1165 | } | 
|  | 1166 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1167 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); | 
|  | 1168 | if (ret) { | 
|  | 1169 | mlog_errno(ret); | 
|  | 1170 | goto bail; | 
|  | 1171 | } | 
|  | 1172 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1173 | bail: | 
|  | 1174 | mlog_exit(ret); | 
|  | 1175 | return ret; | 
|  | 1176 | } | 
|  | 1177 |  | 
|  | 1178 | int ocfs2_rw_lock(struct inode *inode, int write) | 
|  | 1179 | { | 
|  | 1180 | int status, level; | 
|  | 1181 | struct ocfs2_lock_res *lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1182 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1183 |  | 
|  | 1184 | BUG_ON(!inode); | 
|  | 1185 |  | 
|  | 1186 | mlog_entry_void(); | 
|  | 1187 |  | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1188 | mlog(0, "inode %llu take %s RW lock\n", | 
|  | 1189 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1190 | write ? "EXMODE" : "PRMODE"); | 
|  | 1191 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1192 | if (ocfs2_mount_local(osb)) | 
|  | 1193 | return 0; | 
|  | 1194 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1195 | lockres = &OCFS2_I(inode)->ip_rw_lockres; | 
|  | 1196 |  | 
|  | 1197 | level = write ? LKM_EXMODE : LKM_PRMODE; | 
|  | 1198 |  | 
|  | 1199 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, | 
|  | 1200 | 0); | 
|  | 1201 | if (status < 0) | 
|  | 1202 | mlog_errno(status); | 
|  | 1203 |  | 
|  | 1204 | mlog_exit(status); | 
|  | 1205 | return status; | 
|  | 1206 | } | 
|  | 1207 |  | 
|  | 1208 | void ocfs2_rw_unlock(struct inode *inode, int write) | 
|  | 1209 | { | 
|  | 1210 | int level = write ? LKM_EXMODE : LKM_PRMODE; | 
|  | 1211 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1212 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1213 |  | 
|  | 1214 | mlog_entry_void(); | 
|  | 1215 |  | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1216 | mlog(0, "inode %llu drop %s RW lock\n", | 
|  | 1217 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1218 | write ? "EXMODE" : "PRMODE"); | 
|  | 1219 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1220 | if (!ocfs2_mount_local(osb)) | 
|  | 1221 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1222 |  | 
|  | 1223 | mlog_exit_void(); | 
|  | 1224 | } | 
|  | 1225 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1226 | /* | 
|  | 1227 | * ocfs2_open_lock always get PR mode lock. | 
|  | 1228 | */ | 
|  | 1229 | int ocfs2_open_lock(struct inode *inode) | 
|  | 1230 | { | 
|  | 1231 | int status = 0; | 
|  | 1232 | struct ocfs2_lock_res *lockres; | 
|  | 1233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
|  | 1234 |  | 
|  | 1235 | BUG_ON(!inode); | 
|  | 1236 |  | 
|  | 1237 | mlog_entry_void(); | 
|  | 1238 |  | 
|  | 1239 | mlog(0, "inode %llu take PRMODE open lock\n", | 
|  | 1240 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
|  | 1241 |  | 
|  | 1242 | if (ocfs2_mount_local(osb)) | 
|  | 1243 | goto out; | 
|  | 1244 |  | 
|  | 1245 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 
|  | 1246 |  | 
|  | 1247 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 
|  | 1248 | LKM_PRMODE, 0, 0); | 
|  | 1249 | if (status < 0) | 
|  | 1250 | mlog_errno(status); | 
|  | 1251 |  | 
|  | 1252 | out: | 
|  | 1253 | mlog_exit(status); | 
|  | 1254 | return status; | 
|  | 1255 | } | 
|  | 1256 |  | 
|  | 1257 | int ocfs2_try_open_lock(struct inode *inode, int write) | 
|  | 1258 | { | 
|  | 1259 | int status = 0, level; | 
|  | 1260 | struct ocfs2_lock_res *lockres; | 
|  | 1261 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
|  | 1262 |  | 
|  | 1263 | BUG_ON(!inode); | 
|  | 1264 |  | 
|  | 1265 | mlog_entry_void(); | 
|  | 1266 |  | 
|  | 1267 | mlog(0, "inode %llu try to take %s open lock\n", | 
|  | 1268 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
|  | 1269 | write ? "EXMODE" : "PRMODE"); | 
|  | 1270 |  | 
|  | 1271 | if (ocfs2_mount_local(osb)) | 
|  | 1272 | goto out; | 
|  | 1273 |  | 
|  | 1274 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 
|  | 1275 |  | 
|  | 1276 | level = write ? LKM_EXMODE : LKM_PRMODE; | 
|  | 1277 |  | 
|  | 1278 | /* | 
|  | 1279 | * The file system may already holding a PRMODE/EXMODE open lock. | 
|  | 1280 | * Since we pass LKM_NOQUEUE, the request won't block waiting on | 
|  | 1281 | * other nodes and the -EAGAIN will indicate to the caller that | 
|  | 1282 | * this inode is still in use. | 
|  | 1283 | */ | 
|  | 1284 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 
|  | 1285 | level, LKM_NOQUEUE, 0); | 
|  | 1286 |  | 
|  | 1287 | out: | 
|  | 1288 | mlog_exit(status); | 
|  | 1289 | return status; | 
|  | 1290 | } | 
|  | 1291 |  | 
|  | 1292 | /* | 
|  | 1293 | * ocfs2_open_unlock unlock PR and EX mode open locks. | 
|  | 1294 | */ | 
|  | 1295 | void ocfs2_open_unlock(struct inode *inode) | 
|  | 1296 | { | 
|  | 1297 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; | 
|  | 1298 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
|  | 1299 |  | 
|  | 1300 | mlog_entry_void(); | 
|  | 1301 |  | 
|  | 1302 | mlog(0, "inode %llu drop open lock\n", | 
|  | 1303 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
|  | 1304 |  | 
|  | 1305 | if (ocfs2_mount_local(osb)) | 
|  | 1306 | goto out; | 
|  | 1307 |  | 
|  | 1308 | if(lockres->l_ro_holders) | 
|  | 1309 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 
|  | 1310 | LKM_PRMODE); | 
|  | 1311 | if(lockres->l_ex_holders) | 
|  | 1312 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 
|  | 1313 | LKM_EXMODE); | 
|  | 1314 |  | 
|  | 1315 | out: | 
|  | 1316 | mlog_exit_void(); | 
|  | 1317 | } | 
|  | 1318 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1319 | int ocfs2_data_lock_full(struct inode *inode, | 
|  | 1320 | int write, | 
|  | 1321 | int arg_flags) | 
|  | 1322 | { | 
|  | 1323 | int status = 0, level; | 
|  | 1324 | struct ocfs2_lock_res *lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1325 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1326 |  | 
|  | 1327 | BUG_ON(!inode); | 
|  | 1328 |  | 
|  | 1329 | mlog_entry_void(); | 
|  | 1330 |  | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1331 | mlog(0, "inode %llu take %s DATA lock\n", | 
|  | 1332 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1333 | write ? "EXMODE" : "PRMODE"); | 
|  | 1334 |  | 
|  | 1335 | /* We'll allow faking a readonly data lock for | 
|  | 1336 | * rodevices. */ | 
|  | 1337 | if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) { | 
|  | 1338 | if (write) { | 
|  | 1339 | status = -EROFS; | 
|  | 1340 | mlog_errno(status); | 
|  | 1341 | } | 
|  | 1342 | goto out; | 
|  | 1343 | } | 
|  | 1344 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1345 | if (ocfs2_mount_local(osb)) | 
|  | 1346 | goto out; | 
|  | 1347 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1348 | lockres = &OCFS2_I(inode)->ip_data_lockres; | 
|  | 1349 |  | 
|  | 1350 | level = write ? LKM_EXMODE : LKM_PRMODE; | 
|  | 1351 |  | 
|  | 1352 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, | 
|  | 1353 | 0, arg_flags); | 
|  | 1354 | if (status < 0 && status != -EAGAIN) | 
|  | 1355 | mlog_errno(status); | 
|  | 1356 |  | 
|  | 1357 | out: | 
|  | 1358 | mlog_exit(status); | 
|  | 1359 | return status; | 
|  | 1360 | } | 
|  | 1361 |  | 
|  | 1362 | /* see ocfs2_meta_lock_with_page() */ | 
|  | 1363 | int ocfs2_data_lock_with_page(struct inode *inode, | 
|  | 1364 | int write, | 
|  | 1365 | struct page *page) | 
|  | 1366 | { | 
|  | 1367 | int ret; | 
|  | 1368 |  | 
|  | 1369 | ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK); | 
|  | 1370 | if (ret == -EAGAIN) { | 
|  | 1371 | unlock_page(page); | 
|  | 1372 | if (ocfs2_data_lock(inode, write) == 0) | 
|  | 1373 | ocfs2_data_unlock(inode, write); | 
|  | 1374 | ret = AOP_TRUNCATED_PAGE; | 
|  | 1375 | } | 
|  | 1376 |  | 
|  | 1377 | return ret; | 
|  | 1378 | } | 
|  | 1379 |  | 
|  | 1380 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 
|  | 1381 | struct ocfs2_lock_res *lockres) | 
|  | 1382 | { | 
|  | 1383 | int kick = 0; | 
|  | 1384 |  | 
|  | 1385 | mlog_entry_void(); | 
|  | 1386 |  | 
|  | 1387 | /* If we know that another node is waiting on our lock, kick | 
|  | 1388 | * the vote thread * pre-emptively when we reach a release | 
|  | 1389 | * condition. */ | 
|  | 1390 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 
|  | 1391 | switch(lockres->l_blocking) { | 
|  | 1392 | case LKM_EXMODE: | 
|  | 1393 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | 
|  | 1394 | kick = 1; | 
|  | 1395 | break; | 
|  | 1396 | case LKM_PRMODE: | 
|  | 1397 | if (!lockres->l_ex_holders) | 
|  | 1398 | kick = 1; | 
|  | 1399 | break; | 
|  | 1400 | default: | 
|  | 1401 | BUG(); | 
|  | 1402 | } | 
|  | 1403 | } | 
|  | 1404 |  | 
|  | 1405 | if (kick) | 
|  | 1406 | ocfs2_kick_vote_thread(osb); | 
|  | 1407 |  | 
|  | 1408 | mlog_exit_void(); | 
|  | 1409 | } | 
|  | 1410 |  | 
|  | 1411 | void ocfs2_data_unlock(struct inode *inode, | 
|  | 1412 | int write) | 
|  | 1413 | { | 
|  | 1414 | int level = write ? LKM_EXMODE : LKM_PRMODE; | 
|  | 1415 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1416 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1417 |  | 
|  | 1418 | mlog_entry_void(); | 
|  | 1419 |  | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1420 | mlog(0, "inode %llu drop %s DATA lock\n", | 
|  | 1421 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1422 | write ? "EXMODE" : "PRMODE"); | 
|  | 1423 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1424 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | 
|  | 1425 | !ocfs2_mount_local(osb)) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1426 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 
|  | 1427 |  | 
|  | 1428 | mlog_exit_void(); | 
|  | 1429 | } | 
|  | 1430 |  | 
|  | 1431 | #define OCFS2_SEC_BITS   34 | 
|  | 1432 | #define OCFS2_SEC_SHIFT  (64 - 34) | 
|  | 1433 | #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1) | 
|  | 1434 |  | 
|  | 1435 | /* LVB only has room for 64 bits of time here so we pack it for | 
|  | 1436 | * now. */ | 
|  | 1437 | static u64 ocfs2_pack_timespec(struct timespec *spec) | 
|  | 1438 | { | 
|  | 1439 | u64 res; | 
|  | 1440 | u64 sec = spec->tv_sec; | 
|  | 1441 | u32 nsec = spec->tv_nsec; | 
|  | 1442 |  | 
|  | 1443 | res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); | 
|  | 1444 |  | 
|  | 1445 | return res; | 
|  | 1446 | } | 
|  | 1447 |  | 
|  | 1448 | /* Call this with the lockres locked. I am reasonably sure we don't | 
|  | 1449 | * need ip_lock in this function as anyone who would be changing those | 
|  | 1450 | * values is supposed to be blocked in ocfs2_meta_lock right now. */ | 
|  | 1451 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 
|  | 1452 | { | 
|  | 1453 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
|  | 1454 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 
|  | 1455 | struct ocfs2_meta_lvb *lvb; | 
|  | 1456 |  | 
|  | 1457 | mlog_entry_void(); | 
|  | 1458 |  | 
|  | 1459 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 
|  | 1460 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1461 | /* | 
|  | 1462 | * Invalidate the LVB of a deleted inode - this way other | 
|  | 1463 | * nodes are forced to go to disk and discover the new inode | 
|  | 1464 | * status. | 
|  | 1465 | */ | 
|  | 1466 | if (oi->ip_flags & OCFS2_INODE_DELETED) { | 
|  | 1467 | lvb->lvb_version = 0; | 
|  | 1468 | goto out; | 
|  | 1469 | } | 
|  | 1470 |  | 
| Mark Fasheh | 4d3b83f | 2006-09-12 15:22:18 -0700 | [diff] [blame] | 1471 | lvb->lvb_version   = OCFS2_LVB_VERSION; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1472 | lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode)); | 
|  | 1473 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); | 
|  | 1474 | lvb->lvb_iuid      = cpu_to_be32(inode->i_uid); | 
|  | 1475 | lvb->lvb_igid      = cpu_to_be32(inode->i_gid); | 
|  | 1476 | lvb->lvb_imode     = cpu_to_be16(inode->i_mode); | 
|  | 1477 | lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink); | 
|  | 1478 | lvb->lvb_iatime_packed  = | 
|  | 1479 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); | 
|  | 1480 | lvb->lvb_ictime_packed = | 
|  | 1481 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); | 
|  | 1482 | lvb->lvb_imtime_packed = | 
|  | 1483 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); | 
| Herbert Poetzl | ca4d147 | 2006-07-03 17:27:12 -0700 | [diff] [blame] | 1484 | lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr); | 
| Mark Fasheh | f9e2d82 | 2006-09-12 15:35:49 -0700 | [diff] [blame] | 1485 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1486 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1487 | out: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1488 | mlog_meta_lvb(0, lockres); | 
|  | 1489 |  | 
|  | 1490 | mlog_exit_void(); | 
|  | 1491 | } | 
|  | 1492 |  | 
|  | 1493 | static void ocfs2_unpack_timespec(struct timespec *spec, | 
|  | 1494 | u64 packed_time) | 
|  | 1495 | { | 
|  | 1496 | spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; | 
|  | 1497 | spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; | 
|  | 1498 | } | 
|  | 1499 |  | 
|  | 1500 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 
|  | 1501 | { | 
|  | 1502 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
|  | 1503 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 
|  | 1504 | struct ocfs2_meta_lvb *lvb; | 
|  | 1505 |  | 
|  | 1506 | mlog_entry_void(); | 
|  | 1507 |  | 
|  | 1508 | mlog_meta_lvb(0, lockres); | 
|  | 1509 |  | 
|  | 1510 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 
|  | 1511 |  | 
|  | 1512 | /* We're safe here without the lockres lock... */ | 
|  | 1513 | spin_lock(&oi->ip_lock); | 
|  | 1514 | oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); | 
|  | 1515 | i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); | 
|  | 1516 |  | 
| Herbert Poetzl | ca4d147 | 2006-07-03 17:27:12 -0700 | [diff] [blame] | 1517 | oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); | 
|  | 1518 | ocfs2_set_inode_flags(inode); | 
|  | 1519 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1520 | /* fast-symlinks are a special case */ | 
|  | 1521 | if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) | 
|  | 1522 | inode->i_blocks = 0; | 
|  | 1523 | else | 
| Mark Fasheh | 8110b07 | 2007-03-22 16:53:23 -0700 | [diff] [blame] | 1524 | inode->i_blocks = ocfs2_inode_sector_count(inode); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1525 |  | 
|  | 1526 | inode->i_uid     = be32_to_cpu(lvb->lvb_iuid); | 
|  | 1527 | inode->i_gid     = be32_to_cpu(lvb->lvb_igid); | 
|  | 1528 | inode->i_mode    = be16_to_cpu(lvb->lvb_imode); | 
|  | 1529 | inode->i_nlink   = be16_to_cpu(lvb->lvb_inlink); | 
|  | 1530 | ocfs2_unpack_timespec(&inode->i_atime, | 
|  | 1531 | be64_to_cpu(lvb->lvb_iatime_packed)); | 
|  | 1532 | ocfs2_unpack_timespec(&inode->i_mtime, | 
|  | 1533 | be64_to_cpu(lvb->lvb_imtime_packed)); | 
|  | 1534 | ocfs2_unpack_timespec(&inode->i_ctime, | 
|  | 1535 | be64_to_cpu(lvb->lvb_ictime_packed)); | 
|  | 1536 | spin_unlock(&oi->ip_lock); | 
|  | 1537 |  | 
|  | 1538 | mlog_exit_void(); | 
|  | 1539 | } | 
|  | 1540 |  | 
| Mark Fasheh | f9e2d82 | 2006-09-12 15:35:49 -0700 | [diff] [blame] | 1541 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, | 
|  | 1542 | struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1543 | { | 
|  | 1544 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 
|  | 1545 |  | 
| Mark Fasheh | f9e2d82 | 2006-09-12 15:35:49 -0700 | [diff] [blame] | 1546 | if (lvb->lvb_version == OCFS2_LVB_VERSION | 
|  | 1547 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1548 | return 1; | 
|  | 1549 | return 0; | 
|  | 1550 | } | 
|  | 1551 |  | 
|  | 1552 | /* Determine whether a lock resource needs to be refreshed, and | 
|  | 1553 | * arbitrate who gets to refresh it. | 
|  | 1554 | * | 
|  | 1555 | *   0 means no refresh needed. | 
|  | 1556 | * | 
|  | 1557 | *   > 0 means you need to refresh this and you MUST call | 
|  | 1558 | *   ocfs2_complete_lock_res_refresh afterwards. */ | 
|  | 1559 | static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) | 
|  | 1560 | { | 
|  | 1561 | unsigned long flags; | 
|  | 1562 | int status = 0; | 
|  | 1563 |  | 
|  | 1564 | mlog_entry_void(); | 
|  | 1565 |  | 
|  | 1566 | refresh_check: | 
|  | 1567 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1568 | if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { | 
|  | 1569 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1570 | goto bail; | 
|  | 1571 | } | 
|  | 1572 |  | 
|  | 1573 | if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { | 
|  | 1574 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1575 |  | 
|  | 1576 | ocfs2_wait_on_refreshing_lock(lockres); | 
|  | 1577 | goto refresh_check; | 
|  | 1578 | } | 
|  | 1579 |  | 
|  | 1580 | /* Ok, I'll be the one to refresh this lock. */ | 
|  | 1581 | lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); | 
|  | 1582 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1583 |  | 
|  | 1584 | status = 1; | 
|  | 1585 | bail: | 
|  | 1586 | mlog_exit(status); | 
|  | 1587 | return status; | 
|  | 1588 | } | 
|  | 1589 |  | 
|  | 1590 | /* If status is non zero, I'll mark it as not being in refresh | 
|  | 1591 | * anymroe, but i won't clear the needs refresh flag. */ | 
|  | 1592 | static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, | 
|  | 1593 | int status) | 
|  | 1594 | { | 
|  | 1595 | unsigned long flags; | 
|  | 1596 | mlog_entry_void(); | 
|  | 1597 |  | 
|  | 1598 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1599 | lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); | 
|  | 1600 | if (!status) | 
|  | 1601 | lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 
|  | 1602 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1603 |  | 
|  | 1604 | wake_up(&lockres->l_event); | 
|  | 1605 |  | 
|  | 1606 | mlog_exit_void(); | 
|  | 1607 | } | 
|  | 1608 |  | 
|  | 1609 | /* may or may not return a bh if it went to disk. */ | 
|  | 1610 | static int ocfs2_meta_lock_update(struct inode *inode, | 
|  | 1611 | struct buffer_head **bh) | 
|  | 1612 | { | 
|  | 1613 | int status = 0; | 
|  | 1614 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 1615 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1616 | struct ocfs2_dinode *fe; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1617 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1618 |  | 
|  | 1619 | mlog_entry_void(); | 
|  | 1620 |  | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 1621 | if (ocfs2_mount_local(osb)) | 
|  | 1622 | goto bail; | 
|  | 1623 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1624 | spin_lock(&oi->ip_lock); | 
|  | 1625 | if (oi->ip_flags & OCFS2_INODE_DELETED) { | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1626 | mlog(0, "Orphaned inode %llu was deleted while we " | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1627 | "were waiting on a lock. ip_flags = 0x%x\n", | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1628 | (unsigned long long)oi->ip_blkno, oi->ip_flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1629 | spin_unlock(&oi->ip_lock); | 
|  | 1630 | status = -ENOENT; | 
|  | 1631 | goto bail; | 
|  | 1632 | } | 
|  | 1633 | spin_unlock(&oi->ip_lock); | 
|  | 1634 |  | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 1635 | if (!ocfs2_should_refresh_lock_res(lockres)) | 
|  | 1636 | goto bail; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1637 |  | 
|  | 1638 | /* This will discard any caching information we might have had | 
|  | 1639 | * for the inode metadata. */ | 
|  | 1640 | ocfs2_metadata_cache_purge(inode); | 
|  | 1641 |  | 
| Mark Fasheh | 8341897 | 2007-04-23 18:53:12 -0700 | [diff] [blame] | 1642 | ocfs2_extent_map_trunc(inode, 0); | 
|  | 1643 |  | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 1644 | if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1645 | mlog(0, "Trusting LVB on inode %llu\n", | 
|  | 1646 | (unsigned long long)oi->ip_blkno); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1647 | ocfs2_refresh_inode_from_lvb(inode); | 
|  | 1648 | } else { | 
|  | 1649 | /* Boo, we have to go to disk. */ | 
|  | 1650 | /* read bh, cast, ocfs2_refresh_inode */ | 
|  | 1651 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, | 
|  | 1652 | bh, OCFS2_BH_CACHED, inode); | 
|  | 1653 | if (status < 0) { | 
|  | 1654 | mlog_errno(status); | 
|  | 1655 | goto bail_refresh; | 
|  | 1656 | } | 
|  | 1657 | fe = (struct ocfs2_dinode *) (*bh)->b_data; | 
|  | 1658 |  | 
|  | 1659 | /* This is a good chance to make sure we're not | 
|  | 1660 | * locking an invalid object. | 
|  | 1661 | * | 
|  | 1662 | * We bug on a stale inode here because we checked | 
|  | 1663 | * above whether it was wiped from disk. The wiping | 
|  | 1664 | * node provides a guarantee that we receive that | 
|  | 1665 | * message and can mark the inode before dropping any | 
|  | 1666 | * locks associated with it. */ | 
|  | 1667 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 
|  | 1668 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); | 
|  | 1669 | status = -EIO; | 
|  | 1670 | goto bail_refresh; | 
|  | 1671 | } | 
|  | 1672 | mlog_bug_on_msg(inode->i_generation != | 
|  | 1673 | le32_to_cpu(fe->i_generation), | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1674 | "Invalid dinode %llu disk generation: %u " | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1675 | "inode->i_generation: %u\n", | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1676 | (unsigned long long)oi->ip_blkno, | 
|  | 1677 | le32_to_cpu(fe->i_generation), | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1678 | inode->i_generation); | 
|  | 1679 | mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || | 
|  | 1680 | !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1681 | "Stale dinode %llu dtime: %llu flags: 0x%x\n", | 
|  | 1682 | (unsigned long long)oi->ip_blkno, | 
|  | 1683 | (unsigned long long)le64_to_cpu(fe->i_dtime), | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1684 | le32_to_cpu(fe->i_flags)); | 
|  | 1685 |  | 
|  | 1686 | ocfs2_refresh_inode(inode, fe); | 
|  | 1687 | } | 
|  | 1688 |  | 
|  | 1689 | status = 0; | 
|  | 1690 | bail_refresh: | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 1691 | ocfs2_complete_lock_res_refresh(lockres, status); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1692 | bail: | 
|  | 1693 | mlog_exit(status); | 
|  | 1694 | return status; | 
|  | 1695 | } | 
|  | 1696 |  | 
|  | 1697 | static int ocfs2_assign_bh(struct inode *inode, | 
|  | 1698 | struct buffer_head **ret_bh, | 
|  | 1699 | struct buffer_head *passed_bh) | 
|  | 1700 | { | 
|  | 1701 | int status; | 
|  | 1702 |  | 
|  | 1703 | if (passed_bh) { | 
|  | 1704 | /* Ok, the update went to disk for us, use the | 
|  | 1705 | * returned bh. */ | 
|  | 1706 | *ret_bh = passed_bh; | 
|  | 1707 | get_bh(*ret_bh); | 
|  | 1708 |  | 
|  | 1709 | return 0; | 
|  | 1710 | } | 
|  | 1711 |  | 
|  | 1712 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 
|  | 1713 | OCFS2_I(inode)->ip_blkno, | 
|  | 1714 | ret_bh, | 
|  | 1715 | OCFS2_BH_CACHED, | 
|  | 1716 | inode); | 
|  | 1717 | if (status < 0) | 
|  | 1718 | mlog_errno(status); | 
|  | 1719 |  | 
|  | 1720 | return status; | 
|  | 1721 | } | 
|  | 1722 |  | 
|  | 1723 | /* | 
|  | 1724 | * returns < 0 error if the callback will never be called, otherwise | 
|  | 1725 | * the result of the lock will be communicated via the callback. | 
|  | 1726 | */ | 
|  | 1727 | int ocfs2_meta_lock_full(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1728 | struct buffer_head **ret_bh, | 
|  | 1729 | int ex, | 
|  | 1730 | int arg_flags) | 
|  | 1731 | { | 
|  | 1732 | int status, level, dlm_flags, acquired; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1733 | struct ocfs2_lock_res *lockres = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1734 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
|  | 1735 | struct buffer_head *local_bh = NULL; | 
|  | 1736 |  | 
|  | 1737 | BUG_ON(!inode); | 
|  | 1738 |  | 
|  | 1739 | mlog_entry_void(); | 
|  | 1740 |  | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1741 | mlog(0, "inode %llu, take %s META lock\n", | 
|  | 1742 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1743 | ex ? "EXMODE" : "PRMODE"); | 
|  | 1744 |  | 
|  | 1745 | status = 0; | 
|  | 1746 | acquired = 0; | 
|  | 1747 | /* We'll allow faking a readonly metadata lock for | 
|  | 1748 | * rodevices. */ | 
|  | 1749 | if (ocfs2_is_hard_readonly(osb)) { | 
|  | 1750 | if (ex) | 
|  | 1751 | status = -EROFS; | 
|  | 1752 | goto bail; | 
|  | 1753 | } | 
|  | 1754 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1755 | if (ocfs2_mount_local(osb)) | 
|  | 1756 | goto local; | 
|  | 1757 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1758 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 
|  | 1759 | wait_event(osb->recovery_event, | 
|  | 1760 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 
|  | 1761 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1762 | lockres = &OCFS2_I(inode)->ip_meta_lockres; | 
|  | 1763 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 
|  | 1764 | dlm_flags = 0; | 
|  | 1765 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 
|  | 1766 | dlm_flags |= LKM_NOQUEUE; | 
|  | 1767 |  | 
|  | 1768 | status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); | 
|  | 1769 | if (status < 0) { | 
|  | 1770 | if (status != -EAGAIN && status != -EIOCBRETRY) | 
|  | 1771 | mlog_errno(status); | 
|  | 1772 | goto bail; | 
|  | 1773 | } | 
|  | 1774 |  | 
|  | 1775 | /* Notify the error cleanup path to drop the cluster lock. */ | 
|  | 1776 | acquired = 1; | 
|  | 1777 |  | 
|  | 1778 | /* We wait twice because a node may have died while we were in | 
|  | 1779 | * the lower dlm layers. The second time though, we've | 
|  | 1780 | * committed to owning this lock so we don't allow signals to | 
|  | 1781 | * abort the operation. */ | 
|  | 1782 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 
|  | 1783 | wait_event(osb->recovery_event, | 
|  | 1784 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 
|  | 1785 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1786 | local: | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1787 | /* | 
|  | 1788 | * We only see this flag if we're being called from | 
|  | 1789 | * ocfs2_read_locked_inode(). It means we're locking an inode | 
|  | 1790 | * which hasn't been populated yet, so clear the refresh flag | 
|  | 1791 | * and let the caller handle it. | 
|  | 1792 | */ | 
|  | 1793 | if (inode->i_state & I_NEW) { | 
|  | 1794 | status = 0; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1795 | if (lockres) | 
|  | 1796 | ocfs2_complete_lock_res_refresh(lockres, 0); | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1797 | goto bail; | 
|  | 1798 | } | 
|  | 1799 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1800 | /* This is fun. The caller may want a bh back, or it may | 
|  | 1801 | * not. ocfs2_meta_lock_update definitely wants one in, but | 
|  | 1802 | * may or may not read one, depending on what's in the | 
|  | 1803 | * LVB. The result of all of this is that we've *only* gone to | 
|  | 1804 | * disk if we have to, so the complexity is worthwhile. */ | 
|  | 1805 | status = ocfs2_meta_lock_update(inode, &local_bh); | 
|  | 1806 | if (status < 0) { | 
|  | 1807 | if (status != -ENOENT) | 
|  | 1808 | mlog_errno(status); | 
|  | 1809 | goto bail; | 
|  | 1810 | } | 
|  | 1811 |  | 
|  | 1812 | if (ret_bh) { | 
|  | 1813 | status = ocfs2_assign_bh(inode, ret_bh, local_bh); | 
|  | 1814 | if (status < 0) { | 
|  | 1815 | mlog_errno(status); | 
|  | 1816 | goto bail; | 
|  | 1817 | } | 
|  | 1818 | } | 
|  | 1819 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1820 | bail: | 
|  | 1821 | if (status < 0) { | 
|  | 1822 | if (ret_bh && (*ret_bh)) { | 
|  | 1823 | brelse(*ret_bh); | 
|  | 1824 | *ret_bh = NULL; | 
|  | 1825 | } | 
|  | 1826 | if (acquired) | 
|  | 1827 | ocfs2_meta_unlock(inode, ex); | 
|  | 1828 | } | 
|  | 1829 |  | 
|  | 1830 | if (local_bh) | 
|  | 1831 | brelse(local_bh); | 
|  | 1832 |  | 
|  | 1833 | mlog_exit(status); | 
|  | 1834 | return status; | 
|  | 1835 | } | 
|  | 1836 |  | 
|  | 1837 | /* | 
|  | 1838 | * This is working around a lock inversion between tasks acquiring DLM locks | 
|  | 1839 | * while holding a page lock and the vote thread which blocks dlm lock acquiry | 
|  | 1840 | * while acquiring page locks. | 
|  | 1841 | * | 
|  | 1842 | * ** These _with_page variantes are only intended to be called from aop | 
|  | 1843 | * methods that hold page locks and return a very specific *positive* error | 
|  | 1844 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 
|  | 1845 | * | 
|  | 1846 | * The DLM is called such that it returns -EAGAIN if it would have blocked | 
|  | 1847 | * waiting for the vote thread.  In that case we unlock our page so the vote | 
|  | 1848 | * thread can make progress.  Once we've done this we have to return | 
|  | 1849 | * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up | 
|  | 1850 | * into the VFS who will then immediately retry the aop call. | 
|  | 1851 | * | 
|  | 1852 | * We do a blocking lock and immediate unlock before returning, though, so that | 
|  | 1853 | * the lock has a great chance of being cached on this node by the time the VFS | 
|  | 1854 | * calls back to retry the aop.    This has a potential to livelock as nodes | 
|  | 1855 | * ping locks back and forth, but that's a risk we're willing to take to avoid | 
|  | 1856 | * the lock inversion simply. | 
|  | 1857 | */ | 
|  | 1858 | int ocfs2_meta_lock_with_page(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1859 | struct buffer_head **ret_bh, | 
|  | 1860 | int ex, | 
|  | 1861 | struct page *page) | 
|  | 1862 | { | 
|  | 1863 | int ret; | 
|  | 1864 |  | 
| Mark Fasheh | 4bcec18 | 2006-10-09 16:02:40 -0700 | [diff] [blame] | 1865 | ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1866 | if (ret == -EAGAIN) { | 
|  | 1867 | unlock_page(page); | 
| Mark Fasheh | 4bcec18 | 2006-10-09 16:02:40 -0700 | [diff] [blame] | 1868 | if (ocfs2_meta_lock(inode, ret_bh, ex) == 0) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1869 | ocfs2_meta_unlock(inode, ex); | 
|  | 1870 | ret = AOP_TRUNCATED_PAGE; | 
|  | 1871 | } | 
|  | 1872 |  | 
|  | 1873 | return ret; | 
|  | 1874 | } | 
|  | 1875 |  | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 1876 | int ocfs2_meta_lock_atime(struct inode *inode, | 
|  | 1877 | struct vfsmount *vfsmnt, | 
|  | 1878 | int *level) | 
|  | 1879 | { | 
|  | 1880 | int ret; | 
|  | 1881 |  | 
|  | 1882 | mlog_entry_void(); | 
|  | 1883 | ret = ocfs2_meta_lock(inode, NULL, 0); | 
|  | 1884 | if (ret < 0) { | 
|  | 1885 | mlog_errno(ret); | 
|  | 1886 | return ret; | 
|  | 1887 | } | 
|  | 1888 |  | 
|  | 1889 | /* | 
|  | 1890 | * If we should update atime, we will get EX lock, | 
|  | 1891 | * otherwise we just get PR lock. | 
|  | 1892 | */ | 
|  | 1893 | if (ocfs2_should_update_atime(inode, vfsmnt)) { | 
|  | 1894 | struct buffer_head *bh = NULL; | 
|  | 1895 |  | 
|  | 1896 | ocfs2_meta_unlock(inode, 0); | 
|  | 1897 | ret = ocfs2_meta_lock(inode, &bh, 1); | 
|  | 1898 | if (ret < 0) { | 
|  | 1899 | mlog_errno(ret); | 
|  | 1900 | return ret; | 
|  | 1901 | } | 
|  | 1902 | *level = 1; | 
|  | 1903 | if (ocfs2_should_update_atime(inode, vfsmnt)) | 
|  | 1904 | ocfs2_update_inode_atime(inode, bh); | 
|  | 1905 | if (bh) | 
|  | 1906 | brelse(bh); | 
|  | 1907 | } else | 
|  | 1908 | *level = 0; | 
|  | 1909 |  | 
|  | 1910 | mlog_exit(ret); | 
|  | 1911 | return ret; | 
|  | 1912 | } | 
|  | 1913 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1914 | void ocfs2_meta_unlock(struct inode *inode, | 
|  | 1915 | int ex) | 
|  | 1916 | { | 
|  | 1917 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 
|  | 1918 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1919 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1920 |  | 
|  | 1921 | mlog_entry_void(); | 
|  | 1922 |  | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1923 | mlog(0, "inode %llu drop %s META lock\n", | 
|  | 1924 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1925 | ex ? "EXMODE" : "PRMODE"); | 
|  | 1926 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1927 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | 
|  | 1928 | !ocfs2_mount_local(osb)) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1929 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 
|  | 1930 |  | 
|  | 1931 | mlog_exit_void(); | 
|  | 1932 | } | 
|  | 1933 |  | 
|  | 1934 | int ocfs2_super_lock(struct ocfs2_super *osb, | 
|  | 1935 | int ex) | 
|  | 1936 | { | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1937 | int status = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1938 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 
|  | 1939 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 
|  | 1940 | struct buffer_head *bh; | 
|  | 1941 | struct ocfs2_slot_info *si = osb->slot_info; | 
|  | 1942 |  | 
|  | 1943 | mlog_entry_void(); | 
|  | 1944 |  | 
|  | 1945 | if (ocfs2_is_hard_readonly(osb)) | 
|  | 1946 | return -EROFS; | 
|  | 1947 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1948 | if (ocfs2_mount_local(osb)) | 
|  | 1949 | goto bail; | 
|  | 1950 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1951 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 
|  | 1952 | if (status < 0) { | 
|  | 1953 | mlog_errno(status); | 
|  | 1954 | goto bail; | 
|  | 1955 | } | 
|  | 1956 |  | 
|  | 1957 | /* The super block lock path is really in the best position to | 
|  | 1958 | * know when resources covered by the lock need to be | 
|  | 1959 | * refreshed, so we do it here. Of course, making sense of | 
|  | 1960 | * everything is up to the caller :) */ | 
|  | 1961 | status = ocfs2_should_refresh_lock_res(lockres); | 
|  | 1962 | if (status < 0) { | 
|  | 1963 | mlog_errno(status); | 
|  | 1964 | goto bail; | 
|  | 1965 | } | 
|  | 1966 | if (status) { | 
|  | 1967 | bh = si->si_bh; | 
|  | 1968 | status = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, | 
|  | 1969 | si->si_inode); | 
|  | 1970 | if (status == 0) | 
|  | 1971 | ocfs2_update_slot_info(si); | 
|  | 1972 |  | 
|  | 1973 | ocfs2_complete_lock_res_refresh(lockres, status); | 
|  | 1974 |  | 
|  | 1975 | if (status < 0) | 
|  | 1976 | mlog_errno(status); | 
|  | 1977 | } | 
|  | 1978 | bail: | 
|  | 1979 | mlog_exit(status); | 
|  | 1980 | return status; | 
|  | 1981 | } | 
|  | 1982 |  | 
|  | 1983 | void ocfs2_super_unlock(struct ocfs2_super *osb, | 
|  | 1984 | int ex) | 
|  | 1985 | { | 
|  | 1986 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 
|  | 1987 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 
|  | 1988 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1989 | if (!ocfs2_mount_local(osb)) | 
|  | 1990 | ocfs2_cluster_unlock(osb, lockres, level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1991 | } | 
|  | 1992 |  | 
|  | 1993 | int ocfs2_rename_lock(struct ocfs2_super *osb) | 
|  | 1994 | { | 
|  | 1995 | int status; | 
|  | 1996 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 
|  | 1997 |  | 
|  | 1998 | if (ocfs2_is_hard_readonly(osb)) | 
|  | 1999 | return -EROFS; | 
|  | 2000 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2001 | if (ocfs2_mount_local(osb)) | 
|  | 2002 | return 0; | 
|  | 2003 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2004 | status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); | 
|  | 2005 | if (status < 0) | 
|  | 2006 | mlog_errno(status); | 
|  | 2007 |  | 
|  | 2008 | return status; | 
|  | 2009 | } | 
|  | 2010 |  | 
|  | 2011 | void ocfs2_rename_unlock(struct ocfs2_super *osb) | 
|  | 2012 | { | 
|  | 2013 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 
|  | 2014 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2015 | if (!ocfs2_mount_local(osb)) | 
|  | 2016 | ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2017 | } | 
|  | 2018 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2019 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | 
|  | 2020 | { | 
|  | 2021 | int ret; | 
|  | 2022 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 
|  | 2023 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 
|  | 2024 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 
|  | 2025 |  | 
|  | 2026 | BUG_ON(!dl); | 
|  | 2027 |  | 
|  | 2028 | if (ocfs2_is_hard_readonly(osb)) | 
|  | 2029 | return -EROFS; | 
|  | 2030 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2031 | if (ocfs2_mount_local(osb)) | 
|  | 2032 | return 0; | 
|  | 2033 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2034 | ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); | 
|  | 2035 | if (ret < 0) | 
|  | 2036 | mlog_errno(ret); | 
|  | 2037 |  | 
|  | 2038 | return ret; | 
|  | 2039 | } | 
|  | 2040 |  | 
|  | 2041 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | 
|  | 2042 | { | 
|  | 2043 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 
|  | 2044 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 
|  | 2045 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 
|  | 2046 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2047 | if (!ocfs2_mount_local(osb)) | 
|  | 2048 | ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2049 | } | 
|  | 2050 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2051 | /* Reference counting of the dlm debug structure. We want this because | 
|  | 2052 | * open references on the debug inodes can live on after a mount, so | 
|  | 2053 | * we can't rely on the ocfs2_super to always exist. */ | 
|  | 2054 | static void ocfs2_dlm_debug_free(struct kref *kref) | 
|  | 2055 | { | 
|  | 2056 | struct ocfs2_dlm_debug *dlm_debug; | 
|  | 2057 |  | 
|  | 2058 | dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); | 
|  | 2059 |  | 
|  | 2060 | kfree(dlm_debug); | 
|  | 2061 | } | 
|  | 2062 |  | 
|  | 2063 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) | 
|  | 2064 | { | 
|  | 2065 | if (dlm_debug) | 
|  | 2066 | kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); | 
|  | 2067 | } | 
|  | 2068 |  | 
|  | 2069 | static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) | 
|  | 2070 | { | 
|  | 2071 | kref_get(&debug->d_refcnt); | 
|  | 2072 | } | 
|  | 2073 |  | 
|  | 2074 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) | 
|  | 2075 | { | 
|  | 2076 | struct ocfs2_dlm_debug *dlm_debug; | 
|  | 2077 |  | 
|  | 2078 | dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); | 
|  | 2079 | if (!dlm_debug) { | 
|  | 2080 | mlog_errno(-ENOMEM); | 
|  | 2081 | goto out; | 
|  | 2082 | } | 
|  | 2083 |  | 
|  | 2084 | kref_init(&dlm_debug->d_refcnt); | 
|  | 2085 | INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); | 
|  | 2086 | dlm_debug->d_locking_state = NULL; | 
|  | 2087 | out: | 
|  | 2088 | return dlm_debug; | 
|  | 2089 | } | 
|  | 2090 |  | 
|  | 2091 | /* Access to this is arbitrated for us via seq_file->sem. */ | 
|  | 2092 | struct ocfs2_dlm_seq_priv { | 
|  | 2093 | struct ocfs2_dlm_debug *p_dlm_debug; | 
|  | 2094 | struct ocfs2_lock_res p_iter_res; | 
|  | 2095 | struct ocfs2_lock_res p_tmp_res; | 
|  | 2096 | }; | 
|  | 2097 |  | 
|  | 2098 | static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, | 
|  | 2099 | struct ocfs2_dlm_seq_priv *priv) | 
|  | 2100 | { | 
|  | 2101 | struct ocfs2_lock_res *iter, *ret = NULL; | 
|  | 2102 | struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; | 
|  | 2103 |  | 
|  | 2104 | assert_spin_locked(&ocfs2_dlm_tracking_lock); | 
|  | 2105 |  | 
|  | 2106 | list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { | 
|  | 2107 | /* discover the head of the list */ | 
|  | 2108 | if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { | 
|  | 2109 | mlog(0, "End of list found, %p\n", ret); | 
|  | 2110 | break; | 
|  | 2111 | } | 
|  | 2112 |  | 
|  | 2113 | /* We track our "dummy" iteration lockres' by a NULL | 
|  | 2114 | * l_ops field. */ | 
|  | 2115 | if (iter->l_ops != NULL) { | 
|  | 2116 | ret = iter; | 
|  | 2117 | break; | 
|  | 2118 | } | 
|  | 2119 | } | 
|  | 2120 |  | 
|  | 2121 | return ret; | 
|  | 2122 | } | 
|  | 2123 |  | 
|  | 2124 | static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) | 
|  | 2125 | { | 
|  | 2126 | struct ocfs2_dlm_seq_priv *priv = m->private; | 
|  | 2127 | struct ocfs2_lock_res *iter; | 
|  | 2128 |  | 
|  | 2129 | spin_lock(&ocfs2_dlm_tracking_lock); | 
|  | 2130 | iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); | 
|  | 2131 | if (iter) { | 
|  | 2132 | /* Since lockres' have the lifetime of their container | 
|  | 2133 | * (which can be inodes, ocfs2_supers, etc) we want to | 
|  | 2134 | * copy this out to a temporary lockres while still | 
|  | 2135 | * under the spinlock. Obviously after this we can't | 
|  | 2136 | * trust any pointers on the copy returned, but that's | 
|  | 2137 | * ok as the information we want isn't typically held | 
|  | 2138 | * in them. */ | 
|  | 2139 | priv->p_tmp_res = *iter; | 
|  | 2140 | iter = &priv->p_tmp_res; | 
|  | 2141 | } | 
|  | 2142 | spin_unlock(&ocfs2_dlm_tracking_lock); | 
|  | 2143 |  | 
|  | 2144 | return iter; | 
|  | 2145 | } | 
|  | 2146 |  | 
|  | 2147 | static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) | 
|  | 2148 | { | 
|  | 2149 | } | 
|  | 2150 |  | 
|  | 2151 | static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) | 
|  | 2152 | { | 
|  | 2153 | struct ocfs2_dlm_seq_priv *priv = m->private; | 
|  | 2154 | struct ocfs2_lock_res *iter = v; | 
|  | 2155 | struct ocfs2_lock_res *dummy = &priv->p_iter_res; | 
|  | 2156 |  | 
|  | 2157 | spin_lock(&ocfs2_dlm_tracking_lock); | 
|  | 2158 | iter = ocfs2_dlm_next_res(iter, priv); | 
|  | 2159 | list_del_init(&dummy->l_debug_list); | 
|  | 2160 | if (iter) { | 
|  | 2161 | list_add(&dummy->l_debug_list, &iter->l_debug_list); | 
|  | 2162 | priv->p_tmp_res = *iter; | 
|  | 2163 | iter = &priv->p_tmp_res; | 
|  | 2164 | } | 
|  | 2165 | spin_unlock(&ocfs2_dlm_tracking_lock); | 
|  | 2166 |  | 
|  | 2167 | return iter; | 
|  | 2168 | } | 
|  | 2169 |  | 
|  | 2170 | /* So that debugfs.ocfs2 can determine which format is being used */ | 
|  | 2171 | #define OCFS2_DLM_DEBUG_STR_VERSION 1 | 
|  | 2172 | static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | 
|  | 2173 | { | 
|  | 2174 | int i; | 
|  | 2175 | char *lvb; | 
|  | 2176 | struct ocfs2_lock_res *lockres = v; | 
|  | 2177 |  | 
|  | 2178 | if (!lockres) | 
|  | 2179 | return -EINVAL; | 
|  | 2180 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2181 | seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); | 
|  | 2182 |  | 
|  | 2183 | if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) | 
|  | 2184 | seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, | 
|  | 2185 | lockres->l_name, | 
|  | 2186 | (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); | 
|  | 2187 | else | 
|  | 2188 | seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); | 
|  | 2189 |  | 
|  | 2190 | seq_printf(m, "%d\t" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2191 | "0x%lx\t" | 
|  | 2192 | "0x%x\t" | 
|  | 2193 | "0x%x\t" | 
|  | 2194 | "%u\t" | 
|  | 2195 | "%u\t" | 
|  | 2196 | "%d\t" | 
|  | 2197 | "%d\t", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2198 | lockres->l_level, | 
|  | 2199 | lockres->l_flags, | 
|  | 2200 | lockres->l_action, | 
|  | 2201 | lockres->l_unlock_action, | 
|  | 2202 | lockres->l_ro_holders, | 
|  | 2203 | lockres->l_ex_holders, | 
|  | 2204 | lockres->l_requested, | 
|  | 2205 | lockres->l_blocking); | 
|  | 2206 |  | 
|  | 2207 | /* Dump the raw LVB */ | 
|  | 2208 | lvb = lockres->l_lksb.lvb; | 
|  | 2209 | for(i = 0; i < DLM_LVB_LEN; i++) | 
|  | 2210 | seq_printf(m, "0x%x\t", lvb[i]); | 
|  | 2211 |  | 
|  | 2212 | /* End the line */ | 
|  | 2213 | seq_printf(m, "\n"); | 
|  | 2214 | return 0; | 
|  | 2215 | } | 
|  | 2216 |  | 
|  | 2217 | static struct seq_operations ocfs2_dlm_seq_ops = { | 
|  | 2218 | .start =	ocfs2_dlm_seq_start, | 
|  | 2219 | .stop =		ocfs2_dlm_seq_stop, | 
|  | 2220 | .next =		ocfs2_dlm_seq_next, | 
|  | 2221 | .show =		ocfs2_dlm_seq_show, | 
|  | 2222 | }; | 
|  | 2223 |  | 
|  | 2224 | static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) | 
|  | 2225 | { | 
|  | 2226 | struct seq_file *seq = (struct seq_file *) file->private_data; | 
|  | 2227 | struct ocfs2_dlm_seq_priv *priv = seq->private; | 
|  | 2228 | struct ocfs2_lock_res *res = &priv->p_iter_res; | 
|  | 2229 |  | 
|  | 2230 | ocfs2_remove_lockres_tracking(res); | 
|  | 2231 | ocfs2_put_dlm_debug(priv->p_dlm_debug); | 
|  | 2232 | return seq_release_private(inode, file); | 
|  | 2233 | } | 
|  | 2234 |  | 
|  | 2235 | static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) | 
|  | 2236 | { | 
|  | 2237 | int ret; | 
|  | 2238 | struct ocfs2_dlm_seq_priv *priv; | 
|  | 2239 | struct seq_file *seq; | 
|  | 2240 | struct ocfs2_super *osb; | 
|  | 2241 |  | 
|  | 2242 | priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); | 
|  | 2243 | if (!priv) { | 
|  | 2244 | ret = -ENOMEM; | 
|  | 2245 | mlog_errno(ret); | 
|  | 2246 | goto out; | 
|  | 2247 | } | 
| Theodore Ts'o | 8e18e29 | 2006-09-27 01:50:46 -0700 | [diff] [blame] | 2248 | osb = inode->i_private; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2249 | ocfs2_get_dlm_debug(osb->osb_dlm_debug); | 
|  | 2250 | priv->p_dlm_debug = osb->osb_dlm_debug; | 
|  | 2251 | INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); | 
|  | 2252 |  | 
|  | 2253 | ret = seq_open(file, &ocfs2_dlm_seq_ops); | 
|  | 2254 | if (ret) { | 
|  | 2255 | kfree(priv); | 
|  | 2256 | mlog_errno(ret); | 
|  | 2257 | goto out; | 
|  | 2258 | } | 
|  | 2259 |  | 
|  | 2260 | seq = (struct seq_file *) file->private_data; | 
|  | 2261 | seq->private = priv; | 
|  | 2262 |  | 
|  | 2263 | ocfs2_add_lockres_tracking(&priv->p_iter_res, | 
|  | 2264 | priv->p_dlm_debug); | 
|  | 2265 |  | 
|  | 2266 | out: | 
|  | 2267 | return ret; | 
|  | 2268 | } | 
|  | 2269 |  | 
| Arjan van de Ven | 4b6f5d2 | 2006-03-28 01:56:42 -0800 | [diff] [blame] | 2270 | static const struct file_operations ocfs2_dlm_debug_fops = { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2271 | .open =		ocfs2_dlm_debug_open, | 
|  | 2272 | .release =	ocfs2_dlm_debug_release, | 
|  | 2273 | .read =		seq_read, | 
|  | 2274 | .llseek =	seq_lseek, | 
|  | 2275 | }; | 
|  | 2276 |  | 
|  | 2277 | static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) | 
|  | 2278 | { | 
|  | 2279 | int ret = 0; | 
|  | 2280 | struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; | 
|  | 2281 |  | 
|  | 2282 | dlm_debug->d_locking_state = debugfs_create_file("locking_state", | 
|  | 2283 | S_IFREG|S_IRUSR, | 
|  | 2284 | osb->osb_debug_root, | 
|  | 2285 | osb, | 
|  | 2286 | &ocfs2_dlm_debug_fops); | 
|  | 2287 | if (!dlm_debug->d_locking_state) { | 
|  | 2288 | ret = -EINVAL; | 
|  | 2289 | mlog(ML_ERROR, | 
|  | 2290 | "Unable to create locking state debugfs file.\n"); | 
|  | 2291 | goto out; | 
|  | 2292 | } | 
|  | 2293 |  | 
|  | 2294 | ocfs2_get_dlm_debug(dlm_debug); | 
|  | 2295 | out: | 
|  | 2296 | return ret; | 
|  | 2297 | } | 
|  | 2298 |  | 
|  | 2299 | static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) | 
|  | 2300 | { | 
|  | 2301 | struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; | 
|  | 2302 |  | 
|  | 2303 | if (dlm_debug) { | 
|  | 2304 | debugfs_remove(dlm_debug->d_locking_state); | 
|  | 2305 | ocfs2_put_dlm_debug(dlm_debug); | 
|  | 2306 | } | 
|  | 2307 | } | 
|  | 2308 |  | 
|  | 2309 | int ocfs2_dlm_init(struct ocfs2_super *osb) | 
|  | 2310 | { | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2311 | int status = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2312 | u32 dlm_key; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2313 | struct dlm_ctxt *dlm = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2314 |  | 
|  | 2315 | mlog_entry_void(); | 
|  | 2316 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2317 | if (ocfs2_mount_local(osb)) | 
|  | 2318 | goto local; | 
|  | 2319 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2320 | status = ocfs2_dlm_init_debug(osb); | 
|  | 2321 | if (status < 0) { | 
|  | 2322 | mlog_errno(status); | 
|  | 2323 | goto bail; | 
|  | 2324 | } | 
|  | 2325 |  | 
|  | 2326 | /* launch vote thread */ | 
| Mark Fasheh | 7842704 | 2006-05-04 12:03:26 -0700 | [diff] [blame] | 2327 | osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote"); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2328 | if (IS_ERR(osb->vote_task)) { | 
|  | 2329 | status = PTR_ERR(osb->vote_task); | 
|  | 2330 | osb->vote_task = NULL; | 
|  | 2331 | mlog_errno(status); | 
|  | 2332 | goto bail; | 
|  | 2333 | } | 
|  | 2334 |  | 
|  | 2335 | /* used by the dlm code to make message headers unique, each | 
|  | 2336 | * node in this domain must agree on this. */ | 
|  | 2337 | dlm_key = crc32_le(0, osb->uuid_str, strlen(osb->uuid_str)); | 
|  | 2338 |  | 
|  | 2339 | /* for now, uuid == domain */ | 
|  | 2340 | dlm = dlm_register_domain(osb->uuid_str, dlm_key); | 
|  | 2341 | if (IS_ERR(dlm)) { | 
|  | 2342 | status = PTR_ERR(dlm); | 
|  | 2343 | mlog_errno(status); | 
|  | 2344 | goto bail; | 
|  | 2345 | } | 
|  | 2346 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2347 | dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); | 
|  | 2348 |  | 
|  | 2349 | local: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2350 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); | 
|  | 2351 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); | 
|  | 2352 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2353 | osb->dlm = dlm; | 
|  | 2354 |  | 
|  | 2355 | status = 0; | 
|  | 2356 | bail: | 
|  | 2357 | if (status < 0) { | 
|  | 2358 | ocfs2_dlm_shutdown_debug(osb); | 
|  | 2359 | if (osb->vote_task) | 
|  | 2360 | kthread_stop(osb->vote_task); | 
|  | 2361 | } | 
|  | 2362 |  | 
|  | 2363 | mlog_exit(status); | 
|  | 2364 | return status; | 
|  | 2365 | } | 
|  | 2366 |  | 
|  | 2367 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | 
|  | 2368 | { | 
|  | 2369 | mlog_entry_void(); | 
|  | 2370 |  | 
|  | 2371 | dlm_unregister_eviction_cb(&osb->osb_eviction_cb); | 
|  | 2372 |  | 
|  | 2373 | ocfs2_drop_osb_locks(osb); | 
|  | 2374 |  | 
|  | 2375 | if (osb->vote_task) { | 
|  | 2376 | kthread_stop(osb->vote_task); | 
|  | 2377 | osb->vote_task = NULL; | 
|  | 2378 | } | 
|  | 2379 |  | 
|  | 2380 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 
|  | 2381 | ocfs2_lock_res_free(&osb->osb_rename_lockres); | 
|  | 2382 |  | 
|  | 2383 | dlm_unregister_domain(osb->dlm); | 
|  | 2384 | osb->dlm = NULL; | 
|  | 2385 |  | 
|  | 2386 | ocfs2_dlm_shutdown_debug(osb); | 
|  | 2387 |  | 
|  | 2388 | mlog_exit_void(); | 
|  | 2389 | } | 
|  | 2390 |  | 
| Mark Fasheh | 2a45f2d | 2006-09-12 21:36:58 -0700 | [diff] [blame] | 2391 | static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2392 | { | 
|  | 2393 | struct ocfs2_lock_res *lockres = opaque; | 
|  | 2394 | unsigned long flags; | 
|  | 2395 |  | 
|  | 2396 | mlog_entry_void(); | 
|  | 2397 |  | 
|  | 2398 | mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, | 
|  | 2399 | lockres->l_unlock_action); | 
|  | 2400 |  | 
|  | 2401 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2402 | /* We tried to cancel a convert request, but it was already | 
|  | 2403 | * granted. All we want to do here is clear our unlock | 
|  | 2404 | * state. The wake_up call done at the bottom is redundant | 
|  | 2405 | * (ocfs2_prepare_cancel_convert doesn't sleep on this) but doesn't | 
|  | 2406 | * hurt anything anyway */ | 
|  | 2407 | if (status == DLM_CANCELGRANT && | 
|  | 2408 | lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | 
|  | 2409 | mlog(0, "Got cancelgrant for %s\n", lockres->l_name); | 
|  | 2410 |  | 
|  | 2411 | /* We don't clear the busy flag in this case as it | 
|  | 2412 | * should have been cleared by the ast which the dlm | 
|  | 2413 | * has called. */ | 
|  | 2414 | goto complete_unlock; | 
|  | 2415 | } | 
|  | 2416 |  | 
|  | 2417 | if (status != DLM_NORMAL) { | 
|  | 2418 | mlog(ML_ERROR, "Dlm passes status %d for lock %s, " | 
|  | 2419 | "unlock_action %d\n", status, lockres->l_name, | 
|  | 2420 | lockres->l_unlock_action); | 
|  | 2421 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2422 | return; | 
|  | 2423 | } | 
|  | 2424 |  | 
|  | 2425 | switch(lockres->l_unlock_action) { | 
|  | 2426 | case OCFS2_UNLOCK_CANCEL_CONVERT: | 
|  | 2427 | mlog(0, "Cancel convert success for %s\n", lockres->l_name); | 
|  | 2428 | lockres->l_action = OCFS2_AST_INVALID; | 
|  | 2429 | break; | 
|  | 2430 | case OCFS2_UNLOCK_DROP_LOCK: | 
|  | 2431 | lockres->l_level = LKM_IVMODE; | 
|  | 2432 | break; | 
|  | 2433 | default: | 
|  | 2434 | BUG(); | 
|  | 2435 | } | 
|  | 2436 |  | 
|  | 2437 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 2438 | complete_unlock: | 
|  | 2439 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
|  | 2440 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2441 |  | 
|  | 2442 | wake_up(&lockres->l_event); | 
|  | 2443 |  | 
|  | 2444 | mlog_exit_void(); | 
|  | 2445 | } | 
|  | 2446 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2447 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 2448 | struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2449 | { | 
|  | 2450 | enum dlm_status status; | 
|  | 2451 | unsigned long flags; | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 2452 | int lkm_flags = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2453 |  | 
|  | 2454 | /* We didn't get anywhere near actually using this lockres. */ | 
|  | 2455 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) | 
|  | 2456 | goto out; | 
|  | 2457 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 2458 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 
|  | 2459 | lkm_flags |= LKM_VALBLK; | 
|  | 2460 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2461 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2462 |  | 
|  | 2463 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), | 
|  | 2464 | "lockres %s, flags 0x%lx\n", | 
|  | 2465 | lockres->l_name, lockres->l_flags); | 
|  | 2466 |  | 
|  | 2467 | while (lockres->l_flags & OCFS2_LOCK_BUSY) { | 
|  | 2468 | mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " | 
|  | 2469 | "%u, unlock_action = %u\n", | 
|  | 2470 | lockres->l_name, lockres->l_flags, lockres->l_action, | 
|  | 2471 | lockres->l_unlock_action); | 
|  | 2472 |  | 
|  | 2473 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2474 |  | 
|  | 2475 | /* XXX: Today we just wait on any busy | 
|  | 2476 | * locks... Perhaps we need to cancel converts in the | 
|  | 2477 | * future? */ | 
|  | 2478 | ocfs2_wait_on_busy_lock(lockres); | 
|  | 2479 |  | 
|  | 2480 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2481 | } | 
|  | 2482 |  | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 2483 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 
|  | 2484 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | 
|  | 2485 | lockres->l_level == LKM_EXMODE && | 
|  | 2486 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 
|  | 2487 | lockres->l_ops->set_lvb(lockres); | 
|  | 2488 | } | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2489 |  | 
|  | 2490 | if (lockres->l_flags & OCFS2_LOCK_BUSY) | 
|  | 2491 | mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", | 
|  | 2492 | lockres->l_name); | 
|  | 2493 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 
|  | 2494 | mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); | 
|  | 2495 |  | 
|  | 2496 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 
|  | 2497 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2498 | goto out; | 
|  | 2499 | } | 
|  | 2500 |  | 
|  | 2501 | lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); | 
|  | 2502 |  | 
|  | 2503 | /* make sure we never get here while waiting for an ast to | 
|  | 2504 | * fire. */ | 
|  | 2505 | BUG_ON(lockres->l_action != OCFS2_AST_INVALID); | 
|  | 2506 |  | 
|  | 2507 | /* is this necessary? */ | 
|  | 2508 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 2509 | lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; | 
|  | 2510 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2511 |  | 
|  | 2512 | mlog(0, "lock %s\n", lockres->l_name); | 
|  | 2513 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 2514 | status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags, | 
| Mark Fasheh | 2a45f2d | 2006-09-12 21:36:58 -0700 | [diff] [blame] | 2515 | ocfs2_unlock_ast, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2516 | if (status != DLM_NORMAL) { | 
|  | 2517 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | 
|  | 2518 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 
|  | 2519 | dlm_print_one_lock(lockres->l_lksb.lockid); | 
|  | 2520 | BUG(); | 
|  | 2521 | } | 
|  | 2522 | mlog(0, "lock %s, successfull return from dlmunlock\n", | 
|  | 2523 | lockres->l_name); | 
|  | 2524 |  | 
|  | 2525 | ocfs2_wait_on_busy_lock(lockres); | 
|  | 2526 | out: | 
|  | 2527 | mlog_exit(0); | 
|  | 2528 | return 0; | 
|  | 2529 | } | 
|  | 2530 |  | 
|  | 2531 | /* Mark the lockres as being dropped. It will no longer be | 
|  | 2532 | * queued if blocking, but we still may have to wait on it | 
|  | 2533 | * being dequeued from the vote thread before we can consider | 
|  | 2534 | * it safe to drop. | 
|  | 2535 | * | 
|  | 2536 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 
|  | 2537 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) | 
|  | 2538 | { | 
|  | 2539 | int status; | 
|  | 2540 | struct ocfs2_mask_waiter mw; | 
|  | 2541 | unsigned long flags; | 
|  | 2542 |  | 
|  | 2543 | ocfs2_init_mask_waiter(&mw); | 
|  | 2544 |  | 
|  | 2545 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2546 | lockres->l_flags |= OCFS2_LOCK_FREEING; | 
|  | 2547 | while (lockres->l_flags & OCFS2_LOCK_QUEUED) { | 
|  | 2548 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); | 
|  | 2549 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2550 |  | 
|  | 2551 | mlog(0, "Waiting on lockres %s\n", lockres->l_name); | 
|  | 2552 |  | 
|  | 2553 | status = ocfs2_wait_for_mask(&mw); | 
|  | 2554 | if (status) | 
|  | 2555 | mlog_errno(status); | 
|  | 2556 |  | 
|  | 2557 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2558 | } | 
|  | 2559 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2560 | } | 
|  | 2561 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2562 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 
|  | 2563 | struct ocfs2_lock_res *lockres) | 
|  | 2564 | { | 
|  | 2565 | int ret; | 
|  | 2566 |  | 
|  | 2567 | ocfs2_mark_lockres_freeing(lockres); | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 2568 | ret = ocfs2_drop_lock(osb, lockres); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2569 | if (ret) | 
|  | 2570 | mlog_errno(ret); | 
|  | 2571 | } | 
|  | 2572 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2573 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) | 
|  | 2574 | { | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2575 | ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); | 
|  | 2576 | ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2577 | } | 
|  | 2578 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2579 | int ocfs2_drop_inode_locks(struct inode *inode) | 
|  | 2580 | { | 
|  | 2581 | int status, err; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2582 |  | 
|  | 2583 | mlog_entry_void(); | 
|  | 2584 |  | 
|  | 2585 | /* No need to call ocfs2_mark_lockres_freeing here - | 
|  | 2586 | * ocfs2_clear_inode has done it for us. */ | 
|  | 2587 |  | 
|  | 2588 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 2589 | &OCFS2_I(inode)->ip_open_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2590 | if (err < 0) | 
|  | 2591 | mlog_errno(err); | 
|  | 2592 |  | 
|  | 2593 | status = err; | 
|  | 2594 |  | 
|  | 2595 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 2596 | &OCFS2_I(inode)->ip_data_lockres); | 
|  | 2597 | if (err < 0) | 
|  | 2598 | mlog_errno(err); | 
|  | 2599 | if (err < 0 && !status) | 
|  | 2600 | status = err; | 
|  | 2601 |  | 
|  | 2602 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 2603 | &OCFS2_I(inode)->ip_meta_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2604 | if (err < 0) | 
|  | 2605 | mlog_errno(err); | 
|  | 2606 | if (err < 0 && !status) | 
|  | 2607 | status = err; | 
|  | 2608 |  | 
|  | 2609 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 2610 | &OCFS2_I(inode)->ip_rw_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2611 | if (err < 0) | 
|  | 2612 | mlog_errno(err); | 
|  | 2613 | if (err < 0 && !status) | 
|  | 2614 | status = err; | 
|  | 2615 |  | 
|  | 2616 | mlog_exit(status); | 
|  | 2617 | return status; | 
|  | 2618 | } | 
|  | 2619 |  | 
|  | 2620 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 
|  | 2621 | int new_level) | 
|  | 2622 | { | 
|  | 2623 | assert_spin_locked(&lockres->l_lock); | 
|  | 2624 |  | 
|  | 2625 | BUG_ON(lockres->l_blocking <= LKM_NLMODE); | 
|  | 2626 |  | 
|  | 2627 | if (lockres->l_level <= new_level) { | 
|  | 2628 | mlog(ML_ERROR, "lockres->l_level (%u) <= new_level (%u)\n", | 
|  | 2629 | lockres->l_level, new_level); | 
|  | 2630 | BUG(); | 
|  | 2631 | } | 
|  | 2632 |  | 
|  | 2633 | mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", | 
|  | 2634 | lockres->l_name, new_level, lockres->l_blocking); | 
|  | 2635 |  | 
|  | 2636 | lockres->l_action = OCFS2_AST_DOWNCONVERT; | 
|  | 2637 | lockres->l_requested = new_level; | 
|  | 2638 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 2639 | } | 
|  | 2640 |  | 
|  | 2641 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 
|  | 2642 | struct ocfs2_lock_res *lockres, | 
|  | 2643 | int new_level, | 
|  | 2644 | int lvb) | 
|  | 2645 | { | 
|  | 2646 | int ret, dlm_flags = LKM_CONVERT; | 
|  | 2647 | enum dlm_status status; | 
|  | 2648 |  | 
|  | 2649 | mlog_entry_void(); | 
|  | 2650 |  | 
|  | 2651 | if (lvb) | 
|  | 2652 | dlm_flags |= LKM_VALBLK; | 
|  | 2653 |  | 
|  | 2654 | status = dlmlock(osb->dlm, | 
|  | 2655 | new_level, | 
|  | 2656 | &lockres->l_lksb, | 
|  | 2657 | dlm_flags, | 
|  | 2658 | lockres->l_name, | 
| Mark Fasheh | f068106 | 2006-09-08 11:40:10 -0700 | [diff] [blame] | 2659 | OCFS2_LOCK_ID_MAX_LEN - 1, | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 2660 | ocfs2_locking_ast, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2661 | lockres, | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 2662 | ocfs2_blocking_ast); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2663 | if (status != DLM_NORMAL) { | 
|  | 2664 | ocfs2_log_dlm_error("dlmlock", status, lockres); | 
|  | 2665 | ret = -EINVAL; | 
|  | 2666 | ocfs2_recover_from_dlm_error(lockres, 1); | 
|  | 2667 | goto bail; | 
|  | 2668 | } | 
|  | 2669 |  | 
|  | 2670 | ret = 0; | 
|  | 2671 | bail: | 
|  | 2672 | mlog_exit(ret); | 
|  | 2673 | return ret; | 
|  | 2674 | } | 
|  | 2675 |  | 
|  | 2676 | /* returns 1 when the caller should unlock and call dlmunlock */ | 
|  | 2677 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 
|  | 2678 | struct ocfs2_lock_res *lockres) | 
|  | 2679 | { | 
|  | 2680 | assert_spin_locked(&lockres->l_lock); | 
|  | 2681 |  | 
|  | 2682 | mlog_entry_void(); | 
|  | 2683 | mlog(0, "lock %s\n", lockres->l_name); | 
|  | 2684 |  | 
|  | 2685 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | 
|  | 2686 | /* If we're already trying to cancel a lock conversion | 
|  | 2687 | * then just drop the spinlock and allow the caller to | 
|  | 2688 | * requeue this lock. */ | 
|  | 2689 |  | 
|  | 2690 | mlog(0, "Lockres %s, skip convert\n", lockres->l_name); | 
|  | 2691 | return 0; | 
|  | 2692 | } | 
|  | 2693 |  | 
|  | 2694 | /* were we in a convert when we got the bast fire? */ | 
|  | 2695 | BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && | 
|  | 2696 | lockres->l_action != OCFS2_AST_DOWNCONVERT); | 
|  | 2697 | /* set things up for the unlockast to know to just | 
|  | 2698 | * clear out the ast_action and unset busy, etc. */ | 
|  | 2699 | lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; | 
|  | 2700 |  | 
|  | 2701 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), | 
|  | 2702 | "lock %s, invalid flags: 0x%lx\n", | 
|  | 2703 | lockres->l_name, lockres->l_flags); | 
|  | 2704 |  | 
|  | 2705 | return 1; | 
|  | 2706 | } | 
|  | 2707 |  | 
|  | 2708 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 
|  | 2709 | struct ocfs2_lock_res *lockres) | 
|  | 2710 | { | 
|  | 2711 | int ret; | 
|  | 2712 | enum dlm_status status; | 
|  | 2713 |  | 
|  | 2714 | mlog_entry_void(); | 
|  | 2715 | mlog(0, "lock %s\n", lockres->l_name); | 
|  | 2716 |  | 
|  | 2717 | ret = 0; | 
|  | 2718 | status = dlmunlock(osb->dlm, | 
|  | 2719 | &lockres->l_lksb, | 
|  | 2720 | LKM_CANCEL, | 
| Mark Fasheh | 2a45f2d | 2006-09-12 21:36:58 -0700 | [diff] [blame] | 2721 | ocfs2_unlock_ast, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2722 | lockres); | 
|  | 2723 | if (status != DLM_NORMAL) { | 
|  | 2724 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | 
|  | 2725 | ret = -EINVAL; | 
|  | 2726 | ocfs2_recover_from_dlm_error(lockres, 0); | 
|  | 2727 | } | 
|  | 2728 |  | 
|  | 2729 | mlog(0, "lock %s return from dlmunlock\n", lockres->l_name); | 
|  | 2730 |  | 
|  | 2731 | mlog_exit(ret); | 
|  | 2732 | return ret; | 
|  | 2733 | } | 
|  | 2734 |  | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 2735 | static int ocfs2_unblock_lock(struct ocfs2_super *osb, | 
|  | 2736 | struct ocfs2_lock_res *lockres, | 
|  | 2737 | struct ocfs2_unblock_ctl *ctl) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2738 | { | 
|  | 2739 | unsigned long flags; | 
|  | 2740 | int blocking; | 
|  | 2741 | int new_level; | 
|  | 2742 | int ret = 0; | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 2743 | int set_lvb = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2744 |  | 
|  | 2745 | mlog_entry_void(); | 
|  | 2746 |  | 
|  | 2747 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2748 |  | 
|  | 2749 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 
|  | 2750 |  | 
|  | 2751 | recheck: | 
|  | 2752 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2753 | ctl->requeue = 1; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2754 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 
|  | 2755 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2756 | if (ret) { | 
|  | 2757 | ret = ocfs2_cancel_convert(osb, lockres); | 
|  | 2758 | if (ret < 0) | 
|  | 2759 | mlog_errno(ret); | 
|  | 2760 | } | 
|  | 2761 | goto leave; | 
|  | 2762 | } | 
|  | 2763 |  | 
|  | 2764 | /* if we're blocking an exclusive and we have *any* holders, | 
|  | 2765 | * then requeue. */ | 
|  | 2766 | if ((lockres->l_blocking == LKM_EXMODE) | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 2767 | && (lockres->l_ex_holders || lockres->l_ro_holders)) | 
|  | 2768 | goto leave_requeue; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2769 |  | 
|  | 2770 | /* If it's a PR we're blocking, then only | 
|  | 2771 | * requeue if we've got any EX holders */ | 
|  | 2772 | if (lockres->l_blocking == LKM_PRMODE && | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 2773 | lockres->l_ex_holders) | 
|  | 2774 | goto leave_requeue; | 
|  | 2775 |  | 
|  | 2776 | /* | 
|  | 2777 | * Can we get a lock in this state if the holder counts are | 
|  | 2778 | * zero? The meta data unblock code used to check this. | 
|  | 2779 | */ | 
|  | 2780 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 
|  | 2781 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) | 
|  | 2782 | goto leave_requeue; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2783 |  | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 2784 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | 
|  | 2785 |  | 
|  | 2786 | if (lockres->l_ops->check_downconvert | 
|  | 2787 | && !lockres->l_ops->check_downconvert(lockres, new_level)) | 
|  | 2788 | goto leave_requeue; | 
|  | 2789 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2790 | /* If we get here, then we know that there are no more | 
|  | 2791 | * incompatible holders (and anyone asking for an incompatible | 
|  | 2792 | * lock is blocked). We can now downconvert the lock */ | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 2793 | if (!lockres->l_ops->downconvert_worker) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2794 | goto downconvert; | 
|  | 2795 |  | 
|  | 2796 | /* Some lockres types want to do a bit of work before | 
|  | 2797 | * downconverting a lock. Allow that here. The worker function | 
|  | 2798 | * may sleep, so we save off a copy of what we're blocking as | 
|  | 2799 | * it may change while we're not holding the spin lock. */ | 
|  | 2800 | blocking = lockres->l_blocking; | 
|  | 2801 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2802 |  | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 2803 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2804 |  | 
|  | 2805 | if (ctl->unblock_action == UNBLOCK_STOP_POST) | 
|  | 2806 | goto leave; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2807 |  | 
|  | 2808 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2809 | if (blocking != lockres->l_blocking) { | 
|  | 2810 | /* If this changed underneath us, then we can't drop | 
|  | 2811 | * it just yet. */ | 
|  | 2812 | goto recheck; | 
|  | 2813 | } | 
|  | 2814 |  | 
|  | 2815 | downconvert: | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2816 | ctl->requeue = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2817 |  | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 2818 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 
|  | 2819 | if (lockres->l_level == LKM_EXMODE) | 
|  | 2820 | set_lvb = 1; | 
|  | 2821 |  | 
|  | 2822 | /* | 
|  | 2823 | * We only set the lvb if the lock has been fully | 
|  | 2824 | * refreshed - otherwise we risk setting stale | 
|  | 2825 | * data. Otherwise, there's no need to actually clear | 
|  | 2826 | * out the lvb here as it's value is still valid. | 
|  | 2827 | */ | 
|  | 2828 | if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 
|  | 2829 | lockres->l_ops->set_lvb(lockres); | 
|  | 2830 | } | 
|  | 2831 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2832 | ocfs2_prepare_downconvert(lockres, new_level); | 
|  | 2833 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 2834 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2835 | leave: | 
|  | 2836 | mlog_exit(ret); | 
|  | 2837 | return ret; | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 2838 |  | 
|  | 2839 | leave_requeue: | 
|  | 2840 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2841 | ctl->requeue = 1; | 
|  | 2842 |  | 
|  | 2843 | mlog_exit(0); | 
|  | 2844 | return 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2845 | } | 
|  | 2846 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2847 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 2848 | int blocking) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2849 | { | 
|  | 2850 | struct inode *inode; | 
|  | 2851 | struct address_space *mapping; | 
|  | 2852 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2853 | inode = ocfs2_lock_res_inode(lockres); | 
|  | 2854 | mapping = inode->i_mapping; | 
|  | 2855 |  | 
| Mark Fasheh | 7f4a2a9 | 2006-12-11 11:06:36 -0800 | [diff] [blame] | 2856 | /* | 
|  | 2857 | * We need this before the filemap_fdatawrite() so that it can | 
|  | 2858 | * transfer the dirty bit from the PTE to the | 
|  | 2859 | * page. Unfortunately this means that even for EX->PR | 
|  | 2860 | * downconverts, we'll lose our mappings and have to build | 
|  | 2861 | * them up again. | 
|  | 2862 | */ | 
|  | 2863 | unmap_mapping_range(mapping, 0, 0, 0); | 
|  | 2864 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2865 | if (filemap_fdatawrite(mapping)) { | 
| Mark Fasheh | b069705 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2866 | mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", | 
|  | 2867 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2868 | } | 
|  | 2869 | sync_mapping_buffers(mapping); | 
|  | 2870 | if (blocking == LKM_EXMODE) { | 
|  | 2871 | truncate_inode_pages(mapping, 0); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2872 | } else { | 
|  | 2873 | /* We only need to wait on the I/O if we're not also | 
|  | 2874 | * truncating pages because truncate_inode_pages waits | 
|  | 2875 | * for us above. We don't truncate pages if we're | 
|  | 2876 | * blocking anything < EXMODE because we want to keep | 
|  | 2877 | * them around in that case. */ | 
|  | 2878 | filemap_fdatawait(mapping); | 
|  | 2879 | } | 
|  | 2880 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2881 | return UNBLOCK_CONTINUE; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2882 | } | 
|  | 2883 |  | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 2884 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | 
|  | 2885 | int new_level) | 
|  | 2886 | { | 
|  | 2887 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 
|  | 2888 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); | 
|  | 2889 |  | 
|  | 2890 | BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); | 
|  | 2891 | BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed); | 
|  | 2892 |  | 
|  | 2893 | if (checkpointed) | 
|  | 2894 | return 1; | 
|  | 2895 |  | 
|  | 2896 | ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); | 
|  | 2897 | return 0; | 
|  | 2898 | } | 
|  | 2899 |  | 
|  | 2900 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | 
|  | 2901 | { | 
|  | 2902 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 
|  | 2903 |  | 
|  | 2904 | __ocfs2_stuff_meta_lvb(inode); | 
|  | 2905 | } | 
|  | 2906 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2907 | /* | 
|  | 2908 | * Does the final reference drop on our dentry lock. Right now this | 
|  | 2909 | * happens in the vote thread, but we could choose to simplify the | 
|  | 2910 | * dlmglue API and push these off to the ocfs2_wq in the future. | 
|  | 2911 | */ | 
|  | 2912 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 
|  | 2913 | struct ocfs2_lock_res *lockres) | 
|  | 2914 | { | 
|  | 2915 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | 
|  | 2916 | ocfs2_dentry_lock_put(osb, dl); | 
|  | 2917 | } | 
|  | 2918 |  | 
|  | 2919 | /* | 
|  | 2920 | * d_delete() matching dentries before the lock downconvert. | 
|  | 2921 | * | 
|  | 2922 | * At this point, any process waiting to destroy the | 
|  | 2923 | * dentry_lock due to last ref count is stopped by the | 
|  | 2924 | * OCFS2_LOCK_QUEUED flag. | 
|  | 2925 | * | 
|  | 2926 | * We have two potential problems | 
|  | 2927 | * | 
|  | 2928 | * 1) If we do the last reference drop on our dentry_lock (via dput) | 
|  | 2929 | *    we'll wind up in ocfs2_release_dentry_lock(), waiting on | 
|  | 2930 | *    the downconvert to finish. Instead we take an elevated | 
|  | 2931 | *    reference and push the drop until after we've completed our | 
|  | 2932 | *    unblock processing. | 
|  | 2933 | * | 
|  | 2934 | * 2) There might be another process with a final reference, | 
|  | 2935 | *    waiting on us to finish processing. If this is the case, we | 
|  | 2936 | *    detect it and exit out - there's no more dentries anyway. | 
|  | 2937 | */ | 
|  | 2938 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 2939 | int blocking) | 
|  | 2940 | { | 
|  | 2941 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | 
|  | 2942 | struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); | 
|  | 2943 | struct dentry *dentry; | 
|  | 2944 | unsigned long flags; | 
|  | 2945 | int extra_ref = 0; | 
|  | 2946 |  | 
|  | 2947 | /* | 
|  | 2948 | * This node is blocking another node from getting a read | 
|  | 2949 | * lock. This happens when we've renamed within a | 
|  | 2950 | * directory. We've forced the other nodes to d_delete(), but | 
|  | 2951 | * we never actually dropped our lock because it's still | 
|  | 2952 | * valid. The downconvert code will retain a PR for this node, | 
|  | 2953 | * so there's no further work to do. | 
|  | 2954 | */ | 
|  | 2955 | if (blocking == LKM_PRMODE) | 
|  | 2956 | return UNBLOCK_CONTINUE; | 
|  | 2957 |  | 
|  | 2958 | /* | 
|  | 2959 | * Mark this inode as potentially orphaned. The code in | 
|  | 2960 | * ocfs2_delete_inode() will figure out whether it actually | 
|  | 2961 | * needs to be freed or not. | 
|  | 2962 | */ | 
|  | 2963 | spin_lock(&oi->ip_lock); | 
|  | 2964 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | 
|  | 2965 | spin_unlock(&oi->ip_lock); | 
|  | 2966 |  | 
|  | 2967 | /* | 
|  | 2968 | * Yuck. We need to make sure however that the check of | 
|  | 2969 | * OCFS2_LOCK_FREEING and the extra reference are atomic with | 
|  | 2970 | * respect to a reference decrement or the setting of that | 
|  | 2971 | * flag. | 
|  | 2972 | */ | 
|  | 2973 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2974 | spin_lock(&dentry_attach_lock); | 
|  | 2975 | if (!(lockres->l_flags & OCFS2_LOCK_FREEING) | 
|  | 2976 | && dl->dl_count) { | 
|  | 2977 | dl->dl_count++; | 
|  | 2978 | extra_ref = 1; | 
|  | 2979 | } | 
|  | 2980 | spin_unlock(&dentry_attach_lock); | 
|  | 2981 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2982 |  | 
|  | 2983 | mlog(0, "extra_ref = %d\n", extra_ref); | 
|  | 2984 |  | 
|  | 2985 | /* | 
|  | 2986 | * We have a process waiting on us in ocfs2_dentry_iput(), | 
|  | 2987 | * which means we can't have any more outstanding | 
|  | 2988 | * aliases. There's no need to do any more work. | 
|  | 2989 | */ | 
|  | 2990 | if (!extra_ref) | 
|  | 2991 | return UNBLOCK_CONTINUE; | 
|  | 2992 |  | 
|  | 2993 | spin_lock(&dentry_attach_lock); | 
|  | 2994 | while (1) { | 
|  | 2995 | dentry = ocfs2_find_local_alias(dl->dl_inode, | 
|  | 2996 | dl->dl_parent_blkno, 1); | 
|  | 2997 | if (!dentry) | 
|  | 2998 | break; | 
|  | 2999 | spin_unlock(&dentry_attach_lock); | 
|  | 3000 |  | 
|  | 3001 | mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, | 
|  | 3002 | dentry->d_name.name); | 
|  | 3003 |  | 
|  | 3004 | /* | 
|  | 3005 | * The following dcache calls may do an | 
|  | 3006 | * iput(). Normally we don't want that from the | 
|  | 3007 | * downconverting thread, but in this case it's ok | 
|  | 3008 | * because the requesting node already has an | 
|  | 3009 | * exclusive lock on the inode, so it can't be queued | 
|  | 3010 | * for a downconvert. | 
|  | 3011 | */ | 
|  | 3012 | d_delete(dentry); | 
|  | 3013 | dput(dentry); | 
|  | 3014 |  | 
|  | 3015 | spin_lock(&dentry_attach_lock); | 
|  | 3016 | } | 
|  | 3017 | spin_unlock(&dentry_attach_lock); | 
|  | 3018 |  | 
|  | 3019 | /* | 
|  | 3020 | * If we are the last holder of this dentry lock, there is no | 
|  | 3021 | * reason to downconvert so skip straight to the unlock. | 
|  | 3022 | */ | 
|  | 3023 | if (dl->dl_count == 1) | 
|  | 3024 | return UNBLOCK_STOP_POST; | 
|  | 3025 |  | 
|  | 3026 | return UNBLOCK_CONTINUE_POST; | 
|  | 3027 | } | 
|  | 3028 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3029 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 
|  | 3030 | struct ocfs2_lock_res *lockres) | 
|  | 3031 | { | 
|  | 3032 | int status; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3033 | struct ocfs2_unblock_ctl ctl = {0, 0,}; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3034 | unsigned long flags; | 
|  | 3035 |  | 
|  | 3036 | /* Our reference to the lockres in this function can be | 
|  | 3037 | * considered valid until we remove the OCFS2_LOCK_QUEUED | 
|  | 3038 | * flag. */ | 
|  | 3039 |  | 
|  | 3040 | mlog_entry_void(); | 
|  | 3041 |  | 
|  | 3042 | BUG_ON(!lockres); | 
|  | 3043 | BUG_ON(!lockres->l_ops); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3044 |  | 
|  | 3045 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 
|  | 3046 |  | 
|  | 3047 | /* Detect whether a lock has been marked as going away while | 
|  | 3048 | * the vote thread was processing other things. A lock can | 
|  | 3049 | * still be marked with OCFS2_LOCK_FREEING after this check, | 
|  | 3050 | * but short circuiting here will still save us some | 
|  | 3051 | * performance. */ | 
|  | 3052 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3053 | if (lockres->l_flags & OCFS2_LOCK_FREEING) | 
|  | 3054 | goto unqueue; | 
|  | 3055 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3056 |  | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 3057 | status = ocfs2_unblock_lock(osb, lockres, &ctl); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3058 | if (status < 0) | 
|  | 3059 | mlog_errno(status); | 
|  | 3060 |  | 
|  | 3061 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3062 | unqueue: | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3063 | if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3064 | lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); | 
|  | 3065 | } else | 
|  | 3066 | ocfs2_schedule_blocked_lock(osb, lockres); | 
|  | 3067 |  | 
|  | 3068 | mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3069 | ctl.requeue ? "yes" : "no"); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3070 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3071 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3072 | if (ctl.unblock_action != UNBLOCK_CONTINUE | 
|  | 3073 | && lockres->l_ops->post_unlock) | 
|  | 3074 | lockres->l_ops->post_unlock(osb, lockres); | 
|  | 3075 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3076 | mlog_exit_void(); | 
|  | 3077 | } | 
|  | 3078 |  | 
|  | 3079 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | 
|  | 3080 | struct ocfs2_lock_res *lockres) | 
|  | 3081 | { | 
|  | 3082 | mlog_entry_void(); | 
|  | 3083 |  | 
|  | 3084 | assert_spin_locked(&lockres->l_lock); | 
|  | 3085 |  | 
|  | 3086 | if (lockres->l_flags & OCFS2_LOCK_FREEING) { | 
|  | 3087 | /* Do not schedule a lock for downconvert when it's on | 
|  | 3088 | * the way to destruction - any nodes wanting access | 
|  | 3089 | * to the resource will get it soon. */ | 
|  | 3090 | mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", | 
|  | 3091 | lockres->l_name, lockres->l_flags); | 
|  | 3092 | return; | 
|  | 3093 | } | 
|  | 3094 |  | 
|  | 3095 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 
|  | 3096 |  | 
|  | 3097 | spin_lock(&osb->vote_task_lock); | 
|  | 3098 | if (list_empty(&lockres->l_blocked_list)) { | 
|  | 3099 | list_add_tail(&lockres->l_blocked_list, | 
|  | 3100 | &osb->blocked_lock_list); | 
|  | 3101 | osb->blocked_lock_count++; | 
|  | 3102 | } | 
|  | 3103 | spin_unlock(&osb->vote_task_lock); | 
|  | 3104 |  | 
|  | 3105 | mlog_exit_void(); | 
|  | 3106 | } |