| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1 | /* -*- mode: c; c-basic-offset: 8; -*- | 
|  | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 
|  | 3 | * | 
|  | 4 | * dlmglue.c | 
|  | 5 | * | 
|  | 6 | * Code which implements an OCFS2 specific interface to our DLM. | 
|  | 7 | * | 
|  | 8 | * Copyright (C) 2003, 2004 Oracle.  All rights reserved. | 
|  | 9 | * | 
|  | 10 | * This program is free software; you can redistribute it and/or | 
|  | 11 | * modify it under the terms of the GNU General Public | 
|  | 12 | * License as published by the Free Software Foundation; either | 
|  | 13 | * version 2 of the License, or (at your option) any later version. | 
|  | 14 | * | 
|  | 15 | * This program is distributed in the hope that it will be useful, | 
|  | 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 18 | * General Public License for more details. | 
|  | 19 | * | 
|  | 20 | * You should have received a copy of the GNU General Public | 
|  | 21 | * License along with this program; if not, write to the | 
|  | 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 
|  | 23 | * Boston, MA 021110-1307, USA. | 
|  | 24 | */ | 
|  | 25 |  | 
|  | 26 | #include <linux/types.h> | 
|  | 27 | #include <linux/slab.h> | 
|  | 28 | #include <linux/highmem.h> | 
|  | 29 | #include <linux/mm.h> | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 30 | #include <linux/kthread.h> | 
|  | 31 | #include <linux/pagemap.h> | 
|  | 32 | #include <linux/debugfs.h> | 
|  | 33 | #include <linux/seq_file.h> | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 34 | #include <linux/time.h> | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 35 | #include <linux/quotaops.h> | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 36 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 37 | #define MLOG_MASK_PREFIX ML_DLM_GLUE | 
|  | 38 | #include <cluster/masklog.h> | 
|  | 39 |  | 
|  | 40 | #include "ocfs2.h" | 
| Joel Becker | d24fbcd | 2008-01-25 17:02:21 -0800 | [diff] [blame] | 41 | #include "ocfs2_lockingver.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 42 |  | 
|  | 43 | #include "alloc.h" | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 44 | #include "dcache.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 45 | #include "dlmglue.h" | 
|  | 46 | #include "extent_map.h" | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 47 | #include "file.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 48 | #include "heartbeat.h" | 
|  | 49 | #include "inode.h" | 
|  | 50 | #include "journal.h" | 
| Joel Becker | 24ef181 | 2008-01-29 17:37:32 -0800 | [diff] [blame] | 51 | #include "stackglue.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 52 | #include "slot_map.h" | 
|  | 53 | #include "super.h" | 
|  | 54 | #include "uptodate.h" | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 55 | #include "quota.h" | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 56 | #include "refcounttree.h" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 57 |  | 
|  | 58 | #include "buffer_head_io.h" | 
|  | 59 |  | 
|  | 60 | struct ocfs2_mask_waiter { | 
|  | 61 | struct list_head	mw_item; | 
|  | 62 | int			mw_status; | 
|  | 63 | struct completion	mw_complete; | 
|  | 64 | unsigned long		mw_mask; | 
|  | 65 | unsigned long		mw_goal; | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 66 | #ifdef CONFIG_OCFS2_FS_STATS | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 67 | ktime_t			mw_lock_start; | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 68 | #endif | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 69 | }; | 
|  | 70 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 71 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 
|  | 72 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 73 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 74 | static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 75 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 76 | /* | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 77 | * Return value from ->downconvert_worker functions. | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 78 | * | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 79 | * These control the precise actions of ocfs2_unblock_lock() | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 80 | * and ocfs2_process_blocked_lock() | 
|  | 81 | * | 
|  | 82 | */ | 
|  | 83 | enum ocfs2_unblock_action { | 
|  | 84 | UNBLOCK_CONTINUE	= 0, /* Continue downconvert */ | 
|  | 85 | UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire | 
|  | 86 | * ->post_unlock callback */ | 
|  | 87 | UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire | 
|  | 88 | * ->post_unlock() callback. */ | 
|  | 89 | }; | 
|  | 90 |  | 
|  | 91 | struct ocfs2_unblock_ctl { | 
|  | 92 | int requeue; | 
|  | 93 | enum ocfs2_unblock_action unblock_action; | 
|  | 94 | }; | 
|  | 95 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 96 | /* Lockdep class keys */ | 
|  | 97 | struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; | 
|  | 98 |  | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 99 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | 
|  | 100 | int new_level); | 
|  | 101 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); | 
|  | 102 |  | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 103 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 104 | int blocking); | 
|  | 105 |  | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 106 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 107 | int blocking); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 108 |  | 
|  | 109 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 
|  | 110 | struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 111 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 112 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); | 
| Adrian Bunk | 6cb129f | 2007-04-26 00:29:35 -0700 | [diff] [blame] | 113 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 114 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, | 
|  | 115 | int new_level); | 
|  | 116 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 117 | int blocking); | 
|  | 118 |  | 
| Adrian Bunk | 6cb129f | 2007-04-26 00:29:35 -0700 | [diff] [blame] | 119 | #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) | 
|  | 120 |  | 
|  | 121 | /* This aids in debugging situations where a bad LVB might be involved. */ | 
|  | 122 | static void ocfs2_dump_meta_lvb_info(u64 level, | 
|  | 123 | const char *function, | 
|  | 124 | unsigned int line, | 
|  | 125 | struct ocfs2_lock_res *lockres) | 
|  | 126 | { | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 127 | struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Adrian Bunk | 6cb129f | 2007-04-26 00:29:35 -0700 | [diff] [blame] | 128 |  | 
|  | 129 | mlog(level, "LVB information for %s (called from %s:%u):\n", | 
|  | 130 | lockres->l_name, function, line); | 
|  | 131 | mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", | 
|  | 132 | lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), | 
|  | 133 | be32_to_cpu(lvb->lvb_igeneration)); | 
|  | 134 | mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", | 
|  | 135 | (unsigned long long)be64_to_cpu(lvb->lvb_isize), | 
|  | 136 | be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), | 
|  | 137 | be16_to_cpu(lvb->lvb_imode)); | 
|  | 138 | mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " | 
|  | 139 | "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), | 
|  | 140 | (long long)be64_to_cpu(lvb->lvb_iatime_packed), | 
|  | 141 | (long long)be64_to_cpu(lvb->lvb_ictime_packed), | 
|  | 142 | (long long)be64_to_cpu(lvb->lvb_imtime_packed), | 
|  | 143 | be32_to_cpu(lvb->lvb_iattr)); | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 |  | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 147 | /* | 
|  | 148 | * OCFS2 Lock Resource Operations | 
|  | 149 | * | 
|  | 150 | * These fine tune the behavior of the generic dlmglue locking infrastructure. | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 151 | * | 
|  | 152 | * The most basic of lock types can point ->l_priv to their respective | 
|  | 153 | * struct ocfs2_super and allow the default actions to manage things. | 
|  | 154 | * | 
|  | 155 | * Right now, each lock type also needs to implement an init function, | 
|  | 156 | * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() | 
|  | 157 | * should be called when the lock is no longer needed (i.e., object | 
|  | 158 | * destruction time). | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 159 | */ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 160 | struct ocfs2_lock_res_ops { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 161 | /* | 
|  | 162 | * Translate an ocfs2_lock_res * into an ocfs2_super *. Define | 
|  | 163 | * this callback if ->l_priv is not an ocfs2_super pointer | 
|  | 164 | */ | 
|  | 165 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 166 |  | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 167 | /* | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 168 | * Optionally called in the downconvert thread after a | 
|  | 169 | * successful downconvert. The lockres will not be referenced | 
|  | 170 | * after this callback is called, so it is safe to free | 
|  | 171 | * memory, etc. | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 172 | * | 
|  | 173 | * The exact semantics of when this is called are controlled | 
|  | 174 | * by ->downconvert_worker() | 
|  | 175 | */ | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 176 | void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 177 |  | 
|  | 178 | /* | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 179 | * Allow a lock type to add checks to determine whether it is | 
|  | 180 | * safe to downconvert a lock. Return 0 to re-queue the | 
|  | 181 | * downconvert at a later time, nonzero to continue. | 
|  | 182 | * | 
|  | 183 | * For most locks, the default checks that there are no | 
|  | 184 | * incompatible holders are sufficient. | 
|  | 185 | * | 
|  | 186 | * Called with the lockres spinlock held. | 
|  | 187 | */ | 
|  | 188 | int (*check_downconvert)(struct ocfs2_lock_res *, int); | 
|  | 189 |  | 
|  | 190 | /* | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 191 | * Allows a lock type to populate the lock value block. This | 
|  | 192 | * is called on downconvert, and when we drop a lock. | 
|  | 193 | * | 
|  | 194 | * Locks that want to use this should set LOCK_TYPE_USES_LVB | 
|  | 195 | * in the flags field. | 
|  | 196 | * | 
|  | 197 | * Called with the lockres spinlock held. | 
|  | 198 | */ | 
|  | 199 | void (*set_lvb)(struct ocfs2_lock_res *); | 
|  | 200 |  | 
|  | 201 | /* | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 202 | * Called from the downconvert thread when it is determined | 
|  | 203 | * that a lock will be downconverted. This is called without | 
|  | 204 | * any locks held so the function can do work that might | 
|  | 205 | * schedule (syncing out data, etc). | 
|  | 206 | * | 
|  | 207 | * This should return any one of the ocfs2_unblock_action | 
|  | 208 | * values, depending on what it wants the thread to do. | 
|  | 209 | */ | 
|  | 210 | int (*downconvert_worker)(struct ocfs2_lock_res *, int); | 
|  | 211 |  | 
|  | 212 | /* | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 213 | * LOCK_TYPE_* flags which describe the specific requirements | 
|  | 214 | * of a lock type. Descriptions of each individual flag follow. | 
|  | 215 | */ | 
|  | 216 | int flags; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 217 | }; | 
|  | 218 |  | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 219 | /* | 
|  | 220 | * Some locks want to "refresh" potentially stale data when a | 
|  | 221 | * meaningful (PRMODE or EXMODE) lock level is first obtained. If this | 
|  | 222 | * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the | 
|  | 223 | * individual lockres l_flags member from the ast function. It is | 
|  | 224 | * expected that the locking wrapper will clear the | 
|  | 225 | * OCFS2_LOCK_NEEDS_REFRESH flag when done. | 
|  | 226 | */ | 
|  | 227 | #define LOCK_TYPE_REQUIRES_REFRESH 0x1 | 
|  | 228 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 229 | /* | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 230 | * Indicate that a lock type makes use of the lock value block. The | 
|  | 231 | * ->set_lvb lock type callback must be defined. | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 232 | */ | 
|  | 233 | #define LOCK_TYPE_USES_LVB		0x2 | 
|  | 234 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 235 | static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 236 | .get_osb	= ocfs2_get_inode_osb, | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 237 | .flags		= 0, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 238 | }; | 
|  | 239 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 240 | static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 241 | .get_osb	= ocfs2_get_inode_osb, | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 242 | .check_downconvert = ocfs2_check_meta_downconvert, | 
|  | 243 | .set_lvb	= ocfs2_set_meta_lvb, | 
| Mark Fasheh | f1f5406 | 2007-10-18 15:13:59 -0700 | [diff] [blame] | 244 | .downconvert_worker = ocfs2_data_convert_worker, | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 245 | .flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 246 | }; | 
|  | 247 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 248 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 249 | .flags		= LOCK_TYPE_REQUIRES_REFRESH, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 250 | }; | 
|  | 251 |  | 
|  | 252 | static struct ocfs2_lock_res_ops ocfs2_rename_lops = { | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 253 | .flags		= 0, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 254 | }; | 
|  | 255 |  | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 256 | static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { | 
|  | 257 | .flags		= 0, | 
|  | 258 | }; | 
|  | 259 |  | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 260 | static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { | 
|  | 261 | .flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | 
|  | 262 | }; | 
|  | 263 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 264 | static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 265 | .get_osb	= ocfs2_get_dentry_osb, | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 266 | .post_unlock	= ocfs2_dentry_post_unlock, | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 267 | .downconvert_worker = ocfs2_dentry_convert_worker, | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 268 | .flags		= 0, | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 269 | }; | 
|  | 270 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 271 | static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | 
|  | 272 | .get_osb	= ocfs2_get_inode_osb, | 
|  | 273 | .flags		= 0, | 
|  | 274 | }; | 
|  | 275 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 276 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | 
|  | 277 | .get_osb	= ocfs2_get_file_osb, | 
|  | 278 | .flags		= 0, | 
|  | 279 | }; | 
|  | 280 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 281 | static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { | 
|  | 282 | .set_lvb	= ocfs2_set_qinfo_lvb, | 
|  | 283 | .get_osb	= ocfs2_get_qinfo_osb, | 
|  | 284 | .flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, | 
|  | 285 | }; | 
|  | 286 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 287 | static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { | 
|  | 288 | .check_downconvert = ocfs2_check_refcount_downconvert, | 
|  | 289 | .downconvert_worker = ocfs2_refcount_convert_worker, | 
|  | 290 | .flags		= 0, | 
|  | 291 | }; | 
|  | 292 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 293 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 
|  | 294 | { | 
|  | 295 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 296 | lockres->l_type == OCFS2_LOCK_TYPE_RW || | 
|  | 297 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 298 | } | 
|  | 299 |  | 
| Joel Becker | c0e4133 | 2010-01-29 14:46:44 -0800 | [diff] [blame] | 300 | static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 301 | { | 
|  | 302 | return container_of(lksb, struct ocfs2_lock_res, l_lksb); | 
|  | 303 | } | 
|  | 304 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 305 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) | 
|  | 306 | { | 
|  | 307 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | 
|  | 308 |  | 
|  | 309 | return (struct inode *) lockres->l_priv; | 
|  | 310 | } | 
|  | 311 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 312 | static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) | 
|  | 313 | { | 
|  | 314 | BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); | 
|  | 315 |  | 
|  | 316 | return (struct ocfs2_dentry_lock *)lockres->l_priv; | 
|  | 317 | } | 
|  | 318 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 319 | static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) | 
|  | 320 | { | 
|  | 321 | BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); | 
|  | 322 |  | 
|  | 323 | return (struct ocfs2_mem_dqinfo *)lockres->l_priv; | 
|  | 324 | } | 
|  | 325 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 326 | static inline struct ocfs2_refcount_tree * | 
|  | 327 | ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) | 
|  | 328 | { | 
|  | 329 | return container_of(res, struct ocfs2_refcount_tree, rf_lockres); | 
|  | 330 | } | 
|  | 331 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 332 | static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) | 
|  | 333 | { | 
|  | 334 | if (lockres->l_ops->get_osb) | 
|  | 335 | return lockres->l_ops->get_osb(lockres); | 
|  | 336 |  | 
|  | 337 | return (struct ocfs2_super *)lockres->l_priv; | 
|  | 338 | } | 
|  | 339 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 340 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 
|  | 341 | struct ocfs2_lock_res *lockres, | 
|  | 342 | int level, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 343 | u32 dlm_flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 344 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 
|  | 345 | int wanted); | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 346 | static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, | 
|  | 347 | struct ocfs2_lock_res *lockres, | 
|  | 348 | int level, unsigned long caller_ip); | 
|  | 349 | static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, | 
|  | 350 | struct ocfs2_lock_res *lockres, | 
|  | 351 | int level) | 
|  | 352 | { | 
|  | 353 | __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); | 
|  | 354 | } | 
|  | 355 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 356 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); | 
|  | 357 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); | 
|  | 358 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); | 
|  | 359 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); | 
|  | 360 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | 
|  | 361 | struct ocfs2_lock_res *lockres); | 
|  | 362 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 
|  | 363 | int convert); | 
| Sunil Mushran | c74ff8b | 2009-02-03 12:37:14 -0800 | [diff] [blame] | 364 | #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\ | 
|  | 365 | if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\ | 
|  | 366 | mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\ | 
|  | 367 | _err, _func, _lockres->l_name);					\ | 
|  | 368 | else										\ | 
|  | 369 | mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\ | 
|  | 370 | _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\ | 
|  | 371 | (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 372 | } while (0) | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 373 | static int ocfs2_downconvert_thread(void *arg); | 
|  | 374 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 
|  | 375 | struct ocfs2_lock_res *lockres); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 376 | static int ocfs2_inode_lock_update(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 377 | struct buffer_head **bh); | 
|  | 378 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 
|  | 379 | static inline int ocfs2_highest_compat_lock_level(int level); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 380 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 
|  | 381 | int new_level); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 382 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 
|  | 383 | struct ocfs2_lock_res *lockres, | 
|  | 384 | int new_level, | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 385 | int lvb, | 
|  | 386 | unsigned int generation); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 387 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 
|  | 388 | struct ocfs2_lock_res *lockres); | 
|  | 389 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 
|  | 390 | struct ocfs2_lock_res *lockres); | 
|  | 391 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 392 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 393 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 
|  | 394 | u64 blkno, | 
|  | 395 | u32 generation, | 
|  | 396 | char *name) | 
|  | 397 | { | 
|  | 398 | int len; | 
|  | 399 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 400 | BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); | 
|  | 401 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 402 | len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", | 
|  | 403 | ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, | 
|  | 404 | (long long)blkno, generation); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 405 |  | 
|  | 406 | BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); | 
|  | 407 |  | 
|  | 408 | mlog(0, "built lock resource with name: %s\n", name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 409 | } | 
|  | 410 |  | 
| Ingo Molnar | 34af946 | 2006-06-27 02:53:55 -0700 | [diff] [blame] | 411 | static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 412 |  | 
|  | 413 | static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, | 
|  | 414 | struct ocfs2_dlm_debug *dlm_debug) | 
|  | 415 | { | 
|  | 416 | mlog(0, "Add tracking for lockres %s\n", res->l_name); | 
|  | 417 |  | 
|  | 418 | spin_lock(&ocfs2_dlm_tracking_lock); | 
|  | 419 | list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); | 
|  | 420 | spin_unlock(&ocfs2_dlm_tracking_lock); | 
|  | 421 | } | 
|  | 422 |  | 
|  | 423 | static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) | 
|  | 424 | { | 
|  | 425 | spin_lock(&ocfs2_dlm_tracking_lock); | 
|  | 426 | if (!list_empty(&res->l_debug_list)) | 
|  | 427 | list_del_init(&res->l_debug_list); | 
|  | 428 | spin_unlock(&ocfs2_dlm_tracking_lock); | 
|  | 429 | } | 
|  | 430 |  | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 431 | #ifdef CONFIG_OCFS2_FS_STATS | 
|  | 432 | static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) | 
|  | 433 | { | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 434 | res->l_lock_refresh = 0; | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 435 | memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats)); | 
|  | 436 | memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats)); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 437 | } | 
|  | 438 |  | 
|  | 439 | static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, | 
|  | 440 | struct ocfs2_mask_waiter *mw, int ret) | 
|  | 441 | { | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 442 | u32 usec; | 
|  | 443 | ktime_t kt; | 
|  | 444 | struct ocfs2_lock_stats *stats; | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 445 |  | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 446 | if (level == LKM_PRMODE) | 
|  | 447 | stats = &res->l_lock_prmode; | 
|  | 448 | else if (level == LKM_EXMODE) | 
|  | 449 | stats = &res->l_lock_exmode; | 
|  | 450 | else | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 451 | return; | 
|  | 452 |  | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 453 | kt = ktime_sub(ktime_get(), mw->mw_lock_start); | 
|  | 454 | usec = ktime_to_us(kt); | 
|  | 455 |  | 
|  | 456 | stats->ls_gets++; | 
|  | 457 | stats->ls_total += ktime_to_ns(kt); | 
|  | 458 | /* overflow */ | 
|  | 459 | if (unlikely(stats->ls_gets) == 0) { | 
|  | 460 | stats->ls_gets++; | 
|  | 461 | stats->ls_total = ktime_to_ns(kt); | 
|  | 462 | } | 
|  | 463 |  | 
|  | 464 | if (stats->ls_max < usec) | 
|  | 465 | stats->ls_max = usec; | 
|  | 466 |  | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 467 | if (ret) | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 468 | stats->ls_fail++; | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 469 | } | 
|  | 470 |  | 
|  | 471 | static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) | 
|  | 472 | { | 
|  | 473 | lockres->l_lock_refresh++; | 
|  | 474 | } | 
|  | 475 |  | 
|  | 476 | static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) | 
|  | 477 | { | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 478 | mw->mw_lock_start = ktime_get(); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 479 | } | 
|  | 480 | #else | 
|  | 481 | static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) | 
|  | 482 | { | 
|  | 483 | } | 
|  | 484 | static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, | 
|  | 485 | int level, struct ocfs2_mask_waiter *mw, int ret) | 
|  | 486 | { | 
|  | 487 | } | 
|  | 488 | static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) | 
|  | 489 | { | 
|  | 490 | } | 
|  | 491 | static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) | 
|  | 492 | { | 
|  | 493 | } | 
|  | 494 | #endif | 
|  | 495 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 496 | static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, | 
|  | 497 | struct ocfs2_lock_res *res, | 
|  | 498 | enum ocfs2_lock_type type, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 499 | struct ocfs2_lock_res_ops *ops, | 
|  | 500 | void *priv) | 
|  | 501 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 502 | res->l_type          = type; | 
|  | 503 | res->l_ops           = ops; | 
|  | 504 | res->l_priv          = priv; | 
|  | 505 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 506 | res->l_level         = DLM_LOCK_IV; | 
|  | 507 | res->l_requested     = DLM_LOCK_IV; | 
|  | 508 | res->l_blocking      = DLM_LOCK_IV; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 509 | res->l_action        = OCFS2_AST_INVALID; | 
|  | 510 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
|  | 511 |  | 
|  | 512 | res->l_flags         = OCFS2_LOCK_INITIALIZED; | 
|  | 513 |  | 
|  | 514 | ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 515 |  | 
|  | 516 | ocfs2_init_lock_stats(res); | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 517 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 
|  | 518 | if (type != OCFS2_LOCK_TYPE_OPEN) | 
|  | 519 | lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], | 
|  | 520 | &lockdep_keys[type], 0); | 
|  | 521 | else | 
|  | 522 | res->l_lockdep_map.key = NULL; | 
|  | 523 | #endif | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 524 | } | 
|  | 525 |  | 
|  | 526 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) | 
|  | 527 | { | 
|  | 528 | /* This also clears out the lock status block */ | 
|  | 529 | memset(res, 0, sizeof(struct ocfs2_lock_res)); | 
|  | 530 | spin_lock_init(&res->l_lock); | 
|  | 531 | init_waitqueue_head(&res->l_event); | 
|  | 532 | INIT_LIST_HEAD(&res->l_blocked_list); | 
|  | 533 | INIT_LIST_HEAD(&res->l_mask_waiters); | 
|  | 534 | } | 
|  | 535 |  | 
|  | 536 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 
|  | 537 | enum ocfs2_lock_type type, | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 538 | unsigned int generation, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 539 | struct inode *inode) | 
|  | 540 | { | 
|  | 541 | struct ocfs2_lock_res_ops *ops; | 
|  | 542 |  | 
|  | 543 | switch(type) { | 
|  | 544 | case OCFS2_LOCK_TYPE_RW: | 
|  | 545 | ops = &ocfs2_inode_rw_lops; | 
|  | 546 | break; | 
|  | 547 | case OCFS2_LOCK_TYPE_META: | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 548 | ops = &ocfs2_inode_inode_lops; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 549 | break; | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 550 | case OCFS2_LOCK_TYPE_OPEN: | 
|  | 551 | ops = &ocfs2_inode_open_lops; | 
|  | 552 | break; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 553 | default: | 
|  | 554 | mlog_bug_on_msg(1, "type: %d\n", type); | 
|  | 555 | ops = NULL; /* thanks, gcc */ | 
|  | 556 | break; | 
|  | 557 | }; | 
|  | 558 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 559 | ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 560 | generation, res->l_name); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 561 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); | 
|  | 562 | } | 
|  | 563 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 564 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | 
|  | 565 | { | 
|  | 566 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 
|  | 567 |  | 
|  | 568 | return OCFS2_SB(inode->i_sb); | 
|  | 569 | } | 
|  | 570 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 571 | static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) | 
|  | 572 | { | 
|  | 573 | struct ocfs2_mem_dqinfo *info = lockres->l_priv; | 
|  | 574 |  | 
|  | 575 | return OCFS2_SB(info->dqi_gi.dqi_sb); | 
|  | 576 | } | 
|  | 577 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 578 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) | 
|  | 579 | { | 
|  | 580 | struct ocfs2_file_private *fp = lockres->l_priv; | 
|  | 581 |  | 
|  | 582 | return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); | 
|  | 583 | } | 
|  | 584 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 585 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 
|  | 586 | { | 
|  | 587 | __be64 inode_blkno_be; | 
|  | 588 |  | 
|  | 589 | memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], | 
|  | 590 | sizeof(__be64)); | 
|  | 591 |  | 
|  | 592 | return be64_to_cpu(inode_blkno_be); | 
|  | 593 | } | 
|  | 594 |  | 
| Mark Fasheh | 54a7e75 | 2006-09-12 21:49:13 -0700 | [diff] [blame] | 595 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) | 
|  | 596 | { | 
|  | 597 | struct ocfs2_dentry_lock *dl = lockres->l_priv; | 
|  | 598 |  | 
|  | 599 | return OCFS2_SB(dl->dl_inode->i_sb); | 
|  | 600 | } | 
|  | 601 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 602 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | 
|  | 603 | u64 parent, struct inode *inode) | 
|  | 604 | { | 
|  | 605 | int len; | 
|  | 606 | u64 inode_blkno = OCFS2_I(inode)->ip_blkno; | 
|  | 607 | __be64 inode_blkno_be = cpu_to_be64(inode_blkno); | 
|  | 608 | struct ocfs2_lock_res *lockres = &dl->dl_lockres; | 
|  | 609 |  | 
|  | 610 | ocfs2_lock_res_init_once(lockres); | 
|  | 611 |  | 
|  | 612 | /* | 
|  | 613 | * Unfortunately, the standard lock naming scheme won't work | 
|  | 614 | * here because we have two 16 byte values to use. Instead, | 
|  | 615 | * we'll stuff the inode number as a binary value. We still | 
|  | 616 | * want error prints to show something without garbling the | 
|  | 617 | * display, so drop a null byte in there before the inode | 
|  | 618 | * number. A future version of OCFS2 will likely use all | 
|  | 619 | * binary lock names. The stringified names have been a | 
|  | 620 | * tremendous aid in debugging, but now that the debugfs | 
|  | 621 | * interface exists, we can mangle things there if need be. | 
|  | 622 | * | 
|  | 623 | * NOTE: We also drop the standard "pad" value (the total lock | 
|  | 624 | * name size stays the same though - the last part is all | 
|  | 625 | * zeros due to the memset in ocfs2_lock_res_init_once() | 
|  | 626 | */ | 
|  | 627 | len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, | 
|  | 628 | "%c%016llx", | 
|  | 629 | ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), | 
|  | 630 | (long long)parent); | 
|  | 631 |  | 
|  | 632 | BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); | 
|  | 633 |  | 
|  | 634 | memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, | 
|  | 635 | sizeof(__be64)); | 
|  | 636 |  | 
|  | 637 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | 
|  | 638 | OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, | 
|  | 639 | dl); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 640 | } | 
|  | 641 |  | 
|  | 642 | static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, | 
|  | 643 | struct ocfs2_super *osb) | 
|  | 644 | { | 
|  | 645 | /* Superblock lockres doesn't come from a slab so we call init | 
|  | 646 | * once on it manually.  */ | 
|  | 647 | ocfs2_lock_res_init_once(res); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 648 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, | 
|  | 649 | 0, res->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 650 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 651 | &ocfs2_super_lops, osb); | 
|  | 652 | } | 
|  | 653 |  | 
|  | 654 | static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | 
|  | 655 | struct ocfs2_super *osb) | 
|  | 656 | { | 
|  | 657 | /* Rename lockres doesn't come from a slab so we call init | 
|  | 658 | * once on it manually.  */ | 
|  | 659 | ocfs2_lock_res_init_once(res); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 660 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); | 
|  | 661 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 662 | &ocfs2_rename_lops, osb); | 
|  | 663 | } | 
|  | 664 |  | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 665 | static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, | 
|  | 666 | struct ocfs2_super *osb) | 
|  | 667 | { | 
|  | 668 | /* nfs_sync lockres doesn't come from a slab so we call init | 
|  | 669 | * once on it manually.  */ | 
|  | 670 | ocfs2_lock_res_init_once(res); | 
|  | 671 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); | 
|  | 672 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, | 
|  | 673 | &ocfs2_nfs_sync_lops, osb); | 
|  | 674 | } | 
|  | 675 |  | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 676 | static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, | 
|  | 677 | struct ocfs2_super *osb) | 
|  | 678 | { | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 679 | ocfs2_lock_res_init_once(res); | 
|  | 680 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); | 
|  | 681 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, | 
|  | 682 | &ocfs2_orphan_scan_lops, osb); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 683 | } | 
|  | 684 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 685 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | 
|  | 686 | struct ocfs2_file_private *fp) | 
|  | 687 | { | 
|  | 688 | struct inode *inode = fp->fp_file->f_mapping->host; | 
|  | 689 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
|  | 690 |  | 
|  | 691 | ocfs2_lock_res_init_once(lockres); | 
|  | 692 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, | 
|  | 693 | inode->i_generation, lockres->l_name); | 
|  | 694 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | 
|  | 695 | OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, | 
|  | 696 | fp); | 
|  | 697 | lockres->l_flags |= OCFS2_LOCK_NOCACHE; | 
|  | 698 | } | 
|  | 699 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 700 | void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, | 
|  | 701 | struct ocfs2_mem_dqinfo *info) | 
|  | 702 | { | 
|  | 703 | ocfs2_lock_res_init_once(lockres); | 
|  | 704 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, | 
|  | 705 | 0, lockres->l_name); | 
|  | 706 | ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, | 
|  | 707 | OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, | 
|  | 708 | info); | 
|  | 709 | } | 
|  | 710 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 711 | void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, | 
|  | 712 | struct ocfs2_super *osb, u64 ref_blkno, | 
|  | 713 | unsigned int generation) | 
|  | 714 | { | 
|  | 715 | ocfs2_lock_res_init_once(lockres); | 
|  | 716 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, | 
|  | 717 | generation, lockres->l_name); | 
|  | 718 | ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, | 
|  | 719 | &ocfs2_refcount_block_lops, osb); | 
|  | 720 | } | 
|  | 721 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 722 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 
|  | 723 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 724 | if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) | 
|  | 725 | return; | 
|  | 726 |  | 
|  | 727 | ocfs2_remove_lockres_tracking(res); | 
|  | 728 |  | 
|  | 729 | mlog_bug_on_msg(!list_empty(&res->l_blocked_list), | 
|  | 730 | "Lockres %s is on the blocked list\n", | 
|  | 731 | res->l_name); | 
|  | 732 | mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), | 
|  | 733 | "Lockres %s has mask waiters pending\n", | 
|  | 734 | res->l_name); | 
|  | 735 | mlog_bug_on_msg(spin_is_locked(&res->l_lock), | 
|  | 736 | "Lockres %s is locked\n", | 
|  | 737 | res->l_name); | 
|  | 738 | mlog_bug_on_msg(res->l_ro_holders, | 
|  | 739 | "Lockres %s has %u ro holders\n", | 
|  | 740 | res->l_name, res->l_ro_holders); | 
|  | 741 | mlog_bug_on_msg(res->l_ex_holders, | 
|  | 742 | "Lockres %s has %u ex holders\n", | 
|  | 743 | res->l_name, res->l_ex_holders); | 
|  | 744 |  | 
|  | 745 | /* Need to clear out the lock status block for the dlm */ | 
|  | 746 | memset(&res->l_lksb, 0, sizeof(res->l_lksb)); | 
|  | 747 |  | 
|  | 748 | res->l_flags = 0UL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 749 | } | 
|  | 750 |  | 
|  | 751 | static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, | 
|  | 752 | int level) | 
|  | 753 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 754 | BUG_ON(!lockres); | 
|  | 755 |  | 
|  | 756 | switch(level) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 757 | case DLM_LOCK_EX: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 758 | lockres->l_ex_holders++; | 
|  | 759 | break; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 760 | case DLM_LOCK_PR: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 761 | lockres->l_ro_holders++; | 
|  | 762 | break; | 
|  | 763 | default: | 
|  | 764 | BUG(); | 
|  | 765 | } | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 766 | } | 
|  | 767 |  | 
|  | 768 | static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | 
|  | 769 | int level) | 
|  | 770 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 771 | BUG_ON(!lockres); | 
|  | 772 |  | 
|  | 773 | switch(level) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 774 | case DLM_LOCK_EX: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 775 | BUG_ON(!lockres->l_ex_holders); | 
|  | 776 | lockres->l_ex_holders--; | 
|  | 777 | break; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 778 | case DLM_LOCK_PR: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 779 | BUG_ON(!lockres->l_ro_holders); | 
|  | 780 | lockres->l_ro_holders--; | 
|  | 781 | break; | 
|  | 782 | default: | 
|  | 783 | BUG(); | 
|  | 784 | } | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 785 | } | 
|  | 786 |  | 
|  | 787 | /* WARNING: This function lives in a world where the only three lock | 
|  | 788 | * levels are EX, PR, and NL. It *will* have to be adjusted when more | 
|  | 789 | * lock types are added. */ | 
|  | 790 | static inline int ocfs2_highest_compat_lock_level(int level) | 
|  | 791 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 792 | int new_level = DLM_LOCK_EX; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 793 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 794 | if (level == DLM_LOCK_EX) | 
|  | 795 | new_level = DLM_LOCK_NL; | 
|  | 796 | else if (level == DLM_LOCK_PR) | 
|  | 797 | new_level = DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 798 | return new_level; | 
|  | 799 | } | 
|  | 800 |  | 
|  | 801 | static void lockres_set_flags(struct ocfs2_lock_res *lockres, | 
|  | 802 | unsigned long newflags) | 
|  | 803 | { | 
| Christoph Hellwig | 800deef | 2007-05-17 16:03:13 +0200 | [diff] [blame] | 804 | struct ocfs2_mask_waiter *mw, *tmp; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 805 |  | 
|  | 806 | assert_spin_locked(&lockres->l_lock); | 
|  | 807 |  | 
|  | 808 | lockres->l_flags = newflags; | 
|  | 809 |  | 
| Christoph Hellwig | 800deef | 2007-05-17 16:03:13 +0200 | [diff] [blame] | 810 | list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 811 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 
|  | 812 | continue; | 
|  | 813 |  | 
|  | 814 | list_del_init(&mw->mw_item); | 
|  | 815 | mw->mw_status = 0; | 
|  | 816 | complete(&mw->mw_complete); | 
|  | 817 | } | 
|  | 818 | } | 
|  | 819 | static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) | 
|  | 820 | { | 
|  | 821 | lockres_set_flags(lockres, lockres->l_flags | or); | 
|  | 822 | } | 
|  | 823 | static void lockres_clear_flags(struct ocfs2_lock_res *lockres, | 
|  | 824 | unsigned long clear) | 
|  | 825 | { | 
|  | 826 | lockres_set_flags(lockres, lockres->l_flags & ~clear); | 
|  | 827 | } | 
|  | 828 |  | 
|  | 829 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) | 
|  | 830 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 831 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 
|  | 832 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 
|  | 833 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 834 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 835 |  | 
|  | 836 | lockres->l_level = lockres->l_requested; | 
|  | 837 | if (lockres->l_level <= | 
|  | 838 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 839 | lockres->l_blocking = DLM_LOCK_NL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 840 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 
|  | 841 | } | 
|  | 842 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 843 | } | 
|  | 844 |  | 
|  | 845 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) | 
|  | 846 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 847 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 
|  | 848 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 
|  | 849 |  | 
|  | 850 | /* Convert from RO to EX doesn't really need anything as our | 
|  | 851 | * information is already up to data. Convert from NL to | 
|  | 852 | * *anything* however should mark ourselves as needing an | 
|  | 853 | * update */ | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 854 | if (lockres->l_level == DLM_LOCK_NL && | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 855 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 856 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 
|  | 857 |  | 
|  | 858 | lockres->l_level = lockres->l_requested; | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 859 |  | 
|  | 860 | /* | 
|  | 861 | * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing | 
|  | 862 | * the OCFS2_LOCK_BUSY flag to prevent the dc thread from | 
|  | 863 | * downconverting the lock before the upconvert has fully completed. | 
|  | 864 | */ | 
|  | 865 | lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | 
|  | 866 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 867 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 868 | } | 
|  | 869 |  | 
|  | 870 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) | 
|  | 871 | { | 
| Roel Kluin | 3cf0c50 | 2007-10-27 00:20:36 +0200 | [diff] [blame] | 872 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 873 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 
|  | 874 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 875 | if (lockres->l_requested > DLM_LOCK_NL && | 
| Mark Fasheh | f625c97 | 2006-09-12 21:24:53 -0700 | [diff] [blame] | 876 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && | 
|  | 877 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 878 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 
|  | 879 |  | 
|  | 880 | lockres->l_level = lockres->l_requested; | 
|  | 881 | lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); | 
|  | 882 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 883 | } | 
|  | 884 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 885 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | 
|  | 886 | int level) | 
|  | 887 | { | 
|  | 888 | int needs_downconvert = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 889 |  | 
|  | 890 | assert_spin_locked(&lockres->l_lock); | 
|  | 891 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 892 | if (level > lockres->l_blocking) { | 
|  | 893 | /* only schedule a downconvert if we haven't already scheduled | 
|  | 894 | * one that goes low enough to satisfy the level we're | 
|  | 895 | * blocking.  this also catches the case where we get | 
|  | 896 | * duplicate BASTs */ | 
|  | 897 | if (ocfs2_highest_compat_lock_level(level) < | 
|  | 898 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) | 
|  | 899 | needs_downconvert = 1; | 
|  | 900 |  | 
|  | 901 | lockres->l_blocking = level; | 
|  | 902 | } | 
|  | 903 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 904 | mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", | 
|  | 905 | lockres->l_name, level, lockres->l_level, lockres->l_blocking, | 
|  | 906 | needs_downconvert); | 
|  | 907 |  | 
| Wengang Wang | 0b94a90 | 2010-01-21 10:50:02 -0800 | [diff] [blame] | 908 | if (needs_downconvert) | 
|  | 909 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 910 | mlog(0, "needs_downconvert = %d\n", needs_downconvert); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 911 | return needs_downconvert; | 
|  | 912 | } | 
|  | 913 |  | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 914 | /* | 
|  | 915 | * OCFS2_LOCK_PENDING and l_pending_gen. | 
|  | 916 | * | 
|  | 917 | * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting | 
|  | 918 | * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock() | 
|  | 919 | * for more details on the race. | 
|  | 920 | * | 
|  | 921 | * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces | 
|  | 922 | * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock() | 
|  | 923 | * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear | 
|  | 924 | * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns, | 
|  | 925 | * the caller is going to try to clear PENDING again.  If nothing else is | 
|  | 926 | * happening, __lockres_clear_pending() sees PENDING is unset and does | 
|  | 927 | * nothing. | 
|  | 928 | * | 
|  | 929 | * But what if another path (eg downconvert thread) has just started a | 
|  | 930 | * new locking action?  The other path has re-set PENDING.  Our path | 
|  | 931 | * cannot clear PENDING, because that will re-open the original race | 
|  | 932 | * window. | 
|  | 933 | * | 
|  | 934 | * [Example] | 
|  | 935 | * | 
|  | 936 | * ocfs2_meta_lock() | 
|  | 937 | *  ocfs2_cluster_lock() | 
|  | 938 | *   set BUSY | 
|  | 939 | *   set PENDING | 
|  | 940 | *   drop l_lock | 
|  | 941 | *   ocfs2_dlm_lock() | 
|  | 942 | *    ocfs2_locking_ast()		ocfs2_downconvert_thread() | 
|  | 943 | *     clear PENDING			 ocfs2_unblock_lock() | 
|  | 944 | *					  take_l_lock | 
|  | 945 | *					  !BUSY | 
|  | 946 | *					  ocfs2_prepare_downconvert() | 
|  | 947 | *					   set BUSY | 
|  | 948 | *					   set PENDING | 
|  | 949 | *					  drop l_lock | 
|  | 950 | *   take l_lock | 
|  | 951 | *   clear PENDING | 
|  | 952 | *   drop l_lock | 
|  | 953 | *			<window> | 
|  | 954 | *					  ocfs2_dlm_lock() | 
|  | 955 | * | 
|  | 956 | * So as you can see, we now have a window where l_lock is not held, | 
|  | 957 | * PENDING is not set, and ocfs2_dlm_lock() has not been called. | 
|  | 958 | * | 
|  | 959 | * The core problem is that ocfs2_cluster_lock() has cleared the PENDING | 
|  | 960 | * set by ocfs2_prepare_downconvert().  That wasn't nice. | 
|  | 961 | * | 
|  | 962 | * To solve this we introduce l_pending_gen.  A call to | 
|  | 963 | * lockres_clear_pending() will only do so when it is passed a generation | 
|  | 964 | * number that matches the lockres.  lockres_set_pending() will return the | 
|  | 965 | * current generation number.  When ocfs2_cluster_lock() goes to clear | 
|  | 966 | * PENDING, it passes the generation it got from set_pending().  In our | 
|  | 967 | * example above, the generation numbers will *not* match.  Thus, | 
|  | 968 | * ocfs2_cluster_lock() will not clear the PENDING set by | 
|  | 969 | * ocfs2_prepare_downconvert(). | 
|  | 970 | */ | 
|  | 971 |  | 
|  | 972 | /* Unlocked version for ocfs2_locking_ast() */ | 
|  | 973 | static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, | 
|  | 974 | unsigned int generation, | 
|  | 975 | struct ocfs2_super *osb) | 
|  | 976 | { | 
|  | 977 | assert_spin_locked(&lockres->l_lock); | 
|  | 978 |  | 
|  | 979 | /* | 
|  | 980 | * The ast and locking functions can race us here.  The winner | 
|  | 981 | * will clear pending, the loser will not. | 
|  | 982 | */ | 
|  | 983 | if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || | 
|  | 984 | (lockres->l_pending_gen != generation)) | 
|  | 985 | return; | 
|  | 986 |  | 
|  | 987 | lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); | 
|  | 988 | lockres->l_pending_gen++; | 
|  | 989 |  | 
|  | 990 | /* | 
|  | 991 | * The downconvert thread may have skipped us because we | 
|  | 992 | * were PENDING.  Wake it up. | 
|  | 993 | */ | 
|  | 994 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 
|  | 995 | ocfs2_wake_downconvert_thread(osb); | 
|  | 996 | } | 
|  | 997 |  | 
|  | 998 | /* Locked version for callers of ocfs2_dlm_lock() */ | 
|  | 999 | static void lockres_clear_pending(struct ocfs2_lock_res *lockres, | 
|  | 1000 | unsigned int generation, | 
|  | 1001 | struct ocfs2_super *osb) | 
|  | 1002 | { | 
|  | 1003 | unsigned long flags; | 
|  | 1004 |  | 
|  | 1005 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1006 | __lockres_clear_pending(lockres, generation, osb); | 
|  | 1007 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1008 | } | 
|  | 1009 |  | 
|  | 1010 | static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) | 
|  | 1011 | { | 
|  | 1012 | assert_spin_locked(&lockres->l_lock); | 
|  | 1013 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 
|  | 1014 |  | 
|  | 1015 | lockres_or_flags(lockres, OCFS2_LOCK_PENDING); | 
|  | 1016 |  | 
|  | 1017 | return lockres->l_pending_gen; | 
|  | 1018 | } | 
|  | 1019 |  | 
| Joel Becker | c0e4133 | 2010-01-29 14:46:44 -0800 | [diff] [blame] | 1020 | static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1021 | { | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1022 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 1023 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1024 | int needs_downconvert; | 
|  | 1025 | unsigned long flags; | 
|  | 1026 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1027 | BUG_ON(level <= DLM_LOCK_NL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1028 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1029 | mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " | 
|  | 1030 | "type %s\n", lockres->l_name, level, lockres->l_level, | 
| Mark Fasheh | aa2623a | 2006-09-12 21:58:23 -0700 | [diff] [blame] | 1031 | ocfs2_lock_type_string(lockres->l_type)); | 
|  | 1032 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1033 | /* | 
|  | 1034 | * We can skip the bast for locks which don't enable caching - | 
|  | 1035 | * they'll be dropped at the earliest possible time anyway. | 
|  | 1036 | */ | 
|  | 1037 | if (lockres->l_flags & OCFS2_LOCK_NOCACHE) | 
|  | 1038 | return; | 
|  | 1039 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1040 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1041 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 
|  | 1042 | if (needs_downconvert) | 
|  | 1043 | ocfs2_schedule_blocked_lock(osb, lockres); | 
|  | 1044 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1045 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 1046 | wake_up(&lockres->l_event); | 
|  | 1047 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 1048 | ocfs2_wake_downconvert_thread(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1049 | } | 
|  | 1050 |  | 
| Joel Becker | c0e4133 | 2010-01-29 14:46:44 -0800 | [diff] [blame] | 1051 | static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1052 | { | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1053 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1054 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1055 | unsigned long flags; | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1056 | int status; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1057 |  | 
|  | 1058 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1059 |  | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1060 | status = ocfs2_dlm_lock_status(&lockres->l_lksb); | 
|  | 1061 |  | 
|  | 1062 | if (status == -EAGAIN) { | 
|  | 1063 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 1064 | goto out; | 
|  | 1065 | } | 
|  | 1066 |  | 
|  | 1067 | if (status) { | 
| Joel Becker | 8f2c9c1 | 2008-02-01 12:16:57 -0800 | [diff] [blame] | 1068 | mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1069 | lockres->l_name, status); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1070 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1071 | return; | 
|  | 1072 | } | 
|  | 1073 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1074 | mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " | 
|  | 1075 | "level %d => %d\n", lockres->l_name, lockres->l_action, | 
|  | 1076 | lockres->l_unlock_action, lockres->l_level, lockres->l_requested); | 
|  | 1077 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1078 | switch(lockres->l_action) { | 
|  | 1079 | case OCFS2_AST_ATTACH: | 
|  | 1080 | ocfs2_generic_handle_attach_action(lockres); | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 1081 | lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1082 | break; | 
|  | 1083 | case OCFS2_AST_CONVERT: | 
|  | 1084 | ocfs2_generic_handle_convert_action(lockres); | 
|  | 1085 | break; | 
|  | 1086 | case OCFS2_AST_DOWNCONVERT: | 
|  | 1087 | ocfs2_generic_handle_downconvert_action(lockres); | 
|  | 1088 | break; | 
|  | 1089 | default: | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1090 | mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " | 
|  | 1091 | "flags 0x%lx, unlock: %u\n", | 
| Mark Fasheh | e92d57d | 2006-09-12 21:34:35 -0700 | [diff] [blame] | 1092 | lockres->l_name, lockres->l_action, lockres->l_flags, | 
|  | 1093 | lockres->l_unlock_action); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1094 | BUG(); | 
|  | 1095 | } | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1096 | out: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1097 | /* set it to something invalid so if we get called again we | 
|  | 1098 | * can catch it. */ | 
|  | 1099 | lockres->l_action = OCFS2_AST_INVALID; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1100 |  | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1101 | /* Did we try to cancel this lock?  Clear that state */ | 
|  | 1102 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) | 
|  | 1103 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
|  | 1104 |  | 
|  | 1105 | /* | 
|  | 1106 | * We may have beaten the locking functions here.  We certainly | 
|  | 1107 | * know that dlm_lock() has been called :-) | 
|  | 1108 | * Because we can't have two lock calls in flight at once, we | 
|  | 1109 | * can use lockres->l_pending_gen. | 
|  | 1110 | */ | 
|  | 1111 | __lockres_clear_pending(lockres, lockres->l_pending_gen,  osb); | 
|  | 1112 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1113 | wake_up(&lockres->l_event); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 1114 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1115 | } | 
|  | 1116 |  | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 1117 | static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) | 
|  | 1118 | { | 
|  | 1119 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | 
|  | 1120 | unsigned long flags; | 
|  | 1121 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1122 | mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", | 
|  | 1123 | lockres->l_name, lockres->l_unlock_action); | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 1124 |  | 
|  | 1125 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1126 | if (error) { | 
|  | 1127 | mlog(ML_ERROR, "Dlm passes error %d for lock %s, " | 
|  | 1128 | "unlock_action %d\n", error, lockres->l_name, | 
|  | 1129 | lockres->l_unlock_action); | 
|  | 1130 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 1131 | return; | 
|  | 1132 | } | 
|  | 1133 |  | 
|  | 1134 | switch(lockres->l_unlock_action) { | 
|  | 1135 | case OCFS2_UNLOCK_CANCEL_CONVERT: | 
|  | 1136 | mlog(0, "Cancel convert success for %s\n", lockres->l_name); | 
|  | 1137 | lockres->l_action = OCFS2_AST_INVALID; | 
|  | 1138 | /* Downconvert thread may have requeued this lock, we | 
|  | 1139 | * need to wake it. */ | 
|  | 1140 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 
|  | 1141 | ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); | 
|  | 1142 | break; | 
|  | 1143 | case OCFS2_UNLOCK_DROP_LOCK: | 
|  | 1144 | lockres->l_level = DLM_LOCK_IV; | 
|  | 1145 | break; | 
|  | 1146 | default: | 
|  | 1147 | BUG(); | 
|  | 1148 | } | 
|  | 1149 |  | 
|  | 1150 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 1151 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
|  | 1152 | wake_up(&lockres->l_event); | 
|  | 1153 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 1154 | } | 
|  | 1155 |  | 
|  | 1156 | /* | 
|  | 1157 | * This is the filesystem locking protocol.  It provides the lock handling | 
|  | 1158 | * hooks for the underlying DLM.  It has a maximum version number. | 
|  | 1159 | * The version number allows interoperability with systems running at | 
|  | 1160 | * the same major number and an equal or smaller minor number. | 
|  | 1161 | * | 
|  | 1162 | * Whenever the filesystem does new things with locks (adds or removes a | 
|  | 1163 | * lock, orders them differently, does different things underneath a lock), | 
|  | 1164 | * the version must be changed.  The protocol is negotiated when joining | 
|  | 1165 | * the dlm domain.  A node may join the domain if its major version is | 
|  | 1166 | * identical to all other nodes and its minor version is greater than | 
|  | 1167 | * or equal to all other nodes.  When its minor version is greater than | 
|  | 1168 | * the other nodes, it will run at the minor version specified by the | 
|  | 1169 | * other nodes. | 
|  | 1170 | * | 
|  | 1171 | * If a locking change is made that will not be compatible with older | 
|  | 1172 | * versions, the major number must be increased and the minor version set | 
|  | 1173 | * to zero.  If a change merely adds a behavior that can be disabled when | 
|  | 1174 | * speaking to older versions, the minor version must be increased.  If a | 
|  | 1175 | * change adds a fully backwards compatible change (eg, LVB changes that | 
|  | 1176 | * are just ignored by older versions), the version does not need to be | 
|  | 1177 | * updated. | 
|  | 1178 | */ | 
|  | 1179 | static struct ocfs2_locking_protocol lproto = { | 
|  | 1180 | .lp_max_version = { | 
|  | 1181 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | 
|  | 1182 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | 
|  | 1183 | }, | 
|  | 1184 | .lp_lock_ast		= ocfs2_locking_ast, | 
|  | 1185 | .lp_blocking_ast	= ocfs2_blocking_ast, | 
|  | 1186 | .lp_unlock_ast		= ocfs2_unlock_ast, | 
|  | 1187 | }; | 
|  | 1188 |  | 
|  | 1189 | void ocfs2_set_locking_protocol(void) | 
|  | 1190 | { | 
|  | 1191 | ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); | 
|  | 1192 | } | 
|  | 1193 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1194 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 
|  | 1195 | int convert) | 
|  | 1196 | { | 
|  | 1197 | unsigned long flags; | 
|  | 1198 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1199 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1200 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1201 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1202 | if (convert) | 
|  | 1203 | lockres->l_action = OCFS2_AST_INVALID; | 
|  | 1204 | else | 
|  | 1205 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 
|  | 1206 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1207 |  | 
|  | 1208 | wake_up(&lockres->l_event); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1209 | } | 
|  | 1210 |  | 
|  | 1211 | /* Note: If we detect another process working on the lock (i.e., | 
|  | 1212 | * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller | 
|  | 1213 | * to do the right thing in that case. | 
|  | 1214 | */ | 
|  | 1215 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 
|  | 1216 | struct ocfs2_lock_res *lockres, | 
|  | 1217 | int level, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1218 | u32 dlm_flags) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1219 | { | 
|  | 1220 | int ret = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1221 | unsigned long flags; | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1222 | unsigned int gen; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1223 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1224 | mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1225 | dlm_flags); | 
|  | 1226 |  | 
|  | 1227 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1228 | if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || | 
|  | 1229 | (lockres->l_flags & OCFS2_LOCK_BUSY)) { | 
|  | 1230 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1231 | goto bail; | 
|  | 1232 | } | 
|  | 1233 |  | 
|  | 1234 | lockres->l_action = OCFS2_AST_ATTACH; | 
|  | 1235 | lockres->l_requested = level; | 
|  | 1236 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1237 | gen = lockres_set_pending(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1238 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1239 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 1240 | ret = ocfs2_dlm_lock(osb->cconn, | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1241 | level, | 
|  | 1242 | &lockres->l_lksb, | 
|  | 1243 | dlm_flags, | 
|  | 1244 | lockres->l_name, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1245 | OCFS2_LOCK_ID_MAX_LEN - 1); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1246 | lockres_clear_pending(lockres, gen, osb); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1247 | if (ret) { | 
|  | 1248 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1249 | ocfs2_recover_from_dlm_error(lockres, 1); | 
|  | 1250 | } | 
|  | 1251 |  | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1252 | mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1253 |  | 
|  | 1254 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1255 | return ret; | 
|  | 1256 | } | 
|  | 1257 |  | 
|  | 1258 | static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, | 
|  | 1259 | int flag) | 
|  | 1260 | { | 
|  | 1261 | unsigned long flags; | 
|  | 1262 | int ret; | 
|  | 1263 |  | 
|  | 1264 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1265 | ret = lockres->l_flags & flag; | 
|  | 1266 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1267 |  | 
|  | 1268 | return ret; | 
|  | 1269 | } | 
|  | 1270 |  | 
|  | 1271 | static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) | 
|  | 1272 |  | 
|  | 1273 | { | 
|  | 1274 | wait_event(lockres->l_event, | 
|  | 1275 | !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); | 
|  | 1276 | } | 
|  | 1277 |  | 
|  | 1278 | static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) | 
|  | 1279 |  | 
|  | 1280 | { | 
|  | 1281 | wait_event(lockres->l_event, | 
|  | 1282 | !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); | 
|  | 1283 | } | 
|  | 1284 |  | 
|  | 1285 | /* predict what lock level we'll be dropping down to on behalf | 
|  | 1286 | * of another node, and return true if the currently wanted | 
|  | 1287 | * level will be compatible with it. */ | 
|  | 1288 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 
|  | 1289 | int wanted) | 
|  | 1290 | { | 
|  | 1291 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 
|  | 1292 |  | 
|  | 1293 | return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); | 
|  | 1294 | } | 
|  | 1295 |  | 
|  | 1296 | static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) | 
|  | 1297 | { | 
|  | 1298 | INIT_LIST_HEAD(&mw->mw_item); | 
|  | 1299 | init_completion(&mw->mw_complete); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 1300 | ocfs2_init_start_time(mw); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1301 | } | 
|  | 1302 |  | 
|  | 1303 | static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) | 
|  | 1304 | { | 
|  | 1305 | wait_for_completion(&mw->mw_complete); | 
|  | 1306 | /* Re-arm the completion in case we want to wait on it again */ | 
|  | 1307 | INIT_COMPLETION(mw->mw_complete); | 
|  | 1308 | return mw->mw_status; | 
|  | 1309 | } | 
|  | 1310 |  | 
|  | 1311 | static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, | 
|  | 1312 | struct ocfs2_mask_waiter *mw, | 
|  | 1313 | unsigned long mask, | 
|  | 1314 | unsigned long goal) | 
|  | 1315 | { | 
|  | 1316 | BUG_ON(!list_empty(&mw->mw_item)); | 
|  | 1317 |  | 
|  | 1318 | assert_spin_locked(&lockres->l_lock); | 
|  | 1319 |  | 
|  | 1320 | list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); | 
|  | 1321 | mw->mw_mask = mask; | 
|  | 1322 | mw->mw_goal = goal; | 
|  | 1323 | } | 
|  | 1324 |  | 
|  | 1325 | /* returns 0 if the mw that was removed was already satisfied, -EBUSY | 
|  | 1326 | * if the mask still hadn't reached its goal */ | 
|  | 1327 | static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | 
|  | 1328 | struct ocfs2_mask_waiter *mw) | 
|  | 1329 | { | 
|  | 1330 | unsigned long flags; | 
|  | 1331 | int ret = 0; | 
|  | 1332 |  | 
|  | 1333 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1334 | if (!list_empty(&mw->mw_item)) { | 
|  | 1335 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 
|  | 1336 | ret = -EBUSY; | 
|  | 1337 |  | 
|  | 1338 | list_del_init(&mw->mw_item); | 
|  | 1339 | init_completion(&mw->mw_complete); | 
|  | 1340 | } | 
|  | 1341 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1342 |  | 
|  | 1343 | return ret; | 
|  | 1344 |  | 
|  | 1345 | } | 
|  | 1346 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1347 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | 
|  | 1348 | struct ocfs2_lock_res *lockres) | 
|  | 1349 | { | 
|  | 1350 | int ret; | 
|  | 1351 |  | 
|  | 1352 | ret = wait_for_completion_interruptible(&mw->mw_complete); | 
|  | 1353 | if (ret) | 
|  | 1354 | lockres_remove_mask_waiter(lockres, mw); | 
|  | 1355 | else | 
|  | 1356 | ret = mw->mw_status; | 
|  | 1357 | /* Re-arm the completion in case we want to wait on it again */ | 
|  | 1358 | INIT_COMPLETION(mw->mw_complete); | 
|  | 1359 | return ret; | 
|  | 1360 | } | 
|  | 1361 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 1362 | static int __ocfs2_cluster_lock(struct ocfs2_super *osb, | 
|  | 1363 | struct ocfs2_lock_res *lockres, | 
|  | 1364 | int level, | 
|  | 1365 | u32 lkm_flags, | 
|  | 1366 | int arg_flags, | 
|  | 1367 | int l_subclass, | 
|  | 1368 | unsigned long caller_ip) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1369 | { | 
|  | 1370 | struct ocfs2_mask_waiter mw; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1371 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); | 
|  | 1372 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ | 
|  | 1373 | unsigned long flags; | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1374 | unsigned int gen; | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1375 | int noqueue_attempted = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1376 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1377 | ocfs2_init_mask_waiter(&mw); | 
|  | 1378 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 1379 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1380 | lkm_flags |= DLM_LKF_VALBLK; | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 1381 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1382 | again: | 
|  | 1383 | wait = 0; | 
|  | 1384 |  | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1385 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1386 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1387 | if (catch_signals && signal_pending(current)) { | 
|  | 1388 | ret = -ERESTARTSYS; | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1389 | goto unlock; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1390 | } | 
|  | 1391 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1392 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, | 
|  | 1393 | "Cluster lock called on freeing lockres %s! flags " | 
|  | 1394 | "0x%lx\n", lockres->l_name, lockres->l_flags); | 
|  | 1395 |  | 
|  | 1396 | /* We only compare against the currently granted level | 
|  | 1397 | * here. If the lock is blocked waiting on a downconvert, | 
|  | 1398 | * we'll get caught below. */ | 
|  | 1399 | if (lockres->l_flags & OCFS2_LOCK_BUSY && | 
|  | 1400 | level > lockres->l_level) { | 
|  | 1401 | /* is someone sitting in dlm_lock? If so, wait on | 
|  | 1402 | * them. */ | 
|  | 1403 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
|  | 1404 | wait = 1; | 
|  | 1405 | goto unlock; | 
|  | 1406 | } | 
|  | 1407 |  | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1408 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { | 
|  | 1409 | /* | 
|  | 1410 | * We've upconverted. If the lock now has a level we can | 
|  | 1411 | * work with, we take it. If, however, the lock is not at the | 
|  | 1412 | * required level, we go thru the full cycle. One way this could | 
|  | 1413 | * happen is if a process requesting an upconvert to PR is | 
|  | 1414 | * closely followed by another requesting upconvert to an EX. | 
|  | 1415 | * If the process requesting EX lands here, we want it to | 
|  | 1416 | * continue attempting to upconvert and let the process | 
|  | 1417 | * requesting PR take the lock. | 
|  | 1418 | * If multiple processes request upconvert to PR, the first one | 
|  | 1419 | * here will take the lock. The others will have to go thru the | 
|  | 1420 | * OCFS2_LOCK_BLOCKED check to ensure that there is no pending | 
|  | 1421 | * downconvert request. | 
|  | 1422 | */ | 
|  | 1423 | if (level <= lockres->l_level) | 
|  | 1424 | goto update_holders; | 
|  | 1425 | } | 
|  | 1426 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1427 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && | 
|  | 1428 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { | 
|  | 1429 | /* is the lock is currently blocked on behalf of | 
|  | 1430 | * another node */ | 
|  | 1431 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); | 
|  | 1432 | wait = 1; | 
|  | 1433 | goto unlock; | 
|  | 1434 | } | 
|  | 1435 |  | 
|  | 1436 | if (level > lockres->l_level) { | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1437 | if (noqueue_attempted > 0) { | 
|  | 1438 | ret = -EAGAIN; | 
|  | 1439 | goto unlock; | 
|  | 1440 | } | 
|  | 1441 | if (lkm_flags & DLM_LKF_NOQUEUE) | 
|  | 1442 | noqueue_attempted = 1; | 
|  | 1443 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1444 | if (lockres->l_action != OCFS2_AST_INVALID) | 
|  | 1445 | mlog(ML_ERROR, "lockres %s has action %u pending\n", | 
|  | 1446 | lockres->l_name, lockres->l_action); | 
|  | 1447 |  | 
| Mark Fasheh | 019d1b2 | 2007-10-05 12:09:05 -0700 | [diff] [blame] | 1448 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 
|  | 1449 | lockres->l_action = OCFS2_AST_ATTACH; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1450 | lkm_flags &= ~DLM_LKF_CONVERT; | 
| Mark Fasheh | 019d1b2 | 2007-10-05 12:09:05 -0700 | [diff] [blame] | 1451 | } else { | 
|  | 1452 | lockres->l_action = OCFS2_AST_CONVERT; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1453 | lkm_flags |= DLM_LKF_CONVERT; | 
| Mark Fasheh | 019d1b2 | 2007-10-05 12:09:05 -0700 | [diff] [blame] | 1454 | } | 
|  | 1455 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1456 | lockres->l_requested = level; | 
|  | 1457 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1458 | gen = lockres_set_pending(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1459 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1460 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1461 | BUG_ON(level == DLM_LOCK_IV); | 
|  | 1462 | BUG_ON(level == DLM_LOCK_NL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1463 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 1464 | mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1465 | lockres->l_name, lockres->l_level, level); | 
|  | 1466 |  | 
|  | 1467 | /* call dlm_lock to upgrade lock now */ | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 1468 | ret = ocfs2_dlm_lock(osb->cconn, | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1469 | level, | 
|  | 1470 | &lockres->l_lksb, | 
|  | 1471 | lkm_flags, | 
|  | 1472 | lockres->l_name, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1473 | OCFS2_LOCK_ID_MAX_LEN - 1); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1474 | lockres_clear_pending(lockres, gen, osb); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1475 | if (ret) { | 
|  | 1476 | if (!(lkm_flags & DLM_LKF_NOQUEUE) || | 
|  | 1477 | (ret != -EAGAIN)) { | 
| Joel Becker | 24ef181 | 2008-01-29 17:37:32 -0800 | [diff] [blame] | 1478 | ocfs2_log_dlm_error("ocfs2_dlm_lock", | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1479 | ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1480 | } | 
|  | 1481 | ocfs2_recover_from_dlm_error(lockres, 1); | 
|  | 1482 | goto out; | 
|  | 1483 | } | 
|  | 1484 |  | 
| Coly Li | 73ac36e | 2009-01-07 18:09:16 -0800 | [diff] [blame] | 1485 | mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1486 | lockres->l_name); | 
|  | 1487 |  | 
|  | 1488 | /* At this point we've gone inside the dlm and need to | 
|  | 1489 | * complete our work regardless. */ | 
|  | 1490 | catch_signals = 0; | 
|  | 1491 |  | 
|  | 1492 | /* wait for busy to clear and carry on */ | 
|  | 1493 | goto again; | 
|  | 1494 | } | 
|  | 1495 |  | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1496 | update_holders: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1497 | /* Ok, if we get here then we're good to go. */ | 
|  | 1498 | ocfs2_inc_holders(lockres, level); | 
|  | 1499 |  | 
|  | 1500 | ret = 0; | 
|  | 1501 | unlock: | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 1502 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | 
|  | 1503 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1504 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1505 | out: | 
|  | 1506 | /* | 
|  | 1507 | * This is helping work around a lock inversion between the page lock | 
|  | 1508 | * and dlm locks.  One path holds the page lock while calling aops | 
|  | 1509 | * which block acquiring dlm locks.  The voting thread holds dlm | 
|  | 1510 | * locks while acquiring page locks while down converting data locks. | 
|  | 1511 | * This block is helping an aop path notice the inversion and back | 
|  | 1512 | * off to unlock its page lock before trying the dlm lock again. | 
|  | 1513 | */ | 
|  | 1514 | if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && | 
|  | 1515 | mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { | 
|  | 1516 | wait = 0; | 
|  | 1517 | if (lockres_remove_mask_waiter(lockres, &mw)) | 
|  | 1518 | ret = -EAGAIN; | 
|  | 1519 | else | 
|  | 1520 | goto again; | 
|  | 1521 | } | 
|  | 1522 | if (wait) { | 
|  | 1523 | ret = ocfs2_wait_for_mask(&mw); | 
|  | 1524 | if (ret == 0) | 
|  | 1525 | goto again; | 
|  | 1526 | mlog_errno(ret); | 
|  | 1527 | } | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 1528 | ocfs2_update_lock_stats(lockres, level, &mw, ret); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1529 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 1530 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 
|  | 1531 | if (!ret && lockres->l_lockdep_map.key != NULL) { | 
|  | 1532 | if (level == DLM_LOCK_PR) | 
|  | 1533 | rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, | 
|  | 1534 | !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), | 
|  | 1535 | caller_ip); | 
|  | 1536 | else | 
|  | 1537 | rwsem_acquire(&lockres->l_lockdep_map, l_subclass, | 
|  | 1538 | !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), | 
|  | 1539 | caller_ip); | 
|  | 1540 | } | 
|  | 1541 | #endif | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1542 | return ret; | 
|  | 1543 | } | 
|  | 1544 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 1545 | static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, | 
|  | 1546 | struct ocfs2_lock_res *lockres, | 
|  | 1547 | int level, | 
|  | 1548 | u32 lkm_flags, | 
|  | 1549 | int arg_flags) | 
|  | 1550 | { | 
|  | 1551 | return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, | 
|  | 1552 | 0, _RET_IP_); | 
|  | 1553 | } | 
|  | 1554 |  | 
|  | 1555 |  | 
|  | 1556 | static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, | 
|  | 1557 | struct ocfs2_lock_res *lockres, | 
|  | 1558 | int level, | 
|  | 1559 | unsigned long caller_ip) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1560 | { | 
|  | 1561 | unsigned long flags; | 
|  | 1562 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1563 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1564 | ocfs2_dec_holders(lockres, level); | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 1565 | ocfs2_downconvert_on_unlock(osb, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1566 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 1567 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 
|  | 1568 | if (lockres->l_lockdep_map.key != NULL) | 
|  | 1569 | rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); | 
|  | 1570 | #endif | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1571 | } | 
|  | 1572 |  | 
| Adrian Bunk | da66116e | 2006-11-20 03:24:28 +0100 | [diff] [blame] | 1573 | static int ocfs2_create_new_lock(struct ocfs2_super *osb, | 
|  | 1574 | struct ocfs2_lock_res *lockres, | 
|  | 1575 | int ex, | 
|  | 1576 | int local) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1577 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1578 | int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1579 | unsigned long flags; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1580 | u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1581 |  | 
|  | 1582 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1583 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 
|  | 1584 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); | 
|  | 1585 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1586 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1587 | return ocfs2_lock_create(osb, lockres, level, lkm_flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1588 | } | 
|  | 1589 |  | 
|  | 1590 | /* Grants us an EX lock on the data and metadata resources, skipping | 
|  | 1591 | * the normal cluster directory lookup. Use this ONLY on newly created | 
|  | 1592 | * inodes which other nodes can't possibly see, and which haven't been | 
|  | 1593 | * hashed in the inode hash yet. This can give us a good performance | 
|  | 1594 | * increase as it'll skip the network broadcast normally associated | 
|  | 1595 | * with creating a new lock resource. */ | 
|  | 1596 | int ocfs2_create_new_inode_locks(struct inode *inode) | 
|  | 1597 | { | 
|  | 1598 | int ret; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 1599 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1600 |  | 
|  | 1601 | BUG_ON(!inode); | 
|  | 1602 | BUG_ON(!ocfs2_inode_is_new(inode)); | 
|  | 1603 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1604 | mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1605 |  | 
|  | 1606 | /* NOTE: That we don't increment any of the holder counts, nor | 
|  | 1607 | * do we add anything to a journal handle. Since this is | 
|  | 1608 | * supposed to be a new inode which the cluster doesn't know | 
|  | 1609 | * about yet, there is no need to.  As far as the LVB handling | 
|  | 1610 | * is concerned, this is basically like acquiring an EX lock | 
|  | 1611 | * on a resource which has an invalid one -- we'll set it | 
|  | 1612 | * valid when we release the EX. */ | 
|  | 1613 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1614 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1615 | if (ret) { | 
|  | 1616 | mlog_errno(ret); | 
|  | 1617 | goto bail; | 
|  | 1618 | } | 
|  | 1619 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1620 | /* | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1621 | * We don't want to use DLM_LKF_LOCAL on a meta data lock as they | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 1622 | * don't use a generation in their lock names. | 
|  | 1623 | */ | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 1624 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1625 | if (ret) { | 
|  | 1626 | mlog_errno(ret); | 
|  | 1627 | goto bail; | 
|  | 1628 | } | 
|  | 1629 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1630 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); | 
|  | 1631 | if (ret) { | 
|  | 1632 | mlog_errno(ret); | 
|  | 1633 | goto bail; | 
|  | 1634 | } | 
|  | 1635 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1636 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1637 | return ret; | 
|  | 1638 | } | 
|  | 1639 |  | 
|  | 1640 | int ocfs2_rw_lock(struct inode *inode, int write) | 
|  | 1641 | { | 
|  | 1642 | int status, level; | 
|  | 1643 | struct ocfs2_lock_res *lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1644 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1645 |  | 
|  | 1646 | BUG_ON(!inode); | 
|  | 1647 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1648 | mlog(0, "inode %llu take %s RW lock\n", | 
|  | 1649 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1650 | write ? "EXMODE" : "PRMODE"); | 
|  | 1651 |  | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 1652 | if (ocfs2_mount_local(osb)) | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1653 | return 0; | 
|  | 1654 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1655 | lockres = &OCFS2_I(inode)->ip_rw_lockres; | 
|  | 1656 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1657 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1658 |  | 
|  | 1659 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, | 
|  | 1660 | 0); | 
|  | 1661 | if (status < 0) | 
|  | 1662 | mlog_errno(status); | 
|  | 1663 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1664 | return status; | 
|  | 1665 | } | 
|  | 1666 |  | 
|  | 1667 | void ocfs2_rw_unlock(struct inode *inode, int write) | 
|  | 1668 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1669 | int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1670 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1671 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1672 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 1673 | mlog(0, "inode %llu drop %s RW lock\n", | 
|  | 1674 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1675 | write ? "EXMODE" : "PRMODE"); | 
|  | 1676 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 1677 | if (!ocfs2_mount_local(osb)) | 
|  | 1678 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1679 | } | 
|  | 1680 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1681 | /* | 
|  | 1682 | * ocfs2_open_lock always get PR mode lock. | 
|  | 1683 | */ | 
|  | 1684 | int ocfs2_open_lock(struct inode *inode) | 
|  | 1685 | { | 
|  | 1686 | int status = 0; | 
|  | 1687 | struct ocfs2_lock_res *lockres; | 
|  | 1688 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
|  | 1689 |  | 
|  | 1690 | BUG_ON(!inode); | 
|  | 1691 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1692 | mlog(0, "inode %llu take PRMODE open lock\n", | 
|  | 1693 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
|  | 1694 |  | 
|  | 1695 | if (ocfs2_mount_local(osb)) | 
|  | 1696 | goto out; | 
|  | 1697 |  | 
|  | 1698 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 
|  | 1699 |  | 
|  | 1700 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1701 | DLM_LOCK_PR, 0, 0); | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1702 | if (status < 0) | 
|  | 1703 | mlog_errno(status); | 
|  | 1704 |  | 
|  | 1705 | out: | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1706 | return status; | 
|  | 1707 | } | 
|  | 1708 |  | 
|  | 1709 | int ocfs2_try_open_lock(struct inode *inode, int write) | 
|  | 1710 | { | 
|  | 1711 | int status = 0, level; | 
|  | 1712 | struct ocfs2_lock_res *lockres; | 
|  | 1713 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
|  | 1714 |  | 
|  | 1715 | BUG_ON(!inode); | 
|  | 1716 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1717 | mlog(0, "inode %llu try to take %s open lock\n", | 
|  | 1718 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
|  | 1719 | write ? "EXMODE" : "PRMODE"); | 
|  | 1720 |  | 
|  | 1721 | if (ocfs2_mount_local(osb)) | 
|  | 1722 | goto out; | 
|  | 1723 |  | 
|  | 1724 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 
|  | 1725 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1726 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1727 |  | 
|  | 1728 | /* | 
|  | 1729 | * The file system may already holding a PRMODE/EXMODE open lock. | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1730 | * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1731 | * other nodes and the -EAGAIN will indicate to the caller that | 
|  | 1732 | * this inode is still in use. | 
|  | 1733 | */ | 
|  | 1734 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1735 | level, DLM_LKF_NOQUEUE, 0); | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1736 |  | 
|  | 1737 | out: | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1738 | return status; | 
|  | 1739 | } | 
|  | 1740 |  | 
|  | 1741 | /* | 
|  | 1742 | * ocfs2_open_unlock unlock PR and EX mode open locks. | 
|  | 1743 | */ | 
|  | 1744 | void ocfs2_open_unlock(struct inode *inode) | 
|  | 1745 | { | 
|  | 1746 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; | 
|  | 1747 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
|  | 1748 |  | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1749 | mlog(0, "inode %llu drop open lock\n", | 
|  | 1750 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
|  | 1751 |  | 
|  | 1752 | if (ocfs2_mount_local(osb)) | 
|  | 1753 | goto out; | 
|  | 1754 |  | 
|  | 1755 | if(lockres->l_ro_holders) | 
|  | 1756 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1757 | DLM_LOCK_PR); | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1758 | if(lockres->l_ex_holders) | 
|  | 1759 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1760 | DLM_LOCK_EX); | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1761 |  | 
|  | 1762 | out: | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 1763 | return; | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 1764 | } | 
|  | 1765 |  | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1766 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, | 
|  | 1767 | int level) | 
|  | 1768 | { | 
|  | 1769 | int ret; | 
|  | 1770 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 
|  | 1771 | unsigned long flags; | 
|  | 1772 | struct ocfs2_mask_waiter mw; | 
|  | 1773 |  | 
|  | 1774 | ocfs2_init_mask_waiter(&mw); | 
|  | 1775 |  | 
|  | 1776 | retry_cancel: | 
|  | 1777 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1778 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 
|  | 1779 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 
|  | 1780 | if (ret) { | 
|  | 1781 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1782 | ret = ocfs2_cancel_convert(osb, lockres); | 
|  | 1783 | if (ret < 0) { | 
|  | 1784 | mlog_errno(ret); | 
|  | 1785 | goto out; | 
|  | 1786 | } | 
|  | 1787 | goto retry_cancel; | 
|  | 1788 | } | 
|  | 1789 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
|  | 1790 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1791 |  | 
|  | 1792 | ocfs2_wait_for_mask(&mw); | 
|  | 1793 | goto retry_cancel; | 
|  | 1794 | } | 
|  | 1795 |  | 
|  | 1796 | ret = -ERESTARTSYS; | 
|  | 1797 | /* | 
|  | 1798 | * We may still have gotten the lock, in which case there's no | 
|  | 1799 | * point to restarting the syscall. | 
|  | 1800 | */ | 
|  | 1801 | if (lockres->l_level == level) | 
|  | 1802 | ret = 0; | 
|  | 1803 |  | 
|  | 1804 | mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, | 
|  | 1805 | lockres->l_flags, lockres->l_level, lockres->l_action); | 
|  | 1806 |  | 
|  | 1807 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1808 |  | 
|  | 1809 | out: | 
|  | 1810 | return ret; | 
|  | 1811 | } | 
|  | 1812 |  | 
|  | 1813 | /* | 
|  | 1814 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | 
|  | 1815 | * flock() calls. The locking approach this requires is sufficiently | 
|  | 1816 | * different from all other cluster lock types that we implement a | 
| Daniel Mack | 3ad2f3f | 2010-02-03 08:01:28 +0800 | [diff] [blame] | 1817 | * separate path to the "low-level" dlm calls. In particular: | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1818 | * | 
|  | 1819 | * - No optimization of lock levels is done - we take at exactly | 
|  | 1820 | *   what's been requested. | 
|  | 1821 | * | 
|  | 1822 | * - No lock caching is employed. We immediately downconvert to | 
|  | 1823 | *   no-lock at unlock time. This also means flock locks never go on | 
|  | 1824 | *   the blocking list). | 
|  | 1825 | * | 
|  | 1826 | * - Since userspace can trivially deadlock itself with flock, we make | 
|  | 1827 | *   sure to allow cancellation of a misbehaving applications flock() | 
|  | 1828 | *   request. | 
|  | 1829 | * | 
|  | 1830 | * - Access to any flock lockres doesn't require concurrency, so we | 
|  | 1831 | *   can simplify the code by requiring the caller to guarantee | 
|  | 1832 | *   serialization of dlmglue flock calls. | 
|  | 1833 | */ | 
|  | 1834 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | 
|  | 1835 | { | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1836 | int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
|  | 1837 | unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1838 | unsigned long flags; | 
|  | 1839 | struct ocfs2_file_private *fp = file->private_data; | 
|  | 1840 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 
|  | 1841 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | 
|  | 1842 | struct ocfs2_mask_waiter mw; | 
|  | 1843 |  | 
|  | 1844 | ocfs2_init_mask_waiter(&mw); | 
|  | 1845 |  | 
|  | 1846 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1847 | (lockres->l_level > DLM_LOCK_NL)) { | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1848 | mlog(ML_ERROR, | 
|  | 1849 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | 
|  | 1850 | "level: %u\n", lockres->l_name, lockres->l_flags, | 
|  | 1851 | lockres->l_level); | 
|  | 1852 | return -EINVAL; | 
|  | 1853 | } | 
|  | 1854 |  | 
|  | 1855 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1856 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 
|  | 1857 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
|  | 1858 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1859 |  | 
|  | 1860 | /* | 
|  | 1861 | * Get the lock at NLMODE to start - that way we | 
|  | 1862 | * can cancel the upconvert request if need be. | 
|  | 1863 | */ | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1864 | ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1865 | if (ret < 0) { | 
|  | 1866 | mlog_errno(ret); | 
|  | 1867 | goto out; | 
|  | 1868 | } | 
|  | 1869 |  | 
|  | 1870 | ret = ocfs2_wait_for_mask(&mw); | 
|  | 1871 | if (ret) { | 
|  | 1872 | mlog_errno(ret); | 
|  | 1873 | goto out; | 
|  | 1874 | } | 
|  | 1875 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1876 | } | 
|  | 1877 |  | 
|  | 1878 | lockres->l_action = OCFS2_AST_CONVERT; | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1879 | lkm_flags |= DLM_LKF_CONVERT; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1880 | lockres->l_requested = level; | 
|  | 1881 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 1882 |  | 
|  | 1883 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
|  | 1884 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1885 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 1886 | ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 1887 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 1888 | if (ret) { | 
|  | 1889 | if (!trylock || (ret != -EAGAIN)) { | 
| Joel Becker | 24ef181 | 2008-01-29 17:37:32 -0800 | [diff] [blame] | 1890 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1891 | ret = -EINVAL; | 
|  | 1892 | } | 
|  | 1893 |  | 
|  | 1894 | ocfs2_recover_from_dlm_error(lockres, 1); | 
|  | 1895 | lockres_remove_mask_waiter(lockres, &mw); | 
|  | 1896 | goto out; | 
|  | 1897 | } | 
|  | 1898 |  | 
|  | 1899 | ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); | 
|  | 1900 | if (ret == -ERESTARTSYS) { | 
|  | 1901 | /* | 
|  | 1902 | * Userspace can cause deadlock itself with | 
|  | 1903 | * flock(). Current behavior locally is to allow the | 
|  | 1904 | * deadlock, but abort the system call if a signal is | 
|  | 1905 | * received. We follow this example, otherwise a | 
|  | 1906 | * poorly written program could sit in kernel until | 
|  | 1907 | * reboot. | 
|  | 1908 | * | 
|  | 1909 | * Handling this is a bit more complicated for Ocfs2 | 
|  | 1910 | * though. We can't exit this function with an | 
|  | 1911 | * outstanding lock request, so a cancel convert is | 
|  | 1912 | * required. We intentionally overwrite 'ret' - if the | 
|  | 1913 | * cancel fails and the lock was granted, it's easier | 
| André Goddard Rosa | af901ca | 2009-11-14 13:09:05 -0200 | [diff] [blame] | 1914 | * to just bubble success back up to the user. | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1915 | */ | 
|  | 1916 | ret = ocfs2_flock_handle_signal(lockres, level); | 
| David Teigland | 1693a5c | 2008-01-30 16:52:53 -0800 | [diff] [blame] | 1917 | } else if (!ret && (level > lockres->l_level)) { | 
|  | 1918 | /* Trylock failed asynchronously */ | 
|  | 1919 | BUG_ON(!trylock); | 
|  | 1920 | ret = -EAGAIN; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1921 | } | 
|  | 1922 |  | 
|  | 1923 | out: | 
|  | 1924 |  | 
|  | 1925 | mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", | 
|  | 1926 | lockres->l_name, ex, trylock, ret); | 
|  | 1927 | return ret; | 
|  | 1928 | } | 
|  | 1929 |  | 
|  | 1930 | void ocfs2_file_unlock(struct file *file) | 
|  | 1931 | { | 
|  | 1932 | int ret; | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 1933 | unsigned int gen; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1934 | unsigned long flags; | 
|  | 1935 | struct ocfs2_file_private *fp = file->private_data; | 
|  | 1936 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 
|  | 1937 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | 
|  | 1938 | struct ocfs2_mask_waiter mw; | 
|  | 1939 |  | 
|  | 1940 | ocfs2_init_mask_waiter(&mw); | 
|  | 1941 |  | 
|  | 1942 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) | 
|  | 1943 | return; | 
|  | 1944 |  | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1945 | if (lockres->l_level == DLM_LOCK_NL) | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1946 | return; | 
|  | 1947 |  | 
|  | 1948 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | 
|  | 1949 | lockres->l_name, lockres->l_flags, lockres->l_level, | 
|  | 1950 | lockres->l_action); | 
|  | 1951 |  | 
|  | 1952 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 1953 | /* | 
|  | 1954 | * Fake a blocking ast for the downconvert code. | 
|  | 1955 | */ | 
|  | 1956 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1957 | lockres->l_blocking = DLM_LOCK_EX; | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1958 |  | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1959 | gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1960 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
|  | 1961 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 1962 |  | 
| Mark Fasheh | e988cf1 | 2008-07-10 09:25:39 -0700 | [diff] [blame] | 1963 | ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); | 
| Mark Fasheh | cf8e06f | 2007-12-20 16:43:10 -0800 | [diff] [blame] | 1964 | if (ret) { | 
|  | 1965 | mlog_errno(ret); | 
|  | 1966 | return; | 
|  | 1967 | } | 
|  | 1968 |  | 
|  | 1969 | ret = ocfs2_wait_for_mask(&mw); | 
|  | 1970 | if (ret) | 
|  | 1971 | mlog_errno(ret); | 
|  | 1972 | } | 
|  | 1973 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 1974 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 
|  | 1975 | struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1976 | { | 
|  | 1977 | int kick = 0; | 
|  | 1978 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1979 | /* If we know that another node is waiting on our lock, kick | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 1980 | * the downconvert thread * pre-emptively when we reach a release | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1981 | * condition. */ | 
|  | 1982 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 
|  | 1983 | switch(lockres->l_blocking) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1984 | case DLM_LOCK_EX: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1985 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | 
|  | 1986 | kick = 1; | 
|  | 1987 | break; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 1988 | case DLM_LOCK_PR: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1989 | if (!lockres->l_ex_holders) | 
|  | 1990 | kick = 1; | 
|  | 1991 | break; | 
|  | 1992 | default: | 
|  | 1993 | BUG(); | 
|  | 1994 | } | 
|  | 1995 | } | 
|  | 1996 |  | 
|  | 1997 | if (kick) | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 1998 | ocfs2_wake_downconvert_thread(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 1999 | } | 
|  | 2000 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2001 | #define OCFS2_SEC_BITS   34 | 
|  | 2002 | #define OCFS2_SEC_SHIFT  (64 - 34) | 
|  | 2003 | #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1) | 
|  | 2004 |  | 
|  | 2005 | /* LVB only has room for 64 bits of time here so we pack it for | 
|  | 2006 | * now. */ | 
|  | 2007 | static u64 ocfs2_pack_timespec(struct timespec *spec) | 
|  | 2008 | { | 
|  | 2009 | u64 res; | 
|  | 2010 | u64 sec = spec->tv_sec; | 
|  | 2011 | u32 nsec = spec->tv_nsec; | 
|  | 2012 |  | 
|  | 2013 | res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); | 
|  | 2014 |  | 
|  | 2015 | return res; | 
|  | 2016 | } | 
|  | 2017 |  | 
|  | 2018 | /* Call this with the lockres locked. I am reasonably sure we don't | 
|  | 2019 | * need ip_lock in this function as anyone who would be changing those | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2020 | * values is supposed to be blocked in ocfs2_inode_lock right now. */ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2021 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 
|  | 2022 | { | 
|  | 2023 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2024 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2025 | struct ocfs2_meta_lvb *lvb; | 
|  | 2026 |  | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 2027 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2028 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 2029 | /* | 
|  | 2030 | * Invalidate the LVB of a deleted inode - this way other | 
|  | 2031 | * nodes are forced to go to disk and discover the new inode | 
|  | 2032 | * status. | 
|  | 2033 | */ | 
|  | 2034 | if (oi->ip_flags & OCFS2_INODE_DELETED) { | 
|  | 2035 | lvb->lvb_version = 0; | 
|  | 2036 | goto out; | 
|  | 2037 | } | 
|  | 2038 |  | 
| Mark Fasheh | 4d3b83f | 2006-09-12 15:22:18 -0700 | [diff] [blame] | 2039 | lvb->lvb_version   = OCFS2_LVB_VERSION; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2040 | lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode)); | 
|  | 2041 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); | 
|  | 2042 | lvb->lvb_iuid      = cpu_to_be32(inode->i_uid); | 
|  | 2043 | lvb->lvb_igid      = cpu_to_be32(inode->i_gid); | 
|  | 2044 | lvb->lvb_imode     = cpu_to_be16(inode->i_mode); | 
|  | 2045 | lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink); | 
|  | 2046 | lvb->lvb_iatime_packed  = | 
|  | 2047 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); | 
|  | 2048 | lvb->lvb_ictime_packed = | 
|  | 2049 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); | 
|  | 2050 | lvb->lvb_imtime_packed = | 
|  | 2051 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); | 
| Herbert Poetzl | ca4d147 | 2006-07-03 17:27:12 -0700 | [diff] [blame] | 2052 | lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr); | 
| Mark Fasheh | 15b1e36 | 2007-09-07 13:58:15 -0700 | [diff] [blame] | 2053 | lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); | 
| Mark Fasheh | f9e2d82 | 2006-09-12 15:35:49 -0700 | [diff] [blame] | 2054 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2055 |  | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 2056 | out: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2057 | mlog_meta_lvb(0, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2058 | } | 
|  | 2059 |  | 
|  | 2060 | static void ocfs2_unpack_timespec(struct timespec *spec, | 
|  | 2061 | u64 packed_time) | 
|  | 2062 | { | 
|  | 2063 | spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; | 
|  | 2064 | spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; | 
|  | 2065 | } | 
|  | 2066 |  | 
|  | 2067 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 
|  | 2068 | { | 
|  | 2069 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2070 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2071 | struct ocfs2_meta_lvb *lvb; | 
|  | 2072 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2073 | mlog_meta_lvb(0, lockres); | 
|  | 2074 |  | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 2075 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2076 |  | 
|  | 2077 | /* We're safe here without the lockres lock... */ | 
|  | 2078 | spin_lock(&oi->ip_lock); | 
|  | 2079 | oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); | 
|  | 2080 | i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); | 
|  | 2081 |  | 
| Herbert Poetzl | ca4d147 | 2006-07-03 17:27:12 -0700 | [diff] [blame] | 2082 | oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); | 
| Mark Fasheh | 15b1e36 | 2007-09-07 13:58:15 -0700 | [diff] [blame] | 2083 | oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); | 
| Herbert Poetzl | ca4d147 | 2006-07-03 17:27:12 -0700 | [diff] [blame] | 2084 | ocfs2_set_inode_flags(inode); | 
|  | 2085 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2086 | /* fast-symlinks are a special case */ | 
|  | 2087 | if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) | 
|  | 2088 | inode->i_blocks = 0; | 
|  | 2089 | else | 
| Mark Fasheh | 8110b07 | 2007-03-22 16:53:23 -0700 | [diff] [blame] | 2090 | inode->i_blocks = ocfs2_inode_sector_count(inode); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2091 |  | 
|  | 2092 | inode->i_uid     = be32_to_cpu(lvb->lvb_iuid); | 
|  | 2093 | inode->i_gid     = be32_to_cpu(lvb->lvb_igid); | 
|  | 2094 | inode->i_mode    = be16_to_cpu(lvb->lvb_imode); | 
|  | 2095 | inode->i_nlink   = be16_to_cpu(lvb->lvb_inlink); | 
|  | 2096 | ocfs2_unpack_timespec(&inode->i_atime, | 
|  | 2097 | be64_to_cpu(lvb->lvb_iatime_packed)); | 
|  | 2098 | ocfs2_unpack_timespec(&inode->i_mtime, | 
|  | 2099 | be64_to_cpu(lvb->lvb_imtime_packed)); | 
|  | 2100 | ocfs2_unpack_timespec(&inode->i_ctime, | 
|  | 2101 | be64_to_cpu(lvb->lvb_ictime_packed)); | 
|  | 2102 | spin_unlock(&oi->ip_lock); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2103 | } | 
|  | 2104 |  | 
| Mark Fasheh | f9e2d82 | 2006-09-12 15:35:49 -0700 | [diff] [blame] | 2105 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, | 
|  | 2106 | struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2107 | { | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 2108 | struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2109 |  | 
| Joel Becker | 1c520df | 2009-06-19 15:14:13 -0700 | [diff] [blame] | 2110 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) | 
|  | 2111 | && lvb->lvb_version == OCFS2_LVB_VERSION | 
| Mark Fasheh | f9e2d82 | 2006-09-12 15:35:49 -0700 | [diff] [blame] | 2112 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2113 | return 1; | 
|  | 2114 | return 0; | 
|  | 2115 | } | 
|  | 2116 |  | 
|  | 2117 | /* Determine whether a lock resource needs to be refreshed, and | 
|  | 2118 | * arbitrate who gets to refresh it. | 
|  | 2119 | * | 
|  | 2120 | *   0 means no refresh needed. | 
|  | 2121 | * | 
|  | 2122 | *   > 0 means you need to refresh this and you MUST call | 
|  | 2123 | *   ocfs2_complete_lock_res_refresh afterwards. */ | 
|  | 2124 | static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) | 
|  | 2125 | { | 
|  | 2126 | unsigned long flags; | 
|  | 2127 | int status = 0; | 
|  | 2128 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2129 | refresh_check: | 
|  | 2130 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2131 | if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { | 
|  | 2132 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2133 | goto bail; | 
|  | 2134 | } | 
|  | 2135 |  | 
|  | 2136 | if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { | 
|  | 2137 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2138 |  | 
|  | 2139 | ocfs2_wait_on_refreshing_lock(lockres); | 
|  | 2140 | goto refresh_check; | 
|  | 2141 | } | 
|  | 2142 |  | 
|  | 2143 | /* Ok, I'll be the one to refresh this lock. */ | 
|  | 2144 | lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); | 
|  | 2145 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2146 |  | 
|  | 2147 | status = 1; | 
|  | 2148 | bail: | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 2149 | mlog(0, "status %d\n", status); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2150 | return status; | 
|  | 2151 | } | 
|  | 2152 |  | 
|  | 2153 | /* If status is non zero, I'll mark it as not being in refresh | 
|  | 2154 | * anymroe, but i won't clear the needs refresh flag. */ | 
|  | 2155 | static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, | 
|  | 2156 | int status) | 
|  | 2157 | { | 
|  | 2158 | unsigned long flags; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2159 |  | 
|  | 2160 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 2161 | lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); | 
|  | 2162 | if (!status) | 
|  | 2163 | lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 
|  | 2164 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 2165 |  | 
|  | 2166 | wake_up(&lockres->l_event); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2167 | } | 
|  | 2168 |  | 
|  | 2169 | /* may or may not return a bh if it went to disk. */ | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2170 | static int ocfs2_inode_lock_update(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2171 | struct buffer_head **bh) | 
|  | 2172 | { | 
|  | 2173 | int status = 0; | 
|  | 2174 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2175 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2176 | struct ocfs2_dinode *fe; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2177 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2178 |  | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 2179 | if (ocfs2_mount_local(osb)) | 
|  | 2180 | goto bail; | 
|  | 2181 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2182 | spin_lock(&oi->ip_lock); | 
|  | 2183 | if (oi->ip_flags & OCFS2_INODE_DELETED) { | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2184 | mlog(0, "Orphaned inode %llu was deleted while we " | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2185 | "were waiting on a lock. ip_flags = 0x%x\n", | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2186 | (unsigned long long)oi->ip_blkno, oi->ip_flags); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2187 | spin_unlock(&oi->ip_lock); | 
|  | 2188 | status = -ENOENT; | 
|  | 2189 | goto bail; | 
|  | 2190 | } | 
|  | 2191 | spin_unlock(&oi->ip_lock); | 
|  | 2192 |  | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 2193 | if (!ocfs2_should_refresh_lock_res(lockres)) | 
|  | 2194 | goto bail; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2195 |  | 
|  | 2196 | /* This will discard any caching information we might have had | 
|  | 2197 | * for the inode metadata. */ | 
| Joel Becker | 8cb471e | 2009-02-10 20:00:41 -0800 | [diff] [blame] | 2198 | ocfs2_metadata_cache_purge(INODE_CACHE(inode)); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2199 |  | 
| Mark Fasheh | 8341897 | 2007-04-23 18:53:12 -0700 | [diff] [blame] | 2200 | ocfs2_extent_map_trunc(inode, 0); | 
|  | 2201 |  | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 2202 | if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2203 | mlog(0, "Trusting LVB on inode %llu\n", | 
|  | 2204 | (unsigned long long)oi->ip_blkno); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2205 | ocfs2_refresh_inode_from_lvb(inode); | 
|  | 2206 | } else { | 
|  | 2207 | /* Boo, we have to go to disk. */ | 
|  | 2208 | /* read bh, cast, ocfs2_refresh_inode */ | 
| Joel Becker | b657c95 | 2008-11-13 14:49:11 -0800 | [diff] [blame] | 2209 | status = ocfs2_read_inode_block(inode, bh); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2210 | if (status < 0) { | 
|  | 2211 | mlog_errno(status); | 
|  | 2212 | goto bail_refresh; | 
|  | 2213 | } | 
|  | 2214 | fe = (struct ocfs2_dinode *) (*bh)->b_data; | 
|  | 2215 |  | 
|  | 2216 | /* This is a good chance to make sure we're not | 
| Joel Becker | b657c95 | 2008-11-13 14:49:11 -0800 | [diff] [blame] | 2217 | * locking an invalid object.  ocfs2_read_inode_block() | 
|  | 2218 | * already checked that the inode block is sane. | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2219 | * | 
|  | 2220 | * We bug on a stale inode here because we checked | 
|  | 2221 | * above whether it was wiped from disk. The wiping | 
|  | 2222 | * node provides a guarantee that we receive that | 
|  | 2223 | * message and can mark the inode before dropping any | 
|  | 2224 | * locks associated with it. */ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2225 | mlog_bug_on_msg(inode->i_generation != | 
|  | 2226 | le32_to_cpu(fe->i_generation), | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2227 | "Invalid dinode %llu disk generation: %u " | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2228 | "inode->i_generation: %u\n", | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2229 | (unsigned long long)oi->ip_blkno, | 
|  | 2230 | le32_to_cpu(fe->i_generation), | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2231 | inode->i_generation); | 
|  | 2232 | mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || | 
|  | 2233 | !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2234 | "Stale dinode %llu dtime: %llu flags: 0x%x\n", | 
|  | 2235 | (unsigned long long)oi->ip_blkno, | 
|  | 2236 | (unsigned long long)le64_to_cpu(fe->i_dtime), | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2237 | le32_to_cpu(fe->i_flags)); | 
|  | 2238 |  | 
|  | 2239 | ocfs2_refresh_inode(inode, fe); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2240 | ocfs2_track_lock_refresh(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2241 | } | 
|  | 2242 |  | 
|  | 2243 | status = 0; | 
|  | 2244 | bail_refresh: | 
| Mark Fasheh | be9e986 | 2007-04-18 15:22:08 -0700 | [diff] [blame] | 2245 | ocfs2_complete_lock_res_refresh(lockres, status); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2246 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2247 | return status; | 
|  | 2248 | } | 
|  | 2249 |  | 
|  | 2250 | static int ocfs2_assign_bh(struct inode *inode, | 
|  | 2251 | struct buffer_head **ret_bh, | 
|  | 2252 | struct buffer_head *passed_bh) | 
|  | 2253 | { | 
|  | 2254 | int status; | 
|  | 2255 |  | 
|  | 2256 | if (passed_bh) { | 
|  | 2257 | /* Ok, the update went to disk for us, use the | 
|  | 2258 | * returned bh. */ | 
|  | 2259 | *ret_bh = passed_bh; | 
|  | 2260 | get_bh(*ret_bh); | 
|  | 2261 |  | 
|  | 2262 | return 0; | 
|  | 2263 | } | 
|  | 2264 |  | 
| Joel Becker | b657c95 | 2008-11-13 14:49:11 -0800 | [diff] [blame] | 2265 | status = ocfs2_read_inode_block(inode, ret_bh); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2266 | if (status < 0) | 
|  | 2267 | mlog_errno(status); | 
|  | 2268 |  | 
|  | 2269 | return status; | 
|  | 2270 | } | 
|  | 2271 |  | 
|  | 2272 | /* | 
|  | 2273 | * returns < 0 error if the callback will never be called, otherwise | 
|  | 2274 | * the result of the lock will be communicated via the callback. | 
|  | 2275 | */ | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 2276 | int ocfs2_inode_lock_full_nested(struct inode *inode, | 
|  | 2277 | struct buffer_head **ret_bh, | 
|  | 2278 | int ex, | 
|  | 2279 | int arg_flags, | 
|  | 2280 | int subclass) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2281 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2282 | int status, level, acquired; | 
|  | 2283 | u32 dlm_flags; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2284 | struct ocfs2_lock_res *lockres = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2285 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
|  | 2286 | struct buffer_head *local_bh = NULL; | 
|  | 2287 |  | 
|  | 2288 | BUG_ON(!inode); | 
|  | 2289 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2290 | mlog(0, "inode %llu, take %s META lock\n", | 
|  | 2291 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2292 | ex ? "EXMODE" : "PRMODE"); | 
|  | 2293 |  | 
|  | 2294 | status = 0; | 
|  | 2295 | acquired = 0; | 
|  | 2296 | /* We'll allow faking a readonly metadata lock for | 
|  | 2297 | * rodevices. */ | 
|  | 2298 | if (ocfs2_is_hard_readonly(osb)) { | 
|  | 2299 | if (ex) | 
|  | 2300 | status = -EROFS; | 
|  | 2301 | goto bail; | 
|  | 2302 | } | 
|  | 2303 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2304 | if (ocfs2_mount_local(osb)) | 
|  | 2305 | goto local; | 
|  | 2306 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2307 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 
| Joel Becker | 553abd0 | 2008-02-01 12:03:57 -0800 | [diff] [blame] | 2308 | ocfs2_wait_for_recovery(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2309 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2310 | lockres = &OCFS2_I(inode)->ip_inode_lockres; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2311 | level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2312 | dlm_flags = 0; | 
|  | 2313 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2314 | dlm_flags |= DLM_LKF_NOQUEUE; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2315 |  | 
| Jan Kara | cb25797 | 2009-06-04 15:26:50 +0200 | [diff] [blame] | 2316 | status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, | 
|  | 2317 | arg_flags, subclass, _RET_IP_); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2318 | if (status < 0) { | 
|  | 2319 | if (status != -EAGAIN && status != -EIOCBRETRY) | 
|  | 2320 | mlog_errno(status); | 
|  | 2321 | goto bail; | 
|  | 2322 | } | 
|  | 2323 |  | 
|  | 2324 | /* Notify the error cleanup path to drop the cluster lock. */ | 
|  | 2325 | acquired = 1; | 
|  | 2326 |  | 
|  | 2327 | /* We wait twice because a node may have died while we were in | 
|  | 2328 | * the lower dlm layers. The second time though, we've | 
|  | 2329 | * committed to owning this lock so we don't allow signals to | 
|  | 2330 | * abort the operation. */ | 
|  | 2331 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 
| Joel Becker | 553abd0 | 2008-02-01 12:03:57 -0800 | [diff] [blame] | 2332 | ocfs2_wait_for_recovery(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2333 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2334 | local: | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 2335 | /* | 
|  | 2336 | * We only see this flag if we're being called from | 
|  | 2337 | * ocfs2_read_locked_inode(). It means we're locking an inode | 
|  | 2338 | * which hasn't been populated yet, so clear the refresh flag | 
|  | 2339 | * and let the caller handle it. | 
|  | 2340 | */ | 
|  | 2341 | if (inode->i_state & I_NEW) { | 
|  | 2342 | status = 0; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2343 | if (lockres) | 
|  | 2344 | ocfs2_complete_lock_res_refresh(lockres, 0); | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 2345 | goto bail; | 
|  | 2346 | } | 
|  | 2347 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2348 | /* This is fun. The caller may want a bh back, or it may | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2349 | * not. ocfs2_inode_lock_update definitely wants one in, but | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2350 | * may or may not read one, depending on what's in the | 
|  | 2351 | * LVB. The result of all of this is that we've *only* gone to | 
|  | 2352 | * disk if we have to, so the complexity is worthwhile. */ | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2353 | status = ocfs2_inode_lock_update(inode, &local_bh); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2354 | if (status < 0) { | 
|  | 2355 | if (status != -ENOENT) | 
|  | 2356 | mlog_errno(status); | 
|  | 2357 | goto bail; | 
|  | 2358 | } | 
|  | 2359 |  | 
|  | 2360 | if (ret_bh) { | 
|  | 2361 | status = ocfs2_assign_bh(inode, ret_bh, local_bh); | 
|  | 2362 | if (status < 0) { | 
|  | 2363 | mlog_errno(status); | 
|  | 2364 | goto bail; | 
|  | 2365 | } | 
|  | 2366 | } | 
|  | 2367 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2368 | bail: | 
|  | 2369 | if (status < 0) { | 
|  | 2370 | if (ret_bh && (*ret_bh)) { | 
|  | 2371 | brelse(*ret_bh); | 
|  | 2372 | *ret_bh = NULL; | 
|  | 2373 | } | 
|  | 2374 | if (acquired) | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2375 | ocfs2_inode_unlock(inode, ex); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2376 | } | 
|  | 2377 |  | 
|  | 2378 | if (local_bh) | 
|  | 2379 | brelse(local_bh); | 
|  | 2380 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2381 | return status; | 
|  | 2382 | } | 
|  | 2383 |  | 
|  | 2384 | /* | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 2385 | * This is working around a lock inversion between tasks acquiring DLM | 
|  | 2386 | * locks while holding a page lock and the downconvert thread which | 
|  | 2387 | * blocks dlm lock acquiry while acquiring page locks. | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2388 | * | 
|  | 2389 | * ** These _with_page variantes are only intended to be called from aop | 
|  | 2390 | * methods that hold page locks and return a very specific *positive* error | 
|  | 2391 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 
|  | 2392 | * | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 2393 | * The DLM is called such that it returns -EAGAIN if it would have | 
|  | 2394 | * blocked waiting for the downconvert thread.  In that case we unlock | 
|  | 2395 | * our page so the downconvert thread can make progress.  Once we've | 
|  | 2396 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method | 
|  | 2397 | * that called us can bubble that back up into the VFS who will then | 
|  | 2398 | * immediately retry the aop call. | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2399 | * | 
|  | 2400 | * We do a blocking lock and immediate unlock before returning, though, so that | 
|  | 2401 | * the lock has a great chance of being cached on this node by the time the VFS | 
|  | 2402 | * calls back to retry the aop.    This has a potential to livelock as nodes | 
|  | 2403 | * ping locks back and forth, but that's a risk we're willing to take to avoid | 
|  | 2404 | * the lock inversion simply. | 
|  | 2405 | */ | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2406 | int ocfs2_inode_lock_with_page(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2407 | struct buffer_head **ret_bh, | 
|  | 2408 | int ex, | 
|  | 2409 | struct page *page) | 
|  | 2410 | { | 
|  | 2411 | int ret; | 
|  | 2412 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2413 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2414 | if (ret == -EAGAIN) { | 
|  | 2415 | unlock_page(page); | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2416 | if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) | 
|  | 2417 | ocfs2_inode_unlock(inode, ex); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2418 | ret = AOP_TRUNCATED_PAGE; | 
|  | 2419 | } | 
|  | 2420 |  | 
|  | 2421 | return ret; | 
|  | 2422 | } | 
|  | 2423 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2424 | int ocfs2_inode_lock_atime(struct inode *inode, | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 2425 | struct vfsmount *vfsmnt, | 
|  | 2426 | int *level) | 
|  | 2427 | { | 
|  | 2428 | int ret; | 
|  | 2429 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2430 | ret = ocfs2_inode_lock(inode, NULL, 0); | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 2431 | if (ret < 0) { | 
|  | 2432 | mlog_errno(ret); | 
|  | 2433 | return ret; | 
|  | 2434 | } | 
|  | 2435 |  | 
|  | 2436 | /* | 
|  | 2437 | * If we should update atime, we will get EX lock, | 
|  | 2438 | * otherwise we just get PR lock. | 
|  | 2439 | */ | 
|  | 2440 | if (ocfs2_should_update_atime(inode, vfsmnt)) { | 
|  | 2441 | struct buffer_head *bh = NULL; | 
|  | 2442 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2443 | ocfs2_inode_unlock(inode, 0); | 
|  | 2444 | ret = ocfs2_inode_lock(inode, &bh, 1); | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 2445 | if (ret < 0) { | 
|  | 2446 | mlog_errno(ret); | 
|  | 2447 | return ret; | 
|  | 2448 | } | 
|  | 2449 | *level = 1; | 
|  | 2450 | if (ocfs2_should_update_atime(inode, vfsmnt)) | 
|  | 2451 | ocfs2_update_inode_atime(inode, bh); | 
|  | 2452 | if (bh) | 
|  | 2453 | brelse(bh); | 
|  | 2454 | } else | 
|  | 2455 | *level = 0; | 
|  | 2456 |  | 
| Tiger Yang | 7f1a37e | 2006-11-15 15:48:42 +0800 | [diff] [blame] | 2457 | return ret; | 
|  | 2458 | } | 
|  | 2459 |  | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2460 | void ocfs2_inode_unlock(struct inode *inode, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2461 | int ex) | 
|  | 2462 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2463 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 2464 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2465 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2466 |  | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 2467 | mlog(0, "inode %llu drop %s META lock\n", | 
|  | 2468 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2469 | ex ? "EXMODE" : "PRMODE"); | 
|  | 2470 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2471 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | 
|  | 2472 | !ocfs2_mount_local(osb)) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2473 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2474 | } | 
|  | 2475 |  | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2476 | int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2477 | { | 
|  | 2478 | struct ocfs2_lock_res *lockres; | 
|  | 2479 | struct ocfs2_orphan_scan_lvb *lvb; | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2480 | int status = 0; | 
|  | 2481 |  | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2482 | if (ocfs2_is_hard_readonly(osb)) | 
|  | 2483 | return -EROFS; | 
|  | 2484 |  | 
|  | 2485 | if (ocfs2_mount_local(osb)) | 
|  | 2486 | return 0; | 
|  | 2487 |  | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2488 | lockres = &osb->osb_orphan_scan.os_lockres; | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2489 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2490 | if (status < 0) | 
|  | 2491 | return status; | 
|  | 2492 |  | 
|  | 2493 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Joel Becker | 1c520df | 2009-06-19 15:14:13 -0700 | [diff] [blame] | 2494 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && | 
|  | 2495 | lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2496 | *seqno = be32_to_cpu(lvb->lvb_os_seqno); | 
| Sunil Mushran | 3211949 | 2009-06-19 16:53:18 -0700 | [diff] [blame] | 2497 | else | 
|  | 2498 | *seqno = osb->osb_orphan_scan.os_seqno + 1; | 
|  | 2499 |  | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2500 | return status; | 
|  | 2501 | } | 
|  | 2502 |  | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2503 | void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2504 | { | 
|  | 2505 | struct ocfs2_lock_res *lockres; | 
|  | 2506 | struct ocfs2_orphan_scan_lvb *lvb; | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2507 |  | 
| Sunil Mushran | df152c2 | 2009-06-22 11:40:07 -0700 | [diff] [blame] | 2508 | if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { | 
|  | 2509 | lockres = &osb->osb_orphan_scan.os_lockres; | 
|  | 2510 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
|  | 2511 | lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; | 
|  | 2512 | lvb->lvb_os_seqno = cpu_to_be32(seqno); | 
|  | 2513 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); | 
|  | 2514 | } | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 2515 | } | 
|  | 2516 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2517 | int ocfs2_super_lock(struct ocfs2_super *osb, | 
|  | 2518 | int ex) | 
|  | 2519 | { | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2520 | int status = 0; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2521 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2522 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2523 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2524 | if (ocfs2_is_hard_readonly(osb)) | 
|  | 2525 | return -EROFS; | 
|  | 2526 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2527 | if (ocfs2_mount_local(osb)) | 
|  | 2528 | goto bail; | 
|  | 2529 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2530 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 
|  | 2531 | if (status < 0) { | 
|  | 2532 | mlog_errno(status); | 
|  | 2533 | goto bail; | 
|  | 2534 | } | 
|  | 2535 |  | 
|  | 2536 | /* The super block lock path is really in the best position to | 
|  | 2537 | * know when resources covered by the lock need to be | 
|  | 2538 | * refreshed, so we do it here. Of course, making sense of | 
|  | 2539 | * everything is up to the caller :) */ | 
|  | 2540 | status = ocfs2_should_refresh_lock_res(lockres); | 
|  | 2541 | if (status < 0) { | 
|  | 2542 | mlog_errno(status); | 
|  | 2543 | goto bail; | 
|  | 2544 | } | 
|  | 2545 | if (status) { | 
| Mark Fasheh | 8e8a460 | 2008-02-01 11:59:09 -0800 | [diff] [blame] | 2546 | status = ocfs2_refresh_slot_info(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2547 |  | 
|  | 2548 | ocfs2_complete_lock_res_refresh(lockres, status); | 
|  | 2549 |  | 
|  | 2550 | if (status < 0) | 
|  | 2551 | mlog_errno(status); | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2552 | ocfs2_track_lock_refresh(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2553 | } | 
|  | 2554 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2555 | return status; | 
|  | 2556 | } | 
|  | 2557 |  | 
|  | 2558 | void ocfs2_super_unlock(struct ocfs2_super *osb, | 
|  | 2559 | int ex) | 
|  | 2560 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2561 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2562 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 
|  | 2563 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2564 | if (!ocfs2_mount_local(osb)) | 
|  | 2565 | ocfs2_cluster_unlock(osb, lockres, level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2566 | } | 
|  | 2567 |  | 
|  | 2568 | int ocfs2_rename_lock(struct ocfs2_super *osb) | 
|  | 2569 | { | 
|  | 2570 | int status; | 
|  | 2571 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 
|  | 2572 |  | 
|  | 2573 | if (ocfs2_is_hard_readonly(osb)) | 
|  | 2574 | return -EROFS; | 
|  | 2575 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2576 | if (ocfs2_mount_local(osb)) | 
|  | 2577 | return 0; | 
|  | 2578 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2579 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2580 | if (status < 0) | 
|  | 2581 | mlog_errno(status); | 
|  | 2582 |  | 
|  | 2583 | return status; | 
|  | 2584 | } | 
|  | 2585 |  | 
|  | 2586 | void ocfs2_rename_unlock(struct ocfs2_super *osb) | 
|  | 2587 | { | 
|  | 2588 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 
|  | 2589 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2590 | if (!ocfs2_mount_local(osb)) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2591 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2592 | } | 
|  | 2593 |  | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 2594 | int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) | 
|  | 2595 | { | 
|  | 2596 | int status; | 
|  | 2597 | struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; | 
|  | 2598 |  | 
|  | 2599 | if (ocfs2_is_hard_readonly(osb)) | 
|  | 2600 | return -EROFS; | 
|  | 2601 |  | 
|  | 2602 | if (ocfs2_mount_local(osb)) | 
|  | 2603 | return 0; | 
|  | 2604 |  | 
|  | 2605 | status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, | 
|  | 2606 | 0, 0); | 
|  | 2607 | if (status < 0) | 
|  | 2608 | mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); | 
|  | 2609 |  | 
|  | 2610 | return status; | 
|  | 2611 | } | 
|  | 2612 |  | 
|  | 2613 | void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) | 
|  | 2614 | { | 
|  | 2615 | struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; | 
|  | 2616 |  | 
|  | 2617 | if (!ocfs2_mount_local(osb)) | 
|  | 2618 | ocfs2_cluster_unlock(osb, lockres, | 
|  | 2619 | ex ? LKM_EXMODE : LKM_PRMODE); | 
|  | 2620 | } | 
|  | 2621 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2622 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | 
|  | 2623 | { | 
|  | 2624 | int ret; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2625 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2626 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 
|  | 2627 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 
|  | 2628 |  | 
|  | 2629 | BUG_ON(!dl); | 
|  | 2630 |  | 
|  | 2631 | if (ocfs2_is_hard_readonly(osb)) | 
|  | 2632 | return -EROFS; | 
|  | 2633 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2634 | if (ocfs2_mount_local(osb)) | 
|  | 2635 | return 0; | 
|  | 2636 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2637 | ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); | 
|  | 2638 | if (ret < 0) | 
|  | 2639 | mlog_errno(ret); | 
|  | 2640 |  | 
|  | 2641 | return ret; | 
|  | 2642 | } | 
|  | 2643 |  | 
|  | 2644 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | 
|  | 2645 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 2646 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2647 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 
|  | 2648 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 
|  | 2649 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2650 | if (!ocfs2_mount_local(osb)) | 
|  | 2651 | ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2652 | } | 
|  | 2653 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2654 | /* Reference counting of the dlm debug structure. We want this because | 
|  | 2655 | * open references on the debug inodes can live on after a mount, so | 
|  | 2656 | * we can't rely on the ocfs2_super to always exist. */ | 
|  | 2657 | static void ocfs2_dlm_debug_free(struct kref *kref) | 
|  | 2658 | { | 
|  | 2659 | struct ocfs2_dlm_debug *dlm_debug; | 
|  | 2660 |  | 
|  | 2661 | dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); | 
|  | 2662 |  | 
|  | 2663 | kfree(dlm_debug); | 
|  | 2664 | } | 
|  | 2665 |  | 
|  | 2666 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) | 
|  | 2667 | { | 
|  | 2668 | if (dlm_debug) | 
|  | 2669 | kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); | 
|  | 2670 | } | 
|  | 2671 |  | 
|  | 2672 | static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) | 
|  | 2673 | { | 
|  | 2674 | kref_get(&debug->d_refcnt); | 
|  | 2675 | } | 
|  | 2676 |  | 
|  | 2677 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) | 
|  | 2678 | { | 
|  | 2679 | struct ocfs2_dlm_debug *dlm_debug; | 
|  | 2680 |  | 
|  | 2681 | dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); | 
|  | 2682 | if (!dlm_debug) { | 
|  | 2683 | mlog_errno(-ENOMEM); | 
|  | 2684 | goto out; | 
|  | 2685 | } | 
|  | 2686 |  | 
|  | 2687 | kref_init(&dlm_debug->d_refcnt); | 
|  | 2688 | INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); | 
|  | 2689 | dlm_debug->d_locking_state = NULL; | 
|  | 2690 | out: | 
|  | 2691 | return dlm_debug; | 
|  | 2692 | } | 
|  | 2693 |  | 
|  | 2694 | /* Access to this is arbitrated for us via seq_file->sem. */ | 
|  | 2695 | struct ocfs2_dlm_seq_priv { | 
|  | 2696 | struct ocfs2_dlm_debug *p_dlm_debug; | 
|  | 2697 | struct ocfs2_lock_res p_iter_res; | 
|  | 2698 | struct ocfs2_lock_res p_tmp_res; | 
|  | 2699 | }; | 
|  | 2700 |  | 
|  | 2701 | static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, | 
|  | 2702 | struct ocfs2_dlm_seq_priv *priv) | 
|  | 2703 | { | 
|  | 2704 | struct ocfs2_lock_res *iter, *ret = NULL; | 
|  | 2705 | struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; | 
|  | 2706 |  | 
|  | 2707 | assert_spin_locked(&ocfs2_dlm_tracking_lock); | 
|  | 2708 |  | 
|  | 2709 | list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { | 
|  | 2710 | /* discover the head of the list */ | 
|  | 2711 | if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { | 
|  | 2712 | mlog(0, "End of list found, %p\n", ret); | 
|  | 2713 | break; | 
|  | 2714 | } | 
|  | 2715 |  | 
|  | 2716 | /* We track our "dummy" iteration lockres' by a NULL | 
|  | 2717 | * l_ops field. */ | 
|  | 2718 | if (iter->l_ops != NULL) { | 
|  | 2719 | ret = iter; | 
|  | 2720 | break; | 
|  | 2721 | } | 
|  | 2722 | } | 
|  | 2723 |  | 
|  | 2724 | return ret; | 
|  | 2725 | } | 
|  | 2726 |  | 
|  | 2727 | static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) | 
|  | 2728 | { | 
|  | 2729 | struct ocfs2_dlm_seq_priv *priv = m->private; | 
|  | 2730 | struct ocfs2_lock_res *iter; | 
|  | 2731 |  | 
|  | 2732 | spin_lock(&ocfs2_dlm_tracking_lock); | 
|  | 2733 | iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); | 
|  | 2734 | if (iter) { | 
|  | 2735 | /* Since lockres' have the lifetime of their container | 
|  | 2736 | * (which can be inodes, ocfs2_supers, etc) we want to | 
|  | 2737 | * copy this out to a temporary lockres while still | 
|  | 2738 | * under the spinlock. Obviously after this we can't | 
|  | 2739 | * trust any pointers on the copy returned, but that's | 
|  | 2740 | * ok as the information we want isn't typically held | 
|  | 2741 | * in them. */ | 
|  | 2742 | priv->p_tmp_res = *iter; | 
|  | 2743 | iter = &priv->p_tmp_res; | 
|  | 2744 | } | 
|  | 2745 | spin_unlock(&ocfs2_dlm_tracking_lock); | 
|  | 2746 |  | 
|  | 2747 | return iter; | 
|  | 2748 | } | 
|  | 2749 |  | 
|  | 2750 | static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) | 
|  | 2751 | { | 
|  | 2752 | } | 
|  | 2753 |  | 
|  | 2754 | static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) | 
|  | 2755 | { | 
|  | 2756 | struct ocfs2_dlm_seq_priv *priv = m->private; | 
|  | 2757 | struct ocfs2_lock_res *iter = v; | 
|  | 2758 | struct ocfs2_lock_res *dummy = &priv->p_iter_res; | 
|  | 2759 |  | 
|  | 2760 | spin_lock(&ocfs2_dlm_tracking_lock); | 
|  | 2761 | iter = ocfs2_dlm_next_res(iter, priv); | 
|  | 2762 | list_del_init(&dummy->l_debug_list); | 
|  | 2763 | if (iter) { | 
|  | 2764 | list_add(&dummy->l_debug_list, &iter->l_debug_list); | 
|  | 2765 | priv->p_tmp_res = *iter; | 
|  | 2766 | iter = &priv->p_tmp_res; | 
|  | 2767 | } | 
|  | 2768 | spin_unlock(&ocfs2_dlm_tracking_lock); | 
|  | 2769 |  | 
|  | 2770 | return iter; | 
|  | 2771 | } | 
|  | 2772 |  | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 2773 | /* | 
|  | 2774 | * Version is used by debugfs.ocfs2 to determine the format being used | 
|  | 2775 | * | 
|  | 2776 | * New in version 2 | 
|  | 2777 | *	- Lock stats printed | 
|  | 2778 | * New in version 3 | 
|  | 2779 | *	- Max time in lock stats is in usecs (instead of nsecs) | 
|  | 2780 | */ | 
|  | 2781 | #define OCFS2_DLM_DEBUG_STR_VERSION 3 | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2782 | static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | 
|  | 2783 | { | 
|  | 2784 | int i; | 
|  | 2785 | char *lvb; | 
|  | 2786 | struct ocfs2_lock_res *lockres = v; | 
|  | 2787 |  | 
|  | 2788 | if (!lockres) | 
|  | 2789 | return -EINVAL; | 
|  | 2790 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 2791 | seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); | 
|  | 2792 |  | 
|  | 2793 | if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) | 
|  | 2794 | seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, | 
|  | 2795 | lockres->l_name, | 
|  | 2796 | (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); | 
|  | 2797 | else | 
|  | 2798 | seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); | 
|  | 2799 |  | 
|  | 2800 | seq_printf(m, "%d\t" | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2801 | "0x%lx\t" | 
|  | 2802 | "0x%x\t" | 
|  | 2803 | "0x%x\t" | 
|  | 2804 | "%u\t" | 
|  | 2805 | "%u\t" | 
|  | 2806 | "%d\t" | 
|  | 2807 | "%d\t", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2808 | lockres->l_level, | 
|  | 2809 | lockres->l_flags, | 
|  | 2810 | lockres->l_action, | 
|  | 2811 | lockres->l_unlock_action, | 
|  | 2812 | lockres->l_ro_holders, | 
|  | 2813 | lockres->l_ex_holders, | 
|  | 2814 | lockres->l_requested, | 
|  | 2815 | lockres->l_blocking); | 
|  | 2816 |  | 
|  | 2817 | /* Dump the raw LVB */ | 
| Joel Becker | 8f2c9c1 | 2008-02-01 12:16:57 -0800 | [diff] [blame] | 2818 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2819 | for(i = 0; i < DLM_LVB_LEN; i++) | 
|  | 2820 | seq_printf(m, "0x%x\t", lvb[i]); | 
|  | 2821 |  | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2822 | #ifdef CONFIG_OCFS2_FS_STATS | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 2823 | # define lock_num_prmode(_l)		((_l)->l_lock_prmode.ls_gets) | 
|  | 2824 | # define lock_num_exmode(_l)		((_l)->l_lock_exmode.ls_gets) | 
|  | 2825 | # define lock_num_prmode_failed(_l)	((_l)->l_lock_prmode.ls_fail) | 
|  | 2826 | # define lock_num_exmode_failed(_l)	((_l)->l_lock_exmode.ls_fail) | 
|  | 2827 | # define lock_total_prmode(_l)		((_l)->l_lock_prmode.ls_total) | 
|  | 2828 | # define lock_total_exmode(_l)		((_l)->l_lock_exmode.ls_total) | 
|  | 2829 | # define lock_max_prmode(_l)		((_l)->l_lock_prmode.ls_max) | 
|  | 2830 | # define lock_max_exmode(_l)		((_l)->l_lock_exmode.ls_max) | 
|  | 2831 | # define lock_refresh(_l)		((_l)->l_lock_refresh) | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2832 | #else | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 2833 | # define lock_num_prmode(_l)		(0) | 
|  | 2834 | # define lock_num_exmode(_l)		(0) | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2835 | # define lock_num_prmode_failed(_l)	(0) | 
|  | 2836 | # define lock_num_exmode_failed(_l)	(0) | 
| Randy Dunlap | dd25e55 | 2008-05-28 14:41:00 -0700 | [diff] [blame] | 2837 | # define lock_total_prmode(_l)		(0ULL) | 
|  | 2838 | # define lock_total_exmode(_l)		(0ULL) | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2839 | # define lock_max_prmode(_l)		(0) | 
|  | 2840 | # define lock_max_exmode(_l)		(0) | 
|  | 2841 | # define lock_refresh(_l)		(0) | 
|  | 2842 | #endif | 
|  | 2843 | /* The following seq_print was added in version 2 of this output */ | 
| Sunil Mushran | 5bc970e | 2010-12-28 23:26:03 -0800 | [diff] [blame] | 2844 | seq_printf(m, "%u\t" | 
|  | 2845 | "%u\t" | 
| Sunil Mushran | 8ddb7b0 | 2008-05-13 13:45:15 -0700 | [diff] [blame] | 2846 | "%u\t" | 
|  | 2847 | "%u\t" | 
|  | 2848 | "%llu\t" | 
|  | 2849 | "%llu\t" | 
|  | 2850 | "%u\t" | 
|  | 2851 | "%u\t" | 
|  | 2852 | "%u\t", | 
|  | 2853 | lock_num_prmode(lockres), | 
|  | 2854 | lock_num_exmode(lockres), | 
|  | 2855 | lock_num_prmode_failed(lockres), | 
|  | 2856 | lock_num_exmode_failed(lockres), | 
|  | 2857 | lock_total_prmode(lockres), | 
|  | 2858 | lock_total_exmode(lockres), | 
|  | 2859 | lock_max_prmode(lockres), | 
|  | 2860 | lock_max_exmode(lockres), | 
|  | 2861 | lock_refresh(lockres)); | 
|  | 2862 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2863 | /* End the line */ | 
|  | 2864 | seq_printf(m, "\n"); | 
|  | 2865 | return 0; | 
|  | 2866 | } | 
|  | 2867 |  | 
| Jan Engelhardt | 90d9977 | 2008-01-22 20:52:20 +0100 | [diff] [blame] | 2868 | static const struct seq_operations ocfs2_dlm_seq_ops = { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2869 | .start =	ocfs2_dlm_seq_start, | 
|  | 2870 | .stop =		ocfs2_dlm_seq_stop, | 
|  | 2871 | .next =		ocfs2_dlm_seq_next, | 
|  | 2872 | .show =		ocfs2_dlm_seq_show, | 
|  | 2873 | }; | 
|  | 2874 |  | 
|  | 2875 | static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) | 
|  | 2876 | { | 
| Joe Perches | 33fa1d9 | 2010-07-12 13:50:19 -0700 | [diff] [blame] | 2877 | struct seq_file *seq = file->private_data; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2878 | struct ocfs2_dlm_seq_priv *priv = seq->private; | 
|  | 2879 | struct ocfs2_lock_res *res = &priv->p_iter_res; | 
|  | 2880 |  | 
|  | 2881 | ocfs2_remove_lockres_tracking(res); | 
|  | 2882 | ocfs2_put_dlm_debug(priv->p_dlm_debug); | 
|  | 2883 | return seq_release_private(inode, file); | 
|  | 2884 | } | 
|  | 2885 |  | 
|  | 2886 | static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) | 
|  | 2887 | { | 
|  | 2888 | int ret; | 
|  | 2889 | struct ocfs2_dlm_seq_priv *priv; | 
|  | 2890 | struct seq_file *seq; | 
|  | 2891 | struct ocfs2_super *osb; | 
|  | 2892 |  | 
|  | 2893 | priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); | 
|  | 2894 | if (!priv) { | 
|  | 2895 | ret = -ENOMEM; | 
|  | 2896 | mlog_errno(ret); | 
|  | 2897 | goto out; | 
|  | 2898 | } | 
| Theodore Ts'o | 8e18e29 | 2006-09-27 01:50:46 -0700 | [diff] [blame] | 2899 | osb = inode->i_private; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2900 | ocfs2_get_dlm_debug(osb->osb_dlm_debug); | 
|  | 2901 | priv->p_dlm_debug = osb->osb_dlm_debug; | 
|  | 2902 | INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); | 
|  | 2903 |  | 
|  | 2904 | ret = seq_open(file, &ocfs2_dlm_seq_ops); | 
|  | 2905 | if (ret) { | 
|  | 2906 | kfree(priv); | 
|  | 2907 | mlog_errno(ret); | 
|  | 2908 | goto out; | 
|  | 2909 | } | 
|  | 2910 |  | 
| Joe Perches | 33fa1d9 | 2010-07-12 13:50:19 -0700 | [diff] [blame] | 2911 | seq = file->private_data; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2912 | seq->private = priv; | 
|  | 2913 |  | 
|  | 2914 | ocfs2_add_lockres_tracking(&priv->p_iter_res, | 
|  | 2915 | priv->p_dlm_debug); | 
|  | 2916 |  | 
|  | 2917 | out: | 
|  | 2918 | return ret; | 
|  | 2919 | } | 
|  | 2920 |  | 
| Arjan van de Ven | 4b6f5d2 | 2006-03-28 01:56:42 -0800 | [diff] [blame] | 2921 | static const struct file_operations ocfs2_dlm_debug_fops = { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2922 | .open =		ocfs2_dlm_debug_open, | 
|  | 2923 | .release =	ocfs2_dlm_debug_release, | 
|  | 2924 | .read =		seq_read, | 
|  | 2925 | .llseek =	seq_lseek, | 
|  | 2926 | }; | 
|  | 2927 |  | 
|  | 2928 | static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) | 
|  | 2929 | { | 
|  | 2930 | int ret = 0; | 
|  | 2931 | struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; | 
|  | 2932 |  | 
|  | 2933 | dlm_debug->d_locking_state = debugfs_create_file("locking_state", | 
|  | 2934 | S_IFREG|S_IRUSR, | 
|  | 2935 | osb->osb_debug_root, | 
|  | 2936 | osb, | 
|  | 2937 | &ocfs2_dlm_debug_fops); | 
|  | 2938 | if (!dlm_debug->d_locking_state) { | 
|  | 2939 | ret = -EINVAL; | 
|  | 2940 | mlog(ML_ERROR, | 
|  | 2941 | "Unable to create locking state debugfs file.\n"); | 
|  | 2942 | goto out; | 
|  | 2943 | } | 
|  | 2944 |  | 
|  | 2945 | ocfs2_get_dlm_debug(dlm_debug); | 
|  | 2946 | out: | 
|  | 2947 | return ret; | 
|  | 2948 | } | 
|  | 2949 |  | 
|  | 2950 | static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) | 
|  | 2951 | { | 
|  | 2952 | struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; | 
|  | 2953 |  | 
|  | 2954 | if (dlm_debug) { | 
|  | 2955 | debugfs_remove(dlm_debug->d_locking_state); | 
|  | 2956 | ocfs2_put_dlm_debug(dlm_debug); | 
|  | 2957 | } | 
|  | 2958 | } | 
|  | 2959 |  | 
|  | 2960 | int ocfs2_dlm_init(struct ocfs2_super *osb) | 
|  | 2961 | { | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2962 | int status = 0; | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 2963 | struct ocfs2_cluster_connection *conn = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2964 |  | 
| Mark Fasheh | 0abd6d1 | 2008-01-29 16:59:56 -0800 | [diff] [blame] | 2965 | if (ocfs2_mount_local(osb)) { | 
|  | 2966 | osb->node_num = 0; | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2967 | goto local; | 
| Mark Fasheh | 0abd6d1 | 2008-01-29 16:59:56 -0800 | [diff] [blame] | 2968 | } | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 2969 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2970 | status = ocfs2_dlm_init_debug(osb); | 
|  | 2971 | if (status < 0) { | 
|  | 2972 | mlog_errno(status); | 
|  | 2973 | goto bail; | 
|  | 2974 | } | 
|  | 2975 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 2976 | /* launch downconvert thread */ | 
|  | 2977 | osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); | 
|  | 2978 | if (IS_ERR(osb->dc_task)) { | 
|  | 2979 | status = PTR_ERR(osb->dc_task); | 
|  | 2980 | osb->dc_task = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2981 | mlog_errno(status); | 
|  | 2982 | goto bail; | 
|  | 2983 | } | 
|  | 2984 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2985 | /* for now, uuid == domain */ | 
| Joel Becker | 9c6c877 | 2008-02-01 15:17:30 -0800 | [diff] [blame] | 2986 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, | 
|  | 2987 | osb->uuid_str, | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 2988 | strlen(osb->uuid_str), | 
| Joel Becker | 553b5eb | 2010-01-29 17:19:06 -0800 | [diff] [blame] | 2989 | &lproto, ocfs2_do_node_down, osb, | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 2990 | &conn); | 
|  | 2991 | if (status) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 2992 | mlog_errno(status); | 
|  | 2993 | goto bail; | 
|  | 2994 | } | 
|  | 2995 |  | 
| Mark Fasheh | 0abd6d1 | 2008-01-29 16:59:56 -0800 | [diff] [blame] | 2996 | status = ocfs2_cluster_this_node(&osb->node_num); | 
|  | 2997 | if (status < 0) { | 
|  | 2998 | mlog_errno(status); | 
|  | 2999 | mlog(ML_ERROR, | 
|  | 3000 | "could not find this host's node number\n"); | 
| Joel Becker | 286eaa9 | 2008-02-01 15:03:57 -0800 | [diff] [blame] | 3001 | ocfs2_cluster_disconnect(conn, 0); | 
| Mark Fasheh | 0abd6d1 | 2008-01-29 16:59:56 -0800 | [diff] [blame] | 3002 | goto bail; | 
|  | 3003 | } | 
|  | 3004 |  | 
| Sunil Mushran | c271c5c | 2006-12-05 17:56:35 -0800 | [diff] [blame] | 3005 | local: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3006 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); | 
|  | 3007 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 3008 | ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 3009 | ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3010 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3011 | osb->cconn = conn; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3012 |  | 
|  | 3013 | status = 0; | 
|  | 3014 | bail: | 
|  | 3015 | if (status < 0) { | 
|  | 3016 | ocfs2_dlm_shutdown_debug(osb); | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3017 | if (osb->dc_task) | 
|  | 3018 | kthread_stop(osb->dc_task); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3019 | } | 
|  | 3020 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3021 | return status; | 
|  | 3022 | } | 
|  | 3023 |  | 
| Joel Becker | 286eaa9 | 2008-02-01 15:03:57 -0800 | [diff] [blame] | 3024 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, | 
|  | 3025 | int hangup_pending) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3026 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3027 | ocfs2_drop_osb_locks(osb); | 
|  | 3028 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3029 | /* | 
|  | 3030 | * Now that we have dropped all locks and ocfs2_dismount_volume() | 
|  | 3031 | * has disabled recovery, the DLM won't be talking to us.  It's | 
|  | 3032 | * safe to tear things down before disconnecting the cluster. | 
|  | 3033 | */ | 
|  | 3034 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3035 | if (osb->dc_task) { | 
|  | 3036 | kthread_stop(osb->dc_task); | 
|  | 3037 | osb->dc_task = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3038 | } | 
|  | 3039 |  | 
|  | 3040 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 
|  | 3041 | ocfs2_lock_res_free(&osb->osb_rename_lockres); | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 3042 | ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 3043 | ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3044 |  | 
| Joel Becker | 286eaa9 | 2008-02-01 15:03:57 -0800 | [diff] [blame] | 3045 | ocfs2_cluster_disconnect(osb->cconn, hangup_pending); | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3046 | osb->cconn = NULL; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3047 |  | 
|  | 3048 | ocfs2_dlm_shutdown_debug(osb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3049 | } | 
|  | 3050 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3051 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3052 | struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3053 | { | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3054 | int ret; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3055 | unsigned long flags; | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3056 | u32 lkm_flags = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3057 |  | 
|  | 3058 | /* We didn't get anywhere near actually using this lockres. */ | 
|  | 3059 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) | 
|  | 3060 | goto out; | 
|  | 3061 |  | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 3062 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3063 | lkm_flags |= DLM_LKF_VALBLK; | 
| Mark Fasheh | b80fc01 | 2006-09-12 22:08:14 -0700 | [diff] [blame] | 3064 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3065 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3066 |  | 
|  | 3067 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), | 
|  | 3068 | "lockres %s, flags 0x%lx\n", | 
|  | 3069 | lockres->l_name, lockres->l_flags); | 
|  | 3070 |  | 
|  | 3071 | while (lockres->l_flags & OCFS2_LOCK_BUSY) { | 
|  | 3072 | mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " | 
|  | 3073 | "%u, unlock_action = %u\n", | 
|  | 3074 | lockres->l_name, lockres->l_flags, lockres->l_action, | 
|  | 3075 | lockres->l_unlock_action); | 
|  | 3076 |  | 
|  | 3077 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3078 |  | 
|  | 3079 | /* XXX: Today we just wait on any busy | 
|  | 3080 | * locks... Perhaps we need to cancel converts in the | 
|  | 3081 | * future? */ | 
|  | 3082 | ocfs2_wait_on_busy_lock(lockres); | 
|  | 3083 |  | 
|  | 3084 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3085 | } | 
|  | 3086 |  | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3087 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 
|  | 3088 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3089 | lockres->l_level == DLM_LOCK_EX && | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3090 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 
|  | 3091 | lockres->l_ops->set_lvb(lockres); | 
|  | 3092 | } | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3093 |  | 
|  | 3094 | if (lockres->l_flags & OCFS2_LOCK_BUSY) | 
|  | 3095 | mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", | 
|  | 3096 | lockres->l_name); | 
|  | 3097 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 
|  | 3098 | mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); | 
|  | 3099 |  | 
|  | 3100 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 
|  | 3101 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3102 | goto out; | 
|  | 3103 | } | 
|  | 3104 |  | 
|  | 3105 | lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); | 
|  | 3106 |  | 
|  | 3107 | /* make sure we never get here while waiting for an ast to | 
|  | 3108 | * fire. */ | 
|  | 3109 | BUG_ON(lockres->l_action != OCFS2_AST_INVALID); | 
|  | 3110 |  | 
|  | 3111 | /* is this necessary? */ | 
|  | 3112 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
|  | 3113 | lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; | 
|  | 3114 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3115 |  | 
|  | 3116 | mlog(0, "lock %s\n", lockres->l_name); | 
|  | 3117 |  | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 3118 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3119 | if (ret) { | 
|  | 3120 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3121 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 
| Joel Becker | cf0acdc | 2008-01-29 16:59:55 -0800 | [diff] [blame] | 3122 | ocfs2_dlm_dump_lksb(&lockres->l_lksb); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3123 | BUG(); | 
|  | 3124 | } | 
| Coly Li | 73ac36e | 2009-01-07 18:09:16 -0800 | [diff] [blame] | 3125 | mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3126 | lockres->l_name); | 
|  | 3127 |  | 
|  | 3128 | ocfs2_wait_on_busy_lock(lockres); | 
|  | 3129 | out: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3130 | return 0; | 
|  | 3131 | } | 
|  | 3132 |  | 
|  | 3133 | /* Mark the lockres as being dropped. It will no longer be | 
|  | 3134 | * queued if blocking, but we still may have to wait on it | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3135 | * being dequeued from the downconvert thread before we can consider | 
| Sunil Mushran | 2bd6321 | 2010-01-25 16:57:38 -0800 | [diff] [blame] | 3136 | * it safe to drop. | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3137 | * | 
|  | 3138 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 
|  | 3139 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) | 
|  | 3140 | { | 
|  | 3141 | int status; | 
|  | 3142 | struct ocfs2_mask_waiter mw; | 
|  | 3143 | unsigned long flags; | 
|  | 3144 |  | 
|  | 3145 | ocfs2_init_mask_waiter(&mw); | 
|  | 3146 |  | 
|  | 3147 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3148 | lockres->l_flags |= OCFS2_LOCK_FREEING; | 
|  | 3149 | while (lockres->l_flags & OCFS2_LOCK_QUEUED) { | 
|  | 3150 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); | 
|  | 3151 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3152 |  | 
|  | 3153 | mlog(0, "Waiting on lockres %s\n", lockres->l_name); | 
|  | 3154 |  | 
|  | 3155 | status = ocfs2_wait_for_mask(&mw); | 
|  | 3156 | if (status) | 
|  | 3157 | mlog_errno(status); | 
|  | 3158 |  | 
|  | 3159 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3160 | } | 
|  | 3161 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3162 | } | 
|  | 3163 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3164 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 
|  | 3165 | struct ocfs2_lock_res *lockres) | 
|  | 3166 | { | 
|  | 3167 | int ret; | 
|  | 3168 |  | 
|  | 3169 | ocfs2_mark_lockres_freeing(lockres); | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3170 | ret = ocfs2_drop_lock(osb, lockres); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3171 | if (ret) | 
|  | 3172 | mlog_errno(ret); | 
|  | 3173 | } | 
|  | 3174 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3175 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) | 
|  | 3176 | { | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3177 | ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); | 
|  | 3178 | ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); | 
| wengang wang | 6ca497a | 2009-03-06 21:29:10 +0800 | [diff] [blame] | 3179 | ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); | 
| Srinivas Eeda | 8327393 | 2009-06-03 17:02:55 -0700 | [diff] [blame] | 3180 | ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3181 | } | 
|  | 3182 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3183 | int ocfs2_drop_inode_locks(struct inode *inode) | 
|  | 3184 | { | 
|  | 3185 | int status, err; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3186 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3187 | /* No need to call ocfs2_mark_lockres_freeing here - | 
|  | 3188 | * ocfs2_clear_inode has done it for us. */ | 
|  | 3189 |  | 
|  | 3190 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 3191 | &OCFS2_I(inode)->ip_open_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3192 | if (err < 0) | 
|  | 3193 | mlog_errno(err); | 
|  | 3194 |  | 
|  | 3195 | status = err; | 
|  | 3196 |  | 
|  | 3197 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Mark Fasheh | e63aecb6 | 2007-10-18 15:30:42 -0700 | [diff] [blame] | 3198 | &OCFS2_I(inode)->ip_inode_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3199 | if (err < 0) | 
|  | 3200 | mlog_errno(err); | 
|  | 3201 | if (err < 0 && !status) | 
|  | 3202 | status = err; | 
|  | 3203 |  | 
|  | 3204 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| Mark Fasheh | 0d5dc6c | 2006-09-14 14:44:51 -0700 | [diff] [blame] | 3205 | &OCFS2_I(inode)->ip_rw_lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3206 | if (err < 0) | 
|  | 3207 | mlog_errno(err); | 
|  | 3208 | if (err < 0 && !status) | 
|  | 3209 | status = err; | 
|  | 3210 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3211 | return status; | 
|  | 3212 | } | 
|  | 3213 |  | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3214 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 
|  | 3215 | int new_level) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3216 | { | 
|  | 3217 | assert_spin_locked(&lockres->l_lock); | 
|  | 3218 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3219 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3220 |  | 
|  | 3221 | if (lockres->l_level <= new_level) { | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3222 | mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " | 
|  | 3223 | "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " | 
|  | 3224 | "block %d, pgen %d\n", lockres->l_name, lockres->l_level, | 
|  | 3225 | new_level, list_empty(&lockres->l_blocked_list), | 
|  | 3226 | list_empty(&lockres->l_mask_waiters), lockres->l_type, | 
|  | 3227 | lockres->l_flags, lockres->l_ro_holders, | 
|  | 3228 | lockres->l_ex_holders, lockres->l_action, | 
|  | 3229 | lockres->l_unlock_action, lockres->l_requested, | 
|  | 3230 | lockres->l_blocking, lockres->l_pending_gen); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3231 | BUG(); | 
|  | 3232 | } | 
|  | 3233 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3234 | mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", | 
|  | 3235 | lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3236 |  | 
|  | 3237 | lockres->l_action = OCFS2_AST_DOWNCONVERT; | 
|  | 3238 | lockres->l_requested = new_level; | 
|  | 3239 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3240 | return lockres_set_pending(lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3241 | } | 
|  | 3242 |  | 
|  | 3243 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 
|  | 3244 | struct ocfs2_lock_res *lockres, | 
|  | 3245 | int new_level, | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3246 | int lvb, | 
|  | 3247 | unsigned int generation) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3248 | { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3249 | int ret; | 
|  | 3250 | u32 dlm_flags = DLM_LKF_CONVERT; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3251 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3252 | mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, | 
|  | 3253 | lockres->l_level, new_level); | 
|  | 3254 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3255 | if (lvb) | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3256 | dlm_flags |= DLM_LKF_VALBLK; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3257 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3258 | ret = ocfs2_dlm_lock(osb->cconn, | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3259 | new_level, | 
|  | 3260 | &lockres->l_lksb, | 
|  | 3261 | dlm_flags, | 
|  | 3262 | lockres->l_name, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 3263 | OCFS2_LOCK_ID_MAX_LEN - 1); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3264 | lockres_clear_pending(lockres, generation, osb); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3265 | if (ret) { | 
|  | 3266 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3267 | ocfs2_recover_from_dlm_error(lockres, 1); | 
|  | 3268 | goto bail; | 
|  | 3269 | } | 
|  | 3270 |  | 
|  | 3271 | ret = 0; | 
|  | 3272 | bail: | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3273 | return ret; | 
|  | 3274 | } | 
|  | 3275 |  | 
| Joel Becker | 24ef181 | 2008-01-29 17:37:32 -0800 | [diff] [blame] | 3276 | /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3277 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 
|  | 3278 | struct ocfs2_lock_res *lockres) | 
|  | 3279 | { | 
|  | 3280 | assert_spin_locked(&lockres->l_lock); | 
|  | 3281 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3282 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | 
|  | 3283 | /* If we're already trying to cancel a lock conversion | 
|  | 3284 | * then just drop the spinlock and allow the caller to | 
|  | 3285 | * requeue this lock. */ | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3286 | mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3287 | return 0; | 
|  | 3288 | } | 
|  | 3289 |  | 
|  | 3290 | /* were we in a convert when we got the bast fire? */ | 
|  | 3291 | BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && | 
|  | 3292 | lockres->l_action != OCFS2_AST_DOWNCONVERT); | 
|  | 3293 | /* set things up for the unlockast to know to just | 
|  | 3294 | * clear out the ast_action and unset busy, etc. */ | 
|  | 3295 | lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; | 
|  | 3296 |  | 
|  | 3297 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), | 
|  | 3298 | "lock %s, invalid flags: 0x%lx\n", | 
|  | 3299 | lockres->l_name, lockres->l_flags); | 
|  | 3300 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3301 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); | 
|  | 3302 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3303 | return 1; | 
|  | 3304 | } | 
|  | 3305 |  | 
|  | 3306 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 
|  | 3307 | struct ocfs2_lock_res *lockres) | 
|  | 3308 | { | 
|  | 3309 | int ret; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3310 |  | 
| Joel Becker | 4670c46 | 2008-02-01 14:39:35 -0800 | [diff] [blame] | 3311 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, | 
| Joel Becker | a796d28 | 2010-01-28 19:22:39 -0800 | [diff] [blame] | 3312 | DLM_LKF_CANCEL); | 
| Joel Becker | 7431cd7 | 2008-02-01 12:15:37 -0800 | [diff] [blame] | 3313 | if (ret) { | 
|  | 3314 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3315 | ocfs2_recover_from_dlm_error(lockres, 0); | 
|  | 3316 | } | 
|  | 3317 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3318 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3319 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3320 | return ret; | 
|  | 3321 | } | 
|  | 3322 |  | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 3323 | static int ocfs2_unblock_lock(struct ocfs2_super *osb, | 
|  | 3324 | struct ocfs2_lock_res *lockres, | 
|  | 3325 | struct ocfs2_unblock_ctl *ctl) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3326 | { | 
|  | 3327 | unsigned long flags; | 
|  | 3328 | int blocking; | 
|  | 3329 | int new_level; | 
| Sunil Mushran | 079b805 | 2010-02-03 10:16:54 -0800 | [diff] [blame] | 3330 | int level; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3331 | int ret = 0; | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 3332 | int set_lvb = 0; | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3333 | unsigned int gen; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3334 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3335 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3336 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3337 | recheck: | 
| Sunil Mushran | db0f6ce | 2010-02-01 16:55:50 -0800 | [diff] [blame] | 3338 | /* | 
|  | 3339 | * Is it still blocking? If not, we have no more work to do. | 
|  | 3340 | */ | 
|  | 3341 | if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { | 
|  | 3342 | BUG_ON(lockres->l_blocking != DLM_LOCK_NL); | 
|  | 3343 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3344 | ret = 0; | 
|  | 3345 | goto leave; | 
|  | 3346 | } | 
|  | 3347 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3348 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3349 | /* XXX | 
|  | 3350 | * This is a *big* race.  The OCFS2_LOCK_PENDING flag | 
|  | 3351 | * exists entirely for one reason - another thread has set | 
|  | 3352 | * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). | 
|  | 3353 | * | 
|  | 3354 | * If we do ocfs2_cancel_convert() before the other thread | 
|  | 3355 | * calls dlm_lock(), our cancel will do nothing.  We will | 
|  | 3356 | * get no ast, and we will have no way of knowing the | 
|  | 3357 | * cancel failed.  Meanwhile, the other thread will call | 
|  | 3358 | * into dlm_lock() and wait...forever. | 
|  | 3359 | * | 
|  | 3360 | * Why forever?  Because another node has asked for the | 
|  | 3361 | * lock first; that's why we're here in unblock_lock(). | 
|  | 3362 | * | 
|  | 3363 | * The solution is OCFS2_LOCK_PENDING.  When PENDING is | 
|  | 3364 | * set, we just requeue the unblock.  Only when the other | 
|  | 3365 | * thread has called dlm_lock() and cleared PENDING will | 
|  | 3366 | * we then cancel their request. | 
|  | 3367 | * | 
|  | 3368 | * All callers of dlm_lock() must set OCFS2_DLM_PENDING | 
|  | 3369 | * at the same time they set OCFS2_DLM_BUSY.  They must | 
|  | 3370 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. | 
|  | 3371 | */ | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3372 | if (lockres->l_flags & OCFS2_LOCK_PENDING) { | 
|  | 3373 | mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", | 
|  | 3374 | lockres->l_name); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3375 | goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3376 | } | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3377 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3378 | ctl->requeue = 1; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3379 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 
|  | 3380 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3381 | if (ret) { | 
|  | 3382 | ret = ocfs2_cancel_convert(osb, lockres); | 
|  | 3383 | if (ret < 0) | 
|  | 3384 | mlog_errno(ret); | 
|  | 3385 | } | 
|  | 3386 | goto leave; | 
|  | 3387 | } | 
|  | 3388 |  | 
| Sunil Mushran | a191282 | 2010-01-21 10:50:03 -0800 | [diff] [blame] | 3389 | /* | 
|  | 3390 | * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is | 
|  | 3391 | * set when the ast is received for an upconvert just before the | 
|  | 3392 | * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast | 
|  | 3393 | * on the heels of the ast, we want to delay the downconvert just | 
|  | 3394 | * enough to allow the up requestor to do its task. Because this | 
|  | 3395 | * lock is in the blocked queue, the lock will be downconverted | 
|  | 3396 | * as soon as the requestor is done with the lock. | 
|  | 3397 | */ | 
|  | 3398 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) | 
|  | 3399 | goto leave_requeue; | 
|  | 3400 |  | 
| Sunil Mushran | 0d74125 | 2010-01-29 09:44:11 -0800 | [diff] [blame] | 3401 | /* | 
|  | 3402 | * How can we block and yet be at NL?  We were trying to upconvert | 
|  | 3403 | * from NL and got canceled.  The code comes back here, and now | 
|  | 3404 | * we notice and clear BLOCKING. | 
|  | 3405 | */ | 
|  | 3406 | if (lockres->l_level == DLM_LOCK_NL) { | 
|  | 3407 | BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3408 | mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); | 
| Sunil Mushran | 0d74125 | 2010-01-29 09:44:11 -0800 | [diff] [blame] | 3409 | lockres->l_blocking = DLM_LOCK_NL; | 
|  | 3410 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 
|  | 3411 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3412 | goto leave; | 
|  | 3413 | } | 
|  | 3414 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3415 | /* if we're blocking an exclusive and we have *any* holders, | 
|  | 3416 | * then requeue. */ | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3417 | if ((lockres->l_blocking == DLM_LOCK_EX) | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3418 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { | 
|  | 3419 | mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", | 
|  | 3420 | lockres->l_name, lockres->l_ex_holders, | 
|  | 3421 | lockres->l_ro_holders); | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3422 | goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3423 | } | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3424 |  | 
|  | 3425 | /* If it's a PR we're blocking, then only | 
|  | 3426 | * requeue if we've got any EX holders */ | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3427 | if (lockres->l_blocking == DLM_LOCK_PR && | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3428 | lockres->l_ex_holders) { | 
|  | 3429 | mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", | 
|  | 3430 | lockres->l_name, lockres->l_ex_holders); | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3431 | goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3432 | } | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3433 |  | 
|  | 3434 | /* | 
|  | 3435 | * Can we get a lock in this state if the holder counts are | 
|  | 3436 | * zero? The meta data unblock code used to check this. | 
|  | 3437 | */ | 
|  | 3438 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3439 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { | 
|  | 3440 | mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", | 
|  | 3441 | lockres->l_name); | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3442 | goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3443 | } | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3444 |  | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 3445 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | 
|  | 3446 |  | 
|  | 3447 | if (lockres->l_ops->check_downconvert | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3448 | && !lockres->l_ops->check_downconvert(lockres, new_level)) { | 
|  | 3449 | mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", | 
|  | 3450 | lockres->l_name); | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 3451 | goto leave_requeue; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3452 | } | 
| Mark Fasheh | 16d5b95 | 2006-09-13 21:10:12 -0700 | [diff] [blame] | 3453 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3454 | /* If we get here, then we know that there are no more | 
|  | 3455 | * incompatible holders (and anyone asking for an incompatible | 
|  | 3456 | * lock is blocked). We can now downconvert the lock */ | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 3457 | if (!lockres->l_ops->downconvert_worker) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3458 | goto downconvert; | 
|  | 3459 |  | 
|  | 3460 | /* Some lockres types want to do a bit of work before | 
|  | 3461 | * downconverting a lock. Allow that here. The worker function | 
|  | 3462 | * may sleep, so we save off a copy of what we're blocking as | 
|  | 3463 | * it may change while we're not holding the spin lock. */ | 
|  | 3464 | blocking = lockres->l_blocking; | 
| Sunil Mushran | 079b805 | 2010-02-03 10:16:54 -0800 | [diff] [blame] | 3465 | level = lockres->l_level; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3466 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3467 |  | 
| Mark Fasheh | cc567d8 | 2006-09-13 21:52:21 -0700 | [diff] [blame] | 3468 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3469 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3470 | if (ctl->unblock_action == UNBLOCK_STOP_POST) { | 
|  | 3471 | mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", | 
|  | 3472 | lockres->l_name); | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3473 | goto leave; | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3474 | } | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3475 |  | 
|  | 3476 | spin_lock_irqsave(&lockres->l_lock, flags); | 
| Sunil Mushran | 079b805 | 2010-02-03 10:16:54 -0800 | [diff] [blame] | 3477 | if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3478 | /* If this changed underneath us, then we can't drop | 
|  | 3479 | * it just yet. */ | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3480 | mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " | 
|  | 3481 | "Recheck\n", lockres->l_name, blocking, | 
|  | 3482 | lockres->l_blocking, level, lockres->l_level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3483 | goto recheck; | 
|  | 3484 | } | 
|  | 3485 |  | 
|  | 3486 | downconvert: | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3487 | ctl->requeue = 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3488 |  | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 3489 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3490 | if (lockres->l_level == DLM_LOCK_EX) | 
| Mark Fasheh | 5ef0d4e | 2006-09-13 21:21:52 -0700 | [diff] [blame] | 3491 | set_lvb = 1; | 
|  | 3492 |  | 
|  | 3493 | /* | 
|  | 3494 | * We only set the lvb if the lock has been fully | 
|  | 3495 | * refreshed - otherwise we risk setting stale | 
|  | 3496 | * data. Otherwise, there's no need to actually clear | 
|  | 3497 | * out the lvb here as it's value is still valid. | 
|  | 3498 | */ | 
|  | 3499 | if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 
|  | 3500 | lockres->l_ops->set_lvb(lockres); | 
|  | 3501 | } | 
|  | 3502 |  | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3503 | gen = ocfs2_prepare_downconvert(lockres, new_level); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3504 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| Joel Becker | de55124 | 2008-02-01 14:45:08 -0800 | [diff] [blame] | 3505 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, | 
|  | 3506 | gen); | 
|  | 3507 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3508 | leave: | 
| Tao Ma | c1e8d35 | 2011-03-07 16:43:21 +0800 | [diff] [blame] | 3509 | if (ret) | 
|  | 3510 | mlog_errno(ret); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3511 | return ret; | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3512 |  | 
|  | 3513 | leave_requeue: | 
|  | 3514 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3515 | ctl->requeue = 1; | 
|  | 3516 |  | 
| Mark Fasheh | f7fbfdd | 2006-09-13 21:02:29 -0700 | [diff] [blame] | 3517 | return 0; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3518 | } | 
|  | 3519 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3520 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 3521 | int blocking) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3522 | { | 
|  | 3523 | struct inode *inode; | 
|  | 3524 | struct address_space *mapping; | 
| Goldwyn Rodrigues | 5e98d49 | 2010-06-28 10:04:32 -0500 | [diff] [blame] | 3525 | struct ocfs2_inode_info *oi; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3526 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3527 | inode = ocfs2_lock_res_inode(lockres); | 
|  | 3528 | mapping = inode->i_mapping; | 
|  | 3529 |  | 
| Goldwyn Rodrigues | 5e98d49 | 2010-06-28 10:04:32 -0500 | [diff] [blame] | 3530 | if (S_ISDIR(inode->i_mode)) { | 
|  | 3531 | oi = OCFS2_I(inode); | 
|  | 3532 | oi->ip_dir_lock_gen++; | 
|  | 3533 | mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); | 
|  | 3534 | goto out; | 
|  | 3535 | } | 
|  | 3536 |  | 
| Mark Fasheh | 1044e40 | 2008-02-28 17:16:03 -0800 | [diff] [blame] | 3537 | if (!S_ISREG(inode->i_mode)) | 
| Mark Fasheh | f1f5406 | 2007-10-18 15:13:59 -0700 | [diff] [blame] | 3538 | goto out; | 
|  | 3539 |  | 
| Mark Fasheh | 7f4a2a9 | 2006-12-11 11:06:36 -0800 | [diff] [blame] | 3540 | /* | 
|  | 3541 | * We need this before the filemap_fdatawrite() so that it can | 
|  | 3542 | * transfer the dirty bit from the PTE to the | 
|  | 3543 | * page. Unfortunately this means that even for EX->PR | 
|  | 3544 | * downconverts, we'll lose our mappings and have to build | 
|  | 3545 | * them up again. | 
|  | 3546 | */ | 
|  | 3547 | unmap_mapping_range(mapping, 0, 0, 0); | 
|  | 3548 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3549 | if (filemap_fdatawrite(mapping)) { | 
| Mark Fasheh | b0697053 | 2006-03-03 10:24:33 -0800 | [diff] [blame] | 3550 | mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", | 
|  | 3551 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3552 | } | 
|  | 3553 | sync_mapping_buffers(mapping); | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3554 | if (blocking == DLM_LOCK_EX) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3555 | truncate_inode_pages(mapping, 0); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3556 | } else { | 
|  | 3557 | /* We only need to wait on the I/O if we're not also | 
|  | 3558 | * truncating pages because truncate_inode_pages waits | 
|  | 3559 | * for us above. We don't truncate pages if we're | 
|  | 3560 | * blocking anything < EXMODE because we want to keep | 
|  | 3561 | * them around in that case. */ | 
|  | 3562 | filemap_fdatawait(mapping); | 
|  | 3563 | } | 
|  | 3564 |  | 
| Mark Fasheh | f1f5406 | 2007-10-18 15:13:59 -0700 | [diff] [blame] | 3565 | out: | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3566 | return UNBLOCK_CONTINUE; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3567 | } | 
|  | 3568 |  | 
| Tao Ma | a433848 | 2009-08-18 11:19:29 +0800 | [diff] [blame] | 3569 | static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, | 
|  | 3570 | struct ocfs2_lock_res *lockres, | 
|  | 3571 | int new_level) | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3572 | { | 
| Tao Ma | a433848 | 2009-08-18 11:19:29 +0800 | [diff] [blame] | 3573 | int checkpointed = ocfs2_ci_fully_checkpointed(ci); | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3574 |  | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3575 | BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); | 
|  | 3576 | BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3577 |  | 
|  | 3578 | if (checkpointed) | 
|  | 3579 | return 1; | 
|  | 3580 |  | 
| Tao Ma | a433848 | 2009-08-18 11:19:29 +0800 | [diff] [blame] | 3581 | ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3582 | return 0; | 
|  | 3583 | } | 
|  | 3584 |  | 
| Tao Ma | a433848 | 2009-08-18 11:19:29 +0800 | [diff] [blame] | 3585 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | 
|  | 3586 | int new_level) | 
|  | 3587 | { | 
|  | 3588 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 
|  | 3589 |  | 
|  | 3590 | return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); | 
|  | 3591 | } | 
|  | 3592 |  | 
| Mark Fasheh | 810d5ae | 2006-09-13 21:39:52 -0700 | [diff] [blame] | 3593 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | 
|  | 3594 | { | 
|  | 3595 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 
|  | 3596 |  | 
|  | 3597 | __ocfs2_stuff_meta_lvb(inode); | 
|  | 3598 | } | 
|  | 3599 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3600 | /* | 
|  | 3601 | * Does the final reference drop on our dentry lock. Right now this | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3602 | * happens in the downconvert thread, but we could choose to simplify the | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3603 | * dlmglue API and push these off to the ocfs2_wq in the future. | 
|  | 3604 | */ | 
|  | 3605 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 
|  | 3606 | struct ocfs2_lock_res *lockres) | 
|  | 3607 | { | 
|  | 3608 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | 
|  | 3609 | ocfs2_dentry_lock_put(osb, dl); | 
|  | 3610 | } | 
|  | 3611 |  | 
|  | 3612 | /* | 
|  | 3613 | * d_delete() matching dentries before the lock downconvert. | 
|  | 3614 | * | 
|  | 3615 | * At this point, any process waiting to destroy the | 
|  | 3616 | * dentry_lock due to last ref count is stopped by the | 
|  | 3617 | * OCFS2_LOCK_QUEUED flag. | 
|  | 3618 | * | 
|  | 3619 | * We have two potential problems | 
|  | 3620 | * | 
|  | 3621 | * 1) If we do the last reference drop on our dentry_lock (via dput) | 
|  | 3622 | *    we'll wind up in ocfs2_release_dentry_lock(), waiting on | 
|  | 3623 | *    the downconvert to finish. Instead we take an elevated | 
|  | 3624 | *    reference and push the drop until after we've completed our | 
|  | 3625 | *    unblock processing. | 
|  | 3626 | * | 
|  | 3627 | * 2) There might be another process with a final reference, | 
|  | 3628 | *    waiting on us to finish processing. If this is the case, we | 
|  | 3629 | *    detect it and exit out - there's no more dentries anyway. | 
|  | 3630 | */ | 
|  | 3631 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 3632 | int blocking) | 
|  | 3633 | { | 
|  | 3634 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | 
|  | 3635 | struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); | 
|  | 3636 | struct dentry *dentry; | 
|  | 3637 | unsigned long flags; | 
|  | 3638 | int extra_ref = 0; | 
|  | 3639 |  | 
|  | 3640 | /* | 
|  | 3641 | * This node is blocking another node from getting a read | 
|  | 3642 | * lock. This happens when we've renamed within a | 
|  | 3643 | * directory. We've forced the other nodes to d_delete(), but | 
|  | 3644 | * we never actually dropped our lock because it's still | 
|  | 3645 | * valid. The downconvert code will retain a PR for this node, | 
|  | 3646 | * so there's no further work to do. | 
|  | 3647 | */ | 
| Joel Becker | bd3e761 | 2008-02-01 12:14:57 -0800 | [diff] [blame] | 3648 | if (blocking == DLM_LOCK_PR) | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3649 | return UNBLOCK_CONTINUE; | 
|  | 3650 |  | 
|  | 3651 | /* | 
|  | 3652 | * Mark this inode as potentially orphaned. The code in | 
|  | 3653 | * ocfs2_delete_inode() will figure out whether it actually | 
|  | 3654 | * needs to be freed or not. | 
|  | 3655 | */ | 
|  | 3656 | spin_lock(&oi->ip_lock); | 
|  | 3657 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | 
|  | 3658 | spin_unlock(&oi->ip_lock); | 
|  | 3659 |  | 
|  | 3660 | /* | 
|  | 3661 | * Yuck. We need to make sure however that the check of | 
|  | 3662 | * OCFS2_LOCK_FREEING and the extra reference are atomic with | 
|  | 3663 | * respect to a reference decrement or the setting of that | 
|  | 3664 | * flag. | 
|  | 3665 | */ | 
|  | 3666 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3667 | spin_lock(&dentry_attach_lock); | 
|  | 3668 | if (!(lockres->l_flags & OCFS2_LOCK_FREEING) | 
|  | 3669 | && dl->dl_count) { | 
|  | 3670 | dl->dl_count++; | 
|  | 3671 | extra_ref = 1; | 
|  | 3672 | } | 
|  | 3673 | spin_unlock(&dentry_attach_lock); | 
|  | 3674 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3675 |  | 
|  | 3676 | mlog(0, "extra_ref = %d\n", extra_ref); | 
|  | 3677 |  | 
|  | 3678 | /* | 
|  | 3679 | * We have a process waiting on us in ocfs2_dentry_iput(), | 
|  | 3680 | * which means we can't have any more outstanding | 
|  | 3681 | * aliases. There's no need to do any more work. | 
|  | 3682 | */ | 
|  | 3683 | if (!extra_ref) | 
|  | 3684 | return UNBLOCK_CONTINUE; | 
|  | 3685 |  | 
|  | 3686 | spin_lock(&dentry_attach_lock); | 
|  | 3687 | while (1) { | 
|  | 3688 | dentry = ocfs2_find_local_alias(dl->dl_inode, | 
|  | 3689 | dl->dl_parent_blkno, 1); | 
|  | 3690 | if (!dentry) | 
|  | 3691 | break; | 
|  | 3692 | spin_unlock(&dentry_attach_lock); | 
|  | 3693 |  | 
|  | 3694 | mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, | 
|  | 3695 | dentry->d_name.name); | 
|  | 3696 |  | 
|  | 3697 | /* | 
|  | 3698 | * The following dcache calls may do an | 
|  | 3699 | * iput(). Normally we don't want that from the | 
|  | 3700 | * downconverting thread, but in this case it's ok | 
|  | 3701 | * because the requesting node already has an | 
|  | 3702 | * exclusive lock on the inode, so it can't be queued | 
|  | 3703 | * for a downconvert. | 
|  | 3704 | */ | 
|  | 3705 | d_delete(dentry); | 
|  | 3706 | dput(dentry); | 
|  | 3707 |  | 
|  | 3708 | spin_lock(&dentry_attach_lock); | 
|  | 3709 | } | 
|  | 3710 | spin_unlock(&dentry_attach_lock); | 
|  | 3711 |  | 
|  | 3712 | /* | 
|  | 3713 | * If we are the last holder of this dentry lock, there is no | 
|  | 3714 | * reason to downconvert so skip straight to the unlock. | 
|  | 3715 | */ | 
|  | 3716 | if (dl->dl_count == 1) | 
|  | 3717 | return UNBLOCK_STOP_POST; | 
|  | 3718 |  | 
|  | 3719 | return UNBLOCK_CONTINUE_POST; | 
|  | 3720 | } | 
|  | 3721 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 3722 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, | 
|  | 3723 | int new_level) | 
|  | 3724 | { | 
|  | 3725 | struct ocfs2_refcount_tree *tree = | 
|  | 3726 | ocfs2_lock_res_refcount_tree(lockres); | 
|  | 3727 |  | 
|  | 3728 | return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); | 
|  | 3729 | } | 
|  | 3730 |  | 
|  | 3731 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, | 
|  | 3732 | int blocking) | 
|  | 3733 | { | 
|  | 3734 | struct ocfs2_refcount_tree *tree = | 
|  | 3735 | ocfs2_lock_res_refcount_tree(lockres); | 
|  | 3736 |  | 
|  | 3737 | ocfs2_metadata_cache_purge(&tree->rf_ci); | 
|  | 3738 |  | 
|  | 3739 | return UNBLOCK_CONTINUE; | 
|  | 3740 | } | 
|  | 3741 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3742 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) | 
|  | 3743 | { | 
|  | 3744 | struct ocfs2_qinfo_lvb *lvb; | 
|  | 3745 | struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); | 
|  | 3746 | struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, | 
|  | 3747 | oinfo->dqi_gi.dqi_type); | 
|  | 3748 |  | 
| Mark Fasheh | a641dc2 | 2008-12-24 16:03:48 -0800 | [diff] [blame] | 3749 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3750 | lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; | 
|  | 3751 | lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); | 
|  | 3752 | lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); | 
|  | 3753 | lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); | 
|  | 3754 | lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); | 
|  | 3755 | lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); | 
|  | 3756 | lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3757 | } | 
|  | 3758 |  | 
|  | 3759 | void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) | 
|  | 3760 | { | 
|  | 3761 | struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; | 
|  | 3762 | struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); | 
|  | 3763 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
|  | 3764 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3765 | if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) | 
|  | 3766 | ocfs2_cluster_unlock(osb, lockres, level); | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3767 | } | 
|  | 3768 |  | 
|  | 3769 | static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) | 
|  | 3770 | { | 
|  | 3771 | struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, | 
|  | 3772 | oinfo->dqi_gi.dqi_type); | 
|  | 3773 | struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; | 
|  | 3774 | struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 
| Joel Becker | 85eb8b7 | 2008-11-25 15:31:27 +0100 | [diff] [blame] | 3775 | struct buffer_head *bh = NULL; | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3776 | struct ocfs2_global_disk_dqinfo *gdinfo; | 
|  | 3777 | int status = 0; | 
|  | 3778 |  | 
| Joel Becker | 1c520df | 2009-06-19 15:14:13 -0700 | [diff] [blame] | 3779 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && | 
|  | 3780 | lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3781 | info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); | 
|  | 3782 | info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); | 
|  | 3783 | oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); | 
|  | 3784 | oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); | 
|  | 3785 | oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); | 
|  | 3786 | oinfo->dqi_gi.dqi_free_entry = | 
|  | 3787 | be32_to_cpu(lvb->lvb_free_entry); | 
|  | 3788 | } else { | 
| Jan Kara | ae4f6ef | 2010-04-28 19:04:29 +0200 | [diff] [blame] | 3789 | status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, | 
|  | 3790 | oinfo->dqi_giblk, &bh); | 
| Joel Becker | 85eb8b7 | 2008-11-25 15:31:27 +0100 | [diff] [blame] | 3791 | if (status) { | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3792 | mlog_errno(status); | 
|  | 3793 | goto bail; | 
|  | 3794 | } | 
|  | 3795 | gdinfo = (struct ocfs2_global_disk_dqinfo *) | 
|  | 3796 | (bh->b_data + OCFS2_GLOBAL_INFO_OFF); | 
|  | 3797 | info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); | 
|  | 3798 | info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); | 
|  | 3799 | oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); | 
|  | 3800 | oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); | 
|  | 3801 | oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); | 
|  | 3802 | oinfo->dqi_gi.dqi_free_entry = | 
|  | 3803 | le32_to_cpu(gdinfo->dqi_free_entry); | 
|  | 3804 | brelse(bh); | 
|  | 3805 | ocfs2_track_lock_refresh(lockres); | 
|  | 3806 | } | 
|  | 3807 |  | 
|  | 3808 | bail: | 
|  | 3809 | return status; | 
|  | 3810 | } | 
|  | 3811 |  | 
|  | 3812 | /* Lock quota info, this function expects at least shared lock on the quota file | 
|  | 3813 | * so that we can safely refresh quota info from disk. */ | 
|  | 3814 | int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) | 
|  | 3815 | { | 
|  | 3816 | struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; | 
|  | 3817 | struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); | 
|  | 3818 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
|  | 3819 | int status = 0; | 
|  | 3820 |  | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3821 | /* On RO devices, locking really isn't needed... */ | 
|  | 3822 | if (ocfs2_is_hard_readonly(osb)) { | 
|  | 3823 | if (ex) | 
|  | 3824 | status = -EROFS; | 
|  | 3825 | goto bail; | 
|  | 3826 | } | 
|  | 3827 | if (ocfs2_mount_local(osb)) | 
|  | 3828 | goto bail; | 
|  | 3829 |  | 
|  | 3830 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 
|  | 3831 | if (status < 0) { | 
|  | 3832 | mlog_errno(status); | 
|  | 3833 | goto bail; | 
|  | 3834 | } | 
|  | 3835 | if (!ocfs2_should_refresh_lock_res(lockres)) | 
|  | 3836 | goto bail; | 
|  | 3837 | /* OK, we have the lock but we need to refresh the quota info */ | 
|  | 3838 | status = ocfs2_refresh_qinfo(oinfo); | 
|  | 3839 | if (status) | 
|  | 3840 | ocfs2_qinfo_unlock(oinfo, ex); | 
|  | 3841 | ocfs2_complete_lock_res_refresh(lockres, status); | 
|  | 3842 | bail: | 
| Jan Kara | 9e33d69 | 2008-08-25 19:56:50 +0200 | [diff] [blame] | 3843 | return status; | 
|  | 3844 | } | 
|  | 3845 |  | 
| Tao Ma | 8dec98e | 2009-08-18 11:19:58 +0800 | [diff] [blame] | 3846 | int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) | 
|  | 3847 | { | 
|  | 3848 | int status; | 
|  | 3849 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
|  | 3850 | struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; | 
|  | 3851 | struct ocfs2_super *osb = lockres->l_priv; | 
|  | 3852 |  | 
|  | 3853 |  | 
|  | 3854 | if (ocfs2_is_hard_readonly(osb)) | 
|  | 3855 | return -EROFS; | 
|  | 3856 |  | 
|  | 3857 | if (ocfs2_mount_local(osb)) | 
|  | 3858 | return 0; | 
|  | 3859 |  | 
|  | 3860 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 
|  | 3861 | if (status < 0) | 
|  | 3862 | mlog_errno(status); | 
|  | 3863 |  | 
|  | 3864 | return status; | 
|  | 3865 | } | 
|  | 3866 |  | 
|  | 3867 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) | 
|  | 3868 | { | 
|  | 3869 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 
|  | 3870 | struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; | 
|  | 3871 | struct ocfs2_super *osb = lockres->l_priv; | 
|  | 3872 |  | 
|  | 3873 | if (!ocfs2_mount_local(osb)) | 
|  | 3874 | ocfs2_cluster_unlock(osb, lockres, level); | 
|  | 3875 | } | 
|  | 3876 |  | 
| Adrian Bunk | 0060005 | 2008-01-29 00:11:41 +0200 | [diff] [blame] | 3877 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 
|  | 3878 | struct ocfs2_lock_res *lockres) | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3879 | { | 
|  | 3880 | int status; | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3881 | struct ocfs2_unblock_ctl ctl = {0, 0,}; | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3882 | unsigned long flags; | 
|  | 3883 |  | 
|  | 3884 | /* Our reference to the lockres in this function can be | 
|  | 3885 | * considered valid until we remove the OCFS2_LOCK_QUEUED | 
|  | 3886 | * flag. */ | 
|  | 3887 |  | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3888 | BUG_ON(!lockres); | 
|  | 3889 | BUG_ON(!lockres->l_ops); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3890 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3891 | mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3892 |  | 
|  | 3893 | /* Detect whether a lock has been marked as going away while | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3894 | * the downconvert thread was processing other things. A lock can | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3895 | * still be marked with OCFS2_LOCK_FREEING after this check, | 
|  | 3896 | * but short circuiting here will still save us some | 
|  | 3897 | * performance. */ | 
|  | 3898 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3899 | if (lockres->l_flags & OCFS2_LOCK_FREEING) | 
|  | 3900 | goto unqueue; | 
|  | 3901 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3902 |  | 
| Mark Fasheh | b5e500e | 2006-09-13 22:01:16 -0700 | [diff] [blame] | 3903 | status = ocfs2_unblock_lock(osb, lockres, &ctl); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3904 | if (status < 0) | 
|  | 3905 | mlog_errno(status); | 
|  | 3906 |  | 
|  | 3907 | spin_lock_irqsave(&lockres->l_lock, flags); | 
|  | 3908 | unqueue: | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3909 | if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3910 | lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); | 
|  | 3911 | } else | 
|  | 3912 | ocfs2_schedule_blocked_lock(osb, lockres); | 
|  | 3913 |  | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3914 | mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3915 | ctl.requeue ? "yes" : "no"); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3916 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
|  | 3917 |  | 
| Mark Fasheh | d680efe | 2006-09-08 14:14:34 -0700 | [diff] [blame] | 3918 | if (ctl.unblock_action != UNBLOCK_CONTINUE | 
|  | 3919 | && lockres->l_ops->post_unlock) | 
|  | 3920 | lockres->l_ops->post_unlock(osb, lockres); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3921 | } | 
|  | 3922 |  | 
|  | 3923 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | 
|  | 3924 | struct ocfs2_lock_res *lockres) | 
|  | 3925 | { | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3926 | assert_spin_locked(&lockres->l_lock); | 
|  | 3927 |  | 
|  | 3928 | if (lockres->l_flags & OCFS2_LOCK_FREEING) { | 
|  | 3929 | /* Do not schedule a lock for downconvert when it's on | 
|  | 3930 | * the way to destruction - any nodes wanting access | 
|  | 3931 | * to the resource will get it soon. */ | 
| Sunil Mushran | 9b91518 | 2010-02-26 19:42:44 -0800 | [diff] [blame] | 3932 | mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3933 | lockres->l_name, lockres->l_flags); | 
|  | 3934 | return; | 
|  | 3935 | } | 
|  | 3936 |  | 
|  | 3937 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 
|  | 3938 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3939 | spin_lock(&osb->dc_task_lock); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3940 | if (list_empty(&lockres->l_blocked_list)) { | 
|  | 3941 | list_add_tail(&lockres->l_blocked_list, | 
|  | 3942 | &osb->blocked_lock_list); | 
|  | 3943 | osb->blocked_lock_count++; | 
|  | 3944 | } | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3945 | spin_unlock(&osb->dc_task_lock); | 
| Mark Fasheh | ccd979b | 2005-12-15 14:31:24 -0800 | [diff] [blame] | 3946 | } | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3947 |  | 
|  | 3948 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) | 
|  | 3949 | { | 
|  | 3950 | unsigned long processed; | 
|  | 3951 | struct ocfs2_lock_res *lockres; | 
|  | 3952 |  | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3953 | spin_lock(&osb->dc_task_lock); | 
|  | 3954 | /* grab this early so we know to try again if a state change and | 
|  | 3955 | * wake happens part-way through our work  */ | 
|  | 3956 | osb->dc_work_sequence = osb->dc_wake_sequence; | 
|  | 3957 |  | 
|  | 3958 | processed = osb->blocked_lock_count; | 
|  | 3959 | while (processed) { | 
|  | 3960 | BUG_ON(list_empty(&osb->blocked_lock_list)); | 
|  | 3961 |  | 
|  | 3962 | lockres = list_entry(osb->blocked_lock_list.next, | 
|  | 3963 | struct ocfs2_lock_res, l_blocked_list); | 
|  | 3964 | list_del_init(&lockres->l_blocked_list); | 
|  | 3965 | osb->blocked_lock_count--; | 
|  | 3966 | spin_unlock(&osb->dc_task_lock); | 
|  | 3967 |  | 
|  | 3968 | BUG_ON(!processed); | 
|  | 3969 | processed--; | 
|  | 3970 |  | 
|  | 3971 | ocfs2_process_blocked_lock(osb, lockres); | 
|  | 3972 |  | 
|  | 3973 | spin_lock(&osb->dc_task_lock); | 
|  | 3974 | } | 
|  | 3975 | spin_unlock(&osb->dc_task_lock); | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 3976 | } | 
|  | 3977 |  | 
|  | 3978 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) | 
|  | 3979 | { | 
|  | 3980 | int empty = 0; | 
|  | 3981 |  | 
|  | 3982 | spin_lock(&osb->dc_task_lock); | 
|  | 3983 | if (list_empty(&osb->blocked_lock_list)) | 
|  | 3984 | empty = 1; | 
|  | 3985 |  | 
|  | 3986 | spin_unlock(&osb->dc_task_lock); | 
|  | 3987 | return empty; | 
|  | 3988 | } | 
|  | 3989 |  | 
|  | 3990 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) | 
|  | 3991 | { | 
|  | 3992 | int should_wake = 0; | 
|  | 3993 |  | 
|  | 3994 | spin_lock(&osb->dc_task_lock); | 
|  | 3995 | if (osb->dc_work_sequence != osb->dc_wake_sequence) | 
|  | 3996 | should_wake = 1; | 
|  | 3997 | spin_unlock(&osb->dc_task_lock); | 
|  | 3998 |  | 
|  | 3999 | return should_wake; | 
|  | 4000 | } | 
|  | 4001 |  | 
| Adrian Bunk | 200bfae | 2008-02-17 10:20:38 +0200 | [diff] [blame] | 4002 | static int ocfs2_downconvert_thread(void *arg) | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 4003 | { | 
|  | 4004 | int status = 0; | 
|  | 4005 | struct ocfs2_super *osb = arg; | 
|  | 4006 |  | 
|  | 4007 | /* only quit once we've been asked to stop and there is no more | 
|  | 4008 | * work available */ | 
|  | 4009 | while (!(kthread_should_stop() && | 
|  | 4010 | ocfs2_downconvert_thread_lists_empty(osb))) { | 
|  | 4011 |  | 
|  | 4012 | wait_event_interruptible(osb->dc_event, | 
|  | 4013 | ocfs2_downconvert_thread_should_wake(osb) || | 
|  | 4014 | kthread_should_stop()); | 
|  | 4015 |  | 
|  | 4016 | mlog(0, "downconvert_thread: awoken\n"); | 
|  | 4017 |  | 
|  | 4018 | ocfs2_downconvert_thread_do_work(osb); | 
|  | 4019 | } | 
|  | 4020 |  | 
|  | 4021 | osb->dc_task = NULL; | 
|  | 4022 | return status; | 
|  | 4023 | } | 
|  | 4024 |  | 
|  | 4025 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) | 
|  | 4026 | { | 
|  | 4027 | spin_lock(&osb->dc_task_lock); | 
|  | 4028 | /* make sure the voting thread gets a swipe at whatever changes | 
|  | 4029 | * the caller may have made to the voting state */ | 
|  | 4030 | osb->dc_wake_sequence++; | 
|  | 4031 | spin_unlock(&osb->dc_task_lock); | 
|  | 4032 | wake_up(&osb->dc_event); | 
|  | 4033 | } |