| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 1 | /****************************************************************************** | 
|  | 2 | ******************************************************************************* | 
|  | 3 | ** | 
|  | 4 | **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved. | 
| David Teigland | 3ae1acf | 2007-05-18 08:59:31 -0500 | [diff] [blame] | 5 | **  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved. | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 6 | ** | 
|  | 7 | **  This copyrighted material is made available to anyone wishing to use, | 
|  | 8 | **  modify, copy, or redistribute it subject to the terms and conditions | 
|  | 9 | **  of the GNU General Public License v.2. | 
|  | 10 | ** | 
|  | 11 | ******************************************************************************* | 
|  | 12 | ******************************************************************************/ | 
|  | 13 |  | 
|  | 14 | #include "dlm_internal.h" | 
|  | 15 | #include "lockspace.h" | 
|  | 16 | #include "member.h" | 
|  | 17 | #include "dir.h" | 
|  | 18 | #include "ast.h" | 
|  | 19 | #include "recover.h" | 
|  | 20 | #include "lowcomms.h" | 
|  | 21 | #include "lock.h" | 
|  | 22 | #include "requestqueue.h" | 
|  | 23 | #include "recoverd.h" | 
|  | 24 |  | 
|  | 25 |  | 
|  | 26 | /* If the start for which we're re-enabling locking (seq) has been superseded | 
| David Teigland | c36258b | 2007-09-27 15:53:38 -0500 | [diff] [blame] | 27 | by a newer stop (ls_recover_seq), we need to leave locking disabled. | 
|  | 28 |  | 
|  | 29 | We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees | 
|  | 30 | locking stopped and b) adds a message to the requestqueue, but dlm_recoverd | 
|  | 31 | enables locking and clears the requestqueue between a and b. */ | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 32 |  | 
|  | 33 | static int enable_locking(struct dlm_ls *ls, uint64_t seq) | 
|  | 34 | { | 
|  | 35 | int error = -EINTR; | 
|  | 36 |  | 
| David Teigland | c36258b | 2007-09-27 15:53:38 -0500 | [diff] [blame] | 37 | down_write(&ls->ls_recv_active); | 
|  | 38 |  | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 39 | spin_lock(&ls->ls_recover_lock); | 
|  | 40 | if (ls->ls_recover_seq == seq) { | 
|  | 41 | set_bit(LSFL_RUNNING, &ls->ls_flags); | 
| David Teigland | c36258b | 2007-09-27 15:53:38 -0500 | [diff] [blame] | 42 | /* unblocks processes waiting to enter the dlm */ | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 43 | up_write(&ls->ls_in_recovery); | 
|  | 44 | error = 0; | 
|  | 45 | } | 
|  | 46 | spin_unlock(&ls->ls_recover_lock); | 
| David Teigland | c36258b | 2007-09-27 15:53:38 -0500 | [diff] [blame] | 47 |  | 
|  | 48 | up_write(&ls->ls_recv_active); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 49 | return error; | 
|  | 50 | } | 
|  | 51 |  | 
|  | 52 | static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | 
|  | 53 | { | 
|  | 54 | unsigned long start; | 
|  | 55 | int error, neg = 0; | 
|  | 56 |  | 
| Ryusuke Konishi | 57adf7e | 2006-11-29 09:33:48 -0500 | [diff] [blame] | 57 | log_debug(ls, "recover %llx", (unsigned long long)rv->seq); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 58 |  | 
| David Teigland | 9013592 | 2006-01-20 08:47:07 +0000 | [diff] [blame] | 59 | mutex_lock(&ls->ls_recoverd_active); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 60 |  | 
|  | 61 | /* | 
|  | 62 | * Suspending and resuming dlm_astd ensures that no lkb's from this ls | 
|  | 63 | * will be processed by dlm_astd during recovery. | 
|  | 64 | */ | 
|  | 65 |  | 
|  | 66 | dlm_astd_suspend(); | 
|  | 67 | dlm_astd_resume(); | 
|  | 68 |  | 
|  | 69 | /* | 
| David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 70 | * Free non-master tossed rsb's.  Master rsb's are kept on toss | 
|  | 71 | * list and put on root list to be included in resdir recovery. | 
|  | 72 | */ | 
|  | 73 |  | 
|  | 74 | dlm_clear_toss_list(ls); | 
|  | 75 |  | 
|  | 76 | /* | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 77 | * This list of root rsb's will be the basis of most of the recovery | 
|  | 78 | * routines. | 
|  | 79 | */ | 
|  | 80 |  | 
|  | 81 | dlm_create_root_list(ls); | 
|  | 82 |  | 
|  | 83 | /* | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 84 | * Add or remove nodes from the lockspace's ls_nodes list. | 
|  | 85 | * Also waits for all nodes to complete dlm_recover_members. | 
|  | 86 | */ | 
|  | 87 |  | 
|  | 88 | error = dlm_recover_members(ls, rv, &neg); | 
|  | 89 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 90 | log_debug(ls, "recover_members failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 91 | goto fail; | 
|  | 92 | } | 
|  | 93 | start = jiffies; | 
|  | 94 |  | 
|  | 95 | /* | 
|  | 96 | * Rebuild our own share of the directory by collecting from all other | 
|  | 97 | * nodes their master rsb names that hash to us. | 
|  | 98 | */ | 
|  | 99 |  | 
|  | 100 | error = dlm_recover_directory(ls); | 
|  | 101 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 102 | log_debug(ls, "recover_directory failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 103 | goto fail; | 
|  | 104 | } | 
|  | 105 |  | 
|  | 106 | /* | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 107 | * Wait for all nodes to complete directory rebuild. | 
|  | 108 | */ | 
|  | 109 |  | 
|  | 110 | error = dlm_recover_directory_wait(ls); | 
|  | 111 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 112 | log_debug(ls, "recover_directory_wait failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 113 | goto fail; | 
|  | 114 | } | 
|  | 115 |  | 
|  | 116 | /* | 
|  | 117 | * We may have outstanding operations that are waiting for a reply from | 
|  | 118 | * a failed node.  Mark these to be resent after recovery.  Unlock and | 
|  | 119 | * cancel ops can just be completed. | 
|  | 120 | */ | 
|  | 121 |  | 
|  | 122 | dlm_recover_waiters_pre(ls); | 
|  | 123 |  | 
|  | 124 | error = dlm_recovery_stopped(ls); | 
|  | 125 | if (error) | 
|  | 126 | goto fail; | 
|  | 127 |  | 
|  | 128 | if (neg || dlm_no_directory(ls)) { | 
|  | 129 | /* | 
|  | 130 | * Clear lkb's for departed nodes. | 
|  | 131 | */ | 
|  | 132 |  | 
|  | 133 | dlm_purge_locks(ls); | 
|  | 134 |  | 
|  | 135 | /* | 
|  | 136 | * Get new master nodeid's for rsb's that were mastered on | 
|  | 137 | * departed nodes. | 
|  | 138 | */ | 
|  | 139 |  | 
|  | 140 | error = dlm_recover_masters(ls); | 
|  | 141 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 142 | log_debug(ls, "recover_masters failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 143 | goto fail; | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | /* | 
|  | 147 | * Send our locks on remastered rsb's to the new masters. | 
|  | 148 | */ | 
|  | 149 |  | 
|  | 150 | error = dlm_recover_locks(ls); | 
|  | 151 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 152 | log_debug(ls, "recover_locks failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 153 | goto fail; | 
|  | 154 | } | 
|  | 155 |  | 
|  | 156 | error = dlm_recover_locks_wait(ls); | 
|  | 157 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 158 | log_debug(ls, "recover_locks_wait failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 159 | goto fail; | 
|  | 160 | } | 
|  | 161 |  | 
|  | 162 | /* | 
|  | 163 | * Finalize state in master rsb's now that all locks can be | 
|  | 164 | * checked.  This includes conversion resolution and lvb | 
|  | 165 | * settings. | 
|  | 166 | */ | 
|  | 167 |  | 
|  | 168 | dlm_recover_rsbs(ls); | 
| David Teigland | 91c0dc9 | 2006-10-31 11:56:01 -0600 | [diff] [blame] | 169 | } else { | 
|  | 170 | /* | 
|  | 171 | * Other lockspace members may be going through the "neg" steps | 
|  | 172 | * while also adding us to the lockspace, in which case they'll | 
| David Teigland | 4b77f2c | 2006-11-01 09:31:48 -0600 | [diff] [blame] | 173 | * be doing the recover_locks (RS_LOCKS) barrier. | 
| David Teigland | 91c0dc9 | 2006-10-31 11:56:01 -0600 | [diff] [blame] | 174 | */ | 
|  | 175 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | 
| David Teigland | 4b77f2c | 2006-11-01 09:31:48 -0600 | [diff] [blame] | 176 |  | 
|  | 177 | error = dlm_recover_locks_wait(ls); | 
|  | 178 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 179 | log_debug(ls, "recover_locks_wait failed %d", error); | 
| David Teigland | 4b77f2c | 2006-11-01 09:31:48 -0600 | [diff] [blame] | 180 | goto fail; | 
|  | 181 | } | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 182 | } | 
|  | 183 |  | 
|  | 184 | dlm_release_root_list(ls); | 
|  | 185 |  | 
| David Teigland | 2896ee3 | 2006-11-27 11:31:22 -0600 | [diff] [blame] | 186 | /* | 
|  | 187 | * Purge directory-related requests that are saved in requestqueue. | 
|  | 188 | * All dir requests from before recovery are invalid now due to the dir | 
|  | 189 | * rebuild and will be resent by the requesting nodes. | 
|  | 190 | */ | 
|  | 191 |  | 
|  | 192 | dlm_purge_requestqueue(ls); | 
|  | 193 |  | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 194 | dlm_set_recover_status(ls, DLM_RS_DONE); | 
|  | 195 | error = dlm_recover_done_wait(ls); | 
|  | 196 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 197 | log_debug(ls, "recover_done_wait failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 198 | goto fail; | 
|  | 199 | } | 
|  | 200 |  | 
|  | 201 | dlm_clear_members_gone(ls); | 
|  | 202 |  | 
| David Teigland | 3ae1acf | 2007-05-18 08:59:31 -0500 | [diff] [blame] | 203 | dlm_adjust_timeouts(ls); | 
|  | 204 |  | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 205 | error = enable_locking(ls, rv->seq); | 
|  | 206 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 207 | log_debug(ls, "enable_locking failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 208 | goto fail; | 
|  | 209 | } | 
|  | 210 |  | 
|  | 211 | error = dlm_process_requestqueue(ls); | 
|  | 212 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 213 | log_debug(ls, "process_requestqueue failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 214 | goto fail; | 
|  | 215 | } | 
|  | 216 |  | 
|  | 217 | error = dlm_recover_waiters_post(ls); | 
|  | 218 | if (error) { | 
| David Teigland | 8ec6886 | 2007-01-09 09:38:39 -0600 | [diff] [blame] | 219 | log_debug(ls, "recover_waiters_post failed %d", error); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 220 | goto fail; | 
|  | 221 | } | 
|  | 222 |  | 
|  | 223 | dlm_grant_after_purge(ls); | 
|  | 224 |  | 
|  | 225 | dlm_astd_wake(); | 
|  | 226 |  | 
| Ryusuke Konishi | 57adf7e | 2006-11-29 09:33:48 -0500 | [diff] [blame] | 227 | log_debug(ls, "recover %llx done: %u ms", | 
|  | 228 | (unsigned long long)rv->seq, | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 229 | jiffies_to_msecs(jiffies - start)); | 
| David Teigland | 9013592 | 2006-01-20 08:47:07 +0000 | [diff] [blame] | 230 | mutex_unlock(&ls->ls_recoverd_active); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 231 |  | 
|  | 232 | return 0; | 
|  | 233 |  | 
|  | 234 | fail: | 
|  | 235 | dlm_release_root_list(ls); | 
| Ryusuke Konishi | 57adf7e | 2006-11-29 09:33:48 -0500 | [diff] [blame] | 236 | log_debug(ls, "recover %llx error %d", | 
|  | 237 | (unsigned long long)rv->seq, error); | 
| David Teigland | 9013592 | 2006-01-20 08:47:07 +0000 | [diff] [blame] | 238 | mutex_unlock(&ls->ls_recoverd_active); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 239 | return error; | 
|  | 240 | } | 
|  | 241 |  | 
| David Teigland | 2cdc98a | 2006-10-31 11:56:08 -0600 | [diff] [blame] | 242 | /* The dlm_ls_start() that created the rv we take here may already have been | 
|  | 243 | stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP | 
|  | 244 | flag set. */ | 
|  | 245 |  | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 246 | static void do_ls_recovery(struct dlm_ls *ls) | 
|  | 247 | { | 
|  | 248 | struct dlm_recover *rv = NULL; | 
|  | 249 |  | 
|  | 250 | spin_lock(&ls->ls_recover_lock); | 
|  | 251 | rv = ls->ls_recover_args; | 
|  | 252 | ls->ls_recover_args = NULL; | 
| David Teigland | 2cdc98a | 2006-10-31 11:56:08 -0600 | [diff] [blame] | 253 | if (rv && ls->ls_recover_seq == rv->seq) | 
|  | 254 | clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 255 | spin_unlock(&ls->ls_recover_lock); | 
|  | 256 |  | 
|  | 257 | if (rv) { | 
|  | 258 | ls_recover(ls, rv); | 
|  | 259 | kfree(rv->nodeids); | 
| David Teigland | d44e0fc | 2008-03-18 14:22:11 -0500 | [diff] [blame] | 260 | kfree(rv->new); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 261 | kfree(rv); | 
|  | 262 | } | 
|  | 263 | } | 
|  | 264 |  | 
|  | 265 | static int dlm_recoverd(void *arg) | 
|  | 266 | { | 
|  | 267 | struct dlm_ls *ls; | 
|  | 268 |  | 
|  | 269 | ls = dlm_find_lockspace_local(arg); | 
| David Teigland | 5f88f1e | 2006-08-24 14:47:20 -0500 | [diff] [blame] | 270 | if (!ls) { | 
|  | 271 | log_print("dlm_recoverd: no lockspace %p", arg); | 
|  | 272 | return -1; | 
|  | 273 | } | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 274 |  | 
|  | 275 | while (!kthread_should_stop()) { | 
|  | 276 | set_current_state(TASK_INTERRUPTIBLE); | 
|  | 277 | if (!test_bit(LSFL_WORK, &ls->ls_flags)) | 
|  | 278 | schedule(); | 
|  | 279 | set_current_state(TASK_RUNNING); | 
|  | 280 |  | 
|  | 281 | if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) | 
|  | 282 | do_ls_recovery(ls); | 
|  | 283 | } | 
|  | 284 |  | 
|  | 285 | dlm_put_lockspace(ls); | 
|  | 286 | return 0; | 
|  | 287 | } | 
|  | 288 |  | 
|  | 289 | void dlm_recoverd_kick(struct dlm_ls *ls) | 
|  | 290 | { | 
|  | 291 | set_bit(LSFL_WORK, &ls->ls_flags); | 
|  | 292 | wake_up_process(ls->ls_recoverd_task); | 
|  | 293 | } | 
|  | 294 |  | 
|  | 295 | int dlm_recoverd_start(struct dlm_ls *ls) | 
|  | 296 | { | 
|  | 297 | struct task_struct *p; | 
|  | 298 | int error = 0; | 
|  | 299 |  | 
|  | 300 | p = kthread_run(dlm_recoverd, ls, "dlm_recoverd"); | 
|  | 301 | if (IS_ERR(p)) | 
|  | 302 | error = PTR_ERR(p); | 
|  | 303 | else | 
|  | 304 | ls->ls_recoverd_task = p; | 
|  | 305 | return error; | 
|  | 306 | } | 
|  | 307 |  | 
|  | 308 | void dlm_recoverd_stop(struct dlm_ls *ls) | 
|  | 309 | { | 
|  | 310 | kthread_stop(ls->ls_recoverd_task); | 
|  | 311 | } | 
|  | 312 |  | 
|  | 313 | void dlm_recoverd_suspend(struct dlm_ls *ls) | 
|  | 314 | { | 
| David Teigland | f6db1b8 | 2006-08-08 17:06:07 -0500 | [diff] [blame] | 315 | wake_up(&ls->ls_wait_general); | 
| David Teigland | 9013592 | 2006-01-20 08:47:07 +0000 | [diff] [blame] | 316 | mutex_lock(&ls->ls_recoverd_active); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 317 | } | 
|  | 318 |  | 
|  | 319 | void dlm_recoverd_resume(struct dlm_ls *ls) | 
|  | 320 | { | 
| David Teigland | 9013592 | 2006-01-20 08:47:07 +0000 | [diff] [blame] | 321 | mutex_unlock(&ls->ls_recoverd_active); | 
| David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 322 | } | 
|  | 323 |  |