| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | *  scsi_error.c Copyright (C) 1997 Eric Youngdale | 
|  | 3 | * | 
|  | 4 | *  SCSI error/timeout handling | 
|  | 5 | *      Initial versions: Eric Youngdale.  Based upon conversations with | 
|  | 6 | *                        Leonard Zubkoff and David Miller at Linux Expo, | 
|  | 7 | *                        ideas originating from all over the place. | 
|  | 8 | * | 
|  | 9 | *	Restructured scsi_unjam_host and associated functions. | 
|  | 10 | *	September 04, 2002 Mike Anderson (andmike@us.ibm.com) | 
|  | 11 | * | 
|  | 12 | *	Forward port of Russell King's (rmk@arm.linux.org.uk) changes and | 
|  | 13 | *	minor  cleanups. | 
|  | 14 | *	September 30, 2002 Mike Anderson (andmike@us.ibm.com) | 
|  | 15 | */ | 
|  | 16 |  | 
|  | 17 | #include <linux/module.h> | 
|  | 18 | #include <linux/sched.h> | 
|  | 19 | #include <linux/timer.h> | 
|  | 20 | #include <linux/string.h> | 
|  | 21 | #include <linux/slab.h> | 
|  | 22 | #include <linux/kernel.h> | 
| Christoph Hellwig | c5478de | 2005-09-06 14:04:26 +0200 | [diff] [blame] | 23 | #include <linux/kthread.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 24 | #include <linux/interrupt.h> | 
|  | 25 | #include <linux/blkdev.h> | 
|  | 26 | #include <linux/delay.h> | 
|  | 27 |  | 
|  | 28 | #include <scsi/scsi.h> | 
|  | 29 | #include <scsi/scsi_dbg.h> | 
|  | 30 | #include <scsi/scsi_device.h> | 
|  | 31 | #include <scsi/scsi_eh.h> | 
| James Smart | c829c39 | 2006-03-13 08:28:57 -0500 | [diff] [blame] | 32 | #include <scsi/scsi_transport.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 33 | #include <scsi/scsi_host.h> | 
|  | 34 | #include <scsi/scsi_ioctl.h> | 
|  | 35 | #include <scsi/scsi_request.h> | 
|  | 36 |  | 
|  | 37 | #include "scsi_priv.h" | 
|  | 38 | #include "scsi_logging.h" | 
|  | 39 |  | 
|  | 40 | #define SENSE_TIMEOUT		(10*HZ) | 
|  | 41 | #define START_UNIT_TIMEOUT	(30*HZ) | 
|  | 42 |  | 
|  | 43 | /* | 
|  | 44 | * These should *probably* be handled by the host itself. | 
|  | 45 | * Since it is allowed to sleep, it probably should. | 
|  | 46 | */ | 
|  | 47 | #define BUS_RESET_SETTLE_TIME   (10) | 
|  | 48 | #define HOST_RESET_SETTLE_TIME  (10) | 
|  | 49 |  | 
|  | 50 | /* called with shost->host_lock held */ | 
|  | 51 | void scsi_eh_wakeup(struct Scsi_Host *shost) | 
|  | 52 | { | 
|  | 53 | if (shost->host_busy == shost->host_failed) { | 
| James Bottomley | 3ed7a47 | 2005-09-19 09:50:04 -0500 | [diff] [blame] | 54 | wake_up_process(shost->ehandler); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 55 | SCSI_LOG_ERROR_RECOVERY(5, | 
|  | 56 | printk("Waking error handler thread\n")); | 
|  | 57 | } | 
|  | 58 | } | 
|  | 59 |  | 
|  | 60 | /** | 
|  | 61 | * scsi_eh_scmd_add - add scsi cmd to error handling. | 
|  | 62 | * @scmd:	scmd to run eh on. | 
|  | 63 | * @eh_flag:	optional SCSI_EH flag. | 
|  | 64 | * | 
|  | 65 | * Return value: | 
|  | 66 | *	0 on failure. | 
|  | 67 | **/ | 
|  | 68 | int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) | 
|  | 69 | { | 
|  | 70 | struct Scsi_Host *shost = scmd->device->host; | 
|  | 71 | unsigned long flags; | 
| James Bottomley | 939647e | 2005-09-18 15:05:20 -0500 | [diff] [blame] | 72 | int ret = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 73 |  | 
| James Bottomley | 3ed7a47 | 2005-09-19 09:50:04 -0500 | [diff] [blame] | 74 | if (!shost->ehandler) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 75 | return 0; | 
|  | 76 |  | 
|  | 77 | spin_lock_irqsave(shost->host_lock, flags); | 
| James Bottomley | 939647e | 2005-09-18 15:05:20 -0500 | [diff] [blame] | 78 | if (scsi_host_set_state(shost, SHOST_RECOVERY)) | 
|  | 79 | if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) | 
|  | 80 | goto out_unlock; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 81 |  | 
| James Bottomley | 939647e | 2005-09-18 15:05:20 -0500 | [diff] [blame] | 82 | ret = 1; | 
| Christoph Hellwig | 3111b0d | 2005-06-19 13:43:26 +0200 | [diff] [blame] | 83 | scmd->eh_eflags |= eh_flag; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 84 | list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 85 | shost->host_failed++; | 
|  | 86 | scsi_eh_wakeup(shost); | 
| James Bottomley | 939647e | 2005-09-18 15:05:20 -0500 | [diff] [blame] | 87 | out_unlock: | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 88 | spin_unlock_irqrestore(shost->host_lock, flags); | 
| James Bottomley | 939647e | 2005-09-18 15:05:20 -0500 | [diff] [blame] | 89 | return ret; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 90 | } | 
|  | 91 |  | 
|  | 92 | /** | 
|  | 93 | * scsi_add_timer - Start timeout timer for a single scsi command. | 
|  | 94 | * @scmd:	scsi command that is about to start running. | 
|  | 95 | * @timeout:	amount of time to allow this command to run. | 
|  | 96 | * @complete:	timeout function to call if timer isn't canceled. | 
|  | 97 | * | 
|  | 98 | * Notes: | 
|  | 99 | *    This should be turned into an inline function.  Each scsi command | 
|  | 100 | *    has its own timer, and as it is added to the queue, we set up the | 
|  | 101 | *    timer.  When the command completes, we cancel the timer. | 
|  | 102 | **/ | 
|  | 103 | void scsi_add_timer(struct scsi_cmnd *scmd, int timeout, | 
|  | 104 | void (*complete)(struct scsi_cmnd *)) | 
|  | 105 | { | 
|  | 106 |  | 
|  | 107 | /* | 
|  | 108 | * If the clock was already running for this command, then | 
|  | 109 | * first delete the timer.  The timer handling code gets rather | 
|  | 110 | * confused if we don't do this. | 
|  | 111 | */ | 
|  | 112 | if (scmd->eh_timeout.function) | 
|  | 113 | del_timer(&scmd->eh_timeout); | 
|  | 114 |  | 
|  | 115 | scmd->eh_timeout.data = (unsigned long)scmd; | 
|  | 116 | scmd->eh_timeout.expires = jiffies + timeout; | 
|  | 117 | scmd->eh_timeout.function = (void (*)(unsigned long)) complete; | 
|  | 118 |  | 
|  | 119 | SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:" | 
|  | 120 | " %d, (%p)\n", __FUNCTION__, | 
|  | 121 | scmd, timeout, complete)); | 
|  | 122 |  | 
|  | 123 | add_timer(&scmd->eh_timeout); | 
|  | 124 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 125 |  | 
|  | 126 | /** | 
|  | 127 | * scsi_delete_timer - Delete/cancel timer for a given function. | 
|  | 128 | * @scmd:	Cmd that we are canceling timer for | 
|  | 129 | * | 
|  | 130 | * Notes: | 
|  | 131 | *     This should be turned into an inline function. | 
|  | 132 | * | 
|  | 133 | * Return value: | 
|  | 134 | *     1 if we were able to detach the timer.  0 if we blew it, and the | 
|  | 135 | *     timer function has already started to run. | 
|  | 136 | **/ | 
|  | 137 | int scsi_delete_timer(struct scsi_cmnd *scmd) | 
|  | 138 | { | 
|  | 139 | int rtn; | 
|  | 140 |  | 
|  | 141 | rtn = del_timer(&scmd->eh_timeout); | 
|  | 142 |  | 
|  | 143 | SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p," | 
|  | 144 | " rtn: %d\n", __FUNCTION__, | 
|  | 145 | scmd, rtn)); | 
|  | 146 |  | 
|  | 147 | scmd->eh_timeout.data = (unsigned long)NULL; | 
|  | 148 | scmd->eh_timeout.function = NULL; | 
|  | 149 |  | 
|  | 150 | return rtn; | 
|  | 151 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 152 |  | 
|  | 153 | /** | 
|  | 154 | * scsi_times_out - Timeout function for normal scsi commands. | 
|  | 155 | * @scmd:	Cmd that is timing out. | 
|  | 156 | * | 
|  | 157 | * Notes: | 
|  | 158 | *     We do not need to lock this.  There is the potential for a race | 
|  | 159 | *     only in that the normal completion handling might run, but if the | 
|  | 160 | *     normal completion function determines that the timer has already | 
|  | 161 | *     fired, then it mustn't do anything. | 
|  | 162 | **/ | 
|  | 163 | void scsi_times_out(struct scsi_cmnd *scmd) | 
|  | 164 | { | 
|  | 165 | scsi_log_completion(scmd, TIMEOUT_ERROR); | 
|  | 166 |  | 
| James Smart | c829c39 | 2006-03-13 08:28:57 -0500 | [diff] [blame] | 167 | if (scmd->device->host->transportt->eh_timed_out) | 
|  | 168 | switch (scmd->device->host->transportt->eh_timed_out(scmd)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 169 | case EH_HANDLED: | 
|  | 170 | __scsi_done(scmd); | 
|  | 171 | return; | 
|  | 172 | case EH_RESET_TIMER: | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 173 | scsi_add_timer(scmd, scmd->timeout_per_command, | 
|  | 174 | scsi_times_out); | 
|  | 175 | return; | 
|  | 176 | case EH_NOT_HANDLED: | 
|  | 177 | break; | 
|  | 178 | } | 
|  | 179 |  | 
|  | 180 | if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { | 
| James Bottomley | 939647e | 2005-09-18 15:05:20 -0500 | [diff] [blame] | 181 | scmd->result |= DID_TIME_OUT << 16; | 
|  | 182 | __scsi_done(scmd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | } | 
|  | 184 | } | 
|  | 185 |  | 
|  | 186 | /** | 
|  | 187 | * scsi_block_when_processing_errors - Prevent cmds from being queued. | 
|  | 188 | * @sdev:	Device on which we are performing recovery. | 
|  | 189 | * | 
|  | 190 | * Description: | 
|  | 191 | *     We block until the host is out of error recovery, and then check to | 
|  | 192 | *     see whether the host or the device is offline. | 
|  | 193 | * | 
|  | 194 | * Return value: | 
|  | 195 | *     0 when dev was taken offline by error recovery. 1 OK to proceed. | 
|  | 196 | **/ | 
|  | 197 | int scsi_block_when_processing_errors(struct scsi_device *sdev) | 
|  | 198 | { | 
|  | 199 | int online; | 
|  | 200 |  | 
| James Bottomley | 939647e | 2005-09-18 15:05:20 -0500 | [diff] [blame] | 201 | wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 202 |  | 
|  | 203 | online = scsi_device_online(sdev); | 
|  | 204 |  | 
|  | 205 | SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__, | 
|  | 206 | online)); | 
|  | 207 |  | 
|  | 208 | return online; | 
|  | 209 | } | 
|  | 210 | EXPORT_SYMBOL(scsi_block_when_processing_errors); | 
|  | 211 |  | 
|  | 212 | #ifdef CONFIG_SCSI_LOGGING | 
|  | 213 | /** | 
|  | 214 | * scsi_eh_prt_fail_stats - Log info on failures. | 
|  | 215 | * @shost:	scsi host being recovered. | 
|  | 216 | * @work_q:	Queue of scsi cmds to process. | 
|  | 217 | **/ | 
|  | 218 | static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost, | 
|  | 219 | struct list_head *work_q) | 
|  | 220 | { | 
|  | 221 | struct scsi_cmnd *scmd; | 
|  | 222 | struct scsi_device *sdev; | 
|  | 223 | int total_failures = 0; | 
|  | 224 | int cmd_failed = 0; | 
|  | 225 | int cmd_cancel = 0; | 
|  | 226 | int devices_failed = 0; | 
|  | 227 |  | 
|  | 228 | shost_for_each_device(sdev, shost) { | 
|  | 229 | list_for_each_entry(scmd, work_q, eh_entry) { | 
|  | 230 | if (scmd->device == sdev) { | 
|  | 231 | ++total_failures; | 
| Christoph Hellwig | 3111b0d | 2005-06-19 13:43:26 +0200 | [diff] [blame] | 232 | if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 233 | ++cmd_cancel; | 
|  | 234 | else | 
|  | 235 | ++cmd_failed; | 
|  | 236 | } | 
|  | 237 | } | 
|  | 238 |  | 
|  | 239 | if (cmd_cancel || cmd_failed) { | 
|  | 240 | SCSI_LOG_ERROR_RECOVERY(3, | 
| James Bottomley | 9ccfc75 | 2005-10-02 11:45:08 -0500 | [diff] [blame] | 241 | sdev_printk(KERN_INFO, sdev, | 
|  | 242 | "%s: cmds failed: %d, cancel: %d\n", | 
|  | 243 | __FUNCTION__, cmd_failed, | 
|  | 244 | cmd_cancel)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 245 | cmd_cancel = 0; | 
|  | 246 | cmd_failed = 0; | 
|  | 247 | ++devices_failed; | 
|  | 248 | } | 
|  | 249 | } | 
|  | 250 |  | 
|  | 251 | SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d" | 
|  | 252 | " devices require eh work\n", | 
|  | 253 | total_failures, devices_failed)); | 
|  | 254 | } | 
|  | 255 | #endif | 
|  | 256 |  | 
|  | 257 | /** | 
|  | 258 | * scsi_check_sense - Examine scsi cmd sense | 
|  | 259 | * @scmd:	Cmd to have sense checked. | 
|  | 260 | * | 
|  | 261 | * Return value: | 
|  | 262 | * 	SUCCESS or FAILED or NEEDS_RETRY | 
|  | 263 | * | 
|  | 264 | * Notes: | 
|  | 265 | *	When a deferred error is detected the current command has | 
|  | 266 | *	not been executed and needs retrying. | 
|  | 267 | **/ | 
|  | 268 | static int scsi_check_sense(struct scsi_cmnd *scmd) | 
|  | 269 | { | 
|  | 270 | struct scsi_sense_hdr sshdr; | 
|  | 271 |  | 
|  | 272 | if (! scsi_command_normalize_sense(scmd, &sshdr)) | 
|  | 273 | return FAILED;	/* no valid sense data */ | 
|  | 274 |  | 
|  | 275 | if (scsi_sense_is_deferred(&sshdr)) | 
|  | 276 | return NEEDS_RETRY; | 
|  | 277 |  | 
|  | 278 | /* | 
|  | 279 | * Previous logic looked for FILEMARK, EOM or ILI which are | 
|  | 280 | * mainly associated with tapes and returned SUCCESS. | 
|  | 281 | */ | 
|  | 282 | if (sshdr.response_code == 0x70) { | 
|  | 283 | /* fixed format */ | 
|  | 284 | if (scmd->sense_buffer[2] & 0xe0) | 
|  | 285 | return SUCCESS; | 
|  | 286 | } else { | 
|  | 287 | /* | 
|  | 288 | * descriptor format: look for "stream commands sense data | 
|  | 289 | * descriptor" (see SSC-3). Assume single sense data | 
|  | 290 | * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG. | 
|  | 291 | */ | 
|  | 292 | if ((sshdr.additional_length > 3) && | 
|  | 293 | (scmd->sense_buffer[8] == 0x4) && | 
|  | 294 | (scmd->sense_buffer[11] & 0xe0)) | 
|  | 295 | return SUCCESS; | 
|  | 296 | } | 
|  | 297 |  | 
|  | 298 | switch (sshdr.sense_key) { | 
|  | 299 | case NO_SENSE: | 
|  | 300 | return SUCCESS; | 
|  | 301 | case RECOVERED_ERROR: | 
|  | 302 | return /* soft_error */ SUCCESS; | 
|  | 303 |  | 
|  | 304 | case ABORTED_COMMAND: | 
|  | 305 | return NEEDS_RETRY; | 
|  | 306 | case NOT_READY: | 
|  | 307 | case UNIT_ATTENTION: | 
|  | 308 | /* | 
|  | 309 | * if we are expecting a cc/ua because of a bus reset that we | 
|  | 310 | * performed, treat this just as a retry.  otherwise this is | 
|  | 311 | * information that we should pass up to the upper-level driver | 
|  | 312 | * so that we can deal with it there. | 
|  | 313 | */ | 
|  | 314 | if (scmd->device->expecting_cc_ua) { | 
|  | 315 | scmd->device->expecting_cc_ua = 0; | 
|  | 316 | return NEEDS_RETRY; | 
|  | 317 | } | 
|  | 318 | /* | 
|  | 319 | * if the device is in the process of becoming ready, we | 
|  | 320 | * should retry. | 
|  | 321 | */ | 
|  | 322 | if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) | 
|  | 323 | return NEEDS_RETRY; | 
|  | 324 | /* | 
|  | 325 | * if the device is not started, we need to wake | 
|  | 326 | * the error handler to start the motor | 
|  | 327 | */ | 
|  | 328 | if (scmd->device->allow_restart && | 
|  | 329 | (sshdr.asc == 0x04) && (sshdr.ascq == 0x02)) | 
|  | 330 | return FAILED; | 
|  | 331 | return SUCCESS; | 
|  | 332 |  | 
|  | 333 | /* these three are not supported */ | 
|  | 334 | case COPY_ABORTED: | 
|  | 335 | case VOLUME_OVERFLOW: | 
|  | 336 | case MISCOMPARE: | 
|  | 337 | return SUCCESS; | 
|  | 338 |  | 
|  | 339 | case MEDIUM_ERROR: | 
|  | 340 | return NEEDS_RETRY; | 
|  | 341 |  | 
|  | 342 | case HARDWARE_ERROR: | 
|  | 343 | if (scmd->device->retry_hwerror) | 
|  | 344 | return NEEDS_RETRY; | 
|  | 345 | else | 
|  | 346 | return SUCCESS; | 
|  | 347 |  | 
|  | 348 | case ILLEGAL_REQUEST: | 
|  | 349 | case BLANK_CHECK: | 
|  | 350 | case DATA_PROTECT: | 
|  | 351 | default: | 
|  | 352 | return SUCCESS; | 
|  | 353 | } | 
|  | 354 | } | 
|  | 355 |  | 
|  | 356 | /** | 
|  | 357 | * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD. | 
|  | 358 | * @scmd:	SCSI cmd to examine. | 
|  | 359 | * | 
|  | 360 | * Notes: | 
|  | 361 | *    This is *only* called when we are examining the status of commands | 
|  | 362 | *    queued during error recovery.  the main difference here is that we | 
|  | 363 | *    don't allow for the possibility of retries here, and we are a lot | 
|  | 364 | *    more restrictive about what we consider acceptable. | 
|  | 365 | **/ | 
|  | 366 | static int scsi_eh_completed_normally(struct scsi_cmnd *scmd) | 
|  | 367 | { | 
|  | 368 | /* | 
|  | 369 | * first check the host byte, to see if there is anything in there | 
|  | 370 | * that would indicate what we need to do. | 
|  | 371 | */ | 
|  | 372 | if (host_byte(scmd->result) == DID_RESET) { | 
|  | 373 | /* | 
|  | 374 | * rats.  we are already in the error handler, so we now | 
|  | 375 | * get to try and figure out what to do next.  if the sense | 
|  | 376 | * is valid, we have a pretty good idea of what to do. | 
|  | 377 | * if not, we mark it as FAILED. | 
|  | 378 | */ | 
|  | 379 | return scsi_check_sense(scmd); | 
|  | 380 | } | 
|  | 381 | if (host_byte(scmd->result) != DID_OK) | 
|  | 382 | return FAILED; | 
|  | 383 |  | 
|  | 384 | /* | 
|  | 385 | * next, check the message byte. | 
|  | 386 | */ | 
|  | 387 | if (msg_byte(scmd->result) != COMMAND_COMPLETE) | 
|  | 388 | return FAILED; | 
|  | 389 |  | 
|  | 390 | /* | 
|  | 391 | * now, check the status byte to see if this indicates | 
|  | 392 | * anything special. | 
|  | 393 | */ | 
|  | 394 | switch (status_byte(scmd->result)) { | 
|  | 395 | case GOOD: | 
|  | 396 | case COMMAND_TERMINATED: | 
|  | 397 | return SUCCESS; | 
|  | 398 | case CHECK_CONDITION: | 
|  | 399 | return scsi_check_sense(scmd); | 
|  | 400 | case CONDITION_GOOD: | 
|  | 401 | case INTERMEDIATE_GOOD: | 
|  | 402 | case INTERMEDIATE_C_GOOD: | 
|  | 403 | /* | 
|  | 404 | * who knows?  FIXME(eric) | 
|  | 405 | */ | 
|  | 406 | return SUCCESS; | 
|  | 407 | case BUSY: | 
|  | 408 | case QUEUE_FULL: | 
|  | 409 | case RESERVATION_CONFLICT: | 
|  | 410 | default: | 
|  | 411 | return FAILED; | 
|  | 412 | } | 
|  | 413 | return FAILED; | 
|  | 414 | } | 
|  | 415 |  | 
|  | 416 | /** | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 417 | * scsi_eh_done - Completion function for error handling. | 
|  | 418 | * @scmd:	Cmd that is done. | 
|  | 419 | **/ | 
|  | 420 | static void scsi_eh_done(struct scsi_cmnd *scmd) | 
|  | 421 | { | 
| Michael Reed | 8563167 | 2005-12-07 21:46:27 -0600 | [diff] [blame] | 422 | struct completion     *eh_action; | 
|  | 423 |  | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 424 | SCSI_LOG_ERROR_RECOVERY(3, | 
|  | 425 | printk("%s scmd: %p result: %x\n", | 
|  | 426 | __FUNCTION__, scmd, scmd->result)); | 
| Michael Reed | 8563167 | 2005-12-07 21:46:27 -0600 | [diff] [blame] | 427 |  | 
|  | 428 | eh_action = scmd->device->host->eh_action; | 
|  | 429 | if (eh_action) | 
|  | 430 | complete(eh_action); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 431 | } | 
|  | 432 |  | 
|  | 433 | /** | 
|  | 434 | * scsi_send_eh_cmnd  - send a cmd to a device as part of error recovery. | 
|  | 435 | * @scmd:	SCSI Cmd to send. | 
|  | 436 | * @timeout:	Timeout for cmd. | 
|  | 437 | * | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 438 | * Return value: | 
|  | 439 | *    SUCCESS or FAILED or NEEDS_RETRY | 
|  | 440 | **/ | 
|  | 441 | static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) | 
|  | 442 | { | 
|  | f59114b | 2005-04-17 15:00:23 -0500 | [diff] [blame] | 443 | struct scsi_device *sdev = scmd->device; | 
|  | 444 | struct Scsi_Host *shost = sdev->host; | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 445 | DECLARE_COMPLETION(done); | 
|  | 446 | unsigned long timeleft; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 447 | unsigned long flags; | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 448 | int rtn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 449 |  | 
|  | f59114b | 2005-04-17 15:00:23 -0500 | [diff] [blame] | 450 | if (sdev->scsi_level <= SCSI_2) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 451 | scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) | | 
|  | f59114b | 2005-04-17 15:00:23 -0500 | [diff] [blame] | 452 | (sdev->lun << 5 & 0xe0); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 453 |  | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 454 | shost->eh_action = &done; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 455 | scmd->request->rq_status = RQ_SCSI_BUSY; | 
|  | 456 |  | 
|  | f59114b | 2005-04-17 15:00:23 -0500 | [diff] [blame] | 457 | spin_lock_irqsave(shost->host_lock, flags); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 458 | scsi_log_send(scmd); | 
|  | f59114b | 2005-04-17 15:00:23 -0500 | [diff] [blame] | 459 | shost->hostt->queuecommand(scmd, scsi_eh_done); | 
|  | 460 | spin_unlock_irqrestore(shost->host_lock, flags); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 461 |  | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 462 | timeleft = wait_for_completion_timeout(&done, timeout); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 463 |  | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 464 | scmd->request->rq_status = RQ_SCSI_DONE; | 
|  | f59114b | 2005-04-17 15:00:23 -0500 | [diff] [blame] | 465 | shost->eh_action = NULL; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 466 |  | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 467 | scsi_log_completion(scmd, SUCCESS); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 468 |  | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 469 | SCSI_LOG_ERROR_RECOVERY(3, | 
|  | 470 | printk("%s: scmd: %p, timeleft: %ld\n", | 
|  | 471 | __FUNCTION__, scmd, timeleft)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 472 |  | 
|  | 473 | /* | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 474 | * If there is time left scsi_eh_done got called, and we will | 
|  | 475 | * examine the actual status codes to see whether the command | 
|  | 476 | * actually did complete normally, else tell the host to forget | 
|  | 477 | * about this command. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 478 | */ | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 479 | if (timeleft) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 480 | rtn = scsi_eh_completed_normally(scmd); | 
|  | 481 | SCSI_LOG_ERROR_RECOVERY(3, | 
|  | 482 | printk("%s: scsi_eh_completed_normally %x\n", | 
|  | 483 | __FUNCTION__, rtn)); | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 484 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 485 | switch (rtn) { | 
|  | 486 | case SUCCESS: | 
|  | 487 | case NEEDS_RETRY: | 
|  | 488 | case FAILED: | 
|  | 489 | break; | 
|  | 490 | default: | 
|  | 491 | rtn = FAILED; | 
|  | 492 | break; | 
|  | 493 | } | 
| Christoph Hellwig | 7dfdc9a | 2005-10-31 18:49:52 +0100 | [diff] [blame] | 494 | } else { | 
|  | 495 | /* | 
|  | 496 | * FIXME(eric) - we are not tracking whether we could | 
|  | 497 | * abort a timed out command or not.  not sure how | 
|  | 498 | * we should treat them differently anyways. | 
|  | 499 | */ | 
|  | 500 | if (shost->hostt->eh_abort_handler) | 
|  | 501 | shost->hostt->eh_abort_handler(scmd); | 
|  | 502 | rtn = FAILED; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 503 | } | 
|  | 504 |  | 
|  | 505 | return rtn; | 
|  | 506 | } | 
|  | 507 |  | 
|  | 508 | /** | 
|  | 509 | * scsi_request_sense - Request sense data from a particular target. | 
|  | 510 | * @scmd:	SCSI cmd for request sense. | 
|  | 511 | * | 
|  | 512 | * Notes: | 
|  | 513 | *    Some hosts automatically obtain this information, others require | 
|  | 514 | *    that we obtain it on our own. This function will *not* return until | 
|  | 515 | *    the command either times out, or it completes. | 
|  | 516 | **/ | 
|  | 517 | static int scsi_request_sense(struct scsi_cmnd *scmd) | 
|  | 518 | { | 
|  | 519 | static unsigned char generic_sense[6] = | 
|  | 520 | {REQUEST_SENSE, 0, 0, 0, 252, 0}; | 
|  | 521 | unsigned char *scsi_result; | 
|  | 522 | int saved_result; | 
|  | 523 | int rtn; | 
|  | 524 |  | 
|  | 525 | memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense)); | 
|  | 526 |  | 
| Al Viro | bc86120 | 2005-04-24 12:28:34 -0700 | [diff] [blame] | 527 | scsi_result = kmalloc(252, GFP_ATOMIC | ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 528 |  | 
|  | 529 |  | 
|  | 530 | if (unlikely(!scsi_result)) { | 
|  | 531 | printk(KERN_ERR "%s: cannot allocate scsi_result.\n", | 
|  | 532 | __FUNCTION__); | 
|  | 533 | return FAILED; | 
|  | 534 | } | 
|  | 535 |  | 
|  | 536 | /* | 
|  | 537 | * zero the sense buffer.  some host adapters automatically always | 
|  | 538 | * request sense, so it is not a good idea that | 
|  | 539 | * scmd->request_buffer and scmd->sense_buffer point to the same | 
|  | 540 | * address (db).  0 is not a valid sense code. | 
|  | 541 | */ | 
|  | 542 | memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); | 
|  | 543 | memset(scsi_result, 0, 252); | 
|  | 544 |  | 
|  | 545 | saved_result = scmd->result; | 
|  | 546 | scmd->request_buffer = scsi_result; | 
|  | 547 | scmd->request_bufflen = 252; | 
|  | 548 | scmd->use_sg = 0; | 
|  | 549 | scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); | 
|  | 550 | scmd->sc_data_direction = DMA_FROM_DEVICE; | 
|  | 551 | scmd->underflow = 0; | 
|  | 552 |  | 
|  | 553 | rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT); | 
|  | 554 |  | 
|  | 555 | /* last chance to have valid sense data */ | 
|  | 556 | if(!SCSI_SENSE_VALID(scmd)) { | 
|  | 557 | memcpy(scmd->sense_buffer, scmd->request_buffer, | 
|  | 558 | sizeof(scmd->sense_buffer)); | 
|  | 559 | } | 
|  | 560 |  | 
|  | 561 | kfree(scsi_result); | 
|  | 562 |  | 
|  | 563 | /* | 
|  | 564 | * when we eventually call scsi_finish, we really wish to complete | 
|  | 565 | * the original request, so let's restore the original data. (db) | 
|  | 566 | */ | 
|  | 567 | scsi_setup_cmd_retry(scmd); | 
|  | 568 | scmd->result = saved_result; | 
|  | 569 | return rtn; | 
|  | 570 | } | 
|  | 571 |  | 
|  | 572 | /** | 
|  | 573 | * scsi_eh_finish_cmd - Handle a cmd that eh is finished with. | 
|  | 574 | * @scmd:	Original SCSI cmd that eh has finished. | 
|  | 575 | * @done_q:	Queue for processed commands. | 
|  | 576 | * | 
|  | 577 | * Notes: | 
|  | 578 | *    We don't want to use the normal command completion while we are are | 
|  | 579 | *    still handling errors - it may cause other commands to be queued, | 
|  | 580 | *    and that would disturb what we are doing.  thus we really want to | 
|  | 581 | *    keep a list of pending commands for final completion, and once we | 
|  | 582 | *    are ready to leave error handling we handle completion for real. | 
|  | 583 | **/ | 
| Tejun Heo | 041c5fc | 2006-01-23 13:09:36 +0900 | [diff] [blame] | 584 | void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 585 | { | 
|  | 586 | scmd->device->host->host_failed--; | 
| Christoph Hellwig | 3111b0d | 2005-06-19 13:43:26 +0200 | [diff] [blame] | 587 | scmd->eh_eflags = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 588 |  | 
|  | 589 | /* | 
|  | 590 | * set this back so that the upper level can correctly free up | 
|  | 591 | * things. | 
|  | 592 | */ | 
|  | 593 | scsi_setup_cmd_retry(scmd); | 
|  | 594 | list_move_tail(&scmd->eh_entry, done_q); | 
|  | 595 | } | 
| Tejun Heo | 041c5fc | 2006-01-23 13:09:36 +0900 | [diff] [blame] | 596 | EXPORT_SYMBOL(scsi_eh_finish_cmd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 597 |  | 
|  | 598 | /** | 
|  | 599 | * scsi_eh_get_sense - Get device sense data. | 
|  | 600 | * @work_q:	Queue of commands to process. | 
|  | 601 | * @done_q:	Queue of proccessed commands.. | 
|  | 602 | * | 
|  | 603 | * Description: | 
|  | 604 | *    See if we need to request sense information.  if so, then get it | 
|  | 605 | *    now, so we have a better idea of what to do. | 
|  | 606 | * | 
|  | 607 | * Notes: | 
|  | 608 | *    This has the unfortunate side effect that if a shost adapter does | 
|  | 609 | *    not automatically request sense information, that we end up shutting | 
|  | 610 | *    it down before we request it. | 
|  | 611 | * | 
|  | 612 | *    All drivers should request sense information internally these days, | 
|  | 613 | *    so for now all I have to say is tough noogies if you end up in here. | 
|  | 614 | * | 
|  | 615 | *    XXX: Long term this code should go away, but that needs an audit of | 
|  | 616 | *         all LLDDs first. | 
|  | 617 | **/ | 
|  | 618 | static int scsi_eh_get_sense(struct list_head *work_q, | 
|  | 619 | struct list_head *done_q) | 
|  | 620 | { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 621 | struct scsi_cmnd *scmd, *next; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 622 | int rtn; | 
|  | 623 |  | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 624 | list_for_each_entry_safe(scmd, next, work_q, eh_entry) { | 
| Christoph Hellwig | 3111b0d | 2005-06-19 13:43:26 +0200 | [diff] [blame] | 625 | if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) || | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 626 | SCSI_SENSE_VALID(scmd)) | 
|  | 627 | continue; | 
|  | 628 |  | 
| Jeff Garzik | 3bf743e | 2005-10-24 18:04:06 -0400 | [diff] [blame] | 629 | SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd, | 
|  | 630 | "%s: requesting sense\n", | 
|  | 631 | current->comm)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 632 | rtn = scsi_request_sense(scmd); | 
|  | 633 | if (rtn != SUCCESS) | 
|  | 634 | continue; | 
|  | 635 |  | 
|  | 636 | SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p" | 
|  | 637 | " result %x\n", scmd, | 
|  | 638 | scmd->result)); | 
|  | 639 | SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense("bh", scmd)); | 
|  | 640 |  | 
|  | 641 | rtn = scsi_decide_disposition(scmd); | 
|  | 642 |  | 
|  | 643 | /* | 
|  | 644 | * if the result was normal, then just pass it along to the | 
|  | 645 | * upper level. | 
|  | 646 | */ | 
|  | 647 | if (rtn == SUCCESS) | 
|  | 648 | /* we don't want this command reissued, just | 
|  | 649 | * finished with the sense data, so set | 
|  | 650 | * retries to the max allowed to ensure it | 
|  | 651 | * won't get reissued */ | 
|  | 652 | scmd->retries = scmd->allowed; | 
|  | 653 | else if (rtn != NEEDS_RETRY) | 
|  | 654 | continue; | 
|  | 655 |  | 
|  | 656 | scsi_eh_finish_cmd(scmd, done_q); | 
|  | 657 | } | 
|  | 658 |  | 
|  | 659 | return list_empty(work_q); | 
|  | 660 | } | 
|  | 661 |  | 
|  | 662 | /** | 
|  | 663 | * scsi_try_to_abort_cmd - Ask host to abort a running command. | 
|  | 664 | * @scmd:	SCSI cmd to abort from Lower Level. | 
|  | 665 | * | 
|  | 666 | * Notes: | 
|  | 667 | *    This function will not return until the user's completion function | 
|  | 668 | *    has been called.  there is no timeout on this operation.  if the | 
|  | 669 | *    author of the low-level driver wishes this operation to be timed, | 
|  | 670 | *    they can provide this facility themselves.  helper functions in | 
|  | 671 | *    scsi_error.c can be supplied to make this easier to do. | 
|  | 672 | **/ | 
|  | 673 | static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) | 
|  | 674 | { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 675 | if (!scmd->device->host->hostt->eh_abort_handler) | 
| Jeff Garzik | 8fa728a | 2005-05-28 07:54:40 -0400 | [diff] [blame] | 676 | return FAILED; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 677 |  | 
|  | 678 | /* | 
|  | 679 | * scsi_done was called just after the command timed out and before | 
|  | 680 | * we had a chance to process it. (db) | 
|  | 681 | */ | 
|  | 682 | if (scmd->serial_number == 0) | 
|  | 683 | return SUCCESS; | 
| Jeff Garzik | 8fa728a | 2005-05-28 07:54:40 -0400 | [diff] [blame] | 684 | return scmd->device->host->hostt->eh_abort_handler(scmd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 685 | } | 
|  | 686 |  | 
|  | 687 | /** | 
|  | 688 | * scsi_eh_tur - Send TUR to device. | 
|  | 689 | * @scmd:	Scsi cmd to send TUR | 
|  | 690 | * | 
|  | 691 | * Return value: | 
|  | 692 | *    0 - Device is ready. 1 - Device NOT ready. | 
|  | 693 | **/ | 
|  | 694 | static int scsi_eh_tur(struct scsi_cmnd *scmd) | 
|  | 695 | { | 
|  | 696 | static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0}; | 
|  | 697 | int retry_cnt = 1, rtn; | 
| Patrick Mansfield | 793698c | 2005-05-16 17:42:15 -0700 | [diff] [blame] | 698 | int saved_result; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 699 |  | 
|  | 700 | retry_tur: | 
|  | 701 | memcpy(scmd->cmnd, tur_command, sizeof(tur_command)); | 
|  | 702 |  | 
|  | 703 | /* | 
|  | 704 | * zero the sense buffer.  the scsi spec mandates that any | 
|  | 705 | * untransferred sense data should be interpreted as being zero. | 
|  | 706 | */ | 
|  | 707 | memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); | 
|  | 708 |  | 
| Patrick Mansfield | 793698c | 2005-05-16 17:42:15 -0700 | [diff] [blame] | 709 | saved_result = scmd->result; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 710 | scmd->request_buffer = NULL; | 
|  | 711 | scmd->request_bufflen = 0; | 
|  | 712 | scmd->use_sg = 0; | 
|  | 713 | scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); | 
|  | 714 | scmd->underflow = 0; | 
|  | 715 | scmd->sc_data_direction = DMA_NONE; | 
|  | 716 |  | 
|  | 717 | rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT); | 
|  | 718 |  | 
|  | 719 | /* | 
|  | 720 | * when we eventually call scsi_finish, we really wish to complete | 
|  | 721 | * the original request, so let's restore the original data. (db) | 
|  | 722 | */ | 
|  | 723 | scsi_setup_cmd_retry(scmd); | 
| Patrick Mansfield | 793698c | 2005-05-16 17:42:15 -0700 | [diff] [blame] | 724 | scmd->result = saved_result; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 725 |  | 
|  | 726 | /* | 
|  | 727 | * hey, we are done.  let's look to see what happened. | 
|  | 728 | */ | 
|  | 729 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n", | 
|  | 730 | __FUNCTION__, scmd, rtn)); | 
|  | 731 | if (rtn == SUCCESS) | 
|  | 732 | return 0; | 
| Alan Stern | e47373e | 2005-03-30 15:05:45 -0500 | [diff] [blame] | 733 | else if (rtn == NEEDS_RETRY) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 734 | if (retry_cnt--) | 
|  | 735 | goto retry_tur; | 
| Alan Stern | e47373e | 2005-03-30 15:05:45 -0500 | [diff] [blame] | 736 | return 0; | 
|  | 737 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 738 | return 1; | 
|  | 739 | } | 
|  | 740 |  | 
|  | 741 | /** | 
|  | 742 | * scsi_eh_abort_cmds - abort canceled commands. | 
|  | 743 | * @shost:	scsi host being recovered. | 
|  | 744 | * @eh_done_q:	list_head for processed commands. | 
|  | 745 | * | 
|  | 746 | * Decription: | 
|  | 747 | *    Try and see whether or not it makes sense to try and abort the | 
|  | 748 | *    running command.  this only works out to be the case if we have one | 
|  | 749 | *    command that has timed out.  if the command simply failed, it makes | 
|  | 750 | *    no sense to try and abort the command, since as far as the shost | 
|  | 751 | *    adapter is concerned, it isn't running. | 
|  | 752 | **/ | 
|  | 753 | static int scsi_eh_abort_cmds(struct list_head *work_q, | 
|  | 754 | struct list_head *done_q) | 
|  | 755 | { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 756 | struct scsi_cmnd *scmd, *next; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 757 | int rtn; | 
|  | 758 |  | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 759 | list_for_each_entry_safe(scmd, next, work_q, eh_entry) { | 
| Christoph Hellwig | 3111b0d | 2005-06-19 13:43:26 +0200 | [diff] [blame] | 760 | if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD)) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 761 | continue; | 
|  | 762 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:" | 
|  | 763 | "0x%p\n", current->comm, | 
|  | 764 | scmd)); | 
|  | 765 | rtn = scsi_try_to_abort_cmd(scmd); | 
|  | 766 | if (rtn == SUCCESS) { | 
| Christoph Hellwig | 3111b0d | 2005-06-19 13:43:26 +0200 | [diff] [blame] | 767 | scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 768 | if (!scsi_device_online(scmd->device) || | 
|  | 769 | !scsi_eh_tur(scmd)) { | 
|  | 770 | scsi_eh_finish_cmd(scmd, done_q); | 
|  | 771 | } | 
|  | 772 |  | 
|  | 773 | } else | 
|  | 774 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting" | 
|  | 775 | " cmd failed:" | 
|  | 776 | "0x%p\n", | 
|  | 777 | current->comm, | 
|  | 778 | scmd)); | 
|  | 779 | } | 
|  | 780 |  | 
|  | 781 | return list_empty(work_q); | 
|  | 782 | } | 
|  | 783 |  | 
|  | 784 | /** | 
|  | 785 | * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev | 
|  | 786 | * @scmd:	SCSI cmd used to send BDR | 
|  | 787 | * | 
|  | 788 | * Notes: | 
|  | 789 | *    There is no timeout for this operation.  if this operation is | 
|  | 790 | *    unreliable for a given host, then the host itself needs to put a | 
|  | 791 | *    timer on it, and set the host back to a consistent state prior to | 
|  | 792 | *    returning. | 
|  | 793 | **/ | 
|  | 794 | static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd) | 
|  | 795 | { | 
| Jeff Garzik | 94d0e7b8 | 2005-05-28 07:55:48 -0400 | [diff] [blame] | 796 | int rtn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 797 |  | 
|  | 798 | if (!scmd->device->host->hostt->eh_device_reset_handler) | 
| Jeff Garzik | 94d0e7b8 | 2005-05-28 07:55:48 -0400 | [diff] [blame] | 799 | return FAILED; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 800 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 801 | rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 802 | if (rtn == SUCCESS) { | 
|  | 803 | scmd->device->was_reset = 1; | 
|  | 804 | scmd->device->expecting_cc_ua = 1; | 
|  | 805 | } | 
|  | 806 |  | 
|  | 807 | return rtn; | 
|  | 808 | } | 
|  | 809 |  | 
|  | 810 | /** | 
|  | 811 | * scsi_eh_try_stu - Send START_UNIT to device. | 
|  | 812 | * @scmd:	Scsi cmd to send START_UNIT | 
|  | 813 | * | 
|  | 814 | * Return value: | 
|  | 815 | *    0 - Device is ready. 1 - Device NOT ready. | 
|  | 816 | **/ | 
|  | 817 | static int scsi_eh_try_stu(struct scsi_cmnd *scmd) | 
|  | 818 | { | 
|  | 819 | static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0}; | 
|  | 820 | int rtn; | 
| Patrick Mansfield | 793698c | 2005-05-16 17:42:15 -0700 | [diff] [blame] | 821 | int saved_result; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 822 |  | 
|  | 823 | if (!scmd->device->allow_restart) | 
|  | 824 | return 1; | 
|  | 825 |  | 
|  | 826 | memcpy(scmd->cmnd, stu_command, sizeof(stu_command)); | 
|  | 827 |  | 
|  | 828 | /* | 
|  | 829 | * zero the sense buffer.  the scsi spec mandates that any | 
|  | 830 | * untransferred sense data should be interpreted as being zero. | 
|  | 831 | */ | 
|  | 832 | memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); | 
|  | 833 |  | 
| Patrick Mansfield | 793698c | 2005-05-16 17:42:15 -0700 | [diff] [blame] | 834 | saved_result = scmd->result; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 835 | scmd->request_buffer = NULL; | 
|  | 836 | scmd->request_bufflen = 0; | 
|  | 837 | scmd->use_sg = 0; | 
|  | 838 | scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); | 
|  | 839 | scmd->underflow = 0; | 
|  | 840 | scmd->sc_data_direction = DMA_NONE; | 
|  | 841 |  | 
|  | 842 | rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT); | 
|  | 843 |  | 
|  | 844 | /* | 
|  | 845 | * when we eventually call scsi_finish, we really wish to complete | 
|  | 846 | * the original request, so let's restore the original data. (db) | 
|  | 847 | */ | 
|  | 848 | scsi_setup_cmd_retry(scmd); | 
| Patrick Mansfield | 793698c | 2005-05-16 17:42:15 -0700 | [diff] [blame] | 849 | scmd->result = saved_result; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 850 |  | 
|  | 851 | /* | 
|  | 852 | * hey, we are done.  let's look to see what happened. | 
|  | 853 | */ | 
|  | 854 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n", | 
|  | 855 | __FUNCTION__, scmd, rtn)); | 
|  | 856 | if (rtn == SUCCESS) | 
|  | 857 | return 0; | 
|  | 858 | return 1; | 
|  | 859 | } | 
|  | 860 |  | 
|  | 861 | /** | 
|  | 862 | * scsi_eh_stu - send START_UNIT if needed | 
|  | 863 | * @shost:	scsi host being recovered. | 
|  | 864 | * @eh_done_q:	list_head for processed commands. | 
|  | 865 | * | 
|  | 866 | * Notes: | 
|  | 867 | *    If commands are failing due to not ready, initializing command required, | 
|  | 868 | *	try revalidating the device, which will end up sending a start unit. | 
|  | 869 | **/ | 
|  | 870 | static int scsi_eh_stu(struct Scsi_Host *shost, | 
|  | 871 | struct list_head *work_q, | 
|  | 872 | struct list_head *done_q) | 
|  | 873 | { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 874 | struct scsi_cmnd *scmd, *stu_scmd, *next; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 875 | struct scsi_device *sdev; | 
|  | 876 |  | 
|  | 877 | shost_for_each_device(sdev, shost) { | 
|  | 878 | stu_scmd = NULL; | 
|  | 879 | list_for_each_entry(scmd, work_q, eh_entry) | 
|  | 880 | if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) && | 
|  | 881 | scsi_check_sense(scmd) == FAILED ) { | 
|  | 882 | stu_scmd = scmd; | 
|  | 883 | break; | 
|  | 884 | } | 
|  | 885 |  | 
|  | 886 | if (!stu_scmd) | 
|  | 887 | continue; | 
|  | 888 |  | 
|  | 889 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:" | 
|  | 890 | " 0x%p\n", current->comm, sdev)); | 
|  | 891 |  | 
|  | 892 | if (!scsi_eh_try_stu(stu_scmd)) { | 
|  | 893 | if (!scsi_device_online(sdev) || | 
|  | 894 | !scsi_eh_tur(stu_scmd)) { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 895 | list_for_each_entry_safe(scmd, next, | 
|  | 896 | work_q, eh_entry) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 897 | if (scmd->device == sdev) | 
|  | 898 | scsi_eh_finish_cmd(scmd, done_q); | 
|  | 899 | } | 
|  | 900 | } | 
|  | 901 | } else { | 
|  | 902 | SCSI_LOG_ERROR_RECOVERY(3, | 
|  | 903 | printk("%s: START_UNIT failed to sdev:" | 
|  | 904 | " 0x%p\n", current->comm, sdev)); | 
|  | 905 | } | 
|  | 906 | } | 
|  | 907 |  | 
|  | 908 | return list_empty(work_q); | 
|  | 909 | } | 
|  | 910 |  | 
|  | 911 |  | 
|  | 912 | /** | 
|  | 913 | * scsi_eh_bus_device_reset - send bdr if needed | 
|  | 914 | * @shost:	scsi host being recovered. | 
|  | 915 | * @eh_done_q:	list_head for processed commands. | 
|  | 916 | * | 
|  | 917 | * Notes: | 
|  | 918 | *    Try a bus device reset.  still, look to see whether we have multiple | 
|  | 919 | *    devices that are jammed or not - if we have multiple devices, it | 
|  | 920 | *    makes no sense to try bus_device_reset - we really would need to try | 
|  | 921 | *    a bus_reset instead. | 
|  | 922 | **/ | 
|  | 923 | static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, | 
|  | 924 | struct list_head *work_q, | 
|  | 925 | struct list_head *done_q) | 
|  | 926 | { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 927 | struct scsi_cmnd *scmd, *bdr_scmd, *next; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 928 | struct scsi_device *sdev; | 
|  | 929 | int rtn; | 
|  | 930 |  | 
|  | 931 | shost_for_each_device(sdev, shost) { | 
|  | 932 | bdr_scmd = NULL; | 
|  | 933 | list_for_each_entry(scmd, work_q, eh_entry) | 
|  | 934 | if (scmd->device == sdev) { | 
|  | 935 | bdr_scmd = scmd; | 
|  | 936 | break; | 
|  | 937 | } | 
|  | 938 |  | 
|  | 939 | if (!bdr_scmd) | 
|  | 940 | continue; | 
|  | 941 |  | 
|  | 942 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:" | 
|  | 943 | " 0x%p\n", current->comm, | 
|  | 944 | sdev)); | 
|  | 945 | rtn = scsi_try_bus_device_reset(bdr_scmd); | 
|  | 946 | if (rtn == SUCCESS) { | 
|  | 947 | if (!scsi_device_online(sdev) || | 
|  | 948 | !scsi_eh_tur(bdr_scmd)) { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 949 | list_for_each_entry_safe(scmd, next, | 
|  | 950 | work_q, eh_entry) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 951 | if (scmd->device == sdev) | 
|  | 952 | scsi_eh_finish_cmd(scmd, | 
|  | 953 | done_q); | 
|  | 954 | } | 
|  | 955 | } | 
|  | 956 | } else { | 
|  | 957 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR" | 
|  | 958 | " failed sdev:" | 
|  | 959 | "0x%p\n", | 
|  | 960 | current->comm, | 
|  | 961 | sdev)); | 
|  | 962 | } | 
|  | 963 | } | 
|  | 964 |  | 
|  | 965 | return list_empty(work_q); | 
|  | 966 | } | 
|  | 967 |  | 
|  | 968 | /** | 
|  | 969 | * scsi_try_bus_reset - ask host to perform a bus reset | 
|  | 970 | * @scmd:	SCSI cmd to send bus reset. | 
|  | 971 | **/ | 
|  | 972 | static int scsi_try_bus_reset(struct scsi_cmnd *scmd) | 
|  | 973 | { | 
|  | 974 | unsigned long flags; | 
|  | 975 | int rtn; | 
|  | 976 |  | 
|  | 977 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n", | 
|  | 978 | __FUNCTION__)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 979 |  | 
|  | 980 | if (!scmd->device->host->hostt->eh_bus_reset_handler) | 
|  | 981 | return FAILED; | 
|  | 982 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 983 | rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 984 |  | 
|  | 985 | if (rtn == SUCCESS) { | 
|  | 986 | if (!scmd->device->host->hostt->skip_settle_delay) | 
|  | 987 | ssleep(BUS_RESET_SETTLE_TIME); | 
|  | 988 | spin_lock_irqsave(scmd->device->host->host_lock, flags); | 
| Jeff Garzik | 422c0d6 | 2005-10-24 18:05:09 -0400 | [diff] [blame] | 989 | scsi_report_bus_reset(scmd->device->host, | 
|  | 990 | scmd_channel(scmd)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 991 | spin_unlock_irqrestore(scmd->device->host->host_lock, flags); | 
|  | 992 | } | 
|  | 993 |  | 
|  | 994 | return rtn; | 
|  | 995 | } | 
|  | 996 |  | 
|  | 997 | /** | 
|  | 998 | * scsi_try_host_reset - ask host adapter to reset itself | 
|  | 999 | * @scmd:	SCSI cmd to send hsot reset. | 
|  | 1000 | **/ | 
|  | 1001 | static int scsi_try_host_reset(struct scsi_cmnd *scmd) | 
|  | 1002 | { | 
|  | 1003 | unsigned long flags; | 
|  | 1004 | int rtn; | 
|  | 1005 |  | 
|  | 1006 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n", | 
|  | 1007 | __FUNCTION__)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1008 |  | 
|  | 1009 | if (!scmd->device->host->hostt->eh_host_reset_handler) | 
|  | 1010 | return FAILED; | 
|  | 1011 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1012 | rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1013 |  | 
|  | 1014 | if (rtn == SUCCESS) { | 
|  | 1015 | if (!scmd->device->host->hostt->skip_settle_delay) | 
|  | 1016 | ssleep(HOST_RESET_SETTLE_TIME); | 
|  | 1017 | spin_lock_irqsave(scmd->device->host->host_lock, flags); | 
| Jeff Garzik | 422c0d6 | 2005-10-24 18:05:09 -0400 | [diff] [blame] | 1018 | scsi_report_bus_reset(scmd->device->host, | 
|  | 1019 | scmd_channel(scmd)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1020 | spin_unlock_irqrestore(scmd->device->host->host_lock, flags); | 
|  | 1021 | } | 
|  | 1022 |  | 
|  | 1023 | return rtn; | 
|  | 1024 | } | 
|  | 1025 |  | 
|  | 1026 | /** | 
|  | 1027 | * scsi_eh_bus_reset - send a bus reset | 
|  | 1028 | * @shost:	scsi host being recovered. | 
|  | 1029 | * @eh_done_q:	list_head for processed commands. | 
|  | 1030 | **/ | 
|  | 1031 | static int scsi_eh_bus_reset(struct Scsi_Host *shost, | 
|  | 1032 | struct list_head *work_q, | 
|  | 1033 | struct list_head *done_q) | 
|  | 1034 | { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 1035 | struct scsi_cmnd *scmd, *chan_scmd, *next; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1036 | unsigned int channel; | 
|  | 1037 | int rtn; | 
|  | 1038 |  | 
|  | 1039 | /* | 
|  | 1040 | * we really want to loop over the various channels, and do this on | 
|  | 1041 | * a channel by channel basis.  we should also check to see if any | 
|  | 1042 | * of the failed commands are on soft_reset devices, and if so, skip | 
|  | 1043 | * the reset. | 
|  | 1044 | */ | 
|  | 1045 |  | 
|  | 1046 | for (channel = 0; channel <= shost->max_channel; channel++) { | 
|  | 1047 | chan_scmd = NULL; | 
|  | 1048 | list_for_each_entry(scmd, work_q, eh_entry) { | 
| Jeff Garzik | 422c0d6 | 2005-10-24 18:05:09 -0400 | [diff] [blame] | 1049 | if (channel == scmd_channel(scmd)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1050 | chan_scmd = scmd; | 
|  | 1051 | break; | 
|  | 1052 | /* | 
|  | 1053 | * FIXME add back in some support for | 
|  | 1054 | * soft_reset devices. | 
|  | 1055 | */ | 
|  | 1056 | } | 
|  | 1057 | } | 
|  | 1058 |  | 
|  | 1059 | if (!chan_scmd) | 
|  | 1060 | continue; | 
|  | 1061 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:" | 
|  | 1062 | " %d\n", current->comm, | 
|  | 1063 | channel)); | 
|  | 1064 | rtn = scsi_try_bus_reset(chan_scmd); | 
|  | 1065 | if (rtn == SUCCESS) { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 1066 | list_for_each_entry_safe(scmd, next, work_q, eh_entry) { | 
| Jeff Garzik | 422c0d6 | 2005-10-24 18:05:09 -0400 | [diff] [blame] | 1067 | if (channel == scmd_channel(scmd)) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1068 | if (!scsi_device_online(scmd->device) || | 
|  | 1069 | !scsi_eh_tur(scmd)) | 
|  | 1070 | scsi_eh_finish_cmd(scmd, | 
|  | 1071 | done_q); | 
|  | 1072 | } | 
|  | 1073 | } else { | 
|  | 1074 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST" | 
|  | 1075 | " failed chan: %d\n", | 
|  | 1076 | current->comm, | 
|  | 1077 | channel)); | 
|  | 1078 | } | 
|  | 1079 | } | 
|  | 1080 | return list_empty(work_q); | 
|  | 1081 | } | 
|  | 1082 |  | 
|  | 1083 | /** | 
|  | 1084 | * scsi_eh_host_reset - send a host reset | 
|  | 1085 | * @work_q:	list_head for processed commands. | 
|  | 1086 | * @done_q:	list_head for processed commands. | 
|  | 1087 | **/ | 
|  | 1088 | static int scsi_eh_host_reset(struct list_head *work_q, | 
|  | 1089 | struct list_head *done_q) | 
|  | 1090 | { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 1091 | struct scsi_cmnd *scmd, *next; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1092 | int rtn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1093 |  | 
|  | 1094 | if (!list_empty(work_q)) { | 
|  | 1095 | scmd = list_entry(work_q->next, | 
|  | 1096 | struct scsi_cmnd, eh_entry); | 
|  | 1097 |  | 
|  | 1098 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n" | 
|  | 1099 | , current->comm)); | 
|  | 1100 |  | 
|  | 1101 | rtn = scsi_try_host_reset(scmd); | 
|  | 1102 | if (rtn == SUCCESS) { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 1103 | list_for_each_entry_safe(scmd, next, work_q, eh_entry) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1104 | if (!scsi_device_online(scmd->device) || | 
|  | 1105 | (!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) || | 
|  | 1106 | !scsi_eh_tur(scmd)) | 
|  | 1107 | scsi_eh_finish_cmd(scmd, done_q); | 
|  | 1108 | } | 
|  | 1109 | } else { | 
|  | 1110 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST" | 
|  | 1111 | " failed\n", | 
|  | 1112 | current->comm)); | 
|  | 1113 | } | 
|  | 1114 | } | 
|  | 1115 | return list_empty(work_q); | 
|  | 1116 | } | 
|  | 1117 |  | 
|  | 1118 | /** | 
|  | 1119 | * scsi_eh_offline_sdevs - offline scsi devices that fail to recover | 
|  | 1120 | * @work_q:	list_head for processed commands. | 
|  | 1121 | * @done_q:	list_head for processed commands. | 
|  | 1122 | * | 
|  | 1123 | **/ | 
|  | 1124 | static void scsi_eh_offline_sdevs(struct list_head *work_q, | 
|  | 1125 | struct list_head *done_q) | 
|  | 1126 | { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 1127 | struct scsi_cmnd *scmd, *next; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1128 |  | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 1129 | list_for_each_entry_safe(scmd, next, work_q, eh_entry) { | 
| James Bottomley | 9ccfc75 | 2005-10-02 11:45:08 -0500 | [diff] [blame] | 1130 | sdev_printk(KERN_INFO, scmd->device, | 
|  | 1131 | "scsi: Device offlined - not" | 
|  | 1132 | " ready after error recovery\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1133 | scsi_device_set_state(scmd->device, SDEV_OFFLINE); | 
| Christoph Hellwig | 3111b0d | 2005-06-19 13:43:26 +0200 | [diff] [blame] | 1134 | if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1135 | /* | 
|  | 1136 | * FIXME: Handle lost cmds. | 
|  | 1137 | */ | 
|  | 1138 | } | 
|  | 1139 | scsi_eh_finish_cmd(scmd, done_q); | 
|  | 1140 | } | 
|  | 1141 | return; | 
|  | 1142 | } | 
|  | 1143 |  | 
|  | 1144 | /** | 
|  | 1145 | * scsi_decide_disposition - Disposition a cmd on return from LLD. | 
|  | 1146 | * @scmd:	SCSI cmd to examine. | 
|  | 1147 | * | 
|  | 1148 | * Notes: | 
|  | 1149 | *    This is *only* called when we are examining the status after sending | 
|  | 1150 | *    out the actual data command.  any commands that are queued for error | 
|  | 1151 | *    recovery (e.g. test_unit_ready) do *not* come through here. | 
|  | 1152 | * | 
|  | 1153 | *    When this routine returns failed, it means the error handler thread | 
|  | 1154 | *    is woken.  In cases where the error code indicates an error that | 
|  | 1155 | *    doesn't require the error handler read (i.e. we don't need to | 
|  | 1156 | *    abort/reset), this function should return SUCCESS. | 
|  | 1157 | **/ | 
|  | 1158 | int scsi_decide_disposition(struct scsi_cmnd *scmd) | 
|  | 1159 | { | 
|  | 1160 | int rtn; | 
|  | 1161 |  | 
|  | 1162 | /* | 
|  | 1163 | * if the device is offline, then we clearly just pass the result back | 
|  | 1164 | * up to the top level. | 
|  | 1165 | */ | 
|  | 1166 | if (!scsi_device_online(scmd->device)) { | 
|  | 1167 | SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report" | 
|  | 1168 | " as SUCCESS\n", | 
|  | 1169 | __FUNCTION__)); | 
|  | 1170 | return SUCCESS; | 
|  | 1171 | } | 
|  | 1172 |  | 
|  | 1173 | /* | 
|  | 1174 | * first check the host byte, to see if there is anything in there | 
|  | 1175 | * that would indicate what we need to do. | 
|  | 1176 | */ | 
|  | 1177 | switch (host_byte(scmd->result)) { | 
|  | 1178 | case DID_PASSTHROUGH: | 
|  | 1179 | /* | 
|  | 1180 | * no matter what, pass this through to the upper layer. | 
|  | 1181 | * nuke this special code so that it looks like we are saying | 
|  | 1182 | * did_ok. | 
|  | 1183 | */ | 
|  | 1184 | scmd->result &= 0xff00ffff; | 
|  | 1185 | return SUCCESS; | 
|  | 1186 | case DID_OK: | 
|  | 1187 | /* | 
|  | 1188 | * looks good.  drop through, and check the next byte. | 
|  | 1189 | */ | 
|  | 1190 | break; | 
|  | 1191 | case DID_NO_CONNECT: | 
|  | 1192 | case DID_BAD_TARGET: | 
|  | 1193 | case DID_ABORT: | 
|  | 1194 | /* | 
|  | 1195 | * note - this means that we just report the status back | 
|  | 1196 | * to the top level driver, not that we actually think | 
|  | 1197 | * that it indicates SUCCESS. | 
|  | 1198 | */ | 
|  | 1199 | return SUCCESS; | 
|  | 1200 | /* | 
|  | 1201 | * when the low level driver returns did_soft_error, | 
|  | 1202 | * it is responsible for keeping an internal retry counter | 
|  | 1203 | * in order to avoid endless loops (db) | 
|  | 1204 | * | 
|  | 1205 | * actually this is a bug in this function here.  we should | 
|  | 1206 | * be mindful of the maximum number of retries specified | 
|  | 1207 | * and not get stuck in a loop. | 
|  | 1208 | */ | 
|  | 1209 | case DID_SOFT_ERROR: | 
|  | 1210 | goto maybe_retry; | 
|  | 1211 | case DID_IMM_RETRY: | 
|  | 1212 | return NEEDS_RETRY; | 
|  | 1213 |  | 
|  | bf34191 | 2005-04-12 17:49:09 -0500 | [diff] [blame] | 1214 | case DID_REQUEUE: | 
|  | 1215 | return ADD_TO_MLQUEUE; | 
|  | 1216 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1217 | case DID_ERROR: | 
|  | 1218 | if (msg_byte(scmd->result) == COMMAND_COMPLETE && | 
|  | 1219 | status_byte(scmd->result) == RESERVATION_CONFLICT) | 
|  | 1220 | /* | 
|  | 1221 | * execute reservation conflict processing code | 
|  | 1222 | * lower down | 
|  | 1223 | */ | 
|  | 1224 | break; | 
|  | 1225 | /* fallthrough */ | 
|  | 1226 |  | 
|  | 1227 | case DID_BUS_BUSY: | 
|  | 1228 | case DID_PARITY: | 
|  | 1229 | goto maybe_retry; | 
|  | 1230 | case DID_TIME_OUT: | 
|  | 1231 | /* | 
|  | 1232 | * when we scan the bus, we get timeout messages for | 
|  | 1233 | * these commands if there is no device available. | 
|  | 1234 | * other hosts report did_no_connect for the same thing. | 
|  | 1235 | */ | 
|  | 1236 | if ((scmd->cmnd[0] == TEST_UNIT_READY || | 
|  | 1237 | scmd->cmnd[0] == INQUIRY)) { | 
|  | 1238 | return SUCCESS; | 
|  | 1239 | } else { | 
|  | 1240 | return FAILED; | 
|  | 1241 | } | 
|  | 1242 | case DID_RESET: | 
|  | 1243 | return SUCCESS; | 
|  | 1244 | default: | 
|  | 1245 | return FAILED; | 
|  | 1246 | } | 
|  | 1247 |  | 
|  | 1248 | /* | 
|  | 1249 | * next, check the message byte. | 
|  | 1250 | */ | 
|  | 1251 | if (msg_byte(scmd->result) != COMMAND_COMPLETE) | 
|  | 1252 | return FAILED; | 
|  | 1253 |  | 
|  | 1254 | /* | 
|  | 1255 | * check the status byte to see if this indicates anything special. | 
|  | 1256 | */ | 
|  | 1257 | switch (status_byte(scmd->result)) { | 
|  | 1258 | case QUEUE_FULL: | 
|  | 1259 | /* | 
|  | 1260 | * the case of trying to send too many commands to a | 
|  | 1261 | * tagged queueing device. | 
|  | 1262 | */ | 
|  | 1263 | case BUSY: | 
|  | 1264 | /* | 
|  | 1265 | * device can't talk to us at the moment.  Should only | 
|  | 1266 | * occur (SAM-3) when the task queue is empty, so will cause | 
|  | 1267 | * the empty queue handling to trigger a stall in the | 
|  | 1268 | * device. | 
|  | 1269 | */ | 
|  | 1270 | return ADD_TO_MLQUEUE; | 
|  | 1271 | case GOOD: | 
|  | 1272 | case COMMAND_TERMINATED: | 
|  | 1273 | case TASK_ABORTED: | 
|  | 1274 | return SUCCESS; | 
|  | 1275 | case CHECK_CONDITION: | 
|  | 1276 | rtn = scsi_check_sense(scmd); | 
|  | 1277 | if (rtn == NEEDS_RETRY) | 
|  | 1278 | goto maybe_retry; | 
|  | 1279 | /* if rtn == FAILED, we have no sense information; | 
|  | 1280 | * returning FAILED will wake the error handler thread | 
|  | 1281 | * to collect the sense and redo the decide | 
|  | 1282 | * disposition */ | 
|  | 1283 | return rtn; | 
|  | 1284 | case CONDITION_GOOD: | 
|  | 1285 | case INTERMEDIATE_GOOD: | 
|  | 1286 | case INTERMEDIATE_C_GOOD: | 
|  | 1287 | case ACA_ACTIVE: | 
|  | 1288 | /* | 
|  | 1289 | * who knows?  FIXME(eric) | 
|  | 1290 | */ | 
|  | 1291 | return SUCCESS; | 
|  | 1292 |  | 
|  | 1293 | case RESERVATION_CONFLICT: | 
| James Bottomley | 9ccfc75 | 2005-10-02 11:45:08 -0500 | [diff] [blame] | 1294 | sdev_printk(KERN_INFO, scmd->device, | 
|  | 1295 | "reservation conflict\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1296 | return SUCCESS; /* causes immediate i/o error */ | 
|  | 1297 | default: | 
|  | 1298 | return FAILED; | 
|  | 1299 | } | 
|  | 1300 | return FAILED; | 
|  | 1301 |  | 
|  | 1302 | maybe_retry: | 
|  | 1303 |  | 
|  | 1304 | /* we requeue for retry because the error was retryable, and | 
|  | 1305 | * the request was not marked fast fail.  Note that above, | 
|  | 1306 | * even if the request is marked fast fail, we still requeue | 
|  | 1307 | * for queue congestion conditions (QUEUE_FULL or BUSY) */ | 
| Brian King | 8884efa | 2006-02-24 17:10:04 -0600 | [diff] [blame] | 1308 | if ((++scmd->retries) <= scmd->allowed | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1309 | && !blk_noretry_request(scmd->request)) { | 
|  | 1310 | return NEEDS_RETRY; | 
|  | 1311 | } else { | 
|  | 1312 | /* | 
|  | 1313 | * no more retries - report this one back to upper level. | 
|  | 1314 | */ | 
|  | 1315 | return SUCCESS; | 
|  | 1316 | } | 
|  | 1317 | } | 
|  | 1318 |  | 
|  | 1319 | /** | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1320 | * scsi_eh_lock_door - Prevent medium removal for the specified device | 
|  | 1321 | * @sdev:	SCSI device to prevent medium removal | 
|  | 1322 | * | 
|  | 1323 | * Locking: | 
|  | 1324 | * 	We must be called from process context; scsi_allocate_request() | 
|  | 1325 | * 	may sleep. | 
|  | 1326 | * | 
|  | 1327 | * Notes: | 
|  | 1328 | * 	We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the | 
|  | 1329 | * 	head of the devices request queue, and continue. | 
|  | 1330 | * | 
|  | 1331 | * Bugs: | 
|  | 1332 | * 	scsi_allocate_request() may sleep waiting for existing requests to | 
|  | 1333 | * 	be processed.  However, since we haven't kicked off any request | 
|  | 1334 | * 	processing for this host, this may deadlock. | 
|  | 1335 | * | 
|  | 1336 | *	If scsi_allocate_request() fails for what ever reason, we | 
|  | 1337 | *	completely forget to lock the door. | 
|  | 1338 | **/ | 
|  | 1339 | static void scsi_eh_lock_door(struct scsi_device *sdev) | 
|  | 1340 | { | 
| Mike Christie | 6e68af6 | 2005-11-11 05:30:27 -0600 | [diff] [blame] | 1341 | unsigned char cmnd[MAX_COMMAND_SIZE]; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1342 |  | 
| Mike Christie | 6e68af6 | 2005-11-11 05:30:27 -0600 | [diff] [blame] | 1343 | cmnd[0] = ALLOW_MEDIUM_REMOVAL; | 
|  | 1344 | cmnd[1] = 0; | 
|  | 1345 | cmnd[2] = 0; | 
|  | 1346 | cmnd[3] = 0; | 
|  | 1347 | cmnd[4] = SCSI_REMOVAL_PREVENT; | 
|  | 1348 | cmnd[5] = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1349 |  | 
| brking@us.ibm.com | bb1d107 | 2006-01-23 15:03:22 -0600 | [diff] [blame] | 1350 | scsi_execute_async(sdev, cmnd, 6, DMA_NONE, NULL, 0, 0, 10 * HZ, | 
| Mike Christie | 6e68af6 | 2005-11-11 05:30:27 -0600 | [diff] [blame] | 1351 | 5, NULL, NULL, GFP_KERNEL); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1352 | } | 
|  | 1353 |  | 
|  | 1354 |  | 
|  | 1355 | /** | 
|  | 1356 | * scsi_restart_operations - restart io operations to the specified host. | 
|  | 1357 | * @shost:	Host we are restarting. | 
|  | 1358 | * | 
|  | 1359 | * Notes: | 
|  | 1360 | *    When we entered the error handler, we blocked all further i/o to | 
|  | 1361 | *    this device.  we need to 'reverse' this process. | 
|  | 1362 | **/ | 
|  | 1363 | static void scsi_restart_operations(struct Scsi_Host *shost) | 
|  | 1364 | { | 
|  | 1365 | struct scsi_device *sdev; | 
| James Bottomley | 939647e | 2005-09-18 15:05:20 -0500 | [diff] [blame] | 1366 | unsigned long flags; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1367 |  | 
|  | 1368 | /* | 
|  | 1369 | * If the door was locked, we need to insert a door lock request | 
|  | 1370 | * onto the head of the SCSI request queue for the device.  There | 
|  | 1371 | * is no point trying to lock the door of an off-line device. | 
|  | 1372 | */ | 
|  | 1373 | shost_for_each_device(sdev, shost) { | 
|  | 1374 | if (scsi_device_online(sdev) && sdev->locked) | 
|  | 1375 | scsi_eh_lock_door(sdev); | 
|  | 1376 | } | 
|  | 1377 |  | 
|  | 1378 | /* | 
|  | 1379 | * next free up anything directly waiting upon the host.  this | 
|  | 1380 | * will be requests for character device operations, and also for | 
|  | 1381 | * ioctls to queued block devices. | 
|  | 1382 | */ | 
|  | 1383 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n", | 
|  | 1384 | __FUNCTION__)); | 
|  | 1385 |  | 
| James Bottomley | 939647e | 2005-09-18 15:05:20 -0500 | [diff] [blame] | 1386 | spin_lock_irqsave(shost->host_lock, flags); | 
|  | 1387 | if (scsi_host_set_state(shost, SHOST_RUNNING)) | 
|  | 1388 | if (scsi_host_set_state(shost, SHOST_CANCEL)) | 
|  | 1389 | BUG_ON(scsi_host_set_state(shost, SHOST_DEL)); | 
|  | 1390 | spin_unlock_irqrestore(shost->host_lock, flags); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1391 |  | 
|  | 1392 | wake_up(&shost->host_wait); | 
|  | 1393 |  | 
|  | 1394 | /* | 
|  | 1395 | * finally we need to re-initiate requests that may be pending.  we will | 
|  | 1396 | * have had everything blocked while error handling is taking place, and | 
|  | 1397 | * now that error recovery is done, we will need to ensure that these | 
|  | 1398 | * requests are started. | 
|  | 1399 | */ | 
|  | 1400 | scsi_run_host_queues(shost); | 
|  | 1401 | } | 
|  | 1402 |  | 
|  | 1403 | /** | 
|  | 1404 | * scsi_eh_ready_devs - check device ready state and recover if not. | 
|  | 1405 | * @shost: 	host to be recovered. | 
|  | 1406 | * @eh_done_q:	list_head for processed commands. | 
|  | 1407 | * | 
|  | 1408 | **/ | 
|  | 1409 | static void scsi_eh_ready_devs(struct Scsi_Host *shost, | 
|  | 1410 | struct list_head *work_q, | 
|  | 1411 | struct list_head *done_q) | 
|  | 1412 | { | 
|  | 1413 | if (!scsi_eh_stu(shost, work_q, done_q)) | 
|  | 1414 | if (!scsi_eh_bus_device_reset(shost, work_q, done_q)) | 
|  | 1415 | if (!scsi_eh_bus_reset(shost, work_q, done_q)) | 
|  | 1416 | if (!scsi_eh_host_reset(work_q, done_q)) | 
|  | 1417 | scsi_eh_offline_sdevs(work_q, done_q); | 
|  | 1418 | } | 
|  | 1419 |  | 
|  | 1420 | /** | 
|  | 1421 | * scsi_eh_flush_done_q - finish processed commands or retry them. | 
|  | 1422 | * @done_q:	list_head of processed commands. | 
|  | 1423 | * | 
|  | 1424 | **/ | 
| Tejun Heo | 041c5fc | 2006-01-23 13:09:36 +0900 | [diff] [blame] | 1425 | void scsi_eh_flush_done_q(struct list_head *done_q) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1426 | { | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 1427 | struct scsi_cmnd *scmd, *next; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1428 |  | 
| Christoph Hellwig | 937abeaa | 2005-06-19 13:43:56 +0200 | [diff] [blame] | 1429 | list_for_each_entry_safe(scmd, next, done_q, eh_entry) { | 
|  | 1430 | list_del_init(&scmd->eh_entry); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1431 | if (scsi_device_online(scmd->device) && | 
|  | 1432 | !blk_noretry_request(scmd->request) && | 
| Brian King | 8884efa | 2006-02-24 17:10:04 -0600 | [diff] [blame] | 1433 | (++scmd->retries <= scmd->allowed)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1434 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush" | 
|  | 1435 | " retry cmd: %p\n", | 
|  | 1436 | current->comm, | 
|  | 1437 | scmd)); | 
|  | 1438 | scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); | 
|  | 1439 | } else { | 
| Patrick Mansfield | 793698c | 2005-05-16 17:42:15 -0700 | [diff] [blame] | 1440 | /* | 
|  | 1441 | * If just we got sense for the device (called | 
|  | 1442 | * scsi_eh_get_sense), scmd->result is already | 
|  | 1443 | * set, do not set DRIVER_TIMEOUT. | 
|  | 1444 | */ | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1445 | if (!scmd->result) | 
|  | 1446 | scmd->result |= (DRIVER_TIMEOUT << 24); | 
|  | 1447 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish" | 
|  | 1448 | " cmd: %p\n", | 
|  | 1449 | current->comm, scmd)); | 
|  | 1450 | scsi_finish_command(scmd); | 
|  | 1451 | } | 
|  | 1452 | } | 
|  | 1453 | } | 
| Tejun Heo | 041c5fc | 2006-01-23 13:09:36 +0900 | [diff] [blame] | 1454 | EXPORT_SYMBOL(scsi_eh_flush_done_q); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1455 |  | 
|  | 1456 | /** | 
|  | 1457 | * scsi_unjam_host - Attempt to fix a host which has a cmd that failed. | 
|  | 1458 | * @shost:	Host to unjam. | 
|  | 1459 | * | 
|  | 1460 | * Notes: | 
|  | 1461 | *    When we come in here, we *know* that all commands on the bus have | 
|  | 1462 | *    either completed, failed or timed out.  we also know that no further | 
|  | 1463 | *    commands are being sent to the host, so things are relatively quiet | 
|  | 1464 | *    and we have freedom to fiddle with things as we wish. | 
|  | 1465 | * | 
|  | 1466 | *    This is only the *default* implementation.  it is possible for | 
|  | 1467 | *    individual drivers to supply their own version of this function, and | 
|  | 1468 | *    if the maintainer wishes to do this, it is strongly suggested that | 
|  | 1469 | *    this function be taken as a template and modified.  this function | 
|  | 1470 | *    was designed to correctly handle problems for about 95% of the | 
|  | 1471 | *    different cases out there, and it should always provide at least a | 
|  | 1472 | *    reasonable amount of error recovery. | 
|  | 1473 | * | 
|  | 1474 | *    Any command marked 'failed' or 'timeout' must eventually have | 
|  | 1475 | *    scsi_finish_cmd() called for it.  we do all of the retry stuff | 
|  | 1476 | *    here, so when we restart the host after we return it should have an | 
|  | 1477 | *    empty queue. | 
|  | 1478 | **/ | 
|  | 1479 | static void scsi_unjam_host(struct Scsi_Host *shost) | 
|  | 1480 | { | 
|  | 1481 | unsigned long flags; | 
|  | 1482 | LIST_HEAD(eh_work_q); | 
|  | 1483 | LIST_HEAD(eh_done_q); | 
|  | 1484 |  | 
|  | 1485 | spin_lock_irqsave(shost->host_lock, flags); | 
|  | 1486 | list_splice_init(&shost->eh_cmd_q, &eh_work_q); | 
|  | 1487 | spin_unlock_irqrestore(shost->host_lock, flags); | 
|  | 1488 |  | 
|  | 1489 | SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q)); | 
|  | 1490 |  | 
|  | 1491 | if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q)) | 
|  | 1492 | if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q)) | 
|  | 1493 | scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); | 
|  | 1494 |  | 
|  | 1495 | scsi_eh_flush_done_q(&eh_done_q); | 
|  | 1496 | } | 
|  | 1497 |  | 
|  | 1498 | /** | 
| Christoph Hellwig | ad42eb1 | 2005-10-29 01:01:55 +0200 | [diff] [blame] | 1499 | * scsi_error_handler - SCSI error handler thread | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1500 | * @data:	Host for which we are running. | 
|  | 1501 | * | 
|  | 1502 | * Notes: | 
| Christoph Hellwig | ad42eb1 | 2005-10-29 01:01:55 +0200 | [diff] [blame] | 1503 | *    This is the main error handling loop.  This is run as a kernel thread | 
|  | 1504 | *    for every SCSI host and handles all error handling activity. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1505 | **/ | 
|  | 1506 | int scsi_error_handler(void *data) | 
|  | 1507 | { | 
| Christoph Hellwig | ad42eb1 | 2005-10-29 01:01:55 +0200 | [diff] [blame] | 1508 | struct Scsi_Host *shost = data; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1509 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1510 | current->flags |= PF_NOFREEZE; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1511 |  | 
|  | 1512 | /* | 
| Christoph Hellwig | ad42eb1 | 2005-10-29 01:01:55 +0200 | [diff] [blame] | 1513 | * We use TASK_INTERRUPTIBLE so that the thread is not | 
|  | 1514 | * counted against the load average as a running process. | 
|  | 1515 | * We never actually get interrupted because kthread_run | 
|  | 1516 | * disables singal delivery for the created thread. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1517 | */ | 
| James Bottomley | 3ed7a47 | 2005-09-19 09:50:04 -0500 | [diff] [blame] | 1518 | set_current_state(TASK_INTERRUPTIBLE); | 
|  | 1519 | while (!kthread_should_stop()) { | 
|  | 1520 | if (shost->host_failed == 0 || | 
|  | 1521 | shost->host_failed != shost->host_busy) { | 
| Christoph Hellwig | ad42eb1 | 2005-10-29 01:01:55 +0200 | [diff] [blame] | 1522 | SCSI_LOG_ERROR_RECOVERY(1, | 
|  | 1523 | printk("Error handler scsi_eh_%d sleeping\n", | 
|  | 1524 | shost->host_no)); | 
| James Bottomley | 3ed7a47 | 2005-09-19 09:50:04 -0500 | [diff] [blame] | 1525 | schedule(); | 
|  | 1526 | set_current_state(TASK_INTERRUPTIBLE); | 
|  | 1527 | continue; | 
|  | 1528 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1529 |  | 
| James Bottomley | 3ed7a47 | 2005-09-19 09:50:04 -0500 | [diff] [blame] | 1530 | __set_current_state(TASK_RUNNING); | 
| Christoph Hellwig | ad42eb1 | 2005-10-29 01:01:55 +0200 | [diff] [blame] | 1531 | SCSI_LOG_ERROR_RECOVERY(1, | 
|  | 1532 | printk("Error handler scsi_eh_%d waking up\n", | 
|  | 1533 | shost->host_no)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1534 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1535 | /* | 
|  | 1536 | * We have a host that is failing for some reason.  Figure out | 
|  | 1537 | * what we need to do to get it up and online again (if we can). | 
|  | 1538 | * If we fail, we end up taking the thing offline. | 
|  | 1539 | */ | 
|  | 1540 | if (shost->hostt->eh_strategy_handler) | 
| Christoph Hellwig | ad42eb1 | 2005-10-29 01:01:55 +0200 | [diff] [blame] | 1541 | shost->hostt->eh_strategy_handler(shost); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1542 | else | 
|  | 1543 | scsi_unjam_host(shost); | 
|  | 1544 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1545 | /* | 
|  | 1546 | * Note - if the above fails completely, the action is to take | 
|  | 1547 | * individual devices offline and flush the queue of any | 
|  | 1548 | * outstanding requests that may have been pending.  When we | 
|  | 1549 | * restart, we restart any I/O to any other devices on the bus | 
|  | 1550 | * which are still online. | 
|  | 1551 | */ | 
|  | 1552 | scsi_restart_operations(shost); | 
| James Bottomley | 3ed7a47 | 2005-09-19 09:50:04 -0500 | [diff] [blame] | 1553 | set_current_state(TASK_INTERRUPTIBLE); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1554 | } | 
| Steven Rostedt | 461a0ff | 2005-10-19 08:22:13 -0400 | [diff] [blame] | 1555 | __set_current_state(TASK_RUNNING); | 
|  | 1556 |  | 
| Christoph Hellwig | ad42eb1 | 2005-10-29 01:01:55 +0200 | [diff] [blame] | 1557 | SCSI_LOG_ERROR_RECOVERY(1, | 
|  | 1558 | printk("Error handler scsi_eh_%d exiting\n", shost->host_no)); | 
| James Bottomley | 3ed7a47 | 2005-09-19 09:50:04 -0500 | [diff] [blame] | 1559 | shost->ehandler = NULL; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1560 | return 0; | 
|  | 1561 | } | 
|  | 1562 |  | 
|  | 1563 | /* | 
|  | 1564 | * Function:    scsi_report_bus_reset() | 
|  | 1565 | * | 
|  | 1566 | * Purpose:     Utility function used by low-level drivers to report that | 
|  | 1567 | *		they have observed a bus reset on the bus being handled. | 
|  | 1568 | * | 
|  | 1569 | * Arguments:   shost       - Host in question | 
|  | 1570 | *		channel     - channel on which reset was observed. | 
|  | 1571 | * | 
|  | 1572 | * Returns:     Nothing | 
|  | 1573 | * | 
|  | 1574 | * Lock status: Host lock must be held. | 
|  | 1575 | * | 
|  | 1576 | * Notes:       This only needs to be called if the reset is one which | 
|  | 1577 | *		originates from an unknown location.  Resets originated | 
|  | 1578 | *		by the mid-level itself don't need to call this, but there | 
|  | 1579 | *		should be no harm. | 
|  | 1580 | * | 
|  | 1581 | *		The main purpose of this is to make sure that a CHECK_CONDITION | 
|  | 1582 | *		is properly treated. | 
|  | 1583 | */ | 
|  | 1584 | void scsi_report_bus_reset(struct Scsi_Host *shost, int channel) | 
|  | 1585 | { | 
|  | 1586 | struct scsi_device *sdev; | 
|  | 1587 |  | 
|  | 1588 | __shost_for_each_device(sdev, shost) { | 
| Jeff Garzik | 422c0d6 | 2005-10-24 18:05:09 -0400 | [diff] [blame] | 1589 | if (channel == sdev_channel(sdev)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1590 | sdev->was_reset = 1; | 
|  | 1591 | sdev->expecting_cc_ua = 1; | 
|  | 1592 | } | 
|  | 1593 | } | 
|  | 1594 | } | 
|  | 1595 | EXPORT_SYMBOL(scsi_report_bus_reset); | 
|  | 1596 |  | 
|  | 1597 | /* | 
|  | 1598 | * Function:    scsi_report_device_reset() | 
|  | 1599 | * | 
|  | 1600 | * Purpose:     Utility function used by low-level drivers to report that | 
|  | 1601 | *		they have observed a device reset on the device being handled. | 
|  | 1602 | * | 
|  | 1603 | * Arguments:   shost       - Host in question | 
|  | 1604 | *		channel     - channel on which reset was observed | 
|  | 1605 | *		target	    - target on which reset was observed | 
|  | 1606 | * | 
|  | 1607 | * Returns:     Nothing | 
|  | 1608 | * | 
|  | 1609 | * Lock status: Host lock must be held | 
|  | 1610 | * | 
|  | 1611 | * Notes:       This only needs to be called if the reset is one which | 
|  | 1612 | *		originates from an unknown location.  Resets originated | 
|  | 1613 | *		by the mid-level itself don't need to call this, but there | 
|  | 1614 | *		should be no harm. | 
|  | 1615 | * | 
|  | 1616 | *		The main purpose of this is to make sure that a CHECK_CONDITION | 
|  | 1617 | *		is properly treated. | 
|  | 1618 | */ | 
|  | 1619 | void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target) | 
|  | 1620 | { | 
|  | 1621 | struct scsi_device *sdev; | 
|  | 1622 |  | 
|  | 1623 | __shost_for_each_device(sdev, shost) { | 
| Jeff Garzik | 422c0d6 | 2005-10-24 18:05:09 -0400 | [diff] [blame] | 1624 | if (channel == sdev_channel(sdev) && | 
|  | 1625 | target == sdev_id(sdev)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1626 | sdev->was_reset = 1; | 
|  | 1627 | sdev->expecting_cc_ua = 1; | 
|  | 1628 | } | 
|  | 1629 | } | 
|  | 1630 | } | 
|  | 1631 | EXPORT_SYMBOL(scsi_report_device_reset); | 
|  | 1632 |  | 
|  | 1633 | static void | 
|  | 1634 | scsi_reset_provider_done_command(struct scsi_cmnd *scmd) | 
|  | 1635 | { | 
|  | 1636 | } | 
|  | 1637 |  | 
|  | 1638 | /* | 
|  | 1639 | * Function:	scsi_reset_provider | 
|  | 1640 | * | 
|  | 1641 | * Purpose:	Send requested reset to a bus or device at any phase. | 
|  | 1642 | * | 
|  | 1643 | * Arguments:	device	- device to send reset to | 
|  | 1644 | *		flag - reset type (see scsi.h) | 
|  | 1645 | * | 
|  | 1646 | * Returns:	SUCCESS/FAILURE. | 
|  | 1647 | * | 
|  | 1648 | * Notes:	This is used by the SCSI Generic driver to provide | 
|  | 1649 | *		Bus/Device reset capability. | 
|  | 1650 | */ | 
|  | 1651 | int | 
|  | 1652 | scsi_reset_provider(struct scsi_device *dev, int flag) | 
|  | 1653 | { | 
|  | 1654 | struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL); | 
|  | 1655 | struct request req; | 
|  | 1656 | int rtn; | 
|  | 1657 |  | 
|  | 1658 | scmd->request = &req; | 
|  | 1659 | memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout)); | 
|  | 1660 | scmd->request->rq_status      	= RQ_SCSI_BUSY; | 
| Christoph Hellwig | b4edcbc | 2005-06-19 13:40:52 +0200 | [diff] [blame] | 1661 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1662 | memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd)); | 
|  | 1663 |  | 
|  | 1664 | scmd->scsi_done		= scsi_reset_provider_done_command; | 
|  | 1665 | scmd->done			= NULL; | 
|  | 1666 | scmd->buffer			= NULL; | 
|  | 1667 | scmd->bufflen			= 0; | 
|  | 1668 | scmd->request_buffer		= NULL; | 
|  | 1669 | scmd->request_bufflen		= 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1670 |  | 
|  | 1671 | scmd->cmd_len			= 0; | 
|  | 1672 |  | 
|  | 1673 | scmd->sc_data_direction		= DMA_BIDIRECTIONAL; | 
|  | 1674 | scmd->sc_request		= NULL; | 
|  | 1675 | scmd->sc_magic			= SCSI_CMND_MAGIC; | 
|  | 1676 |  | 
|  | 1677 | init_timer(&scmd->eh_timeout); | 
|  | 1678 |  | 
|  | 1679 | /* | 
|  | 1680 | * Sometimes the command can get back into the timer chain, | 
|  | 1681 | * so use the pid as an identifier. | 
|  | 1682 | */ | 
|  | 1683 | scmd->pid			= 0; | 
|  | 1684 |  | 
|  | 1685 | switch (flag) { | 
|  | 1686 | case SCSI_TRY_RESET_DEVICE: | 
|  | 1687 | rtn = scsi_try_bus_device_reset(scmd); | 
|  | 1688 | if (rtn == SUCCESS) | 
|  | 1689 | break; | 
|  | 1690 | /* FALLTHROUGH */ | 
|  | 1691 | case SCSI_TRY_RESET_BUS: | 
|  | 1692 | rtn = scsi_try_bus_reset(scmd); | 
|  | 1693 | if (rtn == SUCCESS) | 
|  | 1694 | break; | 
|  | 1695 | /* FALLTHROUGH */ | 
|  | 1696 | case SCSI_TRY_RESET_HOST: | 
|  | 1697 | rtn = scsi_try_host_reset(scmd); | 
|  | 1698 | break; | 
|  | 1699 | default: | 
|  | 1700 | rtn = FAILED; | 
|  | 1701 | } | 
|  | 1702 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1703 | scsi_next_command(scmd); | 
|  | 1704 | return rtn; | 
|  | 1705 | } | 
|  | 1706 | EXPORT_SYMBOL(scsi_reset_provider); | 
|  | 1707 |  | 
|  | 1708 | /** | 
|  | 1709 | * scsi_normalize_sense - normalize main elements from either fixed or | 
|  | 1710 | *			descriptor sense data format into a common format. | 
|  | 1711 | * | 
|  | 1712 | * @sense_buffer:	byte array containing sense data returned by device | 
|  | 1713 | * @sb_len:		number of valid bytes in sense_buffer | 
|  | 1714 | * @sshdr:		pointer to instance of structure that common | 
|  | 1715 | *			elements are written to. | 
|  | 1716 | * | 
|  | 1717 | * Notes: | 
|  | 1718 | *	The "main elements" from sense data are: response_code, sense_key, | 
|  | 1719 | *	asc, ascq and additional_length (only for descriptor format). | 
|  | 1720 | * | 
|  | 1721 | *	Typically this function can be called after a device has | 
|  | 1722 | *	responded to a SCSI command with the CHECK_CONDITION status. | 
|  | 1723 | * | 
|  | 1724 | * Return value: | 
|  | 1725 | *	1 if valid sense data information found, else 0; | 
|  | 1726 | **/ | 
|  | 1727 | int scsi_normalize_sense(const u8 *sense_buffer, int sb_len, | 
|  | 1728 | struct scsi_sense_hdr *sshdr) | 
|  | 1729 | { | 
| James Bottomley | 33aa687 | 2005-08-28 11:31:14 -0500 | [diff] [blame] | 1730 | if (!sense_buffer || !sb_len) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1731 | return 0; | 
|  | 1732 |  | 
|  | 1733 | memset(sshdr, 0, sizeof(struct scsi_sense_hdr)); | 
|  | 1734 |  | 
|  | 1735 | sshdr->response_code = (sense_buffer[0] & 0x7f); | 
| James Bottomley | 33aa687 | 2005-08-28 11:31:14 -0500 | [diff] [blame] | 1736 |  | 
|  | 1737 | if (!scsi_sense_valid(sshdr)) | 
|  | 1738 | return 0; | 
|  | 1739 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1740 | if (sshdr->response_code >= 0x72) { | 
|  | 1741 | /* | 
|  | 1742 | * descriptor format | 
|  | 1743 | */ | 
|  | 1744 | if (sb_len > 1) | 
|  | 1745 | sshdr->sense_key = (sense_buffer[1] & 0xf); | 
|  | 1746 | if (sb_len > 2) | 
|  | 1747 | sshdr->asc = sense_buffer[2]; | 
|  | 1748 | if (sb_len > 3) | 
|  | 1749 | sshdr->ascq = sense_buffer[3]; | 
|  | 1750 | if (sb_len > 7) | 
|  | 1751 | sshdr->additional_length = sense_buffer[7]; | 
|  | 1752 | } else { | 
|  | 1753 | /* | 
|  | 1754 | * fixed format | 
|  | 1755 | */ | 
|  | 1756 | if (sb_len > 2) | 
|  | 1757 | sshdr->sense_key = (sense_buffer[2] & 0xf); | 
|  | 1758 | if (sb_len > 7) { | 
|  | 1759 | sb_len = (sb_len < (sense_buffer[7] + 8)) ? | 
|  | 1760 | sb_len : (sense_buffer[7] + 8); | 
|  | 1761 | if (sb_len > 12) | 
|  | 1762 | sshdr->asc = sense_buffer[12]; | 
|  | 1763 | if (sb_len > 13) | 
|  | 1764 | sshdr->ascq = sense_buffer[13]; | 
|  | 1765 | } | 
|  | 1766 | } | 
|  | 1767 |  | 
|  | 1768 | return 1; | 
|  | 1769 | } | 
|  | 1770 | EXPORT_SYMBOL(scsi_normalize_sense); | 
|  | 1771 |  | 
|  | 1772 | int scsi_request_normalize_sense(struct scsi_request *sreq, | 
|  | 1773 | struct scsi_sense_hdr *sshdr) | 
|  | 1774 | { | 
|  | 1775 | return scsi_normalize_sense(sreq->sr_sense_buffer, | 
|  | 1776 | sizeof(sreq->sr_sense_buffer), sshdr); | 
|  | 1777 | } | 
|  | 1778 | EXPORT_SYMBOL(scsi_request_normalize_sense); | 
|  | 1779 |  | 
|  | 1780 | int scsi_command_normalize_sense(struct scsi_cmnd *cmd, | 
|  | 1781 | struct scsi_sense_hdr *sshdr) | 
|  | 1782 | { | 
|  | 1783 | return scsi_normalize_sense(cmd->sense_buffer, | 
|  | 1784 | sizeof(cmd->sense_buffer), sshdr); | 
|  | 1785 | } | 
|  | 1786 | EXPORT_SYMBOL(scsi_command_normalize_sense); | 
|  | 1787 |  | 
|  | 1788 | /** | 
|  | 1789 | * scsi_sense_desc_find - search for a given descriptor type in | 
|  | 1790 | *			descriptor sense data format. | 
|  | 1791 | * | 
|  | 1792 | * @sense_buffer:	byte array of descriptor format sense data | 
|  | 1793 | * @sb_len:		number of valid bytes in sense_buffer | 
|  | 1794 | * @desc_type:		value of descriptor type to find | 
|  | 1795 | *			(e.g. 0 -> information) | 
|  | 1796 | * | 
|  | 1797 | * Notes: | 
|  | 1798 | *	only valid when sense data is in descriptor format | 
|  | 1799 | * | 
|  | 1800 | * Return value: | 
|  | 1801 | *	pointer to start of (first) descriptor if found else NULL | 
|  | 1802 | **/ | 
|  | 1803 | const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, | 
|  | 1804 | int desc_type) | 
|  | 1805 | { | 
|  | 1806 | int add_sen_len, add_len, desc_len, k; | 
|  | 1807 | const u8 * descp; | 
|  | 1808 |  | 
|  | 1809 | if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7]))) | 
|  | 1810 | return NULL; | 
|  | 1811 | if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73)) | 
|  | 1812 | return NULL; | 
|  | 1813 | add_sen_len = (add_sen_len < (sb_len - 8)) ? | 
|  | 1814 | add_sen_len : (sb_len - 8); | 
|  | 1815 | descp = &sense_buffer[8]; | 
|  | 1816 | for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) { | 
|  | 1817 | descp += desc_len; | 
|  | 1818 | add_len = (k < (add_sen_len - 1)) ? descp[1]: -1; | 
|  | 1819 | desc_len = add_len + 2; | 
|  | 1820 | if (descp[0] == desc_type) | 
|  | 1821 | return descp; | 
|  | 1822 | if (add_len < 0) // short descriptor ?? | 
|  | 1823 | break; | 
|  | 1824 | } | 
|  | 1825 | return NULL; | 
|  | 1826 | } | 
|  | 1827 | EXPORT_SYMBOL(scsi_sense_desc_find); | 
|  | 1828 |  | 
|  | 1829 | /** | 
|  | 1830 | * scsi_get_sense_info_fld - attempts to get information field from | 
|  | 1831 | *			sense data (either fixed or descriptor format) | 
|  | 1832 | * | 
|  | 1833 | * @sense_buffer:	byte array of sense data | 
|  | 1834 | * @sb_len:		number of valid bytes in sense_buffer | 
|  | 1835 | * @info_out:		pointer to 64 integer where 8 or 4 byte information | 
|  | 1836 | *			field will be placed if found. | 
|  | 1837 | * | 
|  | 1838 | * Return value: | 
|  | 1839 | *	1 if information field found, 0 if not found. | 
|  | 1840 | **/ | 
|  | 1841 | int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len, | 
|  | 1842 | u64 * info_out) | 
|  | 1843 | { | 
|  | 1844 | int j; | 
|  | 1845 | const u8 * ucp; | 
|  | 1846 | u64 ull; | 
|  | 1847 |  | 
|  | 1848 | if (sb_len < 7) | 
|  | 1849 | return 0; | 
|  | 1850 | switch (sense_buffer[0] & 0x7f) { | 
|  | 1851 | case 0x70: | 
|  | 1852 | case 0x71: | 
|  | 1853 | if (sense_buffer[0] & 0x80) { | 
|  | 1854 | *info_out = (sense_buffer[3] << 24) + | 
|  | 1855 | (sense_buffer[4] << 16) + | 
|  | 1856 | (sense_buffer[5] << 8) + sense_buffer[6]; | 
|  | 1857 | return 1; | 
|  | 1858 | } else | 
|  | 1859 | return 0; | 
|  | 1860 | case 0x72: | 
|  | 1861 | case 0x73: | 
|  | 1862 | ucp = scsi_sense_desc_find(sense_buffer, sb_len, | 
|  | 1863 | 0 /* info desc */); | 
|  | 1864 | if (ucp && (0xa == ucp[1])) { | 
|  | 1865 | ull = 0; | 
|  | 1866 | for (j = 0; j < 8; ++j) { | 
|  | 1867 | if (j > 0) | 
|  | 1868 | ull <<= 8; | 
|  | 1869 | ull |= ucp[4 + j]; | 
|  | 1870 | } | 
|  | 1871 | *info_out = ull; | 
|  | 1872 | return 1; | 
|  | 1873 | } else | 
|  | 1874 | return 0; | 
|  | 1875 | default: | 
|  | 1876 | return 0; | 
|  | 1877 | } | 
|  | 1878 | } | 
|  | 1879 | EXPORT_SYMBOL(scsi_get_sense_info_fld); |