msm: SSR: Fix problems with concurrent SSRs If one SSR call comes in and queues a work and the work item starts running and then another SSR call comes in we will end up with a running work item and a pending work item. The pending work item will not run until the running work item completes. With the current code the work item will run to completion and then the pending work item will run and restart the subsystem again. This is wrong since we want to 'short-circuit' the code in this case and do nothing if multiple SSR calls come in while the subsystem is in the 'crashed' state. Add state tracking logic so that we know what part of the restart process a particular subsystem is in and use it to fix this problem. Conflicts: arch/arm/mach-msm/subsystem_restart.c Change-Id: Iac8713951af1f290af1df7e94468d1843fc7980b CRs-Fixed: 397848 Signed-off-by: Stephen Boyd <sboyd@codeaurora.org> Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>

commit: 8f57c0f3f247cf2585401bb369a25dd40fb9c4e3 [log] [tgz]
author: Stephen Boyd <sboyd@codeaurora.org> Tue Sep 18 11:31:02 2012 -0700
committer: Vikram Mulukutla <markivx@codeaurora.org> Wed Apr 24 12:41:13 2013 -0700
tree: 95b2b2f9493c8b829ff374cf003f576ccab53d37
parent: 3e6f690f83d599aecc1ff59a3d7cc237f2ea372f [diff] [blame]
diff --git a/arch/arm/mach-msm/subsystem_restart.c b/arch/arm/mach-msm/subsystem_restart.c
index f2e969a..747276c 100644
--- a/arch/arm/mach-msm/subsystem_restart.c
+++ b/arch/arm/mach-msm/subsystem_restart.c

@@ -52,6 +52,18 @@
 	struct list_head list;
 };
 
+enum subsys_state {
+	SUBSYS_OFFLINE,
+	SUBSYS_ONLINE,
+	SUBSYS_CRASHED,
+};
+
+static const char * const subsys_states[] = {
+	[SUBSYS_OFFLINE] = "OFFLINE",
+	[SUBSYS_ONLINE] = "ONLINE",
+	[SUBSYS_CRASHED] = "CRASHED",
+};
+
 struct subsys_device {
 	struct subsys_desc *desc;
 	struct list_head list;
@@ -59,7 +71,8 @@
 	char wlname[64];
 	struct work_struct work;
 	spinlock_t restart_lock;
-	int restart_count;
+	bool restarting;
+	enum subsys_state state;
 
 	void *notify;
 
@@ -180,6 +193,20 @@
 module_param_call(restart_level, restart_level_set, param_get_int,
 			&restart_level, 0644);
 
+static void subsys_set_state(struct subsys_device *subsys,
+			     enum subsys_state state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&subsys->restart_lock, flags);
+	if (subsys->state != state) {
+		subsys->state = state;
+		spin_unlock_irqrestore(&subsys->restart_lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&subsys->restart_lock, flags);
+}
+
 static struct subsys_soc_restart_order *
 update_restart_order(struct subsys_device *dev)
 {
@@ -303,6 +330,7 @@
 		panic("subsys-restart: [%p]: Failed to shutdown %s!",
 			current, name);
 	}
+	subsys_set_state(dev, SUBSYS_OFFLINE);
 }
 
 static void subsystem_ramdump(struct subsys_device *dev, void *data)
@@ -322,6 +350,7 @@
 	if (dev->desc->powerup(dev->desc) < 0) {
 		panic("[%p]: Failed to powerup %s!", current, name);
 	}
+	subsys_set_state(dev, SUBSYS_ONLINE);
 }
 
 static void subsystem_restart_wq_func(struct work_struct *work)
@@ -371,7 +400,10 @@
 	 * Now that we've acquired the shutdown lock, either we're the first to
 	 * restart these subsystems or some other thread is doing the powerup
 	 * sequence for these subsystems. In the latter case, panic and bail
-	 * out, since a subsystem died in its powerup sequence.
+	 * out, since a subsystem died in its powerup sequence. This catches
+	 * the case where a subsystem in a restart order isn't the one
+	 * who initiated the original restart but has crashed while the restart
+	 * order is being rebooted.
 	 */
 	if (!mutex_trylock(powerup_lock)) {
 		panic("%s[%p]: Subsystem died during powerup!",
@@ -419,32 +451,36 @@
 
 out:
 	spin_lock_irqsave(&dev->restart_lock, flags);
-	dev->restart_count--;
-	if (!dev->restart_count)
-		wake_unlock(&dev->wake_lock);
+	dev->restarting = false;
+	wake_unlock(&dev->wake_lock);
 	spin_unlock_irqrestore(&dev->restart_lock, flags);
 }
 
 static void __subsystem_restart_dev(struct subsys_device *dev)
 {
 	struct subsys_desc *desc = dev->desc;
+	const char *name = dev->desc->name;
 	unsigned long flags;
 
 	pr_debug("Restarting %s [level=%d]!\n", desc->name, restart_level);
 
+	/*
+	 * We want to allow drivers to call subsystem_restart{_dev}() as many
+	 * times as they want up until the point where the subsystem is
+	 * shutdown.
+	 */
 	spin_lock_irqsave(&dev->restart_lock, flags);
-	if (!dev->restart_count)
-		wake_lock(&dev->wake_lock);
-	dev->restart_count++;
-	spin_unlock_irqrestore(&dev->restart_lock, flags);
-
-	if (!queue_work(ssr_wq, &dev->work)) {
-		spin_lock_irqsave(&dev->restart_lock, flags);
-		dev->restart_count--;
-		if (!dev->restart_count)
-			wake_unlock(&dev->wake_lock);
-		spin_unlock_irqrestore(&dev->restart_lock, flags);
+	if (dev->state != SUBSYS_CRASHED) {
+		if (dev->state == SUBSYS_ONLINE && !dev->restarting) {
+			dev->restarting = true;
+			dev->state = SUBSYS_CRASHED;
+			wake_lock(&dev->wake_lock);
+			queue_work(ssr_wq, &dev->work);
+		} else {
+			panic("Subsystem %s crashed during SSR!", name);
+		}
 	}
+	spin_unlock_irqrestore(&dev->restart_lock, flags);
 }
 
 int subsystem_restart_dev(struct subsys_device *dev)
@@ -511,6 +547,7 @@
 	dev->desc = desc;
 	dev->notify = subsys_notif_add_subsys(desc->name);
 	dev->restart_order = update_restart_order(dev);
+	dev->state = SUBSYS_ONLINE; /* Until proper refcounting appears */
 
 	snprintf(dev->wlname, sizeof(dev->wlname), "ssr(%s)", desc->name);
 	wake_lock_init(&dev->wake_lock, WAKE_LOCK_SUSPEND, dev->wlname);
commit	8f57c0f3f247cf2585401bb369a25dd40fb9c4e3	[log] [tgz]
author	Stephen Boyd <sboyd@codeaurora.org>	Tue Sep 18 11:31:02 2012 -0700
committer	Vikram Mulukutla <markivx@codeaurora.org>	Wed Apr 24 12:41:13 2013 -0700
tree	95b2b2f9493c8b829ff374cf003f576ccab53d37
parent	3e6f690f83d599aecc1ff59a3d7cc237f2ea372f [diff] [blame]