msm: SSR: Fix problems with concurrent SSRs
If one SSR call comes in and queues a work and the work item
starts running and then another SSR call comes in we will end up
with a running work item and a pending work item. The pending
work item will not run until the running work item completes.
With the current code the work item will run to completion and
then the pending work item will run and restart the subsystem
again.
This is wrong since we want to 'short-circuit' the code in this
case and do nothing if multiple SSR calls come in while the
subsystem is in the 'crashed' state. Add state tracking logic so
that we know what part of the restart process a particular
subsystem is in and use it to fix this problem.
Conflicts:
arch/arm/mach-msm/subsystem_restart.c
Change-Id: Iac8713951af1f290af1df7e94468d1843fc7980b
CRs-Fixed: 397848
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
diff --git a/arch/arm/mach-msm/subsystem_restart.c b/arch/arm/mach-msm/subsystem_restart.c
index f2e969a..747276c 100644
--- a/arch/arm/mach-msm/subsystem_restart.c
+++ b/arch/arm/mach-msm/subsystem_restart.c
@@ -52,6 +52,18 @@
struct list_head list;
};
+enum subsys_state {
+ SUBSYS_OFFLINE,
+ SUBSYS_ONLINE,
+ SUBSYS_CRASHED,
+};
+
+static const char * const subsys_states[] = {
+ [SUBSYS_OFFLINE] = "OFFLINE",
+ [SUBSYS_ONLINE] = "ONLINE",
+ [SUBSYS_CRASHED] = "CRASHED",
+};
+
struct subsys_device {
struct subsys_desc *desc;
struct list_head list;
@@ -59,7 +71,8 @@
char wlname[64];
struct work_struct work;
spinlock_t restart_lock;
- int restart_count;
+ bool restarting;
+ enum subsys_state state;
void *notify;
@@ -180,6 +193,20 @@
module_param_call(restart_level, restart_level_set, param_get_int,
&restart_level, 0644);
+static void subsys_set_state(struct subsys_device *subsys,
+ enum subsys_state state)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&subsys->restart_lock, flags);
+ if (subsys->state != state) {
+ subsys->state = state;
+ spin_unlock_irqrestore(&subsys->restart_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&subsys->restart_lock, flags);
+}
+
static struct subsys_soc_restart_order *
update_restart_order(struct subsys_device *dev)
{
@@ -303,6 +330,7 @@
panic("subsys-restart: [%p]: Failed to shutdown %s!",
current, name);
}
+ subsys_set_state(dev, SUBSYS_OFFLINE);
}
static void subsystem_ramdump(struct subsys_device *dev, void *data)
@@ -322,6 +350,7 @@
if (dev->desc->powerup(dev->desc) < 0) {
panic("[%p]: Failed to powerup %s!", current, name);
}
+ subsys_set_state(dev, SUBSYS_ONLINE);
}
static void subsystem_restart_wq_func(struct work_struct *work)
@@ -371,7 +400,10 @@
* Now that we've acquired the shutdown lock, either we're the first to
* restart these subsystems or some other thread is doing the powerup
* sequence for these subsystems. In the latter case, panic and bail
- * out, since a subsystem died in its powerup sequence.
+ * out, since a subsystem died in its powerup sequence. This catches
+ * the case where a subsystem in a restart order isn't the one
+ * who initiated the original restart but has crashed while the restart
+ * order is being rebooted.
*/
if (!mutex_trylock(powerup_lock)) {
panic("%s[%p]: Subsystem died during powerup!",
@@ -419,32 +451,36 @@
out:
spin_lock_irqsave(&dev->restart_lock, flags);
- dev->restart_count--;
- if (!dev->restart_count)
- wake_unlock(&dev->wake_lock);
+ dev->restarting = false;
+ wake_unlock(&dev->wake_lock);
spin_unlock_irqrestore(&dev->restart_lock, flags);
}
static void __subsystem_restart_dev(struct subsys_device *dev)
{
struct subsys_desc *desc = dev->desc;
+ const char *name = dev->desc->name;
unsigned long flags;
pr_debug("Restarting %s [level=%d]!\n", desc->name, restart_level);
+ /*
+ * We want to allow drivers to call subsystem_restart{_dev}() as many
+ * times as they want up until the point where the subsystem is
+ * shutdown.
+ */
spin_lock_irqsave(&dev->restart_lock, flags);
- if (!dev->restart_count)
- wake_lock(&dev->wake_lock);
- dev->restart_count++;
- spin_unlock_irqrestore(&dev->restart_lock, flags);
-
- if (!queue_work(ssr_wq, &dev->work)) {
- spin_lock_irqsave(&dev->restart_lock, flags);
- dev->restart_count--;
- if (!dev->restart_count)
- wake_unlock(&dev->wake_lock);
- spin_unlock_irqrestore(&dev->restart_lock, flags);
+ if (dev->state != SUBSYS_CRASHED) {
+ if (dev->state == SUBSYS_ONLINE && !dev->restarting) {
+ dev->restarting = true;
+ dev->state = SUBSYS_CRASHED;
+ wake_lock(&dev->wake_lock);
+ queue_work(ssr_wq, &dev->work);
+ } else {
+ panic("Subsystem %s crashed during SSR!", name);
+ }
}
+ spin_unlock_irqrestore(&dev->restart_lock, flags);
}
int subsystem_restart_dev(struct subsys_device *dev)
@@ -511,6 +547,7 @@
dev->desc = desc;
dev->notify = subsys_notif_add_subsys(desc->name);
dev->restart_order = update_restart_order(dev);
+ dev->state = SUBSYS_ONLINE; /* Until proper refcounting appears */
snprintf(dev->wlname, sizeof(dev->wlname), "ssr(%s)", desc->name);
wake_lock_init(&dev->wake_lock, WAKE_LOCK_SUSPEND, dev->wlname);