camera: Fix client eviction/disconnect race.

- Add blocking wait in camera service connect call to
  prevent race when client has called disconnect while
  eviction of that client is taking place, resulting
  in early call of device initialization before all
  HAL resources are available.

Bug: 20038135
Change-Id: I7afc5bfa23612ba7f83293fa542ff983a5991230
diff --git a/services/camera/libcameraservice/CameraService.cpp b/services/camera/libcameraservice/CameraService.cpp
index dabfafa..414d563 100644
--- a/services/camera/libcameraservice/CameraService.cpp
+++ b/services/camera/libcameraservice/CameraService.cpp
@@ -866,7 +866,7 @@
         std::shared_ptr<resource_policy::ClientDescriptor<String8, sp<BasicClient>>>* partial) {
 
     status_t ret = NO_ERROR;
-    std::vector<sp<BasicClient>> evictedClients;
+    std::vector<DescriptorPtr> evictedClients;
     DescriptorPtr clientDescriptor;
     {
         if (effectiveApiLevel == API_1) {
@@ -974,7 +974,7 @@
 
             ALOGE("CameraService::connect evicting conflicting client for camera ID %s",
                     i->getKey().string());
-            evictedClients.push_back(clientSp);
+            evictedClients.push_back(i);
 
             // Log the clients evicted
             logEvent(String8::format("EVICT device %s client held by package %s (PID"
@@ -1001,12 +1001,31 @@
     // Destroy evicted clients
     for (auto& i : evictedClients) {
         // Disconnect is blocking, and should only have returned when HAL has cleaned up
-        i->disconnect(); // Clients will remove themselves from the active client list here
+        i->getValue()->disconnect(); // Clients will remove themselves from the active client list
     }
-    evictedClients.clear();
 
     IPCThreadState::self()->restoreCallingIdentity(token);
 
+    for (const auto& i : evictedClients) {
+        ALOGV("%s: Waiting for disconnect to complete for client for device %s (PID %" PRId32 ")",
+                __FUNCTION__, i->getKey().string(), i->getOwnerId());
+        ret = mActiveClientManager.waitUntilRemoved(i, DEFAULT_DISCONNECT_TIMEOUT_NS);
+        if (ret == TIMED_OUT) {
+            ALOGE("%s: Timed out waiting for client for device %s to disconnect, "
+                    "current clients:\n%s", __FUNCTION__, i->getKey().string(),
+                    mActiveClientManager.toString().string());
+            return -EBUSY;
+        }
+        if (ret != NO_ERROR) {
+            ALOGE("%s: Received error waiting for client for device %s to disconnect: %s (%d), "
+                    "current clients:\n%s", __FUNCTION__, i->getKey().string(), strerror(-ret),
+                    ret, mActiveClientManager.toString().string());
+            return ret;
+        }
+    }
+
+    evictedClients.clear();
+
     // Once clients have been disconnected, relock
     mServiceLock.lock();
 
diff --git a/services/camera/libcameraservice/CameraService.h b/services/camera/libcameraservice/CameraService.h
index 9eda205..91c7d59 100644
--- a/services/camera/libcameraservice/CameraService.h
+++ b/services/camera/libcameraservice/CameraService.h
@@ -90,6 +90,9 @@
     // 3 second busy timeout when other clients are connecting
     static const nsecs_t DEFAULT_CONNECT_TIMEOUT_NS = 3000000000;
 
+    // 1 second busy timeout when other clients are disconnecting
+    static const nsecs_t DEFAULT_DISCONNECT_TIMEOUT_NS = 1000000000;
+
     // Default number of messages to store in eviction log
     static const size_t DEFAULT_EVENT_LOG_LENGTH = 100;
 
diff --git a/services/camera/libcameraservice/utils/ClientManager.h b/services/camera/libcameraservice/utils/ClientManager.h
index ad5486d..aa40a2d 100644
--- a/services/camera/libcameraservice/utils/ClientManager.h
+++ b/services/camera/libcameraservice/utils/ClientManager.h
@@ -17,7 +17,9 @@
 #ifndef ANDROID_SERVICE_UTILS_EVICTION_POLICY_MANAGER_H
 #define ANDROID_SERVICE_UTILS_EVICTION_POLICY_MANAGER_H
 
+#include <utils/Condition.h>
 #include <utils/Mutex.h>
+#include <utils/Timers.h>
 
 #include <algorithm>
 #include <utility>
@@ -263,6 +265,16 @@
      */
     std::shared_ptr<ClientDescriptor<KEY, VALUE>> get(const KEY& key) const;
 
+    /**
+     * Block until the given client is no longer in the active clients list, or the timeout
+     * occurred.
+     *
+     * Returns NO_ERROR if this succeeded, -ETIMEDOUT on a timeout, or a negative error code on
+     * failure.
+     */
+    status_t waitUntilRemoved(const std::shared_ptr<ClientDescriptor<KEY, VALUE>> client,
+            nsecs_t timeout) const;
+
 protected:
     ~ClientManager();
 
@@ -284,6 +296,7 @@
     int64_t getCurrentCostLocked() const;
 
     mutable Mutex mLock;
+    mutable Condition mRemovedCondition;
     int32_t mMaxCost;
     // LRU ordered, most recent at end
     std::vector<std::shared_ptr<ClientDescriptor<KEY, VALUE>>> mClients;
@@ -430,6 +443,7 @@
         }), mClients.end());
 
     mClients.push_back(client);
+    mRemovedCondition.broadcast();
 
     return evicted;
 }
@@ -487,6 +501,7 @@
 void ClientManager<KEY, VALUE>::removeAll() {
     Mutex::Autolock lock(mLock);
     mClients.clear();
+    mRemovedCondition.broadcast();
 }
 
 template<class KEY, class VALUE>
@@ -505,6 +520,39 @@
             return false;
         }), mClients.end());
 
+    mRemovedCondition.broadcast();
+    return ret;
+}
+
+template<class KEY, class VALUE>
+status_t ClientManager<KEY, VALUE>::waitUntilRemoved(
+        const std::shared_ptr<ClientDescriptor<KEY, VALUE>> client,
+        nsecs_t timeout) const {
+    status_t ret = NO_ERROR;
+    Mutex::Autolock lock(mLock);
+
+    bool isRemoved = false;
+
+    // Figure out what time in the future we should hit the timeout
+    nsecs_t failTime = systemTime(SYSTEM_TIME_MONOTONIC) + timeout;
+
+    while (!isRemoved) {
+        isRemoved = true;
+        for (const auto& i : mClients) {
+            if (i == client) {
+                isRemoved = false;
+            }
+        }
+
+        if (!isRemoved) {
+            ret = mRemovedCondition.waitRelative(mLock, timeout);
+            if (ret != NO_ERROR) {
+                break;
+            }
+            timeout = failTime - systemTime(SYSTEM_TIME_MONOTONIC);
+        }
+    }
+
     return ret;
 }
 
@@ -520,6 +568,7 @@
             }
             return false;
         }), mClients.end());
+    mRemovedCondition.broadcast();
 }
 
 template<class KEY, class VALUE>