From 0914a38252f205fc04fa50e858b24fa5f535ab11 Mon Sep 17 00:00:00 2001
From: Hiroki Nakagawa <nhiroki@chromium.org>
Date: Wed, 29 Apr 2020 11:46:54 +0900
Subject: [PATCH] ServiceWorker: Avoid double destruction of ServiceWorkerObjectHost on connection error

This CL avoids the case where ServiceWorkerObjectHost is destroyed twice
on ServiceWorkerObjectHost::OnConnectionError() when Chromium is built
with the GCC build toolchain.

> How does the issue happen?

ServiceWorkerObjectHost has a cyclic reference like this:

ServiceWorkerObjectHost
  --([1] scoped_refptr)--> ServiceWorkerVersion
    --([2] std::unique_ptr)--> ServiceWorkerProviderHost
      --([3] std::unique_ptr)--> ServiceWorkerContainerHost
        --([4] std::unique_ptr)--> ServiceWorkerObjectHost

Note that ServiceWorkerContainerHost manages ServiceWorkerObjectHost in
map<int64_t version_id, std::unique_ptr<ServiceWorkerObjectHost>>.

When ServiceWorkerObjectHost::OnConnectionError() is called, the
function removes the reference [4] from the map, and destroys
ServiceWorkerObjectHost. If the object host has the last reference [1]
to ServiceWorkerVersion, the destruction also cuts off the references
[2] and [3], and destroys ServiceWorkerProviderHost and
ServiceWorkerContainerHost.

This seems to work well on the Chromium's default toolchain, but not
work on the GCC toolchain. According to the report, destruction of
ServiceWorkerContainerHost happens while the map owned by the container
host is erasing the ServiceWorkerObjectHost, and this results in crash
due to double destruction of the object host.

I don't know the reason why this happens only on the GCC toolchain, but
I suspect the order of object destruction on std::map::erase() could be
different depending on the toolchains.

> How does this CL fix this?

The ideal fix is to redesign the ownership model of
ServiceWorkerVersion, but it's not feasible in the short term.

Instead, this CL avoids destruction of ServiceWorkerObjectHost on
std::map::erase(). The new code takes the ownership of the object host
from the map first, and then erases the entry from the map. This
separates timings to erase the map entry and to destroy the object host,
so the crash should no longer happen.

Bug: 1056598
Change-Id: Id30654cb575bc557c42044d6f0c6f1f9bfaed613
---

diff --git a/content/browser/service_worker/service_worker_container_host.cc b/content/browser/service_worker/service_worker_container_host.cc
index c631bcd..ff917f8 100644
--- a/content/browser/service_worker/service_worker_container_host.cc
+++ b/content/browser/service_worker/service_worker_container_host.cc
@@ -717,6 +717,16 @@
     int64_t version_id) {
   DCHECK_CURRENTLY_ON(ServiceWorkerContext::GetCoreThreadId());
   DCHECK(base::Contains(service_worker_object_hosts_, version_id));
+
+  // ServiceWorkerObjectHost to be deleted may have the last reference to
+  // ServiceWorkerVersion that indirectly owns this ServiceWorkerContainerHost.
+  // If we erase the object host directly from the map, |this| could be deleted
+  // during the map operation and may crash. To avoid the case, we take the
+  // ownership of the object host from the map first, and then erase the entry
+  // from the map. See https://crbug.com/1056598 for details.
+  std::unique_ptr<ServiceWorkerObjectHost> to_be_deleted =
+      std::move(service_worker_object_hosts_[version_id]);
+  DCHECK(to_be_deleted);
   service_worker_object_hosts_.erase(version_id);
 }
 
diff --git a/content/browser/service_worker/service_worker_object_host_unittest.cc b/content/browser/service_worker/service_worker_object_host_unittest.cc
index 238cb8b..f60c7a2 100644
--- a/content/browser/service_worker/service_worker_object_host_unittest.cc
+++ b/content/browser/service_worker/service_worker_object_host_unittest.cc
@@ -200,6 +200,19 @@
     return registration_info;
   }
 
+  void CallOnConnectionError(ServiceWorkerContainerHost* container_host,
+                             int64_t version_id) {
+    // ServiceWorkerObjectHost has the last reference to the version.
+    ServiceWorkerObjectHost* object_host =
+        GetServiceWorkerObjectHost(container_host, version_id);
+    EXPECT_TRUE(object_host->version_->HasOneRef());
+
+    // Make sure that OnConnectionError induces destruction of the version and
+    // the object host.
+    object_host->receivers_.Clear();
+    object_host->OnConnectionError();
+  }
+
   BrowserTaskEnvironment task_environment_;
   std::unique_ptr<EmbeddedWorkerTestHelper> helper_;
   scoped_refptr<ServiceWorkerRegistration> registration_;
@@ -409,5 +422,30 @@
             events[0]->source_info_for_client->client_type);
 }
 
+// This is a regression test for https://crbug.com/1056598.
+TEST_F(ServiceWorkerObjectHostTest, OnConnectionError) {
+  const GURL scope("https://www.example.com/");
+  const GURL script_url("https://www.example.com/service_worker.js");
+  Initialize(std::make_unique<EmbeddedWorkerTestHelper>(base::FilePath()));
+  SetUpRegistration(scope, script_url);
+
+  // Create the provider host.
+  ASSERT_EQ(blink::ServiceWorkerStatusCode::kOk,
+            StartServiceWorker(version_.get()));
+
+  // Set up the case where the last reference to the version is owned by the
+  // service worker object host.
+  ServiceWorkerContainerHost* container_host =
+      version_->provider_host()->container_host();
+  ServiceWorkerVersion* version_rawptr = version_.get();
+  version_ = nullptr;
+  ASSERT_TRUE(version_rawptr->HasOneRef());
+
+  // Simulate the connection error that induces the object host destruction.
+  // This shouldn't crash.
+  CallOnConnectionError(container_host, version_rawptr->version_id());
+  base::RunLoop().RunUntilIdle();
+}
+
 }  // namespace service_worker_object_host_unittest
 }  // namespace content