| // Copyright 2012 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "content/browser/storage_partition_impl_map.h" |
| |
| #include <unordered_set> |
| #include <utility> |
| |
| #include "base/barrier_closure.h" |
| #include "base/command_line.h" |
| #include "base/containers/contains.h" |
| #include "base/containers/map_util.h" |
| #include "base/files/file_enumerator.h" |
| #include "base/files/file_path.h" |
| #include "base/files/file_util.h" |
| #include "base/functional/bind.h" |
| #include "base/functional/callback.h" |
| #include "base/functional/callback_helpers.h" |
| #include "base/location.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "base/task/single_thread_task_runner.h" |
| #include "base/task/thread_pool.h" |
| #include "build/build_config.h" |
| #include "content/browser/background_fetch/background_fetch_context.h" |
| #include "content/browser/blob_storage/chrome_blob_storage_context.h" |
| #include "content/browser/code_cache/generated_code_cache_context.h" |
| #include "content/browser/cookie_store/cookie_store_manager.h" |
| #include "content/browser/file_system/browser_file_system_helper.h" |
| #include "content/browser/loader/subresource_proxying_url_loader_service.h" |
| #include "content/browser/resource_context_impl.h" |
| #include "content/browser/storage_partition_impl.h" |
| #include "content/browser/webui/url_data_manager_backend.h" |
| #include "content/public/browser/browser_context.h" |
| #include "content/public/browser/browser_task_traits.h" |
| #include "content/public/browser/browser_thread.h" |
| #include "content/public/browser/content_browser_client.h" |
| #include "content/public/browser/storage_partition.h" |
| #include "content/public/common/content_client.h" |
| #include "content/public/common/content_features.h" |
| #include "content/public/common/content_switches.h" |
| #include "content/public/common/url_constants.h" |
| #include "crypto/hash.h" |
| #include "services/network/public/cpp/features.h" |
| #include "storage/browser/blob/blob_storage_context.h" |
| #include "third_party/blink/public/common/storage_key/storage_key.h" |
| |
| namespace content { |
| |
| namespace { |
| |
| // These constants are used to create the directory structure under the profile |
| // where renderers with a non-default storage partition keep their persistent |
| // state. This will contain a set of directories that partially mirror the |
| // directory structure of BrowserContext::GetPath(). |
| // |
| // The kStoragePartitionDirname contains an extensions directory which is |
| // further partitioned by extension id, followed by another level of directories |
| // for the "default" extension storage partition and one directory for each |
| // persistent partition used by a webview tag. Example: |
| // |
| // Storage/ext/ABCDEF/def |
| // Storage/ext/ABCDEF/hash(partition name) |
| // |
| // The code in GetStoragePartitionPath() constructs these path names. |
| // |
| // TODO(nasko): Move extension related path code out of content. |
| const base::FilePath::CharType kStoragePartitionDirname[] = |
| FILE_PATH_LITERAL("Storage"); |
| const base::FilePath::CharType kExtensionsDirname[] = |
| FILE_PATH_LITERAL("ext"); |
| const base::FilePath::CharType kDefaultPartitionDirname[] = |
| FILE_PATH_LITERAL("def"); |
| const base::FilePath::CharType kTrashDirname[] = |
| FILE_PATH_LITERAL("trash"); |
| const base::FilePath::CharType kWebSQLDirname[] = |
| FILE_PATH_LITERAL("databases"); |
| |
| // Because partition names are user specified, they can be arbitrarily long |
| // which makes them unsuitable for paths names. We use a truncation of a |
| // SHA256 hash to perform a deterministic shortening of the string. The |
| // kPartitionNameHashBytes constant controls the length of the truncation. |
| // We use 6 bytes, which gives us 99.999% reliability against collisions over |
| // 1 million partition domains. |
| // |
| // Analysis: |
| // We assume that all partition names within one partition domain are |
| // controlled by the the same entity. Thus there is no chance for adverserial |
| // attack and all we care about is accidental collision. To get 5 9s over |
| // 1 million domains, we need the probability of a collision in any one domain |
| // to be |
| // |
| // p < nroot(1000000, .99999) ~= 10^-11 |
| // |
| // We use the following birthday attack approximation to calculate the max |
| // number of unique names for this probability: |
| // |
| // n(p,H) = sqrt(2*H * ln(1/(1-p))) |
| // |
| // For a 6-byte hash, H = 2^(6*8). n(10^-11, H) ~= 75 |
| // |
| // An average partition domain is likely to have less than 10 unique |
| // partition names which is far lower than 75. |
| // |
| // Note, that for 4 9s of reliability, the limit is 237 partition names per |
| // partition domain. |
| const int kPartitionNameHashBytes = 6; |
| |
| // Needed for selecting all files in ObliterateOneDirectory() below. |
| #if BUILDFLAG(IS_POSIX) |
| const int kAllFileTypes = base::FileEnumerator::FILES | |
| base::FileEnumerator::DIRECTORIES | |
| base::FileEnumerator::SHOW_SYM_LINKS; |
| #else |
| const int kAllFileTypes = base::FileEnumerator::FILES | |
| base::FileEnumerator::DIRECTORIES; |
| #endif |
| |
| base::FilePath GetStoragePartitionDomainPath( |
| const std::string& partition_domain) { |
| CHECK(base::IsStringUTF8(partition_domain)); |
| |
| return base::FilePath(kStoragePartitionDirname).Append(kExtensionsDirname) |
| .Append(base::FilePath::FromUTF8Unsafe(partition_domain)); |
| } |
| |
| // Helper function for doing a depth-first deletion of the data on disk. |
| // Examines paths directly in |current_dir| (no recursion) and tries to |
| // delete from disk anything that is in, or isn't a parent of something in |
| // |paths_to_keep|. Paths that need further expansion are added to |
| // |paths_to_consider|. |
| void ObliterateOneDirectory(const base::FilePath& current_dir, |
| const std::vector<base::FilePath>& paths_to_keep, |
| std::vector<base::FilePath>* paths_to_consider) { |
| CHECK(current_dir.IsAbsolute()); |
| |
| base::FileEnumerator enumerator(current_dir, false, kAllFileTypes); |
| for (base::FilePath to_delete = enumerator.Next(); !to_delete.empty(); |
| to_delete = enumerator.Next()) { |
| // Enum tracking which of the 3 possible actions to take for |to_delete|. |
| enum { kSkip, kEnqueue, kDelete } action = kDelete; |
| |
| for (auto to_keep = paths_to_keep.begin(); to_keep != paths_to_keep.end(); |
| ++to_keep) { |
| if (to_delete == *to_keep) { |
| action = kSkip; |
| break; |
| } else if (to_delete.IsParent(*to_keep)) { |
| // |to_delete| contains a path to keep. Add to stack for further |
| // processing. |
| action = kEnqueue; |
| break; |
| } |
| } |
| |
| switch (action) { |
| case kDelete: |
| base::DeletePathRecursively(to_delete); |
| break; |
| |
| case kEnqueue: |
| paths_to_consider->push_back(to_delete); |
| break; |
| |
| case kSkip: |
| break; |
| } |
| } |
| } |
| |
| // Synchronously attempts to delete |unnormalized_root|, preserving only |
| // entries in |paths_to_keep|. If there are no entries in |paths_to_keep| on |
| // disk, then it completely removes |unnormalized_root|. All paths must be |
| // absolute paths. |
| void BlockingObliteratePath( |
| const base::FilePath& unnormalized_browser_context_root, |
| const base::FilePath& unnormalized_root, |
| const std::vector<base::FilePath>& paths_to_keep, |
| const scoped_refptr<base::TaskRunner>& closure_runner, |
| base::OnceClosure on_gc_required) { |
| // Early exit required because MakeAbsoluteFilePath() will fail on POSIX |
| // if |unnormalized_root| does not exist. This is safe because there is |
| // nothing to do in this situation anwyays. |
| if (!base::PathExists(unnormalized_root)) { |
| return; |
| } |
| |
| // Never try to obliterate things outside of the browser context root or the |
| // browser context root itself. Die hard. |
| base::FilePath root = base::MakeAbsoluteFilePath(unnormalized_root); |
| base::FilePath browser_context_root = |
| base::MakeAbsoluteFilePath(unnormalized_browser_context_root); |
| CHECK(!root.empty()); |
| CHECK(!browser_context_root.empty()); |
| CHECK(browser_context_root.IsParent(root) && browser_context_root != root); |
| |
| // Reduce |paths_to_keep| set to those under the root and actually on disk. |
| std::vector<base::FilePath> valid_paths_to_keep; |
| for (auto it = paths_to_keep.begin(); it != paths_to_keep.end(); ++it) { |
| if (root.IsParent(*it) && base::PathExists(*it)) |
| valid_paths_to_keep.push_back(*it); |
| } |
| |
| // If none of the |paths_to_keep| are valid anymore then we just whack the |
| // root and be done with it. Otherwise, signal garbage collection and do |
| // a best-effort delete of the on-disk structures. |
| if (valid_paths_to_keep.empty()) { |
| base::DeletePathRecursively(root); |
| return; |
| } |
| closure_runner->PostTask(FROM_HERE, std::move(on_gc_required)); |
| |
| // Otherwise, start at the root and delete everything that is not in |
| // |valid_paths_to_keep|. |
| std::vector<base::FilePath> paths_to_consider; |
| paths_to_consider.push_back(root); |
| while(!paths_to_consider.empty()) { |
| base::FilePath path = paths_to_consider.back(); |
| paths_to_consider.pop_back(); |
| ObliterateOneDirectory(path, valid_paths_to_keep, &paths_to_consider); |
| } |
| } |
| |
| // Ensures each path in |active_paths| is a direct child of storage_root. |
| void NormalizeActivePaths(const base::FilePath& storage_root, |
| std::unordered_set<base::FilePath>* active_paths) { |
| std::unordered_set<base::FilePath> normalized_active_paths; |
| |
| for (auto iter = active_paths->begin(); iter != active_paths->end(); ++iter) { |
| base::FilePath relative_path; |
| if (!storage_root.AppendRelativePath(*iter, &relative_path)) |
| continue; |
| |
| std::vector<base::FilePath::StringType> components = |
| relative_path.GetComponents(); |
| |
| DCHECK(!relative_path.empty()); |
| normalized_active_paths.insert(storage_root.Append(components.front())); |
| } |
| |
| active_paths->swap(normalized_active_paths); |
| } |
| |
| // Deletes all entries inside the |storage_root| that are not in the |
| // |active_paths|. Deletion is done in 2 steps: |
| // |
| // (1) Moving all garbage collected paths into a trash directory. |
| // (2) Asynchronously deleting the trash directory. |
| // |
| // The deletion is asynchronous because after (1) completes, calling code can |
| // safely continue to use the paths that had just been garbage collected |
| // without fear of race conditions. |
| // |
| // This code also ignores failed moves rather than attempting a smarter retry. |
| // Moves shouldn't fail here unless there is some out-of-band error (eg., |
| // FS corruption). Retry logic is dangerous in the general case because |
| // there is not necessarily a guaranteed case where the logic may succeed. |
| // |
| // This function is still named BlockingGarbageCollect() because it does |
| // execute a few filesystem operations synchronously. |
| void BlockingGarbageCollect( |
| const base::FilePath& storage_root, |
| const scoped_refptr<base::TaskRunner>& file_access_runner, |
| std::unordered_set<base::FilePath> active_paths) { |
| CHECK(storage_root.IsAbsolute()); |
| |
| NormalizeActivePaths(storage_root, &active_paths); |
| |
| base::FileEnumerator enumerator(storage_root, false, kAllFileTypes); |
| base::FilePath trash_directory; |
| if (!base::CreateTemporaryDirInDir(storage_root, kTrashDirname, |
| &trash_directory)) { |
| // Unable to continue without creating the trash directory so give up. |
| return; |
| } |
| for (base::FilePath path = enumerator.Next(); !path.empty(); |
| path = enumerator.Next()) { |
| if (!base::Contains(active_paths, path) && path != trash_directory) { |
| // Since |trash_directory| is unique for each run of this function there |
| // can be no colllisions on the move. |
| base::Move(path, trash_directory.Append(path.BaseName())); |
| } |
| } |
| |
| file_access_runner->PostTask( |
| FROM_HERE, base::GetDeletePathRecursivelyCallback(trash_directory)); |
| } |
| |
| } // namespace |
| |
| // static |
| base::FilePath StoragePartitionImplMap::GetStoragePartitionPath( |
| const std::string& partition_domain, |
| const std::string& partition_name) { |
| if (partition_domain.empty()) |
| return base::FilePath(); |
| |
| base::FilePath path = GetStoragePartitionDomainPath(partition_domain); |
| |
| // TODO(ajwong): Mangle in-memory into this somehow, either by putting |
| // it into the partition_name, or by manually adding another path component |
| // here. Otherwise, it's possible to have an in-memory StoragePartition and |
| // a persistent one that return the same FilePath for GetPath(). |
| if (!partition_name.empty()) { |
| // For analysis of why we can ignore collisions, see the comment above |
| // kPartitionNameHashBytes. |
| auto hash = crypto::hash::Sha256(partition_name); |
| auto truncated_hash = base::span(hash).first<kPartitionNameHashBytes>(); |
| return path.AppendASCII(base::HexEncode(truncated_hash)); |
| } |
| |
| return path.Append(kDefaultPartitionDirname); |
| } |
| |
| StoragePartitionImplMap::StoragePartitionImplMap( |
| BrowserContext* browser_context) |
| : browser_context_(browser_context), |
| file_access_runner_(base::ThreadPool::CreateSequencedTaskRunner( |
| {base::MayBlock(), base::TaskPriority::BEST_EFFORT})), |
| resource_context_initialized_(false) {} |
| |
| StoragePartitionImplMap::~StoragePartitionImplMap() { |
| } |
| |
| StoragePartitionImpl* StoragePartitionImplMap::Get( |
| const StoragePartitionConfig& partition_config, |
| bool can_create) { |
| // Find the previously created partition if it's available. |
| if (auto* partition = base::FindPtrOrNull(partitions_, partition_config)) { |
| return partition; |
| } |
| |
| if (!can_create) |
| return nullptr; |
| |
| base::FilePath relative_partition_path = GetStoragePartitionPath( |
| partition_config.partition_domain(), partition_config.partition_name()); |
| |
| std::optional<StoragePartitionConfig> fallback_config = |
| partition_config.GetFallbackForBlobUrls(); |
| StoragePartitionImpl* fallback_for_blob_urls = |
| fallback_config.has_value() ? Get(*fallback_config, /*can_create=*/false) |
| : nullptr; |
| |
| std::unique_ptr<StoragePartitionImpl> partition_ptr( |
| StoragePartitionImpl::Create(browser_context_, partition_config, |
| relative_partition_path)); |
| StoragePartitionImpl* partition = partition_ptr.get(); |
| partitions_[partition_config] = std::move(partition_ptr); |
| partition->Initialize(fallback_for_blob_urls); |
| |
| // Arm the serviceworker cookie change observation API. |
| partition->GetCookieStoreManager()->ListenToCookieChanges( |
| partition->GetNetworkContext(), base::DoNothing()); |
| |
| PostCreateInitialization(partition, partition_config.in_memory()); |
| |
| return partition; |
| } |
| |
| void StoragePartitionImplMap::AsyncObliterate( |
| const std::string& partition_domain, |
| base::OnceClosure on_gc_required, |
| base::OnceClosure done_callback) { |
| // Find the active partitions for the domain. Because these partitions are |
| // active, it is not possible to just delete the directories that contain |
| // the backing data structures without causing the browser to crash. Instead, |
| // of deleteing the directory, we tell each storage context later to |
| // remove any data they have saved. This will leave the directory structure |
| // intact but it will only contain empty databases. |
| std::vector<StoragePartitionImpl*> active_partitions; |
| std::vector<base::FilePath> paths_to_keep; |
| for (PartitionMap::const_iterator it = partitions_.begin(); |
| it != partitions_.end(); |
| ++it) { |
| const StoragePartitionConfig& config = it->first; |
| if (config.partition_domain() == partition_domain) { |
| active_partitions.push_back(it->second.get()); |
| if (!config.in_memory()) { |
| paths_to_keep.push_back(it->second->GetPath()); |
| } |
| } |
| } |
| |
| // Create a barrier closure for keeping track of the callbacks in |
| // AsyncObliterate(). We have one callback for each active partition that is |
| // cleared and an additional one for BlockingObliteratePath()'s task reply. |
| int num_tasks = active_partitions.size() + 1; |
| auto subtask_done_callback = |
| base::BarrierClosure(num_tasks, std::move(done_callback)); |
| |
| for (auto*& active_partition : active_partitions) { |
| active_partition->ClearData( |
| // All except shader cache. |
| ~StoragePartition::REMOVE_DATA_MASK_SHADER_CACHE, |
| StoragePartition::QUOTA_MANAGED_STORAGE_MASK_ALL, blink::StorageKey(), |
| base::Time(), base::Time::Max(), subtask_done_callback); |
| } |
| |
| // Start a best-effort delete of the on-disk storage excluding paths that are |
| // known to still be in use. This is to delete any previously created |
| // StoragePartition state that just happens to not have been used during this |
| // run of the browser. |
| base::FilePath domain_root = browser_context_->GetPath().Append( |
| GetStoragePartitionDomainPath(partition_domain)); |
| |
| base::ThreadPool::PostTaskAndReply( |
| FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT}, |
| base::BindOnce(&BlockingObliteratePath, browser_context_->GetPath(), |
| domain_root, paths_to_keep, |
| base::SingleThreadTaskRunner::GetCurrentDefault(), |
| std::move(on_gc_required)), |
| subtask_done_callback); |
| } |
| |
| void StoragePartitionImplMap::GarbageCollect( |
| std::unordered_set<base::FilePath> active_paths, |
| base::OnceClosure done) { |
| // Include all paths for current StoragePartitions in the active_paths since |
| // they cannot be deleted safely. |
| for (const auto& part : partitions_) { |
| const StoragePartitionConfig& config = part.first; |
| if (!config.in_memory()) |
| active_paths.insert(part.second->GetPath()); |
| } |
| |
| // Find the directory holding the StoragePartitions and delete everything in |
| // there that isn't considered active. |
| base::FilePath storage_root = browser_context_->GetPath().Append( |
| GetStoragePartitionDomainPath(std::string())); |
| file_access_runner_->PostTaskAndReply( |
| FROM_HERE, |
| base::BindOnce(&BlockingGarbageCollect, storage_root, file_access_runner_, |
| std::move(active_paths)), |
| std::move(done)); |
| } |
| |
| void StoragePartitionImplMap::ForEach( |
| base::FunctionRef<void(StoragePartition*)> fn) { |
| for (const auto& [config, partition] : partitions_) { |
| fn(partition.get()); |
| } |
| } |
| |
| void StoragePartitionImplMap::PostCreateInitialization( |
| StoragePartitionImpl* partition, |
| bool in_memory) { |
| // TODO(ajwong): ResourceContexts no longer have any storage related state. |
| // We should move this into a place where it is called once per |
| // BrowserContext creation rather than piggybacking off the default context |
| // creation. |
| // Note: moving this into Get() before partitions_[] is set causes reentrency. |
| if (!resource_context_initialized_) { |
| resource_context_initialized_ = true; |
| InitializeResourceContext(browser_context_); |
| } |
| |
| if (!in_memory) { |
| // Clean up any lingering WebSQL user data on disk, now that WebSQL |
| // has been deprecated and removed for all platforms. |
| base::ThreadPool::PostTask( |
| FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT}, |
| base::BindOnce( |
| [](const base::FilePath& dir) { base::DeletePathRecursively(dir); }, |
| partition->GetPath().Append(kWebSQLDirname))); |
| } |
| |
| partition->GetBackgroundFetchContext()->Initialize(); |
| } |
| |
| } // namespace content |