blob: 27af9375520727c70fa33feb3078137ebc3b3808 [file] [log] [blame]
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/browser/download/save_package.h"
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
#include "base/containers/contains.h"
#include "base/containers/flat_map.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/functional/bind.h"
#include "base/i18n/file_util_icu.h"
#include "base/logging.h"
#include "base/memory/ptr_util.h"
#include "base/not_fatal_until.h"
#include "base/rand_util.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/stringprintf.h"
#include "base/strings/sys_string_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "base/threading/thread.h"
#include "base/threading/thread_restrictions.h"
#include "base/unguessable_token.h"
#include "build/build_config.h"
#include "components/download/public/common/download_item_impl.h"
#include "components/download/public/common/download_save_item_data.h"
#include "components/download/public/common/download_stats.h"
#include "components/download/public/common/download_task_runner.h"
#include "components/download/public/common/download_ukm_helper.h"
#include "components/download/public/common/download_utils.h"
#include "components/filename_generation/filename_generation.h"
#include "components/url_formatter/url_formatter.h"
#include "content/browser/bad_message.h"
#include "content/browser/download/download_manager_impl.h"
#include "content/browser/download/save_file.h"
#include "content/browser/download/save_file_manager.h"
#include "content/browser/download/save_item.h"
#include "content/browser/download/save_package_serialization_handler.h"
#include "content/browser/renderer_host/frame_tree.h"
#include "content/browser/renderer_host/frame_tree_node.h"
#include "content/browser/renderer_host/page_impl.h"
#include "content/browser/renderer_host/render_frame_host_impl.h"
#include "content/browser/renderer_host/render_process_host_impl.h"
#include "content/browser/renderer_host/render_view_host_delegate.h"
#include "content/browser/renderer_host/render_view_host_impl.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/content_browser_client.h"
#include "content/public/browser/download_manager_delegate.h"
#include "content/public/browser/navigation_entry.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/resource_context.h"
#include "content/public/browser/web_contents.h"
#include "content/public/common/mhtml_generation_params.h"
#include "content/public/common/referrer_type_converters.h"
#include "mojo/public/cpp/bindings/pending_remote.h"
#include "mojo/public/cpp/bindings/self_owned_receiver.h"
#include "net/base/filename_util.h"
#include "net/base/mime_util.h"
#include "services/metrics/public/cpp/ukm_recorder.h"
#include "services/network/public/cpp/request_mode.h"
#include "url/url_constants.h"
#if BUILDFLAG(IS_MAC)
#include "base/mac/mac_util.h"
#endif
namespace content {
namespace {
// Generates unique ids for SavePackage::unique_id_ field.
SavePackageId GetNextSavePackageId() {
static SavePackageId::Generator g_save_package_id_generator;
return g_save_package_id_generator.GenerateNextId();
}
// Default name which will be used when we can not get proper name from
// resource URL.
const char kDefaultSaveName[] = "saved_resource";
// Maximum number of file ordinal number. I think it's big enough for resolving
// name-conflict files which has same base file name.
const int32_t kMaxFileOrdinalNumber = 9999;
// Maximum length for file path. Since Windows have MAX_PATH limitation for
// file path, we need to make sure length of file path of every saved file
// is less than MAX_PATH
#if BUILDFLAG(IS_WIN)
const uint32_t kMaxFilePathLength = MAX_PATH - 1;
#elif BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
const uint32_t kMaxFilePathLength = PATH_MAX - 1;
#endif
// Maximum length for file ordinal number part. Since we only support the
// maximum 9999 for ordinal number, which means maximum file ordinal number part
// should be "(9998)", so the value is 6.
const uint32_t kMaxFileOrdinalNumberPartLength = 6;
// Strip current ordinal number, if any. Should only be used on pure
// file names, i.e. those stripped of their extensions.
// TODO(estade): improve this to not choke on alternate encodings.
base::FilePath::StringType StripOrdinalNumber(
const base::FilePath::StringType& base_name) {
base::FilePath::StringType::size_type r_paren_index =
base_name.rfind(FILE_PATH_LITERAL(')'));
base::FilePath::StringType::size_type l_paren_index =
base_name.rfind(FILE_PATH_LITERAL('('));
if (l_paren_index >= r_paren_index)
return base_name;
for (base::FilePath::StringType::size_type i = l_paren_index + 1;
i != r_paren_index; ++i) {
if (!base::IsAsciiDigit(base_name[i]))
return base_name;
}
return base_name.substr(0, l_paren_index);
}
// Check whether we can save page as complete-HTML for the contents which
// have specified a MIME type. Now only contents which have the MIME type
// "text/html" can be saved as complete-HTML.
bool CanSaveAsComplete(const std::string& contents_mime_type) {
return contents_mime_type == "text/html" ||
contents_mime_type == "application/xhtml+xml";
}
void CancelSavePackage(base::WeakPtr<SavePackage> save_package,
bool user_cancel) {
if (save_package.get() && !save_package->canceled())
save_package->Cancel(user_cancel, false);
}
const std::string GetMimeTypeForSaveType(SavePageType save_type) {
switch (save_type) {
case SAVE_PAGE_TYPE_AS_ONLY_HTML:
case SAVE_PAGE_TYPE_AS_COMPLETE_HTML:
return "text/html";
case SAVE_PAGE_TYPE_AS_MHTML:
return "multipart/related";
case SAVE_PAGE_TYPE_UNKNOWN:
case SAVE_PAGE_TYPE_MAX:
NOTREACHED();
}
NOTREACHED();
}
WebContents* GetWebContents(Page* page) {
return WebContents::FromRenderFrameHost(&page->GetMainDocument());
}
const std::u16string& GetTitle(Page& page) {
RenderFrameHostImpl* rfh =
static_cast<RenderFrameHostImpl*>(&page.GetMainDocument());
NavigationEntry* visible_entry =
rfh->frame_tree()->controller().GetVisibleEntry();
if (visible_entry)
return visible_entry->GetTitleForDisplay();
return base::EmptyString16();
}
bool IsSavableFrame(RenderFrameHost* rfh) {
return rfh->IsRenderFrameLive() && !rfh->IsNestedWithinFencedFrame();
}
} // namespace
const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
FILE_PATH_LITERAL("html");
SavePackage::SavePackage(PageImpl& page)
: page_(page.GetWeakPtrImpl()),
page_url_(GetUrlToBeSaved(&page.GetMainDocument())),
page_isolation_info_(
page.GetMainDocument().ComputeIsolationInfoForNavigation(
page_url_,
page.GetMainDocument().IsCredentialless(),
page.GetMainDocument()
.GetIsolationInfoForSubresources()
.nonce())),
page_is_outermost_main_frame_(
page.GetMainDocument().IsOutermostMainFrame()),
title_(GetTitle(page)),
start_tick_(base::TimeTicks::Now()),
file_name_set_(&base::FilePath::CompareLessIgnoreCase),
unique_id_(GetNextSavePackageId()) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
InternalInit();
}
// Used for tests.
SavePackage::SavePackage(PageImpl& page,
SavePageType save_type,
const base::FilePath& file_full_path,
const base::FilePath& directory_full_path)
: page_(page.GetWeakPtrImpl()),
page_url_(GetUrlToBeSaved(&page.GetMainDocument())),
saved_main_file_path_(file_full_path),
saved_main_directory_path_(directory_full_path),
page_isolation_info_(
page.GetMainDocument().ComputeIsolationInfoForNavigation(
page_url_,
page.GetMainDocument().IsCredentialless(),
page.GetMainDocument()
.GetIsolationInfoForSubresources()
.nonce())),
page_is_outermost_main_frame_(
page.GetMainDocument().IsOutermostMainFrame()),
title_(GetTitle(page)),
start_tick_(base::TimeTicks::Now()),
save_type_(save_type),
file_name_set_(&base::FilePath::CompareLessIgnoreCase),
unique_id_(GetNextSavePackageId()) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
(save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML))
<< save_type_;
DCHECK(!saved_main_file_path_.empty() &&
saved_main_file_path_.value().length() <= kMaxFilePathLength);
DCHECK(!saved_main_directory_path_.empty() &&
saved_main_directory_path_.value().length() < kMaxFilePathLength);
InternalInit();
}
SavePackage::~SavePackage() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// Stop receiving saving job's updates
if (!finished_ && !canceled()) {
// Unexpected quit.
Cancel(true);
}
// We should no longer be observing the download::DownloadItem at this point.
CHECK(!download_);
DCHECK_EQ(all_save_items_count_,
waiting_item_queue_.size() + completed_count() + in_process_count())
<< "waiting: " << waiting_item_queue_.size()
<< " completed: " << completed_count()
<< " in_progress: " << in_process_count();
// Free all SaveItems.
waiting_item_queue_.clear();
in_progress_items_.clear();
saved_success_items_.clear();
saved_failed_items_.clear();
// Clear containers that contain (now dangling/invalid) pointers to the
// save items freed above. This is not strictly required (as the containers
// will be destructed soon by ~SavePackage), but seems like good code hygiene.
frame_tree_node_id_to_contained_save_items_.clear();
frame_tree_node_id_to_save_item_.clear();
url_to_save_item_.clear();
file_manager_ = nullptr;
}
void SavePackage::ClearPage() {
page_ = nullptr;
}
// static
GURL SavePackage::GetUrlToBeSaved(RenderFrameHost* main_frame) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
RenderFrameHostImpl* rfh = static_cast<RenderFrameHostImpl*>(main_frame);
// Retrieve the url from the NavigationEntry because it reflects its
// origin rather than the displayed one (returned by GetVirtualURL) which may
// be different (like having "view-source:" on the front).
NavigationEntry* visible_entry =
rfh->frame_tree()->controller().GetVisibleEntry();
return visible_entry ? visible_entry->GetURL() : GURL();
}
void SavePackage::Cancel(bool user_action, bool cancel_download_item) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (!canceled()) {
if (user_action)
user_canceled_ = true;
else
disk_error_occurred_ = true;
Stop(cancel_download_item);
}
}
// Init() can be called directly, or indirectly via GetSaveInfo(). In both
// cases, we need file_manager_ to be initialized, so we do this first.
void SavePackage::InternalInit() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(page_);
file_manager_ = SaveFileManager::Get();
DCHECK(file_manager_);
download_manager_ = static_cast<DownloadManagerImpl*>(
page_->GetMainDocument().GetBrowserContext()->GetDownloadManager());
DCHECK(download_manager_);
// Always constructed with the primary page that GetPageUkmSourceId()
// supports.
CHECK(page_->IsPrimary());
ukm_source_id_ = page_->GetMainDocument().GetPageUkmSourceId();
ukm_download_id_ = download::GetUniqueDownloadId();
download::DownloadUkmHelper::RecordDownloadStarted(
ukm_download_id_, ukm_source_id_, download::DownloadContent::kText,
download::DownloadSource::UNKNOWN,
download::CheckDownloadConnectionSecurity(
page_->GetMainDocument().GetLastCommittedURL(),
std::vector<GURL>{page_->GetMainDocument().GetLastCommittedURL()}),
true /* is_same_host_download */);
}
bool SavePackage::Init(
SavePackageDownloadCreatedCallback download_created_callback) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(page_url_.is_valid());
// Set proper running state.
if (wait_state_ != INITIALIZE || !page_)
return false;
wait_state_ = START_PROCESS;
// Initialize the request context and resource dispatcher.
BrowserContext* browser_context =
page_->GetMainDocument().GetBrowserContext();
if (!browser_context) {
NOTREACHED();
}
RenderFrameHost& frame_host = page_->GetMainDocument();
download_manager_->CreateSavePackageDownloadItem(
saved_main_file_path_, saved_main_file_display_name_, page_url_,
GetMimeTypeForSaveType(save_type_),
frame_host.GetProcess()->GetDeprecatedID(), frame_host.GetRoutingID(),
base::BindOnce(&CancelSavePackage, weak_ptr_factory_.GetWeakPtr()),
base::BindOnce(&SavePackage::InitWithDownloadItem,
weak_ptr_factory_.GetWeakPtr(),
std::move(download_created_callback)));
return true;
}
void SavePackage::InitWithDownloadItem(
SavePackageDownloadCreatedCallback download_created_callback,
download::DownloadItemImpl* item) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(item);
if (!page_)
return;
download_ = item;
// Confirm above didn't delete the tab out from under us.
if (!download_created_callback.is_null())
std::move(download_created_callback).Run(download_.get());
// Check save type and process the save page job.
if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
// Get directory
DCHECK(!saved_main_directory_path_.empty());
GetSavableResourceLinks();
} else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
MHTMLGenerationParams mhtml_generation_params(saved_main_file_path_);
GetWebContents(page_.get())
->GenerateMHTML(mhtml_generation_params,
base::BindOnce(&SavePackage::OnMHTMLGenerated, this));
} else {
DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_);
wait_state_ = NET_FILES;
// Add this item to waiting list.
waiting_item_queue_.push_back(base::WrapUnique(new SaveItem(
page_url_, Referrer(), page_isolation_info_,
network::mojom::RequestMode::kNavigate, page_is_outermost_main_frame_,
this, SaveFileCreateInfo::SAVE_FILE_FROM_NET, FrameTreeNodeId(),
page_->GetMainDocument().GetFrameTreeNodeId())));
all_save_items_count_ = 1;
download_->SetTotalBytes(1);
DoSavingProcess();
}
}
void SavePackage::OnMHTMLGenerated(int64_t size) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (!download_)
return;
CHECK_EQ(download_->GetState(), download::DownloadItem::IN_PROGRESS);
if (size <= 0) {
Cancel(false);
return;
}
wrote_to_completed_file_ = true;
download_->OnAllDataSaved(size, std::unique_ptr<crypto::SecureHash>());
auto* delegate = download_manager_->GetDelegate();
if (!delegate || delegate->ShouldCompleteDownload(
download_, base::BindOnce(&SavePackage::Finish, this))) {
Finish();
}
}
// On POSIX, the length of |base_name| + |file_name_ext| is further
// restricted by NAME_MAX. The maximum allowed path looks like:
// '/path/to/save_dir' + '/' + NAME_MAX.
uint32_t SavePackage::GetMaxPathLengthForDirectory(
const base::FilePath& base_dir) {
#if BUILDFLAG(IS_WIN)
return kMaxFilePathLength;
#elif BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
return std::min(
kMaxFilePathLength,
static_cast<uint32_t>(base_dir.value().length()) + NAME_MAX + 1);
#endif
}
// static
bool SavePackage::TruncateBaseNameToFitPathConstraints(
const base::FilePath& dir_path,
const base::FilePath::StringType& file_name_ext,
uint32_t max_file_path_len,
base::FilePath::StringType* base_name) {
DCHECK(!base_name->empty());
int available_length = static_cast<int>(max_file_path_len -
dir_path.value().length() -
file_name_ext.length());
// Need an extra space for the separator.
if (!dir_path.EndsWithSeparator())
--available_length;
// Plenty of room.
if (static_cast<int>(base_name->length()) <= available_length)
return true;
// Limited room. Truncate |base_name| to fit.
if (available_length > 0) {
*base_name = base_name->substr(0, available_length);
return true;
}
// Not enough room to even use a shortened |base_name|.
base_name->clear();
return false;
}
// Generate name for saving resource.
bool SavePackage::GenerateFileName(const std::string& disposition,
const GURL& url,
bool need_html_ext,
base::FilePath::StringType* generated_name) {
// TODO(jungshik): Figure out the referrer charset when having one
// makes sense and pass it to GenerateFileName.
base::FilePath file_path = net::GenerateFileName(url,
disposition,
std::string(),
std::string(),
std::string(),
kDefaultSaveName);
DCHECK(!file_path.empty());
if (need_html_ext)
file_path = file_path.ReplaceExtension(kDefaultHtmlExtension);
DownloadManagerDelegate* delegate = download_manager_->GetDelegate();
if (delegate)
delegate->SanitizeSavePackageResourceName(&file_path, url);
DCHECK_EQ(file_path.value(), file_path.BaseName().value())
<< "SanitizeSavePackageResourceName should only return a basename.";
base::FilePath::StringType base_name =
file_path.RemoveExtension().BaseName().value();
base::FilePath::StringType file_name_ext = file_path.Extension();
// Need to make sure the suggested file name is not too long.
uint32_t max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
// Get safe pure file name.
if (!TruncateBaseNameToFitPathConstraints(
saved_main_directory_path_, file_name_ext, max_path, &base_name))
return false;
base::FilePath::StringType file_name = base_name + file_name_ext;
// Check whether we already have same name in a case insensitive manner.
auto iter = file_name_set_.find(file_name);
if (iter == file_name_set_.end()) {
DCHECK(!file_name.empty());
file_name_set_.insert(file_name);
generated_name->assign(file_name);
return true;
}
// Found same name, increase the ordinal number for the file name.
base_name = base::FilePath(*iter).RemoveExtension().BaseName().value();
base::FilePath::StringType base_file_name = StripOrdinalNumber(base_name);
// We need to make sure the length of base file name plus maximum ordinal
// number path will be less than or equal to kMaxFilePathLength.
if (!TruncateBaseNameToFitPathConstraints(
saved_main_directory_path_, file_name_ext,
max_path - kMaxFileOrdinalNumberPartLength, &base_file_name)) {
return false;
}
// Prepare the new ordinal number.
uint32_t ordinal_number;
auto it = file_name_count_map_.find(base_file_name);
if (it == file_name_count_map_.end()) {
// First base-name-conflict resolving, use 1 as initial ordinal number.
file_name_count_map_[base_file_name] = 1;
ordinal_number = 1;
} else {
// We have met same base-name conflict, use latest ordinal number.
ordinal_number = it->second;
}
if (ordinal_number > kMaxFileOrdinalNumber - 1) {
// Use a random file from temporary file.
base::FilePath temp_file;
base::CreateTemporaryFile(&temp_file);
file_name = temp_file.RemoveExtension().BaseName().value();
// Get safe pure file name.
if (!TruncateBaseNameToFitPathConstraints(saved_main_directory_path_,
base::FilePath::StringType(),
max_path, &file_name)) {
return false;
}
} else {
for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) {
base::FilePath new_filepath(base_file_name);
new_filepath = new_filepath
.InsertBeforeExtensionASCII(
base::StrCat({"(", base::NumberToString(i), ")"}))
.AddExtension(file_name_ext);
base::FilePath::StringType new_name = new_filepath.value();
if (!base::Contains(file_name_set_, new_name)) {
// Resolved name conflict.
file_name = new_name;
file_name_count_map_[base_file_name] = ++i;
break;
}
}
}
DCHECK(!file_name.empty());
file_name_set_.insert(file_name);
generated_name->assign(file_name);
return true;
}
// We have received a message from SaveFileManager about a new saving job. We
// find a SaveItem and store it in our in_progress list.
void SavePackage::StartSave(const SaveFileCreateInfo* info) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(info);
if (!page_)
return;
auto it = in_progress_items_.find(info->save_item_id);
if (it == in_progress_items_.end()) {
// If not found, we must have cancel action.
DCHECK(canceled());
return;
}
SaveItem* save_item = it->second.get();
DCHECK(!saved_main_file_path_.empty());
// Determine the proper path for a saving job, by choosing either the default
// save directory, or prompting the user.
DCHECK(!save_item->has_final_name());
if (info->url != page_url_) {
base::FilePath::StringType generated_name;
// For HTML resource file, make sure it will have .htm as extension name,
// otherwise, when you open the saved page in Chrome again, download
// file manager will treat it as downloadable resource, and download it
// instead of opening it as HTML.
bool need_html_ext =
info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM;
if (!GenerateFileName(info->content_disposition,
GURL(info->url),
need_html_ext,
&generated_name)) {
// We can not generate file name for this SaveItem, so we cancel the
// saving page job if the save source is from serialized DOM data.
// Otherwise, it means this SaveItem is sub-resource type, we treat it
// as an error happened on saving. We can ignore this type error for
// sub-resource links which will be resolved as absolute links instead
// of local links in final saved contents.
if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)
Cancel(true);
else
SaveFinished(save_item->id(), 0, false);
return;
}
// When saving page as only-HTML, we only have a SaveItem whose url
// must be page_url_.
DCHECK_EQ(SAVE_PAGE_TYPE_AS_COMPLETE_HTML, save_type_);
DCHECK(!saved_main_directory_path_.empty());
// Now we get final name retrieved from GenerateFileName, we will use it
// rename the SaveItem.
base::FilePath final_name =
saved_main_directory_path_.Append(generated_name);
save_item->SetTargetPath(final_name);
} else {
// It is the main HTML file, use the name chosen by the user.
save_item->SetTargetPath(saved_main_file_path_);
}
// Check whether we begin to require serialized HTML data.
if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
wait_state_ == HTML_DATA) {
// Inform backend to serialize the all frames' DOM and send serialized
// HTML data back.
GetSerializedHtmlWithLocalLinks();
}
}
SaveItem* SavePackage::LookupInProgressSaveItem(SaveItemId save_item_id) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
auto it = in_progress_items_.find(save_item_id);
if (it != in_progress_items_.end()) {
SaveItem* save_item = it->second.get();
DCHECK_EQ(SaveItem::IN_PROGRESS, save_item->state());
return save_item;
}
return nullptr;
}
void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
auto it = in_progress_items_.find(save_item->id());
CHECK(it != in_progress_items_.end(), base::NotFatalUntil::M130);
DCHECK_EQ(save_item, it->second.get());
std::unique_ptr<SaveItem> owned_item = std::move(it->second);
in_progress_items_.erase(it);
SaveItemIdMap& map = save_item->success() ?
saved_success_items_ : saved_failed_items_;
DCHECK(!base::Contains(map, save_item->id()));
map[save_item->id()] = std::move(owned_item);
}
// Called for updating saving state.
bool SavePackage::UpdateSaveProgress(SaveItemId save_item_id,
int64_t size,
bool write_success) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// Because we might have canceled this saving job before,
// so we might not find corresponding SaveItem.
SaveItem* save_item = LookupInProgressSaveItem(save_item_id);
if (!save_item)
return false;
save_item->Update(size);
// If we got disk error, cancel whole save page job.
if (!write_success) {
// Cancel job with reason of disk error.
Cancel(false);
}
return true;
}
// Stop all page saving jobs that are in progress and instruct the download
// sequence to delete all saved files.
void SavePackage::Stop(bool cancel_download_item) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// If we haven't moved out of the initial state, there's nothing to cancel and
// there won't be valid pointers for |file_manager_| or |download_|.
if (wait_state_ == INITIALIZE)
return;
// When stopping, if it still has some items in in_progress, cancel them.
DCHECK(canceled());
if (in_process_count()) {
for (const auto& it : in_progress_items_) {
SaveItem* save_item = it.second.get();
DCHECK_EQ(SaveItem::IN_PROGRESS, save_item->state());
save_item->Cancel();
}
// Remove all in progress item to saved map. For failed items, they will
// be put into saved_failed_items_, for successful item, they will be put
// into saved_success_items_.
while (in_process_count())
PutInProgressItemToSavedMap(in_progress_items_.begin()->second.get());
}
// This vector contains the save ids of the save files which SaveFileManager
// needs to remove from its save_file_map_.
std::vector<SaveItemId> save_item_ids;
for (const auto& it : saved_success_items_)
save_item_ids.push_back(it.first);
for (const auto& it : saved_failed_items_)
save_item_ids.push_back(it.first);
download::GetDownloadTaskRunner()->PostTask(
FROM_HERE, base::BindOnce(&SaveFileManager::RemoveSavedFileFromFileMap,
file_manager_, save_item_ids));
finished_ = true;
wait_state_ = FAILED;
// Inform the download::DownloadItem we have canceled whole save page job.
if (download_) {
if (cancel_download_item)
download_->Cancel(false);
FinalizeDownloadEntry();
}
}
void SavePackage::CheckFinish() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (in_process_count() || finished_ || !page_)
return;
DownloadManagerDelegate* delegate = download_manager_->GetDelegate();
if (delegate) {
std::vector<std::pair<SaveItemId, base::FilePath>> ids_and_final_paths(
saved_success_items_.size());
for (const auto& it : saved_success_items_)
ids_and_final_paths.emplace_back(it.first, it.second->full_path());
download::GetDownloadTaskRunner()->PostTask(
FROM_HERE,
base::BindOnce(&SaveFileManager::GetSaveFilePaths, file_manager_,
std::move(ids_and_final_paths),
base::BindOnce(&SavePackage::CheckRenameAllowedForPaths,
weak_ptr_factory_.GetWeakPtr())));
} else {
RenameIfAllowed(true);
}
}
void SavePackage::CheckRenameAllowedForPaths(
base::flat_map<base::FilePath, base::FilePath> tmp_paths_to_final_paths) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DownloadManagerDelegate* delegate = download_manager_->GetDelegate();
if (delegate) {
delegate->CheckSavePackageAllowed(
download_, std::move(tmp_paths_to_final_paths),
base::BindOnce(&SavePackage::RenameIfAllowed,
weak_ptr_factory_.GetWeakPtr()));
} else {
RenameIfAllowed(true);
}
}
void SavePackage::RenameIfAllowed(bool allowed) {
DCHECK(page_);
if (!allowed) {
Cancel(true);
return;
}
base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
saved_success_items_.size() > 1) ?
saved_main_directory_path_ : base::FilePath();
FinalNamesMap final_names;
for (const auto& it : saved_success_items_)
final_names.insert(std::make_pair(it.first, it.second->full_path()));
download::GetDownloadTaskRunner()->PostTask(
FROM_HERE,
base::BindOnce(&SaveFileManager::RenameAllFiles, file_manager_,
final_names, dir,
page_->GetMainDocument().GetProcess()->GetDeprecatedID(),
page_->GetMainDocument().GetRoutingID(), id()));
}
// Successfully finished all items of this SavePackage.
void SavePackage::Finish() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// User may cancel the job when we're moving files to the final directory.
if (canceled())
return;
wait_state_ = SUCCESSFUL;
finished_ = true;
#if BUILDFLAG(IS_MAC)
// Always set tags on the main HTML file, and if there is an associated
// "_files" directory, set the tags on it, too.
base::mac::SetFileTags(saved_main_file_path_, file_tags_);
if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
base::mac::SetFileTags(saved_main_directory_path_, file_tags_);
}
#endif // BUILDFLAG(IS_MAC)
if (download_) {
std::vector<download::DownloadSaveItemData::ItemInfo> files;
for (auto& item : saved_success_items_) {
files.emplace_back(item.second->full_path(), item.second->url(),
item.second->referrer().url);
}
download::DownloadSaveItemData::AttachItemData(download_, std::move(files));
}
// TODO(qinmin): report the actual file size and duration for the download.
download::DownloadUkmHelper::RecordDownloadCompleted(ukm_download_id_, 1,
base::TimeDelta(), 0);
// This vector contains the save ids of the save files which SaveFileManager
// needs to remove from its |save_file_map_|.
std::vector<SaveItemId> list_of_failed_save_item_ids;
for (const auto& it : saved_failed_items_) {
const SaveItem* save_item = it.second.get();
DCHECK_EQ(it.first, save_item->id());
list_of_failed_save_item_ids.push_back(save_item->id());
}
download::GetDownloadTaskRunner()->PostTask(
FROM_HERE, base::BindOnce(&SaveFileManager::RemoveSavedFileFromFileMap,
file_manager_, list_of_failed_save_item_ids));
if (download_) {
if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) {
CHECK_EQ(download_->GetState(), download::DownloadItem::IN_PROGRESS);
download_->DestinationUpdate(
all_save_items_count_, CurrentSpeed(),
std::vector<download::DownloadItem::ReceivedSlice>());
download_->OnAllDataSaved(all_save_items_count_,
std::unique_ptr<crypto::SecureHash>());
}
download_->MarkAsComplete();
if (download_->GetOpenWhenComplete())
download_->OpenDownload();
FinalizeDownloadEntry();
}
}
void SavePackage::SaveFinished(SaveItemId save_item_id,
int64_t size,
bool is_success) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// Because we might have canceled this saving job before,
// so we might not find corresponding SaveItem. Just ignore it.
SaveItem* save_item = LookupInProgressSaveItem(save_item_id);
if (!save_item)
return;
// Let SaveItem set end state.
save_item->Finish(size, is_success);
// Remove the associated save id and SavePackage.
file_manager_->RemoveSaveFile(save_item->id(), this);
PutInProgressItemToSavedMap(save_item);
// Inform the download::DownloadItem to update UI.
// We use the received bytes as number of saved files.
if (download_) {
CHECK_EQ(download_->GetState(), download::DownloadItem::IN_PROGRESS);
download_->DestinationUpdate(
completed_count(), CurrentSpeed(),
std::vector<download::DownloadItem::ReceivedSlice>());
}
if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM &&
save_item->url() == page_url_ && !save_item->received_bytes()) {
// If size of main HTML page is 0, treat it as disk error.
Cancel(false);
return;
}
if (canceled()) {
DCHECK(finished_);
return;
}
// Continue processing the save page job.
DoSavingProcess();
// Check whether we can successfully finish whole job.
CheckFinish();
}
void SavePackage::SaveCanceled(const SaveItem* save_item) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
file_manager_->RemoveSaveFile(save_item->id(), this);
download::GetDownloadTaskRunner()->PostTask(
FROM_HERE, base::BindOnce(&SaveFileManager::CancelSave, file_manager_,
save_item->id()));
}
void SavePackage::SaveNextFile(bool process_all_remaining_items) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(page_);
DCHECK(!waiting_item_queue_.empty());
do {
// Pop SaveItem from waiting list.
std::unique_ptr<SaveItem> save_item =
std::move(waiting_item_queue_.front());
SaveItem* save_item_ptr = save_item.get();
waiting_item_queue_.pop_front();
// Add the item to |in_progress_items_|.
DCHECK(!base::Contains(in_progress_items_, save_item->id()));
in_progress_items_[save_item_ptr->id()] = std::move(save_item);
save_item_ptr->Start();
// Find the frame responsible for making the network request below - it will
// be used in security checks made later.
FrameTreeNodeId requester_frame_tree_node_id =
save_item_ptr->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_NET
? save_item_ptr->container_frame_tree_node_id()
: save_item_ptr->frame_tree_node_id();
DCHECK(requester_frame_tree_node_id);
FrameTreeNode* requester_frame_tree_node =
FrameTreeNode::GloballyFindByID(requester_frame_tree_node_id);
if (!requester_frame_tree_node) {
save_item_ptr->Finish(0, /* is_success = */ false);
continue;
}
RenderFrameHostImpl* requester_frame =
requester_frame_tree_node->current_frame_host();
mojo::PendingRemote<quarantine::mojom::Quarantine> quarantine;
auto quarantine_callback =
download_manager_->GetQuarantineConnectionCallback();
if (quarantine_callback)
quarantine_callback.Run(quarantine.InitWithNewPipeAndPassReceiver());
file_manager_->SaveURL(
save_item_ptr->id(), save_item_ptr->url(), save_item_ptr->referrer(),
save_item_ptr->isolation_info(), save_item_ptr->request_mode(),
save_item_ptr->is_outermost_main_frame(),
requester_frame->GetProcess()->GetDeprecatedID(),
requester_frame->render_view_host()->GetRoutingID(),
requester_frame->GetRoutingID(), save_item_ptr->save_source(),
save_item_ptr->full_path(),
page_->GetMainDocument().GetBrowserContext(),
page_->GetMainDocument()
.GetRenderViewHost()
->GetProcess()
->GetStoragePartition(),
this, download_manager_->GetApplicationClientIdForFileScanning(),
std::move(quarantine));
} while (process_all_remaining_items && !waiting_item_queue_.empty());
}
int SavePackage::PercentComplete() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (!all_save_items_count_)
return 0;
if (!in_process_count())
return 100;
return completed_count() / all_save_items_count_;
}
int64_t SavePackage::CurrentSpeed() const {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
base::TimeDelta diff = base::TimeTicks::Now() - start_tick_;
int64_t diff_ms = diff.InMilliseconds();
return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms;
}
void SavePackage::DoSavingProcess() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (save_type_ != SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
// Save as HTML only or MHTML.
DCHECK_EQ(NET_FILES, wait_state_);
DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
(save_type_ == SAVE_PAGE_TYPE_AS_MHTML))
<< save_type_;
if (waiting_item_queue_.size()) {
DCHECK_EQ(all_save_items_count_, waiting_item_queue_.size());
SaveNextFile(false);
}
return;
}
// We guarantee that images and JavaScripts must be downloaded first.
// So when finishing all those sub-resources, we will know which
// sub-resource's link can be replaced with local file path, which
// sub-resource's link need to be replaced with absolute URL which
// point to its internet address because it got error when saving its data.
// Start a new SaveItem job if we still have job in waiting queue.
if (!waiting_item_queue_.empty()) {
DCHECK_EQ(NET_FILES, wait_state_);
const SaveItem* save_item = waiting_item_queue_.front().get();
if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
SaveNextFile(false);
} else if (!in_process_count()) {
// If there is no in-process SaveItem, it means all sub-resources
// have been processed. Now we need to start serializing HTML DOM
// for the current page to get the generated HTML data.
wait_state_ = HTML_DATA;
// All non-HTML resources have been finished, start all remaining
// HTML files.
SaveNextFile(true);
}
} else if (in_process_count()) {
// Continue asking for HTML data.
DCHECK_EQ(HTML_DATA, wait_state_);
}
}
// After finishing all SaveItems which need to get data from net.
// We collect all URLs which have local storage and send the
// map:(originalURL:currentLocalPath) to render process (backend).
// Then render process will serialize DOM and send data to us.
void SavePackage::GetSerializedHtmlWithLocalLinks() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(page_);
if (wait_state_ != HTML_DATA)
return;
// First collect the status of all the resource files and check whether they
// have created local files (although they have not been completely saved).
int successful_started_items_count = 0;
for (const auto& item : in_progress_items_) {
DCHECK_EQ(SaveFileCreateInfo::SAVE_FILE_FROM_DOM,
item.second->save_source());
if (item.second->has_final_name())
successful_started_items_count++;
}
// If not all file of HTML resource have been started, then wait.
if (successful_started_items_count != in_process_count())
return;
// Try to serialize all the frames gathered during GetSavableResourceLinks.
DCHECK_EQ(0, number_of_frames_pending_response_);
FrameTree* frame_tree =
static_cast<RenderFrameHostImpl*>(&page_->GetMainDocument())
->frame_tree();
for (const auto& item : frame_tree_node_id_to_save_item_) {
FrameTreeNodeId frame_tree_node_id = item.first;
const SaveItem* save_item = item.second;
FrameTreeNode* frame_tree_node = frame_tree->FindByID(frame_tree_node_id);
if (frame_tree_node &&
IsSavableFrame(frame_tree_node->current_frame_host())) {
// Ask the frame for HTML to be written to the associated SaveItem.
GetSerializedHtmlWithLocalLinksForFrame(frame_tree_node);
number_of_frames_pending_response_++;
} else {
// Notify SaveFileManager about the failure to save this SaveItem.
download::GetDownloadTaskRunner()->PostTask(
FROM_HERE,
base::BindOnce(&SaveFileManager::SaveFinished, file_manager_,
save_item->id(), id(), false));
}
}
if (number_of_frames_pending_response_ == 0) {
// All frames disappeared since gathering of savable resources?
// Treat this as cancellation.
Cancel(false);
}
}
void SavePackage::GetSerializedHtmlWithLocalLinksForFrame(
FrameTreeNode* target_tree_node) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(target_tree_node);
FrameTreeNodeId target_frame_tree_node_id =
target_tree_node->frame_tree_node_id();
RenderFrameHostImpl* target = target_tree_node->current_frame_host();
// Collect all saved success items.
// SECURITY NOTE: We don't send *all* urls / local paths, but only
// those that the given frame had access to already (because it contained
// the savable resources / subframes associated with save items).
base::flat_map<GURL, base::FilePath> url_to_local_path;
base::flat_map<blink::FrameToken, base::FilePath> frame_token_to_local_path;
auto it = frame_tree_node_id_to_contained_save_items_.find(
target_frame_tree_node_id);
if (it != frame_tree_node_id_to_contained_save_items_.end()) {
for (const SaveItem* save_item : it->second) {
// Skip items that failed to save.
if (!save_item->has_final_name()) {
DCHECK_EQ(SaveItem::SaveState::COMPLETE, save_item->state());
DCHECK(!save_item->success());
continue;
}
// Calculate the relative path for referring to the |save_item|.
base::FilePath local_path(base::FilePath::kCurrentDirectory);
if (target_tree_node->IsOutermostMainFrame()) {
local_path = local_path.Append(saved_main_directory_path_.BaseName());
}
local_path = local_path.Append(save_item->full_path().BaseName());
// Insert the link into |url_to_local_path| or
// |frame_token_to_local_path|.
if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
DCHECK(!save_item->frame_tree_node_id());
url_to_local_path[save_item->url()] = local_path;
} else {
FrameTreeNode* save_item_frame_tree_node =
target_tree_node->frame_tree().FindByID(
save_item->frame_tree_node_id());
if (!save_item_frame_tree_node) {
// crbug.com/541354: Raciness when saving a dynamically changing page.
continue;
}
std::optional<blink::FrameToken> frame_token =
save_item_frame_tree_node->render_manager()
->GetFrameTokenForSiteInstanceGroup(
target->GetSiteInstance()->group());
DCHECK(frame_token.has_value());
frame_token_to_local_path[frame_token.value()] = local_path;
}
}
}
// Create a SavePackageSerializationHandler for the target RenderFrameHost
// plus the required callbacks to report progress, and make it owned by a mojo
// receiver that will be alive for the time that the serialization process is
// in progress. It's expected that the Done() callback will be called right
// after the last time the DidReceiveData() callback gets invoked, at which
// point the remote end of the pipe will be closed, disposing the receiver.
mojo::PendingRemote<mojom::FrameHTMLSerializerHandler> serializer_handler;
mojo::MakeSelfOwnedReceiver(
std::make_unique<SavePackageSerializationHandler>(
base::BindRepeating(&SavePackage::OnDidReceiveSerializedHtmlData,
weak_ptr_factory_.GetWeakPtr(),
target->GetWeakPtr()),
base::BindOnce(&SavePackage::OnDidFinishedSerializingHtmlData,
weak_ptr_factory_.GetWeakPtr(), target->GetWeakPtr())),
serializer_handler.InitWithNewPipeAndPassReceiver());
// Ask target frame to serialize itself.
target->GetSerializedHtmlWithLocalLinks(
url_to_local_path, frame_token_to_local_path,
page_->GetMainDocument().GetBrowserContext()->IsOffTheRecord(),
std::move(serializer_handler));
}
void SavePackage::OnDidReceiveSerializedHtmlData(
base::WeakPtr<RenderFrameHostImpl> sender,
const std::string& data) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// Check current state.
if (!sender || wait_state_ != HTML_DATA)
return;
// This method can only get called as a response to the serialization request
// previously sent from the browser to the renderer for a given FrameTreeNode.
const SaveItem* save_item = LookupSaveItemForSender(sender);
DCHECK(save_item);
if (save_item->state() != SaveItem::IN_PROGRESS) {
for (const auto& saved_it : saved_success_items_) {
if (saved_it.second->url() == save_item->url()) {
wrote_to_completed_file_ = true;
break;
}
}
if (base::Contains(saved_failed_items_, save_item->id()))
wrote_to_failed_file_ = true;
return;
}
if (!data.empty()) {
// Call write file functionality in download sequence.
download::GetDownloadTaskRunner()->PostTask(
FROM_HERE, base::BindOnce(&SaveFileManager::UpdateSaveProgress,
file_manager_, save_item->id(), data));
}
}
void SavePackage::OnDidFinishedSerializingHtmlData(
base::WeakPtr<RenderFrameHostImpl> sender) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// Check current state.
if (!sender || wait_state_ != HTML_DATA)
return;
// This method can only get called as a response to the serialization request
// previously sent from the browser to the renderer for a given FrameTreeNode.
const SaveItem* save_item = LookupSaveItemForSender(sender);
DCHECK(save_item);
// Current frame is completed saving, call finish in download sequence.
DVLOG(20) << __func__ << "() save_item_id = " << save_item->id()
<< " url = \"" << save_item->url().spec() << "\"";
download::GetDownloadTaskRunner()->PostTask(
FROM_HERE, base::BindOnce(&SaveFileManager::SaveFinished, file_manager_,
save_item->id(), id(), true));
number_of_frames_pending_response_--;
DCHECK_LE(0, number_of_frames_pending_response_);
}
const SaveItem* SavePackage::LookupSaveItemForSender(
base::WeakPtr<RenderFrameHostImpl> sender) {
if (!sender)
return nullptr;
FrameTreeNodeId frame_tree_node_id =
sender->frame_tree_node()->frame_tree_node_id();
auto it = frame_tree_node_id_to_save_item_.find(frame_tree_node_id);
if (it == frame_tree_node_id_to_save_item_.end())
return nullptr;
const SaveItem* save_item = it->second;
DCHECK_EQ(SaveFileCreateInfo::SAVE_FILE_FROM_DOM, save_item->save_source());
return save_item;
}
void SavePackage::GetSavableResourceLinksForRenderFrameHost(
RenderFrameHostImpl* rfh) {
if (!IsSavableFrame(rfh))
return;
++number_of_frames_pending_response_;
rfh->GetSavableResourceLinksFromRenderer();
}
// Ask for all savable resource links from backend, include main frame and
// sub-frame.
void SavePackage::GetSavableResourceLinks() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(page_);
if (wait_state_ != START_PROCESS)
return;
wait_state_ = RESOURCES_LIST;
DCHECK_EQ(0, number_of_frames_pending_response_);
page_->GetMainDocument().ForEachRenderFrameHostImpl(
[this](RenderFrameHostImpl* rfh) {
GetSavableResourceLinksForRenderFrameHost(rfh);
});
DCHECK_LT(0, number_of_frames_pending_response_);
// Enqueue the main frame separately (because this frame won't show up in any
// of GetsSavableResourceLinks callbacks).
FrameTreeNode* main_frame_tree_node =
static_cast<RenderFrameHostImpl*>(&page_->GetMainDocument())
->frame_tree_node();
EnqueueFrame(FrameTreeNodeId(), // No container.
main_frame_tree_node->frame_tree_node_id(),
main_frame_tree_node->current_url());
all_save_items_count_ = 1;
}
void SavePackage::SavableResourceLinksResponse(
RenderFrameHostImpl* sender,
const std::vector<GURL>& resources_list,
blink::mojom::ReferrerPtr referrer,
const std::vector<blink::mojom::SavableSubframePtr>& subframes) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (wait_state_ != RESOURCES_LIST)
return;
// Add all sub-resources to wait list.
FrameTreeNodeId container_frame_tree_node_id =
sender->frame_tree_node()->frame_tree_node_id();
for (const GURL& u : resources_list) {
EnqueueSavableResource(container_frame_tree_node_id, u,
referrer.To<content::Referrer>());
}
for (auto& subframe : subframes) {
FrameTreeNode* subframe_ftn = sender->FindAndVerifyChild(
subframe->subframe_token,
bad_message::DWNLD_INVALID_SAVABLE_RESOURCE_LINKS_RESPONSE);
if (!subframe_ftn) {
// crbug.com/541354 - Raciness when saving a dynamically changing page.
continue;
}
EnqueueFrame(container_frame_tree_node_id,
subframe_ftn->frame_tree_node_id(), subframe->original_url);
}
CompleteSavableResourceLinksResponse();
}
SaveItem* SavePackage::CreatePendingSaveItem(
FrameTreeNodeId container_frame_tree_node_id,
FrameTreeNodeId save_item_frame_tree_node_id,
const GURL& url,
const Referrer& referrer,
SaveFileCreateInfo::SaveFileSource save_source) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
Referrer sanitized_referrer = Referrer::SanitizeForRequest(url, referrer);
// Use an empty Isolation Info for subresources.
// TODO(crbug.com/41485695): Populate the correct site isolation and fetch
// mode per-resource.
const net::IsolationInfo isolation_info;
SaveItem* save_item = new SaveItem(
url, sanitized_referrer, isolation_info,
network::mojom::RequestMode::kNavigate, false, this, save_source,
save_item_frame_tree_node_id, container_frame_tree_node_id);
waiting_item_queue_.push_back(base::WrapUnique(save_item));
frame_tree_node_id_to_contained_save_items_[container_frame_tree_node_id]
.push_back(save_item);
return save_item;
}
void SavePackage::CreatePendingSaveItemDeduplicatingByUrl(
FrameTreeNodeId container_frame_tree_node_id,
FrameTreeNodeId save_item_frame_tree_node_id,
const GURL& url,
const Referrer& referrer,
SaveFileCreateInfo::SaveFileSource save_source) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(url.is_valid()); // |url| should be validated by the callers.
// Frames should not be deduplicated by URL.
DCHECK_NE(SaveFileCreateInfo::SAVE_FILE_FROM_DOM, save_source);
auto it = url_to_save_item_.find(url);
if (it != url_to_save_item_.end()) {
frame_tree_node_id_to_contained_save_items_[container_frame_tree_node_id]
.push_back(it->second);
} else {
url_to_save_item_[url] = CreatePendingSaveItem(container_frame_tree_node_id,
save_item_frame_tree_node_id,
url, referrer, save_source);
}
}
void SavePackage::EnqueueSavableResource(
FrameTreeNodeId container_frame_tree_node_id,
const GURL& url,
const Referrer& referrer) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (!url.is_valid())
return;
CreatePendingSaveItemDeduplicatingByUrl(
container_frame_tree_node_id, FrameTreeNodeId(), url, referrer,
SaveFileCreateInfo::SAVE_FILE_FROM_NET);
}
void SavePackage::EnqueueFrame(FrameTreeNodeId container_frame_tree_node_id,
FrameTreeNodeId frame_tree_node_id,
const GURL& frame_original_url) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
SaveItem* save_item = CreatePendingSaveItem(
container_frame_tree_node_id, frame_tree_node_id, frame_original_url,
Referrer(), SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
DCHECK(save_item);
frame_tree_node_id_to_save_item_[frame_tree_node_id] = save_item;
}
void SavePackage::SavableResourceLinksError(RenderFrameHostImpl* sender) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
CompleteSavableResourceLinksResponse();
}
void SavePackage::CompleteSavableResourceLinksResponse() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
--number_of_frames_pending_response_;
DCHECK_LE(0, number_of_frames_pending_response_);
if (number_of_frames_pending_response_ != 0)
return; // Need to wait for more responses from RenderFrames.
// Sort |waiting_item_queue_| so that frames go last (frames are identified by
// SAVE_FILE_FROM_DOM in the comparison function below).
std::stable_sort(
waiting_item_queue_.begin(), waiting_item_queue_.end(),
[](const std::unique_ptr<SaveItem>& x,
const std::unique_ptr<SaveItem>& y) {
DCHECK(x);
DCHECK(y);
return (x->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) &&
(y->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
});
all_save_items_count_ = static_cast<int>(waiting_item_queue_.size());
// We use total bytes as the total number of files we want to save.
if (download_)
download_->SetTotalBytes(all_save_items_count_);
if (all_save_items_count_) {
wait_state_ = NET_FILES;
// Give backend the lists which contain all resource links that have local
// storage, after which, render process will serialize DOM for generating
// HTML data.
DoSavingProcess();
} else {
// No savable frames and/or resources - treat it as user cancel.
Cancel(true);
}
}
void SavePackage::GetSaveInfo() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (!page_)
return;
// Can't use |page_| in the download sequence, so get the data that we
// need before calling to it.
base::FilePath website_save_dir;
base::FilePath download_save_dir;
auto* delegate = download_manager_->GetDelegate();
if (delegate) {
delegate->GetSaveDir(page_->GetMainDocument().GetBrowserContext(),
&website_save_dir, &download_save_dir);
}
std::string mime_type =
static_cast<PageImpl*>(page_.get())->GetContentsMimeType();
bool can_save_as_complete = CanSaveAsComplete(mime_type);
download::GetDownloadTaskRunner()->PostTaskAndReplyWithResult(
FROM_HERE,
base::BindOnce(&SavePackage::CreateDirectoryOnFileThread, title_,
page_url_, can_save_as_complete, mime_type,
website_save_dir, download_save_dir),
base::BindOnce(&SavePackage::ContinueGetSaveInfo, this,
can_save_as_complete));
}
// static
base::FilePath SavePackage::CreateDirectoryOnFileThread(
const std::u16string& title,
const GURL& page_url,
bool can_save_as_complete,
const std::string& mime_type,
const base::FilePath& website_save_dir,
const base::FilePath& download_save_dir) {
DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
base::FilePath suggested_filename = filename_generation::GenerateFilename(
title, page_url, can_save_as_complete, mime_type);
base::FilePath save_dir;
// If the default html/websites save folder doesn't exist...
if (!base::DirectoryExists(website_save_dir)) {
// If the default download dir doesn't exist, create it.
if (!base::DirectoryExists(download_save_dir)) {
bool res = base::CreateDirectory(download_save_dir);
DCHECK(res);
}
save_dir = download_save_dir;
} else {
// If it does exist, use the default save dir param.
save_dir = website_save_dir;
}
base::FilePath::StringType base_name =
suggested_filename.RemoveExtension().BaseName().value();
base::FilePath::StringType file_name_ext = suggested_filename.Extension();
// Need to make sure the suggested file name is not too long.
uint32_t max_path = GetMaxPathLengthForDirectory(save_dir);
if (TruncateBaseNameToFitPathConstraints(save_dir, file_name_ext, max_path,
&base_name)) {
return save_dir.Append(base_name + file_name_ext);
}
// Cannot create a shorter filename. This will cause the save as operation
// to fail unless the user pick a shorter name. Continuing even though it
// will fail because returning means no save as popup for the user, which
// is even more confusing. This case should be rare though.
return save_dir.Append(suggested_filename);
}
void SavePackage::ContinueGetSaveInfo(bool can_save_as_complete,
const base::FilePath& suggested_path) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// The WebContents which owns this SavePackage may have disappeared during
// the UI->download sequence->UI thread hop of
// GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo.
if (!page_ || !download_manager_->GetDelegate())
return;
base::FilePath::StringType default_extension;
if (can_save_as_complete)
default_extension = kDefaultHtmlExtension;
download_manager_->GetDelegate()->ChooseSavePath(
GetWebContents(page_.get()), suggested_path, default_extension,
can_save_as_complete,
base::BindOnce(&SavePackage::OnPathPicked,
weak_ptr_factory_.GetWeakPtr()));
}
void SavePackage::OnPathPicked(
SavePackagePathPickedParams params,
SavePackageDownloadCreatedCallback download_created_callback) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK((params.save_type == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
(params.save_type == SAVE_PAGE_TYPE_AS_MHTML) ||
(params.save_type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML))
<< params.save_type;
if (!page_)
return;
// Ensure the filename is safe.
saved_main_file_path_ = params.file_path;
#if BUILDFLAG(IS_ANDROID)
if (saved_main_file_path_.IsContentUri()) {
save_type_ = SAVE_PAGE_TYPE_AS_MHTML;
saved_main_file_display_name_ = params.display_name;
Init(std::move(download_created_callback));
return;
}
#endif
// TODO(asanka): This call may block on IO and shouldn't be made
// from the UI thread. See http://crbug.com/61827.
std::string mime_type =
static_cast<PageImpl*>(page_.get())->GetContentsMimeType();
net::GenerateSafeFileName(mime_type, false, &saved_main_file_path_);
saved_main_directory_path_ = saved_main_file_path_.DirName();
save_type_ = params.save_type;
if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
// Make new directory for saving complete file.
saved_main_directory_path_ = saved_main_directory_path_.Append(
saved_main_file_path_.RemoveExtension().BaseName().value() +
FILE_PATH_LITERAL("_files"));
}
#if BUILDFLAG(IS_MAC)
file_tags_ = params.file_tags;
#endif
Init(std::move(download_created_callback));
}
void SavePackage::FinalizeDownloadEntry() {
DCHECK(download_);
DCHECK(download_manager_);
download_ = nullptr;
download_manager_ = nullptr;
}
} // namespace content