blob: 44119c4526b167144c61c17fb072d50f07c007cc [file] [log] [blame]
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/browser/download/mhtml_generation_manager.h"
#include <tuple>
#include <utility>
#include "base/containers/queue.h"
#include "base/containers/span.h"
#include "base/files/file.h"
#include "base/functional/bind.h"
#include "base/memory/ptr_util.h"
#include "base/metrics/histogram_macros.h"
#include "base/numerics/safe_conversions.h"
#include "base/stl_util.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/time/time.h"
#include "base/trace_event/trace_event.h"
#include "base/types/optional_util.h"
#include "base/uuid.h"
#include "components/download/public/common/download_task_runner.h"
#include "content/browser/bad_message.h"
#include "content/browser/download/mhtml_extra_parts_impl.h"
#include "content/browser/renderer_host/frame_tree_node.h"
#include "content/browser/renderer_host/render_frame_host_impl.h"
#include "content/browser/web_contents/web_contents_impl.h"
#include "content/common/download/mhtml_file_writer.mojom.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/mhtml_extra_parts.h"
#include "content/public/browser/mhtml_generation_result.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/render_process_host.h"
#include "content/public/browser/web_contents.h"
#include "content/public/common/mhtml_generation_params.h"
#include "crypto/secure_hash.h"
#include "crypto/sha2.h"
#include "mojo/core/embedder/embedder.h"
#include "mojo/public/cpp/bindings/associated_remote.h"
#include "net/base/mime_util.h"
#include "third_party/blink/public/common/associated_interfaces/associated_interface_provider.h"
#if BUILDFLAG(IS_WIN)
#include <windows.h>
#include "base/win/security_util.h"
#include "base/win/sid.h"
#endif // BUILDFLAG(IS_WIN)
namespace {
// Callback to notify the UI thread that writing to the MHTML file is complete.
using MHTMLWriteCompleteCallback =
base::RepeatingCallback<void(content::mojom::MhtmlSaveStatus)>;
const char kContentLocation[] = "Content-Location: ";
const char kContentType[] = "Content-Type: ";
int kInvalidFileSize = -1;
#if BUILDFLAG(IS_WIN)
// Attempts to deny execute access to the file at `path`.
bool DenyExecuteAccessToMHTMLFile(const base::FilePath& path) {
static constexpr wchar_t kEveryoneSid[] = L"WD";
auto sids = base::win::Sid::FromSddlStringVector({kEveryoneSid});
if (!sids) {
return false;
}
return base::win::DenyAccessToPath(path, *sids, FILE_EXECUTE,
/*NO_INHERITANCE=*/0, /*recursive=*/false);
}
#endif // BUILDFLAG(IS_WIN)
// CloseFileResult holds the result of closing the generated file using the
// status of the operation, a file size and a pointer to a file digest. It
// stores the values of the status and size directly, and makes a copy of the
// digest if present.
struct CloseFileResult {
CloseFileResult(content::mojom::MhtmlSaveStatus status,
int64_t size,
std::string* digest)
: save_status(status), file_size(size) {
if (digest)
file_digest = std::optional<std::string>(*digest);
}
content::mojom::MhtmlSaveStatus save_status;
int64_t file_size;
std::optional<std::string> file_digest;
content::MHTMLGenerationResult toMHTMLGenerationResult() const {
return content::MHTMLGenerationResult(file_size,
base::OptionalToPtr(file_digest));
}
};
base::File CreateMHTMLFile(const base::FilePath& file_path) {
DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
// SECURITY NOTE: A file descriptor to the file created below will be passed
// to multiple renderer processes which (in out-of-process iframes mode) can
// act on behalf of separate web principals. Therefore it is important to
// only allow writing to the file and forbid reading from the file (as this
// would allow reading content generated by other renderers / other web
// principals).
uint32_t file_flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
base::File browser_file(file_path, file_flags);
if (!browser_file.IsValid()) {
DLOG(ERROR) << "Failed to create file to save MHTML at: "
<< file_path.value();
}
#if BUILDFLAG(IS_WIN)
// SECURITY NOTE: On Windows, it is not safe to pass a writeable file handle
// to a renderer that could be re-opened executable. Attempting to do so will
// cause a DCHECK in mojo.
//
// Normally it would be best to use base::PreventExecuteMapping or the
// base::File::Flags::FLAG_WIN_NO_EXECUTE flag, but both of these will
// DCHECK if the File is outside of a set of safe directories, and the MHTML
// files are usually located in a user-controlled directory e.g.
// the Downloads directory.
//
// In this case, however, the file is an MHTML file, which we can mark
// no-execute with no side-effects as it will never be mapped into memory
// executable and it is not a real 'executable' file.
//
// It's important to note that this does not prevent the file being
// double-clicked on or opened in any application, since that is done via
// ShellExecute which does not need the FILE_EXECUTE permission on the file.
//
// If this fails, then it's likely other filesystem operations will also fail,
// so there isn't much that can be done. In this case, mojo will also deny the
// transit of the file handle to the renderer, and the MHTML file creation
// will fail.
std::ignore = DenyExecuteAccessToMHTMLFile(file_path);
#endif
return browser_file;
}
} // namespace
namespace content {
// The class and all of its members live on the UI thread. Only static methods
// are executed on other threads.
// Job instances are created in MHTMLGenerationManager::Job::StartNewJob(),
// proceeding with the MHTML saving process unmanaged. Every instance is
// self-owned and responsible for deleting itself upon invoking OnFinished.
// With self-ownership lifetime concerns, we make the following precautions:
// - SerializeAsMHTMLResponse() always proceeds with finalizing upon detecting
// Job completion/cancellation.
// - Jobs are prematurely finalized and deleted upon detecting a connection
// error with the message pipe during serialization.
// - Any pending callbacks after deletion are invalidated using weak pointers.
class MHTMLGenerationManager::Job {
public:
// Creates and registers a new job.
static void StartNewJob(
WebContents* web_contents,
const MHTMLGenerationParams& params,
MHTMLGenerationResult::GenerateMHTMLCallback callback);
Job(const Job&) = delete;
Job& operator=(const Job&) = delete;
private:
Job(WebContents* web_contents,
const MHTMLGenerationParams& params,
MHTMLGenerationResult::GenerateMHTMLCallback callback);
~Job();
// Begins queuing frames from web_contents, creates a new MHTML file and
// begins page serialization to created file.
void initializeJob(WebContents* web_contents);
// Writes the string |to_write| to the file. If successful, updates hash and
// returns true, otherwise, returns false. Does not take ownership of |file|
// nor |raw_secure_hash|.
static bool WriteToFileAndUpdateHash(base::File* file,
crypto::SecureHash* secure_hash,
std::string to_write);
// Writes the MHTML footer to the file and closes it. It also receives the
// SimpleWatcher instance used to watch the data pipe and the current hash
// state for safe destruction on the IO thread.
//
// Note: The same |boundary| marker must be used for all "boundaries" -- in
// the header, parts and footer -- that belong to the same MHTML document (see
// also rfc1341, section 7.2.1, "boundary" description).
static CloseFileResult FinalizeOnFileThread(
mojom::MhtmlSaveStatus save_status,
const std::string& boundary,
base::File file,
const std::vector<MHTMLExtraDataPart>& extra_data_parts,
std::unique_ptr<mojo::SimpleWatcher> watcher,
std::unique_ptr<crypto::SecureHash> secure_hash);
// Creates a string that encompasses any remaining extra data parts to write
// to the file.
static std::string CreateExtraDataParts(
const std::string& boundary,
const std::vector<MHTMLExtraDataPart>& extra_data_parts);
// Creates a string with the contents if htem MHTML file footer.
static std::string CreateFooter(const std::string& boundary);
// Called on the UI thread when the file that should hold the MHTML data has
// been created.
void OnFileAvailable(base::File browser_file);
// Called on the UI thread after the file got finalized and we have its size,
// or an error occurred while creating a new file.
void OnFinished(const CloseFileResult& result);
// Starts watching a handle on the file thread. Instantiates a new instance
// of |watcher_| upon call.
void BeginWatchingHandle(MHTMLWriteCompleteCallback callback);
// Writes data from the consumer handle to the new MHTML file. Only done
// with on the fly hash computation.
// Bound to the data pipe watcher and called upon notification of write
// completion to producer pipe sent to the Renderer.
// TODO(crbug.com/40606905): Eventually simplify this implementation
// with a DataPipeDrainer once error signalling is implemented there.
void WriteMHTMLToDisk(MHTMLWriteCompleteCallback callback,
MojoResult result,
const mojo::HandleSignalsState& state);
// Destroys |watcher_| instance and notifies UI thread of write completion.
void OnWriteComplete(MHTMLWriteCompleteCallback callback,
mojom::MhtmlSaveStatus save_status);
// Notifies Job of frame write completion and sends request to next render
// frame if the response was blocked by the write operation.
void DoneWritingToDisk(mojom::MhtmlSaveStatus save_status);
// Called when the message pipe to the renderer is disconnected.
void OnConnectionError();
// Handler for the Mojo interface callback (a notification from the
// renderer that the MHTML generation for previous frame has finished).
void SerializeAsMHTMLResponse(
mojom::MhtmlSaveStatus save_status,
const std::vector<std::string>& digests_of_uris_of_serialized_resources);
// Records newly serialized resource digests into
// |digests_of_already_serialized_uris_|.
void RecordDigests(
const std::vector<std::string>& digests_of_uris_of_serialized_resources);
// Continues sending serialization requests to the next frame if ready and
// there are more frames to be serialized.
void MaybeSendToNextRenderFrame(mojom::MhtmlSaveStatus save_status);
// Packs up the current status of the MHTML file save operation into a Mojo
// struct to send to the renderer process.
mojom::SerializeAsMHTMLParamsPtr CreateMojoParams();
// Sends Mojo interface call to the renderer, asking for MHTML
// generation of the next frame. Returns MhtmlSaveStatus::kSuccess or a
// specific error status.
mojom::MhtmlSaveStatus SendToNextRenderFrame();
// Indicates if the writing operation on the IO thread is complete, and
// we have received a response from the Renderer.
// This check is necessary to provide synchronization between file writing
// operations and MHTML serialization.
bool CurrentFrameDone() const;
// Called on the UI thread when a job has been finished.
void Finalize(mojom::MhtmlSaveStatus save_status);
// Write the MHTML footer and close the file on the file thread and respond
// back on the UI thread with the updated status and file size (which will be
// negative in case of errors).
void CloseFile(mojom::MhtmlSaveStatus save_status);
// Marks the Job as completed, preventing any further notifications from the
// Renderer. This prevents the race/crash from https://crbug.com/612098.
void MarkAsFinished();
// Close the MHTML file if it looks good, setting the size param. Returns
// false for failure.
static bool CloseFileIfValid(base::File& file, int64_t* file_size);
// User-configurable parameters. Includes the file location, binary encoding
// choices.
MHTMLGenerationParams params_;
// The IDs of frames that still need to be processed.
base::queue<FrameTreeNodeId> pending_frame_tree_node_ids_;
// Identifies a frame to which we've sent through
// MhtmlFileWriter::SerializeAsMHTML but for which we didn't yet process
// the response via SerializeAsMHTMLResponse.
FrameTreeNodeId frame_tree_node_id_of_busy_frame_;
// The handle to the file the MHTML is saved to for the browser process.
base::File browser_file_;
// MIME multipart boundary to use in the MHTML doc.
const std::string mhtml_boundary_marker_;
// Digests of URIs of already generated MHTML parts.
std::set<std::string> digests_of_already_serialized_uris_;
std::string salt_;
// The callback to call once generation is complete.
MHTMLGenerationResult::GenerateMHTMLCallback callback_;
// Whether the job is finished (set to true only for the short duration of
// time between MHTMLGenerationManager::Job::Finalize is called and the job is
// destroyed by MHTMLGenerationManager::Job::OnFinished).
bool is_finished_;
// Any extra data parts that should be emitted into the output MHTML.
std::vector<MHTMLExtraDataPart> extra_data_parts_;
// MHTMLFileWriter instance for the frame being currently serialized.
mojo::AssociatedRemote<mojom::MhtmlFileWriter> writer_;
// Watcher to detect new data written to |mhtml_data_consumer_|.
// This is instantiated and destroyed in the download sequence for each frame.
std::unique_ptr<mojo::SimpleWatcher> watcher_;
// Consumer handle for data pipe streaming.
mojo::ScopedDataPipeConsumerHandle mhtml_data_consumer_;
// Indicates whether there is currently data being streamed from the Renderer.
// Not used when the renderer is writing directly to file.
bool waiting_on_data_streaming_;
// Current state of contents hash computation.
// This is updated upon every successful file write and finalized in the
// download sequence.
std::unique_ptr<crypto::SecureHash> secure_hash_;
base::WeakPtrFactory<Job> weak_factory_{this};
};
MHTMLGenerationManager::Job::Job(
WebContents* web_contents,
const MHTMLGenerationParams& params,
MHTMLGenerationResult::GenerateMHTMLCallback callback)
: params_(params),
mhtml_boundary_marker_(net::GenerateMimeMultipartBoundary()),
salt_(base::Uuid::GenerateRandomV4().AsLowercaseString()),
callback_(std::move(callback)),
is_finished_(false),
waiting_on_data_streaming_(false) {
initializeJob(web_contents);
}
MHTMLGenerationManager::Job::~Job() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(!watcher_);
}
void MHTMLGenerationManager::Job::initializeJob(WebContents* web_contents) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
TRACE_EVENT_NESTABLE_ASYNC_BEGIN2(
"page-serialization", "SavingMhtmlJob", this, "url",
web_contents->GetLastCommittedURL().possibly_invalid_spec(), "file",
params_.file_path.AsUTF8Unsafe());
// Only include nodes from the primary frame tree, since an MHTML document
// would not be able to load inner frame trees (e.g. fenced frames).
for (FrameTreeNode* node : static_cast<WebContentsImpl*>(web_contents)
->GetPrimaryFrameTree()
.Nodes()) {
if (node->current_frame_host()->inner_tree_main_frame_tree_node_id()) {
// Skip inner tree placeholder nodes.
continue;
}
pending_frame_tree_node_ids_.push(node->frame_tree_node_id());
}
// Main frame needs to be processed first.
DCHECK(!pending_frame_tree_node_ids_.empty());
DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front())
->parent() == nullptr);
// Save off any extra data.
auto* extra_parts = static_cast<MHTMLExtraPartsImpl*>(
MHTMLExtraParts::FromWebContents(web_contents));
if (extra_parts)
extra_data_parts_ = extra_parts->parts();
download::GetDownloadTaskRunner()->PostTaskAndReplyWithResult(
FROM_HERE, base::BindOnce(&CreateMHTMLFile, params_.file_path),
base::BindOnce(&Job::OnFileAvailable, weak_factory_.GetWeakPtr()));
}
mojom::SerializeAsMHTMLParamsPtr
MHTMLGenerationManager::Job::CreateMojoParams() {
mojom::SerializeAsMHTMLParamsPtr mojo_params =
mojom::SerializeAsMHTMLParams::New();
mojo_params->mhtml_boundary_marker = mhtml_boundary_marker_;
mojo_params->mhtml_binary_encoding = params_.use_binary_encoding;
mojo_params->mhtml_popup_overlay_removal = params_.remove_popup_overlay;
// Tell the renderer to skip (= deduplicate) already covered MHTML parts.
mojo_params->salt = salt_;
mojo_params->digests_of_uris_to_skip.assign(
digests_of_already_serialized_uris_.begin(),
digests_of_already_serialized_uris_.end());
return mojo_params;
}
mojom::MhtmlSaveStatus MHTMLGenerationManager::Job::SendToNextRenderFrame() {
DCHECK(browser_file_.IsValid());
DCHECK(!pending_frame_tree_node_ids_.empty());
FrameTreeNodeId frame_tree_node_id = pending_frame_tree_node_ids_.front();
pending_frame_tree_node_ids_.pop();
FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id);
if (!ftn) // The contents went away.
return mojom::MhtmlSaveStatus::kFrameNoLongerExists;
RenderFrameHost* rfh = ftn->current_frame_host();
if (writer_) {
// If we reached here, means the work for previous frame is done, so it is
// safe to cut the connection to the previous frame.
writer_.reset();
}
// Bind Mojo interface to the RenderFrame
rfh->GetRemoteAssociatedInterfaces()->GetInterface(&writer_);
// Safe, as |writer_| is owned by this Job instance.
auto error_callback =
base::BindOnce(&Job::OnConnectionError, base::Unretained(this));
writer_.set_disconnect_handler(std::move(error_callback));
mojom::SerializeAsMHTMLParamsPtr params(CreateMojoParams());
// File::Duplicate() creates a reference to this file for use in the
// Renderer.
params->output_handle =
mojom::MhtmlOutputHandle::NewFileHandle(browser_file_.Duplicate());
// Send a Mojo request to Renderer to serialize its frame.
DCHECK(frame_tree_node_id_of_busy_frame_.is_null());
frame_tree_node_id_of_busy_frame_ = frame_tree_node_id;
auto response_callback = base::BindOnce(&Job::SerializeAsMHTMLResponse,
weak_factory_.GetWeakPtr());
writer_->SerializeAsMHTML(std::move(params), std::move(response_callback));
TRACE_EVENT_NESTABLE_ASYNC_BEGIN1("page-serialization", "WaitingOnRenderer",
this, "frame tree node id",
frame_tree_node_id_of_busy_frame_);
return mojom::MhtmlSaveStatus::kSuccess;
}
void MHTMLGenerationManager::Job::BeginWatchingHandle(
MHTMLWriteCompleteCallback callback) {
DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
DCHECK(!watcher_);
watcher_ = std::make_unique<mojo::SimpleWatcher>(
FROM_HERE, mojo::SimpleWatcher::ArmingPolicy::AUTOMATIC,
download::GetDownloadTaskRunner());
// base::Unretained is safe, as |this| owns |mhtml_data_consumer_|, which
// is responsible for invoking |watcher_| callbacks.
if (watcher_->Watch(
mhtml_data_consumer_.get(),
MOJO_HANDLE_SIGNAL_NEW_DATA_READABLE | MOJO_HANDLE_SIGNAL_PEER_CLOSED,
MOJO_WATCH_CONDITION_SATISFIED,
base::BindRepeating(&Job::WriteMHTMLToDisk, base::Unretained(this),
callback)) != MOJO_RESULT_OK) {
DLOG(ERROR) << "Failed to strap watcher to consumer handle.";
OnWriteComplete(callback, mojom::MhtmlSaveStatus::kStreamingError);
}
}
void MHTMLGenerationManager::Job::WriteMHTMLToDisk(
MHTMLWriteCompleteCallback callback,
MojoResult result,
const mojo::HandleSignalsState& state) {
DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
DCHECK_NE(result, MOJO_RESULT_FAILED_PRECONDITION);
// Begin consumer data pipe handle read and file write loop.
std::vector<uint8_t> buffer(1024, 0x00);
size_t actually_read_bytes;
while (result == MOJO_RESULT_OK && state.readable()) {
result = mhtml_data_consumer_->ReadData(MOJO_READ_DATA_FLAG_NONE,
base::as_writable_byte_span(buffer),
actually_read_bytes);
if (result == MOJO_RESULT_OK) {
std::string_view read_chars =
base::as_string_view(buffer).substr(0, actually_read_bytes);
if (secure_hash_)
secure_hash_->Update(read_chars.data(), read_chars.size());
if (UNSAFE_TODO(browser_file_.WriteAtCurrentPos(
read_chars.data(), base::checked_cast<int>(read_chars.size()))) <
0) {
DLOG(ERROR) << "Error writing to file handle.";
OnWriteComplete(std::move(callback),
mojom::MhtmlSaveStatus::kFileWritingError);
return;
}
}
}
if (result != MOJO_RESULT_OK && result != MOJO_RESULT_FAILED_PRECONDITION &&
result != MOJO_RESULT_SHOULD_WAIT) {
DLOG(ERROR) << "Error streaming MHTML data to the Browser.";
OnWriteComplete(std::move(callback),
mojom::MhtmlSaveStatus::kStreamingError);
return;
}
// Only notify successful write completion if peer handle is closed without
// any errors.
if (state.peer_closed())
OnWriteComplete(std::move(callback), mojom::MhtmlSaveStatus::kSuccess);
}
void MHTMLGenerationManager::Job::OnWriteComplete(
MHTMLWriteCompleteCallback callback,
mojom::MhtmlSaveStatus save_status) {
DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
watcher_.reset();
GetUIThreadTaskRunner({})->PostTask(
FROM_HERE, base::BindOnce(std::move(callback), save_status));
}
void MHTMLGenerationManager::Job::DoneWritingToDisk(
mojom::MhtmlSaveStatus save_status) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// If the Job has prematurely finalized and marked as finished, make this
// response no-op.
if (is_finished_)
return;
waiting_on_data_streaming_ = false;
MaybeSendToNextRenderFrame(save_status);
}
void MHTMLGenerationManager::Job::OnConnectionError() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// If message pipe end closes, then it is an unexpected crash.
DLOG(ERROR) << "Message pipe to renderer closed while expecting response";
Finalize(mojom::MhtmlSaveStatus::kRenderProcessExited);
}
void MHTMLGenerationManager::Job::OnFileAvailable(base::File browser_file) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (!browser_file.IsValid()) {
DLOG(ERROR) << "Failed to create file";
Finalize(mojom::MhtmlSaveStatus::kFileCreationError);
return;
}
browser_file_ = std::move(browser_file);
mojom::MhtmlSaveStatus save_status = SendToNextRenderFrame();
if (save_status != mojom::MhtmlSaveStatus::kSuccess)
Finalize(save_status);
}
void MHTMLGenerationManager::Job::OnFinished(
const CloseFileResult& close_file_result) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
mojom::MhtmlSaveStatus save_status = close_file_result.save_status;
int64_t file_size = close_file_result.file_size;
TRACE_EVENT_NESTABLE_ASYNC_END2("page-serialization", "SavingMhtmlJob", this,
"job save status", save_status, "file size",
file_size);
std::move(callback_).Run(close_file_result.toMHTMLGenerationResult());
delete this; // This is the last time the Job is referenced.
}
void MHTMLGenerationManager::Job::MarkAsFinished() {
// MarkAsFinished() may be called twice only in the case which
// writer_.reset() does not correctly stop OnConnectionError
// notifications for the case described in https://crbug.com/612098.
if (is_finished_) {
NOTREACHED();
}
is_finished_ = true;
writer_.reset();
// Additionally, |watcher_| may also invoke DoneWritingToDisk() from
// the download sequence, potentially calling this twice. We cannot disable
// |watcher_| notifications similar to |writer_|, since it exists in
// the download sequence, so we handle the case in DoneWritingToDisk().
TRACE_EVENT_NESTABLE_ASYNC_INSTANT0("page-serialization", "JobFinished",
this);
}
void MHTMLGenerationManager::Job::CloseFile(
mojom::MhtmlSaveStatus save_status) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(!mhtml_boundary_marker_.empty());
// Only update the status if that won't hide an earlier error.
if (!browser_file_.IsValid() &&
save_status == mojom::MhtmlSaveStatus::kSuccess)
save_status = mojom::MhtmlSaveStatus::kFileWritingError;
// If no previous error occurred the boundary should be sent.
download::GetDownloadTaskRunner()->PostTaskAndReplyWithResult(
FROM_HERE,
base::BindOnce(&MHTMLGenerationManager::Job::FinalizeOnFileThread,
save_status, mhtml_boundary_marker_,
std::move(browser_file_), std::move(extra_data_parts_),
std::move(watcher_), std::move(secure_hash_)),
base::BindOnce(&Job::OnFinished, weak_factory_.GetWeakPtr()));
}
void MHTMLGenerationManager::Job::SerializeAsMHTMLResponse(
mojom::MhtmlSaveStatus save_status,
const std::vector<std::string>& digests_of_uris_of_serialized_resources) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
TRACE_EVENT_NESTABLE_ASYNC_END0("page-serialization", "WaitingOnRenderer",
this);
frame_tree_node_id_of_busy_frame_ = FrameTreeNodeId();
// If the renderer succeeded, update the resource digests.
if (save_status == mojom::MhtmlSaveStatus::kSuccess)
RecordDigests(digests_of_uris_of_serialized_resources);
MaybeSendToNextRenderFrame(save_status);
}
void MHTMLGenerationManager::Job::RecordDigests(
const std::vector<std::string>& digests_of_uris_of_serialized_resources) {
// Renderer should be deduping resources with the same uris.
DCHECK_EQ(0u, base::STLSetIntersection<std::set<std::string>>(
digests_of_already_serialized_uris_,
std::set<std::string>(
digests_of_uris_of_serialized_resources.begin(),
digests_of_uris_of_serialized_resources.end()))
.size());
digests_of_already_serialized_uris_.insert(
digests_of_uris_of_serialized_resources.begin(),
digests_of_uris_of_serialized_resources.end());
}
void MHTMLGenerationManager::Job::MaybeSendToNextRenderFrame(
mojom::MhtmlSaveStatus save_status) {
// If current operation is successful and there are more frames to process,
// let save status depend on the result of sending the next request.
if (save_status == mojom::MhtmlSaveStatus::kSuccess &&
!pending_frame_tree_node_ids_.empty() && CurrentFrameDone()) {
save_status = SendToNextRenderFrame();
}
// If there was a failure (either from the renderer or from the job) then
// terminate the job and return.
if (save_status != mojom::MhtmlSaveStatus::kSuccess) {
Finalize(save_status);
return;
}
// Otherwise report completion if there are no more frames to process
// and Job is done processing the current frame.
if (pending_frame_tree_node_ids_.empty() && CurrentFrameDone())
Finalize(mojom::MhtmlSaveStatus::kSuccess);
}
bool MHTMLGenerationManager::Job::CurrentFrameDone() const {
bool waiting_for_response_from_renderer = !!frame_tree_node_id_of_busy_frame_;
return !waiting_for_response_from_renderer && !waiting_on_data_streaming_;
}
void MHTMLGenerationManager::Job::Finalize(mojom::MhtmlSaveStatus save_status) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
MarkAsFinished();
CloseFile(save_status);
}
// static
void MHTMLGenerationManager::Job::StartNewJob(
WebContents* web_contents,
const MHTMLGenerationParams& params,
MHTMLGenerationResult::GenerateMHTMLCallback callback) {
// Creates a new Job.
// The constructor starts the serialization process and it will delete
// itself upon finishing.
new Job(web_contents, params, std::move(callback));
}
// static
bool MHTMLGenerationManager::Job::WriteToFileAndUpdateHash(
base::File* file,
crypto::SecureHash* secure_hash,
std::string to_write) {
if (!file->WriteAtCurrentPosAndCheck(base::as_byte_span(to_write))) {
return false;
}
if (secure_hash) {
secure_hash->Update(to_write.data(), to_write.size());
}
return true;
}
// static
CloseFileResult MHTMLGenerationManager::Job::FinalizeOnFileThread(
mojom::MhtmlSaveStatus save_status,
const std::string& boundary,
base::File file,
const std::vector<MHTMLExtraDataPart>& extra_data_parts,
std::unique_ptr<mojo::SimpleWatcher> watcher,
std::unique_ptr<crypto::SecureHash> secure_hash) {
DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
watcher.reset();
DCHECK(!boundary.empty());
if (save_status == mojom::MhtmlSaveStatus::kSuccess) {
TRACE_EVENT0("page-serialization",
"MHTMLGenerationManager::Job MHTML footer writing");
#if BUILDFLAG(IS_FUCHSIA)
// TODO(crbug.com/42050414): Remove the Seek call.
// On fuchsia, fds do not share state. As the fd has been duped and sent to
// the renderer process, it must be seeked to the end to ensure the data is
// appended.
if (file.Seek(base::File::FROM_END, 0) == -1) {
save_status = mojom::MhtmlSaveStatus::kFileWritingError;
}
#endif // BUILDFLAG(IS_FUCHSIA)
// Write the extra data into a part of its own, if we have any.
std::string serialized_extra_data_parts =
CreateExtraDataParts(boundary, extra_data_parts);
// Short circuit to prevent file IO if nothing to write.
if (!serialized_extra_data_parts.empty() &&
!WriteToFileAndUpdateHash(&file, secure_hash.get(),
serialized_extra_data_parts)) {
save_status = mojom::MhtmlSaveStatus::kFileWritingError;
}
// Write out the footer at the bottom of the file.
std::string footer = CreateFooter(boundary);
if (save_status == mojom::MhtmlSaveStatus::kSuccess &&
!WriteToFileAndUpdateHash(&file, secure_hash.get(), footer)) {
save_status = mojom::MhtmlSaveStatus::kFileWritingError;
}
}
// If the file is still valid try to close it. Only update the status if that
// won't hide an earlier error.
int64_t file_size;
if (!CloseFileIfValid(file, &file_size) &&
save_status == mojom::MhtmlSaveStatus::kSuccess) {
save_status = mojom::MhtmlSaveStatus::kFileClosingError;
}
file_size = save_status == mojom::MhtmlSaveStatus::kSuccess
? file_size
: kInvalidFileSize;
// If we do not have a pending hash or the file is invalid, finalize operation
// with an empty digest result.
if (!secure_hash || file_size == kInvalidFileSize)
return CloseFileResult(save_status, file_size, nullptr);
// Record hash and finish operation.
std::string file_digest = std::string(secure_hash->GetHashLength(), 0);
secure_hash->Finish(&(file_digest[0]), file_digest.size());
secure_hash.reset();
return CloseFileResult(save_status, file_size, &file_digest);
}
// static
std::string MHTMLGenerationManager::Job::CreateExtraDataParts(
const std::string& boundary,
const std::vector<MHTMLExtraDataPart>& extra_data_parts) {
DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
std::string serialized_extra_data_parts;
// Don't write an extra data part if there is none.
if (extra_data_parts.empty())
return serialized_extra_data_parts;
// For each extra part, serialize that part and add to our accumulator
// string.
for (const auto& part : extra_data_parts) {
// Write a newline, then a boundary, a newline, then the content
// location, a newline, the content type, a newline, extra_headers,
// two newlines, the body, and end with a newline.
std::string serialized_extra_data_part = base::StringPrintf(
"\r\n--%s\r\n%s%s\r\n%s%s\r\n%s\r\n\r\n%s\r\n", boundary.c_str(),
kContentLocation, part.content_location.c_str(), kContentType,
part.content_type.c_str(), part.extra_headers.c_str(),
part.body.c_str());
DCHECK(base::IsStringASCII(serialized_extra_data_part));
serialized_extra_data_parts += serialized_extra_data_part;
}
return serialized_extra_data_parts;
}
// static
std::string MHTMLGenerationManager::Job::CreateFooter(
const std::string& boundary) {
DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
// Per the spec, the boundary must occur at the beginning of a line.
std::string footer = base::StringPrintf("\r\n--%s--\r\n", boundary.c_str());
DCHECK(base::IsStringASCII(footer));
return footer;
}
// static
bool MHTMLGenerationManager::Job::CloseFileIfValid(base::File& file,
int64_t* file_size) {
DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence());
DCHECK(file_size);
if (file.IsValid()) {
*file_size = file.GetLength();
file.Close();
return true;
}
return false;
}
MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() {
return base::Singleton<MHTMLGenerationManager>::get();
}
MHTMLGenerationManager::MHTMLGenerationManager() = default;
MHTMLGenerationManager::~MHTMLGenerationManager() = default;
void MHTMLGenerationManager::SaveMHTML(
WebContents* web_contents,
const MHTMLGenerationParams& params,
MHTMLGenerationResult::GenerateMHTMLCallback callback) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
Job::StartNewJob(web_contents, params, std::move(callback));
}
} // namespace content