| // Copyright 2012 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "content/browser/download/mhtml_generation_manager.h" |
| |
| #include <tuple> |
| #include <utility> |
| |
| #include "base/containers/queue.h" |
| #include "base/containers/span.h" |
| #include "base/files/file.h" |
| #include "base/functional/bind.h" |
| #include "base/memory/ptr_util.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/numerics/safe_conversions.h" |
| #include "base/stl_util.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/stringprintf.h" |
| #include "base/time/time.h" |
| #include "base/trace_event/trace_event.h" |
| #include "base/types/optional_util.h" |
| #include "base/uuid.h" |
| #include "components/download/public/common/download_task_runner.h" |
| #include "content/browser/bad_message.h" |
| #include "content/browser/download/mhtml_extra_parts_impl.h" |
| #include "content/browser/renderer_host/frame_tree_node.h" |
| #include "content/browser/renderer_host/render_frame_host_impl.h" |
| #include "content/browser/web_contents/web_contents_impl.h" |
| #include "content/common/download/mhtml_file_writer.mojom.h" |
| #include "content/public/browser/browser_thread.h" |
| #include "content/public/browser/mhtml_extra_parts.h" |
| #include "content/public/browser/mhtml_generation_result.h" |
| #include "content/public/browser/render_frame_host.h" |
| #include "content/public/browser/render_process_host.h" |
| #include "content/public/browser/web_contents.h" |
| #include "content/public/common/mhtml_generation_params.h" |
| #include "crypto/secure_hash.h" |
| #include "crypto/sha2.h" |
| #include "mojo/core/embedder/embedder.h" |
| #include "mojo/public/cpp/bindings/associated_remote.h" |
| #include "net/base/mime_util.h" |
| #include "third_party/blink/public/common/associated_interfaces/associated_interface_provider.h" |
| |
| #if BUILDFLAG(IS_WIN) |
| #include <windows.h> |
| |
| #include "base/win/security_util.h" |
| #include "base/win/sid.h" |
| #endif // BUILDFLAG(IS_WIN) |
| |
| namespace { |
| |
| // Callback to notify the UI thread that writing to the MHTML file is complete. |
| using MHTMLWriteCompleteCallback = |
| base::RepeatingCallback<void(content::mojom::MhtmlSaveStatus)>; |
| |
| const char kContentLocation[] = "Content-Location: "; |
| const char kContentType[] = "Content-Type: "; |
| int kInvalidFileSize = -1; |
| |
| #if BUILDFLAG(IS_WIN) |
| // Attempts to deny execute access to the file at `path`. |
| bool DenyExecuteAccessToMHTMLFile(const base::FilePath& path) { |
| static constexpr wchar_t kEveryoneSid[] = L"WD"; |
| auto sids = base::win::Sid::FromSddlStringVector({kEveryoneSid}); |
| if (!sids) { |
| return false; |
| } |
| return base::win::DenyAccessToPath(path, *sids, FILE_EXECUTE, |
| /*NO_INHERITANCE=*/0, /*recursive=*/false); |
| } |
| #endif // BUILDFLAG(IS_WIN) |
| |
| // CloseFileResult holds the result of closing the generated file using the |
| // status of the operation, a file size and a pointer to a file digest. It |
| // stores the values of the status and size directly, and makes a copy of the |
| // digest if present. |
| struct CloseFileResult { |
| CloseFileResult(content::mojom::MhtmlSaveStatus status, |
| int64_t size, |
| std::string* digest) |
| : save_status(status), file_size(size) { |
| if (digest) |
| file_digest = std::optional<std::string>(*digest); |
| } |
| |
| content::mojom::MhtmlSaveStatus save_status; |
| int64_t file_size; |
| std::optional<std::string> file_digest; |
| |
| content::MHTMLGenerationResult toMHTMLGenerationResult() const { |
| return content::MHTMLGenerationResult(file_size, |
| base::OptionalToPtr(file_digest)); |
| } |
| }; |
| |
| base::File CreateMHTMLFile(const base::FilePath& file_path) { |
| DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence()); |
| |
| // SECURITY NOTE: A file descriptor to the file created below will be passed |
| // to multiple renderer processes which (in out-of-process iframes mode) can |
| // act on behalf of separate web principals. Therefore it is important to |
| // only allow writing to the file and forbid reading from the file (as this |
| // would allow reading content generated by other renderers / other web |
| // principals). |
| uint32_t file_flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE; |
| |
| base::File browser_file(file_path, file_flags); |
| if (!browser_file.IsValid()) { |
| DLOG(ERROR) << "Failed to create file to save MHTML at: " |
| << file_path.value(); |
| } |
| #if BUILDFLAG(IS_WIN) |
| // SECURITY NOTE: On Windows, it is not safe to pass a writeable file handle |
| // to a renderer that could be re-opened executable. Attempting to do so will |
| // cause a DCHECK in mojo. |
| // |
| // Normally it would be best to use base::PreventExecuteMapping or the |
| // base::File::Flags::FLAG_WIN_NO_EXECUTE flag, but both of these will |
| // DCHECK if the File is outside of a set of safe directories, and the MHTML |
| // files are usually located in a user-controlled directory e.g. |
| // the Downloads directory. |
| // |
| // In this case, however, the file is an MHTML file, which we can mark |
| // no-execute with no side-effects as it will never be mapped into memory |
| // executable and it is not a real 'executable' file. |
| // |
| // It's important to note that this does not prevent the file being |
| // double-clicked on or opened in any application, since that is done via |
| // ShellExecute which does not need the FILE_EXECUTE permission on the file. |
| // |
| // If this fails, then it's likely other filesystem operations will also fail, |
| // so there isn't much that can be done. In this case, mojo will also deny the |
| // transit of the file handle to the renderer, and the MHTML file creation |
| // will fail. |
| std::ignore = DenyExecuteAccessToMHTMLFile(file_path); |
| #endif |
| return browser_file; |
| } |
| |
| } // namespace |
| |
| namespace content { |
| |
| // The class and all of its members live on the UI thread. Only static methods |
| // are executed on other threads. |
| // Job instances are created in MHTMLGenerationManager::Job::StartNewJob(), |
| // proceeding with the MHTML saving process unmanaged. Every instance is |
| // self-owned and responsible for deleting itself upon invoking OnFinished. |
| // With self-ownership lifetime concerns, we make the following precautions: |
| // - SerializeAsMHTMLResponse() always proceeds with finalizing upon detecting |
| // Job completion/cancellation. |
| // - Jobs are prematurely finalized and deleted upon detecting a connection |
| // error with the message pipe during serialization. |
| // - Any pending callbacks after deletion are invalidated using weak pointers. |
| class MHTMLGenerationManager::Job { |
| public: |
| // Creates and registers a new job. |
| static void StartNewJob( |
| WebContents* web_contents, |
| const MHTMLGenerationParams& params, |
| MHTMLGenerationResult::GenerateMHTMLCallback callback); |
| |
| Job(const Job&) = delete; |
| Job& operator=(const Job&) = delete; |
| |
| private: |
| Job(WebContents* web_contents, |
| const MHTMLGenerationParams& params, |
| MHTMLGenerationResult::GenerateMHTMLCallback callback); |
| ~Job(); |
| |
| // Begins queuing frames from web_contents, creates a new MHTML file and |
| // begins page serialization to created file. |
| void initializeJob(WebContents* web_contents); |
| |
| // Writes the string |to_write| to the file. If successful, updates hash and |
| // returns true, otherwise, returns false. Does not take ownership of |file| |
| // nor |raw_secure_hash|. |
| static bool WriteToFileAndUpdateHash(base::File* file, |
| crypto::SecureHash* secure_hash, |
| std::string to_write); |
| |
| // Writes the MHTML footer to the file and closes it. It also receives the |
| // SimpleWatcher instance used to watch the data pipe and the current hash |
| // state for safe destruction on the IO thread. |
| // |
| // Note: The same |boundary| marker must be used for all "boundaries" -- in |
| // the header, parts and footer -- that belong to the same MHTML document (see |
| // also rfc1341, section 7.2.1, "boundary" description). |
| static CloseFileResult FinalizeOnFileThread( |
| mojom::MhtmlSaveStatus save_status, |
| const std::string& boundary, |
| base::File file, |
| const std::vector<MHTMLExtraDataPart>& extra_data_parts, |
| std::unique_ptr<mojo::SimpleWatcher> watcher, |
| std::unique_ptr<crypto::SecureHash> secure_hash); |
| |
| // Creates a string that encompasses any remaining extra data parts to write |
| // to the file. |
| static std::string CreateExtraDataParts( |
| const std::string& boundary, |
| const std::vector<MHTMLExtraDataPart>& extra_data_parts); |
| |
| // Creates a string with the contents if htem MHTML file footer. |
| static std::string CreateFooter(const std::string& boundary); |
| |
| // Called on the UI thread when the file that should hold the MHTML data has |
| // been created. |
| void OnFileAvailable(base::File browser_file); |
| |
| // Called on the UI thread after the file got finalized and we have its size, |
| // or an error occurred while creating a new file. |
| void OnFinished(const CloseFileResult& result); |
| |
| // Starts watching a handle on the file thread. Instantiates a new instance |
| // of |watcher_| upon call. |
| void BeginWatchingHandle(MHTMLWriteCompleteCallback callback); |
| |
| // Writes data from the consumer handle to the new MHTML file. Only done |
| // with on the fly hash computation. |
| // Bound to the data pipe watcher and called upon notification of write |
| // completion to producer pipe sent to the Renderer. |
| // TODO(crbug.com/40606905): Eventually simplify this implementation |
| // with a DataPipeDrainer once error signalling is implemented there. |
| void WriteMHTMLToDisk(MHTMLWriteCompleteCallback callback, |
| MojoResult result, |
| const mojo::HandleSignalsState& state); |
| |
| // Destroys |watcher_| instance and notifies UI thread of write completion. |
| void OnWriteComplete(MHTMLWriteCompleteCallback callback, |
| mojom::MhtmlSaveStatus save_status); |
| |
| // Notifies Job of frame write completion and sends request to next render |
| // frame if the response was blocked by the write operation. |
| void DoneWritingToDisk(mojom::MhtmlSaveStatus save_status); |
| |
| // Called when the message pipe to the renderer is disconnected. |
| void OnConnectionError(); |
| |
| // Handler for the Mojo interface callback (a notification from the |
| // renderer that the MHTML generation for previous frame has finished). |
| void SerializeAsMHTMLResponse( |
| mojom::MhtmlSaveStatus save_status, |
| const std::vector<std::string>& digests_of_uris_of_serialized_resources); |
| |
| // Records newly serialized resource digests into |
| // |digests_of_already_serialized_uris_|. |
| void RecordDigests( |
| const std::vector<std::string>& digests_of_uris_of_serialized_resources); |
| |
| // Continues sending serialization requests to the next frame if ready and |
| // there are more frames to be serialized. |
| void MaybeSendToNextRenderFrame(mojom::MhtmlSaveStatus save_status); |
| |
| // Packs up the current status of the MHTML file save operation into a Mojo |
| // struct to send to the renderer process. |
| mojom::SerializeAsMHTMLParamsPtr CreateMojoParams(); |
| |
| // Sends Mojo interface call to the renderer, asking for MHTML |
| // generation of the next frame. Returns MhtmlSaveStatus::kSuccess or a |
| // specific error status. |
| mojom::MhtmlSaveStatus SendToNextRenderFrame(); |
| |
| // Indicates if the writing operation on the IO thread is complete, and |
| // we have received a response from the Renderer. |
| // This check is necessary to provide synchronization between file writing |
| // operations and MHTML serialization. |
| bool CurrentFrameDone() const; |
| |
| // Called on the UI thread when a job has been finished. |
| void Finalize(mojom::MhtmlSaveStatus save_status); |
| |
| // Write the MHTML footer and close the file on the file thread and respond |
| // back on the UI thread with the updated status and file size (which will be |
| // negative in case of errors). |
| void CloseFile(mojom::MhtmlSaveStatus save_status); |
| |
| // Marks the Job as completed, preventing any further notifications from the |
| // Renderer. This prevents the race/crash from https://crbug.com/612098. |
| void MarkAsFinished(); |
| |
| // Close the MHTML file if it looks good, setting the size param. Returns |
| // false for failure. |
| static bool CloseFileIfValid(base::File& file, int64_t* file_size); |
| |
| // User-configurable parameters. Includes the file location, binary encoding |
| // choices. |
| MHTMLGenerationParams params_; |
| |
| // The IDs of frames that still need to be processed. |
| base::queue<FrameTreeNodeId> pending_frame_tree_node_ids_; |
| |
| // Identifies a frame to which we've sent through |
| // MhtmlFileWriter::SerializeAsMHTML but for which we didn't yet process |
| // the response via SerializeAsMHTMLResponse. |
| FrameTreeNodeId frame_tree_node_id_of_busy_frame_; |
| |
| // The handle to the file the MHTML is saved to for the browser process. |
| base::File browser_file_; |
| |
| // MIME multipart boundary to use in the MHTML doc. |
| const std::string mhtml_boundary_marker_; |
| |
| // Digests of URIs of already generated MHTML parts. |
| std::set<std::string> digests_of_already_serialized_uris_; |
| std::string salt_; |
| |
| // The callback to call once generation is complete. |
| MHTMLGenerationResult::GenerateMHTMLCallback callback_; |
| |
| // Whether the job is finished (set to true only for the short duration of |
| // time between MHTMLGenerationManager::Job::Finalize is called and the job is |
| // destroyed by MHTMLGenerationManager::Job::OnFinished). |
| bool is_finished_; |
| |
| // Any extra data parts that should be emitted into the output MHTML. |
| std::vector<MHTMLExtraDataPart> extra_data_parts_; |
| |
| // MHTMLFileWriter instance for the frame being currently serialized. |
| mojo::AssociatedRemote<mojom::MhtmlFileWriter> writer_; |
| |
| // Watcher to detect new data written to |mhtml_data_consumer_|. |
| // This is instantiated and destroyed in the download sequence for each frame. |
| std::unique_ptr<mojo::SimpleWatcher> watcher_; |
| |
| // Consumer handle for data pipe streaming. |
| mojo::ScopedDataPipeConsumerHandle mhtml_data_consumer_; |
| |
| // Indicates whether there is currently data being streamed from the Renderer. |
| // Not used when the renderer is writing directly to file. |
| bool waiting_on_data_streaming_; |
| |
| // Current state of contents hash computation. |
| // This is updated upon every successful file write and finalized in the |
| // download sequence. |
| std::unique_ptr<crypto::SecureHash> secure_hash_; |
| |
| base::WeakPtrFactory<Job> weak_factory_{this}; |
| }; |
| |
| MHTMLGenerationManager::Job::Job( |
| WebContents* web_contents, |
| const MHTMLGenerationParams& params, |
| MHTMLGenerationResult::GenerateMHTMLCallback callback) |
| : params_(params), |
| mhtml_boundary_marker_(net::GenerateMimeMultipartBoundary()), |
| salt_(base::Uuid::GenerateRandomV4().AsLowercaseString()), |
| callback_(std::move(callback)), |
| is_finished_(false), |
| waiting_on_data_streaming_(false) { |
| initializeJob(web_contents); |
| } |
| |
| MHTMLGenerationManager::Job::~Job() { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| DCHECK(!watcher_); |
| } |
| |
| void MHTMLGenerationManager::Job::initializeJob(WebContents* web_contents) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| |
| TRACE_EVENT_NESTABLE_ASYNC_BEGIN2( |
| "page-serialization", "SavingMhtmlJob", this, "url", |
| web_contents->GetLastCommittedURL().possibly_invalid_spec(), "file", |
| params_.file_path.AsUTF8Unsafe()); |
| |
| // Only include nodes from the primary frame tree, since an MHTML document |
| // would not be able to load inner frame trees (e.g. fenced frames). |
| for (FrameTreeNode* node : static_cast<WebContentsImpl*>(web_contents) |
| ->GetPrimaryFrameTree() |
| .Nodes()) { |
| if (node->current_frame_host()->inner_tree_main_frame_tree_node_id()) { |
| // Skip inner tree placeholder nodes. |
| continue; |
| } |
| pending_frame_tree_node_ids_.push(node->frame_tree_node_id()); |
| } |
| |
| // Main frame needs to be processed first. |
| DCHECK(!pending_frame_tree_node_ids_.empty()); |
| DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front()) |
| ->parent() == nullptr); |
| |
| // Save off any extra data. |
| auto* extra_parts = static_cast<MHTMLExtraPartsImpl*>( |
| MHTMLExtraParts::FromWebContents(web_contents)); |
| if (extra_parts) |
| extra_data_parts_ = extra_parts->parts(); |
| |
| download::GetDownloadTaskRunner()->PostTaskAndReplyWithResult( |
| FROM_HERE, base::BindOnce(&CreateMHTMLFile, params_.file_path), |
| base::BindOnce(&Job::OnFileAvailable, weak_factory_.GetWeakPtr())); |
| } |
| |
| mojom::SerializeAsMHTMLParamsPtr |
| MHTMLGenerationManager::Job::CreateMojoParams() { |
| mojom::SerializeAsMHTMLParamsPtr mojo_params = |
| mojom::SerializeAsMHTMLParams::New(); |
| mojo_params->mhtml_boundary_marker = mhtml_boundary_marker_; |
| mojo_params->mhtml_binary_encoding = params_.use_binary_encoding; |
| mojo_params->mhtml_popup_overlay_removal = params_.remove_popup_overlay; |
| |
| // Tell the renderer to skip (= deduplicate) already covered MHTML parts. |
| mojo_params->salt = salt_; |
| mojo_params->digests_of_uris_to_skip.assign( |
| digests_of_already_serialized_uris_.begin(), |
| digests_of_already_serialized_uris_.end()); |
| |
| return mojo_params; |
| } |
| |
| mojom::MhtmlSaveStatus MHTMLGenerationManager::Job::SendToNextRenderFrame() { |
| DCHECK(browser_file_.IsValid()); |
| DCHECK(!pending_frame_tree_node_ids_.empty()); |
| |
| FrameTreeNodeId frame_tree_node_id = pending_frame_tree_node_ids_.front(); |
| pending_frame_tree_node_ids_.pop(); |
| |
| FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); |
| if (!ftn) // The contents went away. |
| return mojom::MhtmlSaveStatus::kFrameNoLongerExists; |
| RenderFrameHost* rfh = ftn->current_frame_host(); |
| |
| if (writer_) { |
| // If we reached here, means the work for previous frame is done, so it is |
| // safe to cut the connection to the previous frame. |
| writer_.reset(); |
| } |
| |
| // Bind Mojo interface to the RenderFrame |
| rfh->GetRemoteAssociatedInterfaces()->GetInterface(&writer_); |
| |
| // Safe, as |writer_| is owned by this Job instance. |
| auto error_callback = |
| base::BindOnce(&Job::OnConnectionError, base::Unretained(this)); |
| writer_.set_disconnect_handler(std::move(error_callback)); |
| |
| mojom::SerializeAsMHTMLParamsPtr params(CreateMojoParams()); |
| |
| // File::Duplicate() creates a reference to this file for use in the |
| // Renderer. |
| params->output_handle = |
| mojom::MhtmlOutputHandle::NewFileHandle(browser_file_.Duplicate()); |
| |
| // Send a Mojo request to Renderer to serialize its frame. |
| DCHECK(frame_tree_node_id_of_busy_frame_.is_null()); |
| frame_tree_node_id_of_busy_frame_ = frame_tree_node_id; |
| |
| auto response_callback = base::BindOnce(&Job::SerializeAsMHTMLResponse, |
| weak_factory_.GetWeakPtr()); |
| writer_->SerializeAsMHTML(std::move(params), std::move(response_callback)); |
| |
| TRACE_EVENT_NESTABLE_ASYNC_BEGIN1("page-serialization", "WaitingOnRenderer", |
| this, "frame tree node id", |
| frame_tree_node_id_of_busy_frame_); |
| return mojom::MhtmlSaveStatus::kSuccess; |
| } |
| |
| void MHTMLGenerationManager::Job::BeginWatchingHandle( |
| MHTMLWriteCompleteCallback callback) { |
| DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence()); |
| |
| DCHECK(!watcher_); |
| watcher_ = std::make_unique<mojo::SimpleWatcher>( |
| FROM_HERE, mojo::SimpleWatcher::ArmingPolicy::AUTOMATIC, |
| download::GetDownloadTaskRunner()); |
| |
| // base::Unretained is safe, as |this| owns |mhtml_data_consumer_|, which |
| // is responsible for invoking |watcher_| callbacks. |
| if (watcher_->Watch( |
| mhtml_data_consumer_.get(), |
| MOJO_HANDLE_SIGNAL_NEW_DATA_READABLE | MOJO_HANDLE_SIGNAL_PEER_CLOSED, |
| MOJO_WATCH_CONDITION_SATISFIED, |
| base::BindRepeating(&Job::WriteMHTMLToDisk, base::Unretained(this), |
| callback)) != MOJO_RESULT_OK) { |
| DLOG(ERROR) << "Failed to strap watcher to consumer handle."; |
| OnWriteComplete(callback, mojom::MhtmlSaveStatus::kStreamingError); |
| } |
| } |
| |
| void MHTMLGenerationManager::Job::WriteMHTMLToDisk( |
| MHTMLWriteCompleteCallback callback, |
| MojoResult result, |
| const mojo::HandleSignalsState& state) { |
| DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence()); |
| DCHECK_NE(result, MOJO_RESULT_FAILED_PRECONDITION); |
| // Begin consumer data pipe handle read and file write loop. |
| std::vector<uint8_t> buffer(1024, 0x00); |
| size_t actually_read_bytes; |
| while (result == MOJO_RESULT_OK && state.readable()) { |
| result = mhtml_data_consumer_->ReadData(MOJO_READ_DATA_FLAG_NONE, |
| base::as_writable_byte_span(buffer), |
| actually_read_bytes); |
| if (result == MOJO_RESULT_OK) { |
| std::string_view read_chars = |
| base::as_string_view(buffer).substr(0, actually_read_bytes); |
| if (secure_hash_) |
| secure_hash_->Update(read_chars.data(), read_chars.size()); |
| if (UNSAFE_TODO(browser_file_.WriteAtCurrentPos( |
| read_chars.data(), base::checked_cast<int>(read_chars.size()))) < |
| 0) { |
| DLOG(ERROR) << "Error writing to file handle."; |
| OnWriteComplete(std::move(callback), |
| mojom::MhtmlSaveStatus::kFileWritingError); |
| return; |
| } |
| } |
| } |
| |
| if (result != MOJO_RESULT_OK && result != MOJO_RESULT_FAILED_PRECONDITION && |
| result != MOJO_RESULT_SHOULD_WAIT) { |
| DLOG(ERROR) << "Error streaming MHTML data to the Browser."; |
| OnWriteComplete(std::move(callback), |
| mojom::MhtmlSaveStatus::kStreamingError); |
| return; |
| } |
| |
| // Only notify successful write completion if peer handle is closed without |
| // any errors. |
| if (state.peer_closed()) |
| OnWriteComplete(std::move(callback), mojom::MhtmlSaveStatus::kSuccess); |
| } |
| |
| void MHTMLGenerationManager::Job::OnWriteComplete( |
| MHTMLWriteCompleteCallback callback, |
| mojom::MhtmlSaveStatus save_status) { |
| DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence()); |
| |
| watcher_.reset(); |
| GetUIThreadTaskRunner({})->PostTask( |
| FROM_HERE, base::BindOnce(std::move(callback), save_status)); |
| } |
| |
| void MHTMLGenerationManager::Job::DoneWritingToDisk( |
| mojom::MhtmlSaveStatus save_status) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| |
| // If the Job has prematurely finalized and marked as finished, make this |
| // response no-op. |
| if (is_finished_) |
| return; |
| |
| waiting_on_data_streaming_ = false; |
| MaybeSendToNextRenderFrame(save_status); |
| } |
| |
| void MHTMLGenerationManager::Job::OnConnectionError() { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| // If message pipe end closes, then it is an unexpected crash. |
| DLOG(ERROR) << "Message pipe to renderer closed while expecting response"; |
| Finalize(mojom::MhtmlSaveStatus::kRenderProcessExited); |
| } |
| |
| void MHTMLGenerationManager::Job::OnFileAvailable(base::File browser_file) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| |
| if (!browser_file.IsValid()) { |
| DLOG(ERROR) << "Failed to create file"; |
| Finalize(mojom::MhtmlSaveStatus::kFileCreationError); |
| return; |
| } |
| |
| browser_file_ = std::move(browser_file); |
| |
| mojom::MhtmlSaveStatus save_status = SendToNextRenderFrame(); |
| if (save_status != mojom::MhtmlSaveStatus::kSuccess) |
| Finalize(save_status); |
| } |
| |
| void MHTMLGenerationManager::Job::OnFinished( |
| const CloseFileResult& close_file_result) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| mojom::MhtmlSaveStatus save_status = close_file_result.save_status; |
| int64_t file_size = close_file_result.file_size; |
| |
| TRACE_EVENT_NESTABLE_ASYNC_END2("page-serialization", "SavingMhtmlJob", this, |
| "job save status", save_status, "file size", |
| file_size); |
| |
| std::move(callback_).Run(close_file_result.toMHTMLGenerationResult()); |
| |
| delete this; // This is the last time the Job is referenced. |
| } |
| |
| void MHTMLGenerationManager::Job::MarkAsFinished() { |
| // MarkAsFinished() may be called twice only in the case which |
| // writer_.reset() does not correctly stop OnConnectionError |
| // notifications for the case described in https://crbug.com/612098. |
| if (is_finished_) { |
| NOTREACHED(); |
| } |
| is_finished_ = true; |
| writer_.reset(); |
| |
| // Additionally, |watcher_| may also invoke DoneWritingToDisk() from |
| // the download sequence, potentially calling this twice. We cannot disable |
| // |watcher_| notifications similar to |writer_|, since it exists in |
| // the download sequence, so we handle the case in DoneWritingToDisk(). |
| |
| TRACE_EVENT_NESTABLE_ASYNC_INSTANT0("page-serialization", "JobFinished", |
| this); |
| } |
| |
| void MHTMLGenerationManager::Job::CloseFile( |
| mojom::MhtmlSaveStatus save_status) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| DCHECK(!mhtml_boundary_marker_.empty()); |
| |
| // Only update the status if that won't hide an earlier error. |
| if (!browser_file_.IsValid() && |
| save_status == mojom::MhtmlSaveStatus::kSuccess) |
| save_status = mojom::MhtmlSaveStatus::kFileWritingError; |
| |
| // If no previous error occurred the boundary should be sent. |
| download::GetDownloadTaskRunner()->PostTaskAndReplyWithResult( |
| FROM_HERE, |
| base::BindOnce(&MHTMLGenerationManager::Job::FinalizeOnFileThread, |
| save_status, mhtml_boundary_marker_, |
| std::move(browser_file_), std::move(extra_data_parts_), |
| std::move(watcher_), std::move(secure_hash_)), |
| base::BindOnce(&Job::OnFinished, weak_factory_.GetWeakPtr())); |
| } |
| |
| void MHTMLGenerationManager::Job::SerializeAsMHTMLResponse( |
| mojom::MhtmlSaveStatus save_status, |
| const std::vector<std::string>& digests_of_uris_of_serialized_resources) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| |
| TRACE_EVENT_NESTABLE_ASYNC_END0("page-serialization", "WaitingOnRenderer", |
| this); |
| |
| frame_tree_node_id_of_busy_frame_ = FrameTreeNodeId(); |
| |
| // If the renderer succeeded, update the resource digests. |
| if (save_status == mojom::MhtmlSaveStatus::kSuccess) |
| RecordDigests(digests_of_uris_of_serialized_resources); |
| |
| MaybeSendToNextRenderFrame(save_status); |
| } |
| |
| void MHTMLGenerationManager::Job::RecordDigests( |
| const std::vector<std::string>& digests_of_uris_of_serialized_resources) { |
| // Renderer should be deduping resources with the same uris. |
| DCHECK_EQ(0u, base::STLSetIntersection<std::set<std::string>>( |
| digests_of_already_serialized_uris_, |
| std::set<std::string>( |
| digests_of_uris_of_serialized_resources.begin(), |
| digests_of_uris_of_serialized_resources.end())) |
| .size()); |
| digests_of_already_serialized_uris_.insert( |
| digests_of_uris_of_serialized_resources.begin(), |
| digests_of_uris_of_serialized_resources.end()); |
| } |
| |
| void MHTMLGenerationManager::Job::MaybeSendToNextRenderFrame( |
| mojom::MhtmlSaveStatus save_status) { |
| // If current operation is successful and there are more frames to process, |
| // let save status depend on the result of sending the next request. |
| if (save_status == mojom::MhtmlSaveStatus::kSuccess && |
| !pending_frame_tree_node_ids_.empty() && CurrentFrameDone()) { |
| save_status = SendToNextRenderFrame(); |
| } |
| |
| // If there was a failure (either from the renderer or from the job) then |
| // terminate the job and return. |
| if (save_status != mojom::MhtmlSaveStatus::kSuccess) { |
| Finalize(save_status); |
| return; |
| } |
| |
| // Otherwise report completion if there are no more frames to process |
| // and Job is done processing the current frame. |
| if (pending_frame_tree_node_ids_.empty() && CurrentFrameDone()) |
| Finalize(mojom::MhtmlSaveStatus::kSuccess); |
| } |
| |
| bool MHTMLGenerationManager::Job::CurrentFrameDone() const { |
| bool waiting_for_response_from_renderer = !!frame_tree_node_id_of_busy_frame_; |
| return !waiting_for_response_from_renderer && !waiting_on_data_streaming_; |
| } |
| |
| void MHTMLGenerationManager::Job::Finalize(mojom::MhtmlSaveStatus save_status) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| MarkAsFinished(); |
| CloseFile(save_status); |
| } |
| |
| // static |
| void MHTMLGenerationManager::Job::StartNewJob( |
| WebContents* web_contents, |
| const MHTMLGenerationParams& params, |
| MHTMLGenerationResult::GenerateMHTMLCallback callback) { |
| // Creates a new Job. |
| // The constructor starts the serialization process and it will delete |
| // itself upon finishing. |
| new Job(web_contents, params, std::move(callback)); |
| } |
| |
| // static |
| bool MHTMLGenerationManager::Job::WriteToFileAndUpdateHash( |
| base::File* file, |
| crypto::SecureHash* secure_hash, |
| std::string to_write) { |
| if (!file->WriteAtCurrentPosAndCheck(base::as_byte_span(to_write))) { |
| return false; |
| } |
| if (secure_hash) { |
| secure_hash->Update(to_write.data(), to_write.size()); |
| } |
| return true; |
| } |
| |
| // static |
| CloseFileResult MHTMLGenerationManager::Job::FinalizeOnFileThread( |
| mojom::MhtmlSaveStatus save_status, |
| const std::string& boundary, |
| base::File file, |
| const std::vector<MHTMLExtraDataPart>& extra_data_parts, |
| std::unique_ptr<mojo::SimpleWatcher> watcher, |
| std::unique_ptr<crypto::SecureHash> secure_hash) { |
| DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence()); |
| |
| watcher.reset(); |
| DCHECK(!boundary.empty()); |
| |
| if (save_status == mojom::MhtmlSaveStatus::kSuccess) { |
| TRACE_EVENT0("page-serialization", |
| "MHTMLGenerationManager::Job MHTML footer writing"); |
| |
| #if BUILDFLAG(IS_FUCHSIA) |
| // TODO(crbug.com/42050414): Remove the Seek call. |
| // On fuchsia, fds do not share state. As the fd has been duped and sent to |
| // the renderer process, it must be seeked to the end to ensure the data is |
| // appended. |
| if (file.Seek(base::File::FROM_END, 0) == -1) { |
| save_status = mojom::MhtmlSaveStatus::kFileWritingError; |
| } |
| #endif // BUILDFLAG(IS_FUCHSIA) |
| |
| // Write the extra data into a part of its own, if we have any. |
| std::string serialized_extra_data_parts = |
| CreateExtraDataParts(boundary, extra_data_parts); |
| // Short circuit to prevent file IO if nothing to write. |
| if (!serialized_extra_data_parts.empty() && |
| !WriteToFileAndUpdateHash(&file, secure_hash.get(), |
| serialized_extra_data_parts)) { |
| save_status = mojom::MhtmlSaveStatus::kFileWritingError; |
| } |
| |
| // Write out the footer at the bottom of the file. |
| std::string footer = CreateFooter(boundary); |
| if (save_status == mojom::MhtmlSaveStatus::kSuccess && |
| !WriteToFileAndUpdateHash(&file, secure_hash.get(), footer)) { |
| save_status = mojom::MhtmlSaveStatus::kFileWritingError; |
| } |
| } |
| |
| // If the file is still valid try to close it. Only update the status if that |
| // won't hide an earlier error. |
| int64_t file_size; |
| if (!CloseFileIfValid(file, &file_size) && |
| save_status == mojom::MhtmlSaveStatus::kSuccess) { |
| save_status = mojom::MhtmlSaveStatus::kFileClosingError; |
| } |
| |
| file_size = save_status == mojom::MhtmlSaveStatus::kSuccess |
| ? file_size |
| : kInvalidFileSize; |
| // If we do not have a pending hash or the file is invalid, finalize operation |
| // with an empty digest result. |
| if (!secure_hash || file_size == kInvalidFileSize) |
| return CloseFileResult(save_status, file_size, nullptr); |
| |
| // Record hash and finish operation. |
| std::string file_digest = std::string(secure_hash->GetHashLength(), 0); |
| secure_hash->Finish(&(file_digest[0]), file_digest.size()); |
| secure_hash.reset(); |
| return CloseFileResult(save_status, file_size, &file_digest); |
| } |
| |
| // static |
| std::string MHTMLGenerationManager::Job::CreateExtraDataParts( |
| const std::string& boundary, |
| const std::vector<MHTMLExtraDataPart>& extra_data_parts) { |
| DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence()); |
| std::string serialized_extra_data_parts; |
| |
| // Don't write an extra data part if there is none. |
| if (extra_data_parts.empty()) |
| return serialized_extra_data_parts; |
| |
| // For each extra part, serialize that part and add to our accumulator |
| // string. |
| for (const auto& part : extra_data_parts) { |
| // Write a newline, then a boundary, a newline, then the content |
| // location, a newline, the content type, a newline, extra_headers, |
| // two newlines, the body, and end with a newline. |
| std::string serialized_extra_data_part = base::StringPrintf( |
| "\r\n--%s\r\n%s%s\r\n%s%s\r\n%s\r\n\r\n%s\r\n", boundary.c_str(), |
| kContentLocation, part.content_location.c_str(), kContentType, |
| part.content_type.c_str(), part.extra_headers.c_str(), |
| part.body.c_str()); |
| DCHECK(base::IsStringASCII(serialized_extra_data_part)); |
| |
| serialized_extra_data_parts += serialized_extra_data_part; |
| } |
| return serialized_extra_data_parts; |
| } |
| |
| // static |
| std::string MHTMLGenerationManager::Job::CreateFooter( |
| const std::string& boundary) { |
| DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence()); |
| // Per the spec, the boundary must occur at the beginning of a line. |
| std::string footer = base::StringPrintf("\r\n--%s--\r\n", boundary.c_str()); |
| DCHECK(base::IsStringASCII(footer)); |
| return footer; |
| } |
| |
| // static |
| bool MHTMLGenerationManager::Job::CloseFileIfValid(base::File& file, |
| int64_t* file_size) { |
| DCHECK(download::GetDownloadTaskRunner()->RunsTasksInCurrentSequence()); |
| DCHECK(file_size); |
| if (file.IsValid()) { |
| *file_size = file.GetLength(); |
| file.Close(); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { |
| return base::Singleton<MHTMLGenerationManager>::get(); |
| } |
| |
| MHTMLGenerationManager::MHTMLGenerationManager() = default; |
| |
| MHTMLGenerationManager::~MHTMLGenerationManager() = default; |
| |
| void MHTMLGenerationManager::SaveMHTML( |
| WebContents* web_contents, |
| const MHTMLGenerationParams& params, |
| MHTMLGenerationResult::GenerateMHTMLCallback callback) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| Job::StartNewJob(web_contents, params, std::move(callback)); |
| } |
| |
| } // namespace content |