| // Copyright 2012 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <stdint.h> |
| |
| #include <memory> |
| |
| #include "base/containers/span.h" |
| #include "base/files/file_path.h" |
| #include "base/files/file_util.h" |
| #include "base/files/scoped_temp_dir.h" |
| #include "base/functional/bind.h" |
| #include "base/functional/callback.h" |
| #include "base/path_service.h" |
| #include "base/run_loop.h" |
| #include "base/strings/string_split.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "base/test/metrics/histogram_tester.h" |
| #include "base/test/scoped_feature_list.h" |
| #include "base/threading/thread_restrictions.h" |
| #include "build/build_config.h" |
| #include "components/download/public/common/download_task_runner.h" |
| #include "content/browser/download/mhtml_generation_manager.h" |
| #include "content/browser/renderer_host/render_process_host_impl.h" |
| #include "content/common/download/mhtml_file_writer.mojom.h" |
| #include "content/public/browser/browser_task_traits.h" |
| #include "content/public/browser/browser_thread.h" |
| #include "content/public/browser/mhtml_extra_parts.h" |
| #include "content/public/browser/mhtml_generation_result.h" |
| #include "content/public/browser/render_frame_host.h" |
| #include "content/public/browser/render_process_host.h" |
| #include "content/public/browser/web_contents.h" |
| #include "content/public/common/content_paths.h" |
| #include "content/public/common/mhtml_generation_params.h" |
| #include "content/public/test/browser_test.h" |
| #include "content/public/test/browser_test_utils.h" |
| #include "content/public/test/content_browser_test.h" |
| #include "content/public/test/content_browser_test_utils.h" |
| #include "content/public/test/test_utils.h" |
| #include "content/shell/browser/shell.h" |
| #include "crypto/secure_hash.h" |
| #include "crypto/sha2.h" |
| #include "mojo/public/cpp/bindings/associated_receiver.h" |
| #include "mojo/public/cpp/bindings/pending_associated_receiver.h" |
| #include "net/base/filename_util.h" |
| #include "net/dns/mock_host_resolver.h" |
| #include "net/test/embedded_test_server/embedded_test_server.h" |
| #include "testing/gmock/include/gmock/gmock.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| #include "third_party/abseil-cpp/absl/cleanup/cleanup.h" |
| #include "third_party/blink/public/common/associated_interfaces/associated_interface_provider.h" |
| #include "third_party/blink/public/common/features.h" |
| #include "third_party/blink/public/mojom/frame/find_in_page.mojom.h" |
| |
| #if BUILDFLAG(IS_WIN) |
| #include "base/functional/callback_helpers.h" |
| #include "base/test/bind.h" |
| #endif // BUILDFLAG(IS_WIN) |
| |
| using testing::Contains; |
| using testing::EndsWith; |
| using testing::HasSubstr; |
| using testing::IsEmpty; |
| using testing::IsSupersetOf; |
| using testing::Not; |
| using testing::Pointee; |
| |
| namespace content { |
| |
| namespace { |
| |
| const char kGetPageInfoScript[] = R"js( |
| |
| // This script is evaluated after loading the original page, and after loading |
| // the saved page. It returns an object that should usually be equivalent |
| // between the original and saved pages. |
| (() => { |
| // Pick a subset of styles to export to keep the output size down. |
| const styleKeys = ['font-family', 'line-height', 'display']; |
| function elementStyles(el) { |
| const styles = window.getComputedStyle(el); |
| const result = Object.fromEntries(styleKeys |
| .map(name => [name, styles[name]]) |
| .filter(v=>v[1])); |
| // add background-image, but only the file name because the full path will |
| // change in the saved page. |
| let m = styles.backgroundImage.match(/url\((.*)\)/); |
| if (m) { |
| const url = m[1]; |
| const parts = url.split('/'); |
| result['backgroundImageFile'] = parts[parts.length-1]; |
| } |
| return result; |
| } |
| function isVisible(el) { |
| const styles = window.getComputedStyle(el); |
| return styles.display !== 'none'; |
| } |
| function sorted(a) { |
| const result = Array.from(a); |
| result.sort() |
| return result; |
| } |
| |
| return { |
| title: document.title, |
| innerText: document.body.innerText, |
| fonts: sorted(Array.from(document.fonts) |
| .map(f=>`${f.family}: ${document.fonts.check("12px " |
| + f.family) ? "loaded" : "not_loaded"}`)), |
| // loaded state of visible image elements. |
| images: Array.from(document.querySelectorAll('img')) |
| .filter(isVisible) |
| .map(i => i.complete ? 'loaded' : 'not_loaded'), |
| // Computed styles for elements with ids. |
| computedStyles: Array.from(document.querySelectorAll('*')) |
| .filter(e => e.id && isVisible(e)) |
| .map(e => [e.id, elementStyles(e)]) |
| .filter(e => e), |
| }; |
| })() |
| |
| )js"; |
| |
| // Information about the MHTML file. |
| class MHTMLFileInfo { |
| public: |
| MHTMLFileInfo() = default; |
| explicit MHTMLFileInfo(const base::FilePath& path) : path_(path) { |
| base::ScopedAllowBlockingForTesting allow_blocking; |
| base::ReadFileToString(path, &content_); |
| } |
| |
| const base::FilePath& path() const { return path_; } |
| |
| const std::string& content() const { return content_; } |
| |
| std::vector<std::string> ContentLocations() { |
| std::vector<std::string> parts = base::SplitStringUsingSubstr( |
| content_, |
| "\r\nContent-Location: ", base::WhitespaceHandling::KEEP_WHITESPACE, |
| base::SplitResult::SPLIT_WANT_ALL); |
| std::vector<std::string> result; |
| for (size_t i = 1; i < parts.size(); ++i) { |
| const std::string& part = parts[i]; |
| auto pos = part.find('\r'); |
| if (pos != std::string::npos) { |
| result.push_back(part.substr(0, pos)); |
| } |
| } |
| return result; |
| } |
| |
| private: |
| base::FilePath path_; |
| std::string content_; |
| }; |
| |
| struct CompareOptions { |
| std::optional<int> expected_number_of_frames; |
| // Strings that must be present in the original and saved pages. |
| std::vector<std::string> expected_substrings; |
| // Forbidden strings for both the original and saved pages. |
| std::vector<std::string> forbidden_substrings; |
| // Forbidden strings for the saved page. |
| std::vector<std::string> forbidden_substrings_in_saved_page; |
| }; |
| |
| struct CompareResult { |
| // Output of `kGetPageInfoScript` for the original page. |
| base::Value original_info; |
| // Output of `kGetPageInfoScript` for the saved page. |
| base::Value saved_info; |
| MHTMLFileInfo file; |
| }; |
| |
| // A dummy WebContentsDelegate which tracks the results of a find operation. |
| class FindTrackingDelegate : public WebContentsDelegate { |
| public: |
| explicit FindTrackingDelegate(const std::string& search) : search_(search) {} |
| |
| FindTrackingDelegate(const FindTrackingDelegate&) = delete; |
| FindTrackingDelegate& operator=(const FindTrackingDelegate&) = delete; |
| |
| // Returns number of results. |
| int Wait(WebContents* web_contents) { |
| WebContentsDelegate* old_delegate = web_contents->GetDelegate(); |
| web_contents->SetDelegate(this); |
| |
| auto options = blink::mojom::FindOptions::New(); |
| options->run_synchronously_for_testing = true; |
| options->match_case = false; |
| |
| web_contents->Find(global_request_id++, base::UTF8ToUTF16(search_), |
| std::move(options), /*skip_delay=*/false); |
| run_loop_.Run(); |
| |
| web_contents->SetDelegate(old_delegate); |
| |
| return matches_; |
| } |
| |
| void FindReply(WebContents* web_contents, |
| int request_id, |
| int number_of_matches, |
| const gfx::Rect& selection_rect, |
| int active_match_ordinal, |
| bool final_update) override { |
| if (final_update) { |
| matches_ = number_of_matches; |
| run_loop_.Quit(); |
| } |
| } |
| |
| static int global_request_id; |
| |
| private: |
| std::string search_; |
| int matches_ = -1; |
| base::RunLoop run_loop_; |
| }; |
| |
| // static |
| int FindTrackingDelegate::global_request_id = 0; |
| |
| const std::string_view kTestData = |
| "Sample Text to write on a generated MHTML " |
| "file for tests to validate whether the implementation is able to access " |
| "and write to the file."; |
| |
| class MockWriterBase : public mojom::MhtmlFileWriter { |
| public: |
| MockWriterBase() = default; |
| |
| MockWriterBase(const MockWriterBase&) = delete; |
| MockWriterBase& operator=(const MockWriterBase&) = delete; |
| |
| ~MockWriterBase() override = default; |
| |
| void BindReceiver(mojo::ScopedInterfaceEndpointHandle handle) { |
| receiver_.Bind(mojo::PendingAssociatedReceiver<mojom::MhtmlFileWriter>( |
| std::move(handle))); |
| } |
| |
| protected: |
| void SendResponse(SerializeAsMHTMLCallback callback) { |
| std::vector<std::string> dummy_digests; |
| std::move(callback).Run(mojom::MhtmlSaveStatus::kSuccess, dummy_digests); |
| } |
| |
| void WriteDataToDestinationFile(base::File& destination_file) { |
| base::ScopedAllowBlockingForTesting allow_blocking; |
| destination_file.WriteAtCurrentPos(base::as_byte_span(kTestData)); |
| destination_file.Close(); |
| } |
| |
| void WriteDataToProducerPipe( |
| mojo::ScopedDataPipeProducerHandle producer_pipe) { |
| base::ScopedAllowBlockingForTesting allow_blocking; |
| size_t actually_written_bytes = 0; |
| producer_pipe->WriteData(base::as_byte_span(kTestData), |
| MOJO_WRITE_DATA_FLAG_NONE, actually_written_bytes); |
| producer_pipe.reset(); |
| } |
| |
| mojo::AssociatedReceiver<mojom::MhtmlFileWriter> receiver_{this}; |
| }; |
| |
| // This Mock injects our overwritten interface, running the callback |
| // SerializeAsMHTMLResponse and immediately disconnecting the message pipe. |
| class RespondAndDisconnectMockWriter |
| : public MockWriterBase, |
| public base::RefCountedThreadSafe<RespondAndDisconnectMockWriter> { |
| public: |
| RespondAndDisconnectMockWriter() = default; |
| |
| RespondAndDisconnectMockWriter(const RespondAndDisconnectMockWriter&) = |
| delete; |
| RespondAndDisconnectMockWriter& operator=( |
| const RespondAndDisconnectMockWriter&) = delete; |
| |
| void SerializeAsMHTML(mojom::SerializeAsMHTMLParamsPtr params, |
| SerializeAsMHTMLCallback callback) override { |
| // Upon using the overridden mock interface implementation, this will be |
| // handled by the product code as illustrated below. (1), (2), (3) depict |
| // points in time when product code runs on UI thread and download sequence. |
| // For the repro, the message pipe disconnection needs to happen between (1) |
| // and (3). |
| // |
| // Test instance UI thread download sequence |
| // --------- --------- ----------- |
| // | | | |
| // WE ARE HERE | | |
| // | | | |
| // | | | |
| // +--------------->+ | |
| // | | | |
| // | | | |
| // | | | |
| // | | | |
| // | | | |
| // | | | |
| // (1) | MHTMLGenerationManager::Job | |
| // | ::SerializeAsMHTMLResponse | |
| // | +-------------------->+ |
| // | | | |
| // | | | |
| // | | | |
| // (2) | | MHTMLGenerationManager::Job |
| // | | ::CloseFileOnFileThread |
| // | | | |
| // | | | |
| // | test needs to | |
| // | disconnect message pipe | |
| // | HERE - between (1) and (3) | |
| // | | | |
| // | | | |
| // | +<--------------------+ |
| // | | | |
| // (3) | MHTMLGenerationManager | |
| // | Job::OnFinished | |
| // | | | |
| // |
| // We hope that the error handler is invoked between (1) and (3) by doing |
| // the following: |
| // - From here, run the callback response to the UI thread. This queues |
| // the response message onto the bound message pipe. |
| // - After running the callback response, immediately unbind the message |
| // pipe in order to queue a message onto the bound message pipe to notify |
| // the Browser the connection was closed and invoke the error handler. |
| // - Upon resuming operation, the FIFO ordering property of associated |
| // interfaces guarantees the execution of (1) before the error handler. |
| // (1) posts (2) to the download sequence and terminates. The client end |
| // then accepts the error notification and invokes the connection error |
| // handler, guaranteeing its execution before (3). |
| |
| bool compute_contents_hash = params->output_handle->is_producer_handle(); |
| |
| // Write a valid MHTML file to its respective handle, since we are not |
| // actively running a serialization pipeline in the mock implementation. |
| if (compute_contents_hash) { |
| WriteDataToProducerPipe( |
| std::move(params->output_handle->get_producer_handle())); |
| } else { |
| WriteDataToDestinationFile(params->output_handle->get_file_handle()); |
| } |
| |
| SendResponse(std::move(callback)); |
| |
| // Reset the message pipe connection to invoke the disconnect callback. The |
| // disconnect handler from here will finalize the Job and attempt to call |
| // MHTMLGenerationManager::Job::CloseFile a second time. If this situation |
| // is handled correctly, the browser file should be invalidated and |
| // idempotent. |
| if (!compute_contents_hash) { |
| receiver_.reset(); |
| return; |
| } |
| |
| // In the case we are using a data pipe to stream serialized MHTML data, |
| // we must ensure the write complete notification arrives before the |
| // connection error notification, otherwise the Browser will report |
| // an MhtmlSaveStatus != kSuccess. We can guarantee this by potentially |
| // running tasks after each watcher invocation to send notifications that |
| // it has been completed. We need at least two tasks to guarantee this, |
| // as there can be at most two watcher invocations to write a block of |
| // data smaller than the data pipe buffer to file. |
| download::GetDownloadTaskRunner()->PostTask( |
| FROM_HERE, |
| base::BindOnce(&RespondAndDisconnectMockWriter::TaskX, |
| scoped_refptr<RespondAndDisconnectMockWriter>(this))); |
| } |
| |
| void TaskX() { |
| download::GetDownloadTaskRunner()->PostTask( |
| FROM_HERE, |
| base::BindOnce(&RespondAndDisconnectMockWriter::TaskY, |
| scoped_refptr<RespondAndDisconnectMockWriter>(this))); |
| } |
| |
| void TaskY() { |
| GetUIThreadTaskRunner({})->PostTask( |
| FROM_HERE, |
| base::BindOnce(&RespondAndDisconnectMockWriter::TaskZ, |
| scoped_refptr<RespondAndDisconnectMockWriter>(this))); |
| } |
| |
| void TaskZ() { receiver_.reset(); } |
| |
| private: |
| friend base::RefCountedThreadSafe<RespondAndDisconnectMockWriter>; |
| |
| ~RespondAndDisconnectMockWriter() override = default; |
| }; |
| |
| class MHTMLGenerationTest : public ContentBrowserTest, |
| public testing::WithParamInterface<bool> { |
| public: |
| MHTMLGenerationTest() = default; |
| |
| enum TaskOrder { WriteThenRespond, RespondThenWrite }; |
| |
| protected: |
| void SetUpOnMainThread() override { |
| ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); |
| ASSERT_TRUE(embedded_test_server()->Start()); |
| ContentBrowserTest::SetUpOnMainThread(); |
| } |
| |
| void OverrideInterface(MockWriterBase* mock_writer) { |
| blink::AssociatedInterfaceProvider* remote_interfaces = |
| shell() |
| ->web_contents() |
| ->GetPrimaryMainFrame() |
| ->GetRemoteAssociatedInterfaces(); |
| remote_interfaces->OverrideBinderForTesting( |
| mojom::MhtmlFileWriter::Name_, |
| base::BindRepeating(&MockWriterBase::BindReceiver, |
| base::Unretained(mock_writer))); |
| } |
| |
| MHTMLFileInfo GenerateMHTML(base::FilePath& path, const GURL& url) { |
| MHTMLGenerationParams params(path); |
| return GenerateMHTML(params, url); |
| } |
| |
| MHTMLFileInfo GenerateMHTML(const MHTMLGenerationParams& params, |
| const GURL& url) { |
| EXPECT_TRUE(NavigateToURL(shell(), url)); |
| return GenerateMHTMLForCurrentPage(params); |
| } |
| |
| MHTMLFileInfo GenerateMHTML(const GURL& url) { |
| return GenerateMHTML(DefaultGenerationParams(), url); |
| } |
| |
| // Loads the generated MHTML file to check if it is well formed. |
| void NavigateToMHTML(const base::FilePath& path) { |
| // Loads the generated file to check if it is well formed. |
| WebContentsConsoleObserver console_observer(shell()->web_contents()); |
| console_observer.SetPattern("Malformed multipart archive: *"); |
| |
| EXPECT_TRUE(NavigateToURL(shell(), net::FilePathToFileURL(path))) |
| << "Error navigating to the generated MHTML file"; |
| EXPECT_TRUE(console_observer.messages().empty()) |
| << "The generated MHTML file is malformed"; |
| } |
| |
| // Extracts various information from the loaded page. Extracts information |
| // that should be equivalent in the original and saved pages. |
| base::Value GetPageInfo() { |
| auto result = EvalJs(shell(), kGetPageInfoScript); |
| EXPECT_EQ(result.error, ""); |
| return result.value.Clone(); |
| } |
| |
| MHTMLFileInfo GenerateMHTMLForCurrentPage() { |
| return GenerateMHTMLForCurrentPage(DefaultGenerationParams()); |
| } |
| |
| MHTMLFileInfo GenerateMHTMLForCurrentPage( |
| const MHTMLGenerationParams& params) { |
| base::RunLoop run_loop; |
| histogram_tester_ = std::make_unique<base::HistogramTester>(); |
| |
| bool use_result_callback = GetParam(); |
| |
| if (use_result_callback) { |
| shell()->web_contents()->GenerateMHTMLWithResult( |
| params, |
| base::BindOnce(&MHTMLGenerationTest::MHTMLGeneratedWithResult, |
| base::Unretained(this), run_loop.QuitClosure())); |
| } else { |
| shell()->web_contents()->GenerateMHTML( |
| params, |
| base::BindOnce(&MHTMLGenerationTest::MHTMLGenerated, |
| base::Unretained(this), run_loop.QuitClosure())); |
| } |
| |
| // Block until the MHTML is generated. |
| run_loop.Run(); |
| |
| EXPECT_TRUE(has_mhtml_callback_run()) |
| << "Unexpected error generating MHTML file"; |
| if (!has_mhtml_callback_run()) { |
| return MHTMLFileInfo(params.file_path); |
| } |
| |
| // TODO(crbug.com/40641976): Add tests which will let MHTMLGeneration |
| // manager fail during file write operation. This will allow us to actually |
| // test if we receive a bogus hash instead of a std::nullopt. |
| EXPECT_EQ(std::nullopt, file_digest()); |
| |
| MHTMLFileInfo info(params.file_path); |
| |
| // Skip well formedness check if explicitly disabled or there was a |
| // generation error. |
| if (well_formedness_check_) { |
| EXPECT_NE(file_size(), -1) << "GenerateMHTML callback wasn't called"; |
| EXPECT_THAT(info.content(), Not(IsEmpty())); |
| } |
| |
| return info; |
| } |
| |
| void TwoStepSyncTestFor(const TaskOrder order); |
| |
| int64_t ReadFileSizeFromDisk(base::FilePath path) { |
| base::ScopedAllowBlockingForTesting allow_blocking; |
| std::optional<int64_t> file_size = base::GetFileSize(path); |
| if (!file_size.has_value()) { |
| return -1; |
| } |
| return file_size.value(); |
| } |
| |
| CompareResult TestOriginalVsSavedPage(const GURL& url, |
| const CompareOptions& options = {}) { |
| return TestOriginalVsSavedPage(url, DefaultGenerationParams(), options); |
| } |
| |
| CompareResult TestOriginalVsSavedPage(const GURL& url, |
| MHTMLGenerationParams params, |
| const CompareOptions& options = {}) { |
| CompareResult result; |
| // Navigate to the test page and verify if test expectations |
| // are met (this is mostly a sanity check - a failure to meet |
| // expectations would probably mean that there is a test bug |
| // (i.e. that we got called with wrong expected_foo argument). |
| EXPECT_TRUE(NavigateToURL(shell(), url)); |
| |
| result.file = GenerateMHTML(params, url); |
| result.original_info = GetPageInfo(); |
| AssertExpectationsAboutCurrentTab(options.expected_number_of_frames, |
| options.expected_substrings, |
| options.forbidden_substrings); |
| |
| // Stop the test server (to make sure the locally saved page |
| // is self-contained / won't try to open original resources). |
| if (!embedded_test_server()->ShutdownAndWaitUntilComplete()) { |
| EXPECT_FALSE(true) << "ShutdownAndWaitUntilComplete failed"; |
| return result; |
| } |
| |
| // Open the saved page and verify if test expectations are |
| // met (i.e. if the same expectations are met for "after" |
| // [saved version of the page] as for the "before" |
| // [the original version of the page]. |
| NavigateToMHTML(params.file_path); |
| result.saved_info = GetPageInfo(); |
| |
| auto forbidden_strings = options.forbidden_substrings_in_saved_page; |
| forbidden_strings.insert(forbidden_strings.end(), |
| options.forbidden_substrings.begin(), |
| options.forbidden_substrings.end()); |
| AssertExpectationsAboutCurrentTab(options.expected_number_of_frames, |
| options.expected_substrings, |
| forbidden_strings); |
| return result; |
| } |
| |
| void AssertExpectationsAboutCurrentTab( |
| std::optional<int> expected_number_of_frames, |
| const std::vector<std::string>& expected_substrings, |
| const std::vector<std::string>& forbidden_substrings) { |
| int actual_number_of_frames = |
| CollectAllRenderFrameHosts(shell()->web_contents()->GetPrimaryPage()) |
| .size(); |
| if (expected_number_of_frames) { |
| EXPECT_EQ(*expected_number_of_frames, actual_number_of_frames); |
| } |
| |
| for (const auto& expected_substring : expected_substrings) { |
| FindTrackingDelegate delegate(expected_substring); |
| int actual_number_of_matches = delegate.Wait(shell()->web_contents()); |
| EXPECT_EQ(1, actual_number_of_matches) |
| << "Verifying that \"" << expected_substring << "\" appears " |
| << "exactly once in the text of web contents of " |
| << shell()->web_contents()->GetLastCommittedURL().spec(); |
| } |
| |
| for (const auto& forbidden_substring : forbidden_substrings) { |
| FindTrackingDelegate delegate(forbidden_substring); |
| int actual_number_of_matches = delegate.Wait(shell()->web_contents()); |
| EXPECT_EQ(0, actual_number_of_matches) |
| << "Verifying that \"" << forbidden_substring << "\" doesn't " |
| << "appear in the text of web contents of " |
| << shell()->web_contents()->GetLastCommittedURL().spec(); |
| } |
| } |
| |
| // Tests that the result of setting compute_contents_hash is the same as |
| // manually hashing the file. Because MHTMLGenerationManager depends on |
| // net::GenerateMimeMultipartBoundary() to write the boundary, we cannot |
| // compute the digest in advance. Therefore, we must compute the hash of the |
| // whole file and assert that the computed hash is the same as the hash |
| // produced here. |
| void TestComputeContentsHash(base::FilePath& path) { |
| base::ScopedAllowBlockingForTesting allow_blocking; |
| |
| // Reload the file to an mhtml string for hashing |
| std::string test_mhtml; |
| ASSERT_TRUE(base::ReadFileToString(path, &test_mhtml)); |
| |
| // Hash the file in one big step. This is not recommended to do outside of |
| // tests because the files being hashed could be too large. |
| std::unique_ptr<crypto::SecureHash> secure_hash = |
| crypto::SecureHash::Create(crypto::SecureHash::Algorithm::SHA256); |
| secure_hash->Update(test_mhtml.c_str(), test_mhtml.size()); |
| std::string expected_digest(secure_hash->GetHashLength(), 0); |
| secure_hash->Finish(&(expected_digest[0]), expected_digest.size()); |
| secure_hash.reset(); |
| |
| ASSERT_TRUE(file_digest()); |
| EXPECT_EQ(file_digest().value(), expected_digest); |
| } |
| |
| // In the case that we are using a pre-generated .mhtml file, we do |
| // not have any control over the final mhtml_boundary_marker write |
| // operation. This results in the post-generation verification tests |
| // reporting a malformed multipart archive, unintentionally failing the |
| // test. |
| void DisableWellformednessCheck() { well_formedness_check_ = false; } |
| |
| MHTMLGenerationParams DefaultGenerationParams() const { |
| return MHTMLGenerationParams( |
| temp_dir_.GetPath().Append(FILE_PATH_LITERAL("test.mht"))); |
| } |
| |
| bool has_mhtml_callback_run() const { return has_mhtml_callback_run_; } |
| int64_t file_size() const { return file_size_; } |
| std::optional<std::string> file_digest() const { return file_digest_; } |
| base::HistogramTester* histogram_tester() { return histogram_tester_.get(); } |
| |
| base::ScopedTempDir temp_dir_; |
| |
| private: |
| void MHTMLGenerated(base::OnceClosure quit_closure, int64_t size) { |
| has_mhtml_callback_run_ = true; |
| file_size_ = size; |
| std::move(quit_closure).Run(); |
| } |
| void MHTMLGeneratedWithResult(base::OnceClosure quit_closure, |
| const MHTMLGenerationResult& result) { |
| has_mhtml_callback_run_ = true; |
| file_size_ = result.file_size; |
| file_digest_ = result.file_digest; |
| std::move(quit_closure).Run(); |
| } |
| |
| bool has_mhtml_callback_run_ = false; |
| int64_t file_size_ = 0; |
| std::optional<std::string> file_digest_; |
| bool well_formedness_check_ = true; |
| std::unique_ptr<base::HistogramTester> histogram_tester_; |
| }; |
| |
| class MHTMLGenerationImprovedTest : public MHTMLGenerationTest { |
| public: |
| MHTMLGenerationImprovedTest() { |
| feature_list_.InitAndEnableFeature(blink::features::kMHTML_Improvements); |
| } |
| |
| private: |
| base::test::ScopedFeatureList feature_list_; |
| }; |
| |
| // Tests that generating a MHTML does create contents. |
| // Note that the actual content of the file is not tested, the purpose of this |
| // test is to ensure we were successful in creating the MHTML data from the |
| // renderer. |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTML) { |
| MHTMLFileInfo info = |
| GenerateMHTML(embedded_test_server()->GetURL("/simple_page.html")); |
| |
| // Make sure the actual generated file has some contents. |
| EXPECT_THAT(info.content(), |
| HasSubstr("Content-Transfer-Encoding: quoted-printable")); |
| } |
| |
| #if BUILDFLAG(IS_WIN) |
| // This Windows only test generates an MHTML file in a path that is explicitly |
| // not in the temp directory and not in the user data dir. This is to test that |
| // the mojo security constraints correctly allow this writeable handle to a |
| // renderer process. See `mojo/core/platform_handle_security_util_win.cc`. |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLInNonTempDir) { |
| base::FilePath local_app_data; |
| // This test creates a temporary directory in %LocalAppData% then deletes it |
| // afterwards. |
| EXPECT_TRUE( |
| base::PathService::Get(base::DIR_LOCAL_APP_DATA, &local_app_data)); |
| base::FilePath new_dir; |
| { |
| base::ScopedAllowBlockingForTesting allow_blocking; |
| EXPECT_TRUE(base::CreateTemporaryDirInDir( |
| local_app_data, FILE_PATH_LITERAL("MHTMLGenerationTest"), &new_dir)); |
| } |
| absl::Cleanup delete_dir = [new_dir] { |
| base::ScopedAllowBlockingForTesting allow_blocking; |
| base::DeletePathRecursively(new_dir); |
| }; |
| |
| base::FilePath path = new_dir.Append(FILE_PATH_LITERAL("test.mht")); |
| |
| MHTMLFileInfo info = |
| GenerateMHTML(path, embedded_test_server()->GetURL("/simple_page.html")); |
| |
| EXPECT_THAT(info.content(), |
| HasSubstr("Content-Transfer-Encoding: quoted-printable")); |
| } |
| #endif // BUILDFLAG(IS_WIN) |
| |
| // Regression test for the crash/race from https://crbug.com/612098. |
| // |
| // TODO(crbug.com/41456635): Flaky on Android. |
| #if BUILDFLAG(IS_ANDROID) |
| #define MAYBE_GenerateMHTMLAndCloseConnection \ |
| DISABLED_GenerateMHTMLAndCloseConnection |
| #else |
| #define MAYBE_GenerateMHTMLAndCloseConnection GenerateMHTMLAndCloseConnection |
| #endif |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, |
| MAYBE_GenerateMHTMLAndCloseConnection) { |
| scoped_refptr<RespondAndDisconnectMockWriter> mock_writer = |
| base::MakeRefCounted<RespondAndDisconnectMockWriter>(); |
| |
| EXPECT_TRUE(NavigateToURL( |
| shell(), embedded_test_server()->GetURL("/simple_page.html"))); |
| base::FilePath path(temp_dir_.GetPath()); |
| path = path.Append(FILE_PATH_LITERAL("test.mht")); |
| |
| OverrideInterface(mock_writer.get()); |
| DisableWellformednessCheck(); |
| |
| GenerateMHTMLForCurrentPage(); |
| |
| // Verify the file has some contents written to it. |
| EXPECT_GT(ReadFileSizeFromDisk(path), 100); |
| // Verify the reported file size matches the file written to disk. |
| EXPECT_EQ(ReadFileSizeFromDisk(path), file_size()); |
| } |
| |
| // TODO(crbug.com/41290169): Flaky on Windows. |
| #if BUILDFLAG(IS_WIN) |
| #define MAYBE_InvalidPath DISABLED_InvalidPath |
| #else |
| #define MAYBE_InvalidPath InvalidPath |
| #endif |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, MAYBE_InvalidPath) { |
| base::FilePath path(FILE_PATH_LITERAL("/invalid/file/path")); |
| DisableWellformednessCheck(); |
| |
| GenerateMHTML(path, embedded_test_server()->GetURL("/page_with_image.html")); |
| |
| EXPECT_EQ(file_size(), -1); // Expecting that the callback reported failure. |
| } |
| |
| // Tests that MHTML generated using the default 'quoted-printable' encoding does |
| // not contain the 'binary' Content-Transfer-Encoding header, and generates |
| // base64 encoding for the image part. |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateNonBinaryMHTMLWithImage) { |
| GURL url(embedded_test_server()->GetURL("/page_with_image.html")); |
| MHTMLFileInfo info = GenerateMHTML(url); |
| |
| EXPECT_THAT(info.content(), HasSubstr("Content-Transfer-Encoding: base64")); |
| EXPECT_THAT(info.content(), |
| Not(HasSubstr("Content-Transfer-Encoding: binary"))); |
| EXPECT_THAT(info.ContentLocations(), Contains(EndsWith("blank.jpg"))); |
| // Verify the boundary should start with CRLF. |
| EXPECT_THAT(info.content(), HasSubstr("\r\n------MultipartBoundary")); |
| } |
| |
| // Tests that MHTML generated using the binary encoding contains the 'binary' |
| // Content-Transfer-Encoding header, and does not contain any base64 encoded |
| // parts. |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateBinaryMHTMLWithImage) { |
| GURL url(embedded_test_server()->GetURL("/page_with_image.html")); |
| MHTMLGenerationParams params = DefaultGenerationParams(); |
| params.use_binary_encoding = true; |
| |
| MHTMLFileInfo info = GenerateMHTML(params, url); |
| |
| EXPECT_THAT(info.content(), HasSubstr("Content-Transfer-Encoding: binary")); |
| EXPECT_THAT(info.content(), |
| Not(HasSubstr("Content-Transfer-Encoding: base64"))); |
| EXPECT_THAT(info.ContentLocations(), Contains(EndsWith("blank.jpg"))); |
| // Verify the boundary should start with CRLF. |
| EXPECT_THAT(info.content(), HasSubstr("\r\n------MultipartBoundary")); |
| } |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLIgnoreNoStore) { |
| GURL url(embedded_test_server()->GetURL("/nostore.html")); |
| |
| // Generate MHTML without specifying the FailForNoStoreMainFrame policy. |
| MHTMLFileInfo info = GenerateMHTML(url); |
| |
| // Make sure the contents of the body are present. |
| EXPECT_THAT(info.content(), HasSubstr("test body")); |
| |
| // Make sure that URL of the content is present. |
| EXPECT_THAT(info.ContentLocations(), Contains(EndsWith("/nostore.html"))); |
| } |
| |
| // TODO(crbug.com/40470937): These fail on Android under some circumstances. |
| #if BUILDFLAG(IS_ANDROID) |
| #define MAYBE_ViewedMHTMLContainsNoStoreContent \ |
| DISABLED_ViewedMHTMLContainsNoStoreContent |
| #else |
| #define MAYBE_ViewedMHTMLContainsNoStoreContent \ |
| ViewedMHTMLContainsNoStoreContent |
| #endif |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, |
| MAYBE_ViewedMHTMLContainsNoStoreContent) { |
| // Generate MHTML. |
| CompareOptions options; |
| options.expected_number_of_frames = 2; |
| // We should see both frames. |
| options.expected_substrings = { |
| "Main Frame, normal headers.", |
| "Cache-Control: no-store test body", |
| }; |
| TestOriginalVsSavedPage( |
| embedded_test_server()->GetURL("/page_with_nostore_iframe.html"), |
| options); |
| } |
| |
| // Test suite that allows testing --site-per-process against cross-site frames. |
| // See http://dev.chromium.org/developers/design-documents/site-isolation. |
| class MHTMLGenerationSitePerProcessTest : public MHTMLGenerationTest { |
| public: |
| MHTMLGenerationSitePerProcessTest() = default; |
| |
| MHTMLGenerationSitePerProcessTest(const MHTMLGenerationSitePerProcessTest&) = |
| delete; |
| MHTMLGenerationSitePerProcessTest& operator=( |
| const MHTMLGenerationSitePerProcessTest&) = delete; |
| |
| protected: |
| void SetUpCommandLine(base::CommandLine* command_line) override { |
| MHTMLGenerationTest::SetUpCommandLine(command_line); |
| |
| // Append --site-per-process flag. |
| content::IsolateAllSitesForTesting(command_line); |
| } |
| |
| void SetUpOnMainThread() override { |
| host_resolver()->AddRule("*", "127.0.0.1"); |
| content::SetupCrossSiteRedirector(embedded_test_server()); |
| |
| MHTMLGenerationTest::SetUpOnMainThread(); |
| } |
| }; |
| |
| // Test for crbug.com/538766. |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationSitePerProcessTest, GenerateMHTML) { |
| base::FilePath path = |
| temp_dir_.GetPath().Append(FILE_PATH_LITERAL("test.mht")); |
| |
| GURL url(embedded_test_server()->GetURL( |
| "a.com", "/frame_tree/page_with_one_frame.html")); |
| MHTMLFileInfo info = GenerateMHTML(path, url); |
| |
| // Make sure the contents of both frames are present. |
| EXPECT_THAT(info.content(), HasSubstr("This page has one cross-site iframe")); |
| EXPECT_THAT(info.content(), |
| HasSubstr("This page has no title")); // From title1.html. |
| |
| // Make sure that URLs of both frames are present |
| // (note that these are single-line regexes). |
| EXPECT_THAT( |
| info.ContentLocations(), |
| testing::IsSupersetOf({EndsWith("/frame_tree/page_with_one_frame.html"), |
| EndsWith("/title1.html")})); |
| } |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, RemovePopupOverlay) { |
| base::FilePath path(temp_dir_.GetPath()); |
| path = path.Append(FILE_PATH_LITERAL("test.mht")); |
| |
| GURL url(embedded_test_server()->GetURL("/popup.html")); |
| |
| MHTMLGenerationParams params = DefaultGenerationParams(); |
| params.remove_popup_overlay = true; |
| |
| MHTMLFileInfo info = GenerateMHTML(params, url); |
| |
| // Make sure the overlay is removed. |
| EXPECT_THAT(info.content(), Not(HasSubstr("class=3D\"overlay"))); |
| EXPECT_THAT(info.content(), Not(HasSubstr("class=3D\"modal"))); |
| } |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLWithExtraData) { |
| const char kFakeSignalData1[] = "FakeSignalData1"; |
| const char kFakeSignalData2[] = "OtherMockDataForSignals"; |
| const char kFakeContentType[] = "text/plain"; |
| const char kFakeContentLocation[] = |
| "cid:signal-data-62691-645341c4-62b3-478e-a8c5-e0dfccc3ca02@mhtml.blink"; |
| base::FilePath path(temp_dir_.GetPath()); |
| path = path.Append(FILE_PATH_LITERAL("test.mht")); |
| GURL url(embedded_test_server()->GetURL("/page_with_image.html")); |
| |
| // Place the extra data we need into the web contents user data. |
| std::string content_type(kFakeContentType); |
| std::string content_location(kFakeContentLocation); |
| std::string extra_headers; |
| |
| // Get the MHTMLExtraParts |
| MHTMLExtraParts* extra_parts = |
| MHTMLExtraParts::FromWebContents(shell()->web_contents()); |
| |
| // Add two extra data parts to the MHTML. |
| extra_parts->AddExtraMHTMLPart(content_type, content_location, extra_headers, |
| kFakeSignalData1); |
| extra_parts->AddExtraMHTMLPart(content_type, content_location, extra_headers, |
| kFakeSignalData2); |
| EXPECT_EQ(extra_parts->size(), 2); |
| MHTMLFileInfo info = GenerateMHTML(path, url); |
| |
| EXPECT_TRUE(has_mhtml_callback_run()); |
| |
| |
| // Make sure that both extra data parts made it into the mhtml. |
| EXPECT_THAT(info.content(), HasSubstr(kFakeSignalData1)); |
| EXPECT_THAT(info.content(), HasSubstr(kFakeSignalData2)); |
| } |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationTest, GenerateMHTMLWithMultipleFrames) { |
| CompareOptions options; |
| options.expected_number_of_frames = 11; |
| CompareResult result = TestOriginalVsSavedPage( |
| embedded_test_server()->GetURL("/page_with_multiple_iframes.html"), |
| options); |
| |
| EXPECT_EQ(result.original_info, result.saved_info); |
| |
| // Expect all frames in the .html are included in the generated file. |
| EXPECT_THAT( |
| result.file.ContentLocations(), |
| testing::IsSupersetOf({EndsWith("/page_with_image.html"), |
| EndsWith("/page_with_popup.html"), |
| EndsWith("/page_with_frameset.html"), |
| EndsWith("/page_with_allowfullscreen_frame.html"), |
| EndsWith("/page_with_iframe_and_link.html")})); |
| } |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationImprovedTest, CustomElement) { |
| CompareOptions options; |
| options.expected_number_of_frames = 1; |
| options.expected_substrings = |
| { |
| // If this isn't show, the custom element is either not created, or |
| // not defined through customElements.define. |
| "Inside an Autonomous Custom Element", |
| "This is a defined built-in custom element", |
| "This is an undefined built-in custom element", |
| }, |
| options.forbidden_substrings = { |
| "Hidden with adopted stylesheet on shadowRoot", |
| "Hidden because undefined-test-element is not defined.", |
| "Hidden with adopted stylesheet on document", |
| "Hidden with stylesheet on shadowRoot", |
| }; |
| CompareResult result = TestOriginalVsSavedPage( |
| embedded_test_server()->GetURL("/mhtml/custom_element_defined.html"), |
| options); |
| EXPECT_EQ(result.original_info, result.saved_info); |
| } |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationImprovedTest, CustomElementInFrame) { |
| // Note this has all the same string assertions from |
| // `GenerateMHTMLWithCustomElement`. |
| CompareOptions options; |
| options.expected_number_of_frames = 2; |
| options.expected_substrings = { |
| "Inside an Autonomous Custom Element", |
| "This is a defined built-in custom element", |
| "This is an undefined built-in custom element", |
| }; |
| options.forbidden_substrings = { |
| "Hidden with adopted stylesheet on shadowRoot", |
| "Hidden because undefined-test-element is not defined.", |
| "Hidden with adopted stylesheet on document", |
| "Hidden with stylesheet on shadowRoot", |
| |
| // Verify <test-element> isn't accidentally defined outside of the |
| // frame. |
| "Hidden because not defined outside of frame.", |
| }; |
| CompareResult result = TestOriginalVsSavedPage( |
| embedded_test_server()->GetURL( |
| "/mhtml/custom_element_defined_in_frame.html"), |
| options); |
| EXPECT_EQ(result.original_info, result.saved_info); |
| } |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationImprovedTest, Styles) { |
| CompareOptions options; |
| options.expected_number_of_frames = 1; |
| options.expected_substrings = {"hidden1", "hidden4", |
| "This should show if inline CSS is escaped."}; |
| options.forbidden_substrings = {"hidden2", "hidden3"}; |
| CompareResult result = TestOriginalVsSavedPage( |
| embedded_test_server()->GetURL("/mhtml/styles.html"), options); |
| EXPECT_EQ(result.original_info, result.saved_info); |
| } |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationImprovedTest, Fonts) { |
| CompareResult result = TestOriginalVsSavedPage( |
| embedded_test_server()->GetURL("/mhtml/fonts.html")); |
| EXPECT_THAT(result.saved_info.GetDict().FindList("fonts"), |
| Pointee(IsSupersetOf({"ahem: loaded", "notexist: not_loaded"}))); |
| EXPECT_EQ(result.original_info, result.saved_info); |
| } |
| |
| IN_PROC_BROWSER_TEST_P(MHTMLGenerationImprovedTest, Elements) { |
| CompareResult result = TestOriginalVsSavedPage( |
| embedded_test_server()->GetURL("/mhtml/elements.html"), {}); |
| |
| EXPECT_EQ(result.original_info, result.saved_info); |
| EXPECT_THAT(result.file.ContentLocations(), |
| AllOf(Contains(EndsWith("/image-inline.png?img")), |
| Contains(EndsWith("/image-inline.png?svg")))); |
| } |
| |
| // We instantiate the MHTML Generation Tests both using and not using the |
| // GenerateMHTMLWithResults callback. |
| INSTANTIATE_TEST_SUITE_P(MHTMLGenerationTest, |
| MHTMLGenerationTest, |
| testing::Bool()); |
| INSTANTIATE_TEST_SUITE_P(MHTMLGenerationSitePerProcessTest, |
| MHTMLGenerationSitePerProcessTest, |
| testing::Bool()); |
| INSTANTIATE_TEST_SUITE_P(MHTMLGenerationImprovedTest, |
| MHTMLGenerationImprovedTest, |
| testing::Bool()); |
| |
| } // namespace |
| } // namespace content |