[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 1 | // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "pdf/document_loader.h" |
| 6 | |
avi | a7c09d5 | 2015-12-21 19:49:43 | [diff] [blame^] | 7 | #include <stddef.h> |
| 8 | #include <stdint.h> |
| 9 | |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 10 | #include "base/logging.h" |
| 11 | #include "base/strings/string_util.h" |
| 12 | #include "net/http/http_util.h" |
| 13 | #include "ppapi/c/pp_errors.h" |
| 14 | #include "ppapi/cpp/url_loader.h" |
| 15 | #include "ppapi/cpp/url_request_info.h" |
| 16 | #include "ppapi/cpp/url_response_info.h" |
| 17 | |
| 18 | namespace chrome_pdf { |
| 19 | |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 20 | namespace { |
| 21 | |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 22 | // If the headers have a byte-range response, writes the start and end |
| 23 | // positions and returns true if at least the start position was parsed. |
| 24 | // The end position will be set to 0 if it was not found or parsed from the |
| 25 | // response. |
| 26 | // Returns false if not even a start position could be parsed. |
| 27 | bool GetByteRange(const std::string& headers, uint32_t* start, uint32_t* end) { |
| 28 | net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
| 29 | while (it.GetNext()) { |
brettw | bc17d2c8 | 2015-06-09 22:39:08 | [diff] [blame] | 30 | if (base::LowerCaseEqualsASCII(it.name(), "content-range")) { |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 31 | std::string range = it.values().c_str(); |
brettw | 9550931 | 2015-07-16 23:57:33 | [diff] [blame] | 32 | if (base::StartsWith(range, "bytes", |
| 33 | base::CompareCase::INSENSITIVE_ASCII)) { |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 34 | range = range.substr(strlen("bytes")); |
| 35 | std::string::size_type pos = range.find('-'); |
| 36 | std::string range_end; |
| 37 | if (pos != std::string::npos) |
| 38 | range_end = range.substr(pos + 1); |
tfarina | 023b1dcc | 2015-12-06 13:25:41 | [diff] [blame] | 39 | base::TrimWhitespaceASCII(range, base::TRIM_LEADING, &range); |
| 40 | base::TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end); |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 41 | *start = atoi(range.c_str()); |
| 42 | *end = atoi(range_end.c_str()); |
| 43 | return true; |
| 44 | } |
| 45 | } |
| 46 | } |
| 47 | return false; |
| 48 | } |
| 49 | |
| 50 | // If the headers have a multi-part response, returns the boundary name. |
| 51 | // Otherwise returns an empty string. |
| 52 | std::string GetMultiPartBoundary(const std::string& headers) { |
| 53 | net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); |
| 54 | while (it.GetNext()) { |
brettw | bc17d2c8 | 2015-06-09 22:39:08 | [diff] [blame] | 55 | if (base::LowerCaseEqualsASCII(it.name(), "content-type")) { |
brettw | 8e2106d | 2015-08-11 19:30:22 | [diff] [blame] | 56 | std::string type = base::ToLowerASCII(it.values()); |
brettw | 9550931 | 2015-07-16 23:57:33 | [diff] [blame] | 57 | if (base::StartsWith(type, "multipart/", base::CompareCase::SENSITIVE)) { |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 58 | const char* boundary = strstr(type.c_str(), "boundary="); |
| 59 | if (!boundary) { |
| 60 | NOTREACHED(); |
| 61 | break; |
| 62 | } |
| 63 | |
| 64 | return std::string(boundary + 9); |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | return std::string(); |
| 69 | } |
| 70 | |
thestig | 488102f | 2015-05-29 03:25:26 | [diff] [blame] | 71 | bool IsValidContentType(const std::string& type) { |
brettw | a7ff1b29 | 2015-07-16 17:49:29 | [diff] [blame] | 72 | return (base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) || |
| 73 | base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) || |
| 74 | base::EndsWith(type, "/x-pdf", |
| 75 | base::CompareCase::INSENSITIVE_ASCII) || |
| 76 | base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) || |
| 77 | base::EndsWith(type, "/acrobat", |
| 78 | base::CompareCase::INSENSITIVE_ASCII) || |
| 79 | base::EndsWith(type, "/unknown", |
| 80 | base::CompareCase::INSENSITIVE_ASCII)); |
thestig | 488102f | 2015-05-29 03:25:26 | [diff] [blame] | 81 | } |
| 82 | |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 83 | } // namespace |
| 84 | |
| 85 | DocumentLoader::Client::~Client() { |
| 86 | } |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 87 | |
| 88 | DocumentLoader::DocumentLoader(Client* client) |
| 89 | : client_(client), partial_document_(false), request_pending_(false), |
| 90 | current_pos_(0), current_chunk_size_(0), current_chunk_read_(0), |
| 91 | document_size_(0), header_request_(true), is_multipart_(false) { |
| 92 | loader_factory_.Initialize(this); |
| 93 | } |
| 94 | |
| 95 | DocumentLoader::~DocumentLoader() { |
| 96 | } |
| 97 | |
| 98 | bool DocumentLoader::Init(const pp::URLLoader& loader, |
| 99 | const std::string& url, |
| 100 | const std::string& headers) { |
| 101 | DCHECK(url_.empty()); |
| 102 | url_ = url; |
| 103 | loader_ = loader; |
| 104 | |
| 105 | std::string response_headers; |
| 106 | if (!headers.empty()) { |
| 107 | response_headers = headers; |
| 108 | } else { |
| 109 | pp::URLResponseInfo response = loader_.GetResponseInfo(); |
| 110 | pp::Var headers_var = response.GetHeaders(); |
| 111 | |
| 112 | if (headers_var.is_string()) { |
| 113 | response_headers = headers_var.AsString(); |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | bool accept_ranges_bytes = false; |
| 118 | bool content_encoded = false; |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 119 | uint32_t content_length = 0; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 120 | std::string type; |
| 121 | std::string disposition; |
thestig | 488102f | 2015-05-29 03:25:26 | [diff] [blame] | 122 | |
| 123 | // This happens for PDFs not loaded from http(s) sources. |
| 124 | if (response_headers == "Content-Type: text/plain") { |
brettw | 9550931 | 2015-07-16 23:57:33 | [diff] [blame] | 125 | if (!base::StartsWith(url, "http://", |
| 126 | base::CompareCase::INSENSITIVE_ASCII) && |
| 127 | !base::StartsWith(url, "https://", |
| 128 | base::CompareCase::INSENSITIVE_ASCII)) { |
thestig | 488102f | 2015-05-29 03:25:26 | [diff] [blame] | 129 | type = "application/pdf"; |
| 130 | } |
| 131 | } |
| 132 | if (type.empty() && !response_headers.empty()) { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 133 | net::HttpUtil::HeadersIterator it(response_headers.begin(), |
| 134 | response_headers.end(), "\n"); |
| 135 | while (it.GetNext()) { |
brettw | bc17d2c8 | 2015-06-09 22:39:08 | [diff] [blame] | 136 | if (base::LowerCaseEqualsASCII(it.name(), "content-length")) { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 137 | content_length = atoi(it.values().c_str()); |
brettw | bc17d2c8 | 2015-06-09 22:39:08 | [diff] [blame] | 138 | } else if (base::LowerCaseEqualsASCII(it.name(), "accept-ranges")) { |
| 139 | accept_ranges_bytes = base::LowerCaseEqualsASCII(it.values(), "bytes"); |
| 140 | } else if (base::LowerCaseEqualsASCII(it.name(), "content-encoding")) { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 141 | content_encoded = true; |
brettw | bc17d2c8 | 2015-06-09 22:39:08 | [diff] [blame] | 142 | } else if (base::LowerCaseEqualsASCII(it.name(), "content-type")) { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 143 | type = it.values(); |
| 144 | size_t semi_colon_pos = type.find(';'); |
| 145 | if (semi_colon_pos != std::string::npos) { |
| 146 | type = type.substr(0, semi_colon_pos); |
| 147 | } |
tfarina | 023b1dcc | 2015-12-06 13:25:41 | [diff] [blame] | 148 | TrimWhitespaceASCII(type, base::TRIM_ALL, &type); |
brettw | bc17d2c8 | 2015-06-09 22:39:08 | [diff] [blame] | 149 | } else if (base::LowerCaseEqualsASCII(it.name(), "content-disposition")) { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 150 | disposition = it.values(); |
| 151 | } |
| 152 | } |
| 153 | } |
thestig | 488102f | 2015-05-29 03:25:26 | [diff] [blame] | 154 | if (!type.empty() && !IsValidContentType(type)) |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 155 | return false; |
brettw | 9550931 | 2015-07-16 23:57:33 | [diff] [blame] | 156 | if (base::StartsWith(disposition, "attachment", |
| 157 | base::CompareCase::INSENSITIVE_ASCII)) |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 158 | return false; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 159 | |
| 160 | if (content_length > 0) |
| 161 | chunk_stream_.Preallocate(content_length); |
| 162 | |
| 163 | document_size_ = content_length; |
| 164 | requests_count_ = 0; |
| 165 | |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 166 | // Enable partial loading only if file size is above the threshold. |
| 167 | // It will allow avoiding latency for multiple requests. |
| 168 | if (content_length > kMinFileSize && |
| 169 | accept_ranges_bytes && |
| 170 | !content_encoded) { |
| 171 | LoadPartialDocument(); |
| 172 | } else { |
| 173 | LoadFullDocument(); |
| 174 | } |
| 175 | return true; |
| 176 | } |
| 177 | |
| 178 | void DocumentLoader::LoadPartialDocument() { |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 179 | // The current request is a full request (not a range request) so it starts at |
| 180 | // 0 and ends at |document_size_|. |
| 181 | current_chunk_size_ = document_size_; |
| 182 | current_pos_ = 0; |
| 183 | current_request_offset_ = 0; |
| 184 | current_request_size_ = 0; |
| 185 | current_request_extended_size_ = document_size_; |
| 186 | request_pending_ = true; |
| 187 | |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 188 | partial_document_ = true; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 189 | header_request_ = true; |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 190 | ReadMore(); |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 191 | } |
| 192 | |
| 193 | void DocumentLoader::LoadFullDocument() { |
| 194 | partial_document_ = false; |
| 195 | chunk_buffer_.clear(); |
| 196 | ReadMore(); |
| 197 | } |
| 198 | |
| 199 | bool DocumentLoader::IsDocumentComplete() const { |
| 200 | if (document_size_ == 0) // Document size unknown. |
| 201 | return false; |
| 202 | return IsDataAvailable(0, document_size_); |
| 203 | } |
| 204 | |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 205 | uint32_t DocumentLoader::GetAvailableData() const { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 206 | if (document_size_ == 0) { // If document size is unknown. |
| 207 | return current_pos_; |
| 208 | } |
| 209 | |
| 210 | std::vector<std::pair<size_t, size_t> > ranges; |
| 211 | chunk_stream_.GetMissedRanges(0, document_size_, &ranges); |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 212 | uint32_t available = document_size_; |
| 213 | for (const auto& range : ranges) |
| 214 | available -= range.second; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 215 | return available; |
| 216 | } |
| 217 | |
| 218 | void DocumentLoader::ClearPendingRequests() { |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 219 | pending_requests_.erase(pending_requests_.begin(), |
| 220 | pending_requests_.end()); |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 221 | } |
| 222 | |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 223 | bool DocumentLoader::GetBlock(uint32_t position, |
| 224 | uint32_t size, |
| 225 | void* buf) const { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 226 | return chunk_stream_.ReadData(position, size, buf); |
| 227 | } |
| 228 | |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 229 | bool DocumentLoader::IsDataAvailable(uint32_t position, uint32_t size) const { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 230 | return chunk_stream_.IsRangeAvailable(position, size); |
| 231 | } |
| 232 | |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 233 | void DocumentLoader::RequestData(uint32_t position, uint32_t size) { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 234 | DCHECK(partial_document_); |
| 235 | |
| 236 | // We have some artefact request from |
| 237 | // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after |
| 238 | // document is complete. |
| 239 | // We need this fix in PDFIum. Adding this as a work around. |
| 240 | // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 |
| 241 | // Test url: |
| 242 | // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf |
| 243 | if (IsDocumentComplete()) |
| 244 | return; |
| 245 | |
| 246 | pending_requests_.push_back(std::pair<size_t, size_t>(position, size)); |
| 247 | DownloadPendingRequests(); |
| 248 | } |
| 249 | |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 250 | void DocumentLoader::RemoveCompletedRanges() { |
| 251 | // Split every request that has been partially downloaded already into smaller |
| 252 | // requests. |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 253 | std::vector<std::pair<size_t, size_t> > ranges; |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 254 | auto it = pending_requests_.begin(); |
| 255 | while (it != pending_requests_.end()) { |
| 256 | chunk_stream_.GetMissedRanges(it->first, it->second, &ranges); |
| 257 | pending_requests_.insert(it, ranges.begin(), ranges.end()); |
| 258 | ranges.clear(); |
| 259 | pending_requests_.erase(it++); |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | void DocumentLoader::DownloadPendingRequests() { |
| 264 | if (request_pending_) |
| 265 | return; |
| 266 | |
| 267 | uint32_t pos; |
| 268 | uint32_t size; |
| 269 | if (pending_requests_.empty()) { |
| 270 | // If the document is not complete and we have no outstanding requests, |
| 271 | // download what's left for as long as no other request gets added to |
| 272 | // |pending_requests_|. |
| 273 | pos = chunk_stream_.GetFirstMissingByte(); |
| 274 | if (pos >= document_size_) { |
| 275 | // We're done downloading the document. |
| 276 | return; |
| 277 | } |
| 278 | // Start with size 0, we'll set |current_request_extended_size_| to > 0. |
| 279 | // This way this request will get cancelled as soon as the renderer wants |
| 280 | // another portion of the document. |
| 281 | size = 0; |
| 282 | } else { |
| 283 | RemoveCompletedRanges(); |
| 284 | |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 285 | pos = pending_requests_.front().first; |
| 286 | size = pending_requests_.front().second; |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 287 | if (IsDataAvailable(pos, size)) { |
| 288 | ReadComplete(); |
| 289 | return; |
| 290 | } |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 291 | } |
| 292 | |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 293 | size_t last_byte_before = chunk_stream_.GetFirstMissingByteInInterval(pos); |
| 294 | if (size < kDefaultRequestSize) { |
| 295 | // Try to extend before pos, up to size |kDefaultRequestSize|. |
| 296 | if (pos + size - last_byte_before > kDefaultRequestSize) { |
| 297 | pos += size - kDefaultRequestSize; |
| 298 | size = kDefaultRequestSize; |
| 299 | } else { |
| 300 | size += pos - last_byte_before; |
| 301 | pos = last_byte_before; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 302 | } |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 303 | } |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 304 | if (pos - last_byte_before < kDefaultRequestSize) { |
| 305 | // Don't leave a gap smaller than |kDefaultRequestSize|. |
| 306 | size += pos - last_byte_before; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 307 | pos = last_byte_before; |
| 308 | } |
| 309 | |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 310 | current_request_offset_ = pos; |
| 311 | current_request_size_ = size; |
| 312 | |
| 313 | // Extend the request until the next downloaded byte or the end of the |
| 314 | // document. |
| 315 | size_t last_missing_byte = |
| 316 | chunk_stream_.GetLastMissingByteInInterval(pos + size - 1); |
| 317 | current_request_extended_size_ = last_missing_byte - pos + 1; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 318 | |
| 319 | request_pending_ = true; |
| 320 | |
| 321 | // Start downloading first pending request. |
| 322 | loader_.Close(); |
| 323 | loader_ = client_->CreateURLLoader(); |
| 324 | pp::CompletionCallback callback = |
| 325 | loader_factory_.NewCallback(&DocumentLoader::DidOpen); |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 326 | pp::URLRequestInfo request = GetRequest(pos, current_request_extended_size_); |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 327 | requests_count_++; |
| 328 | int rv = loader_.Open(request, callback); |
| 329 | if (rv != PP_OK_COMPLETIONPENDING) |
| 330 | callback.Run(rv); |
| 331 | } |
| 332 | |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 333 | pp::URLRequestInfo DocumentLoader::GetRequest(uint32_t position, |
| 334 | uint32_t size) const { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 335 | pp::URLRequestInfo request(client_->GetPluginInstance()); |
thestig | a9ceb72 | 2015-04-30 02:06:09 | [diff] [blame] | 336 | request.SetURL(url_); |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 337 | request.SetMethod("GET"); |
| 338 | request.SetFollowRedirects(true); |
thestig | a9ceb72 | 2015-04-30 02:06:09 | [diff] [blame] | 339 | request.SetCustomReferrerURL(url_); |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 340 | |
| 341 | const size_t kBufSize = 100; |
| 342 | char buf[kBufSize]; |
| 343 | // According to rfc2616, byte range specifies position of the first and last |
| 344 | // bytes in the requested range inclusively. Therefore we should subtract 1 |
| 345 | // from the position + size, to get index of the last byte that needs to be |
| 346 | // downloaded. |
| 347 | base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position, |
| 348 | position + size - 1); |
| 349 | pp::Var header(buf); |
| 350 | request.SetHeaders(header); |
| 351 | |
| 352 | return request; |
| 353 | } |
| 354 | |
| 355 | void DocumentLoader::DidOpen(int32_t result) { |
| 356 | if (result != PP_OK) { |
| 357 | NOTREACHED(); |
| 358 | return; |
| 359 | } |
| 360 | |
gene | 7cafb2ce6 | 2014-10-24 00:56:53 | [diff] [blame] | 361 | int32_t http_code = loader_.GetResponseInfo().GetStatusCode(); |
| 362 | if (http_code >= 400 && http_code < 500) { |
| 363 | // Error accessing resource. 4xx error indicate subsequent requests |
| 364 | // will fail too. |
| 365 | // E.g. resource has been removed from the server while loading it. |
| 366 | // https://code.google.com/p/chromium/issues/detail?id=414827 |
| 367 | return; |
| 368 | } |
| 369 | |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 370 | is_multipart_ = false; |
| 371 | current_chunk_size_ = 0; |
| 372 | current_chunk_read_ = 0; |
| 373 | |
| 374 | pp::Var headers_var = loader_.GetResponseInfo().GetHeaders(); |
| 375 | std::string headers; |
| 376 | if (headers_var.is_string()) |
| 377 | headers = headers_var.AsString(); |
| 378 | |
| 379 | std::string boundary = GetMultiPartBoundary(headers); |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 380 | if (!boundary.empty()) { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 381 | // Leave position untouched for now, when we read the data we'll get it. |
| 382 | is_multipart_ = true; |
| 383 | multipart_boundary_ = boundary; |
| 384 | } else { |
| 385 | // Need to make sure that the server returned a byte-range, since it's |
| 386 | // possible for a server to just ignore our bye-range request and just |
| 387 | // return the entire document even if it supports byte-range requests. |
| 388 | // i.e. sniff response to |
| 389 | // http://www.act.org/compass/sample/pdf/geometry.pdf |
| 390 | current_pos_ = 0; |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 391 | uint32_t start_pos, end_pos; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 392 | if (GetByteRange(headers, &start_pos, &end_pos)) { |
| 393 | current_pos_ = start_pos; |
| 394 | if (end_pos && end_pos > start_pos) |
| 395 | current_chunk_size_ = end_pos - start_pos + 1; |
| 396 | } |
| 397 | } |
| 398 | |
| 399 | ReadMore(); |
| 400 | } |
| 401 | |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 402 | void DocumentLoader::ReadMore() { |
| 403 | pp::CompletionCallback callback = |
| 404 | loader_factory_.NewCallback(&DocumentLoader::DidRead); |
| 405 | int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback); |
| 406 | if (rv != PP_OK_COMPLETIONPENDING) |
| 407 | callback.Run(rv); |
| 408 | } |
| 409 | |
| 410 | void DocumentLoader::DidRead(int32_t result) { |
| 411 | if (result > 0) { |
| 412 | char* start = buffer_; |
| 413 | size_t length = result; |
| 414 | if (is_multipart_ && result > 2) { |
| 415 | for (int i = 2; i < result; ++i) { |
| 416 | if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') || |
| 417 | (i >= 4 && |
| 418 | buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' && |
| 419 | buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) { |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 420 | uint32_t start_pos, end_pos; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 421 | if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) { |
| 422 | current_pos_ = start_pos; |
| 423 | start += i; |
| 424 | length -= i; |
| 425 | if (end_pos && end_pos > start_pos) |
| 426 | current_chunk_size_ = end_pos - start_pos + 1; |
| 427 | } |
| 428 | break; |
| 429 | } |
| 430 | } |
| 431 | |
| 432 | // Reset this flag so we don't look inside the buffer in future calls of |
| 433 | // DidRead for this response. Note that this code DOES NOT handle multi- |
| 434 | // part responses with more than one part (we don't issue them at the |
| 435 | // moment, so they shouldn't arrive). |
| 436 | is_multipart_ = false; |
| 437 | } |
| 438 | |
| 439 | if (current_chunk_size_ && |
| 440 | current_chunk_read_ + length > current_chunk_size_) |
| 441 | length = current_chunk_size_ - current_chunk_read_; |
| 442 | |
| 443 | if (length) { |
| 444 | if (document_size_ > 0) { |
| 445 | chunk_stream_.WriteData(current_pos_, start, length); |
| 446 | } else { |
| 447 | // If we did not get content-length in the response, we can't |
| 448 | // preallocate buffer for the entire document. Resizing array causing |
| 449 | // memory fragmentation issues on the large files and OOM exceptions. |
| 450 | // To fix this, we collect all chunks of the file to the list and |
| 451 | // concatenate them together after request is complete. |
| 452 | chunk_buffer_.push_back(std::vector<unsigned char>()); |
| 453 | chunk_buffer_.back().resize(length); |
| 454 | memcpy(&(chunk_buffer_.back()[0]), start, length); |
| 455 | } |
| 456 | current_pos_ += length; |
| 457 | current_chunk_read_ += length; |
| 458 | client_->OnNewDataAvailable(); |
| 459 | } |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 460 | |
| 461 | // Only call the renderer if we allow partial loading. |
| 462 | if (!partial_document_) { |
| 463 | ReadMore(); |
| 464 | return; |
| 465 | } |
| 466 | |
| 467 | UpdateRendering(); |
| 468 | RemoveCompletedRanges(); |
| 469 | |
| 470 | if (!pending_requests_.empty()) { |
| 471 | // If there are pending requests and the current content we're downloading |
| 472 | // doesn't satisfy any of these requests, cancel the current request to |
| 473 | // fullfill those more important requests. |
| 474 | bool satisfying_pending_request = |
| 475 | SatisfyingRequest(current_request_offset_, current_request_size_); |
| 476 | for (const auto& pending_request : pending_requests_) { |
| 477 | if (SatisfyingRequest(pending_request.first, pending_request.second)) { |
| 478 | satisfying_pending_request = true; |
| 479 | break; |
| 480 | } |
| 481 | } |
| 482 | // Cancel the request as it's not satisfying any request from the |
| 483 | // renderer, unless the current request is finished in which case we let |
| 484 | // it finish cleanly. |
| 485 | if (!satisfying_pending_request && |
| 486 | current_pos_ < current_request_offset_ + |
| 487 | current_request_extended_size_) { |
| 488 | loader_.Close(); |
| 489 | } |
| 490 | } |
| 491 | |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 492 | ReadMore(); |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 493 | } else if (result == PP_OK || result == PP_ERROR_ABORTED) { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 494 | ReadComplete(); |
| 495 | } else { |
| 496 | NOTREACHED(); |
| 497 | } |
| 498 | } |
| 499 | |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 500 | bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const { |
| 501 | return offset <= current_pos_ + kDefaultRequestSize && |
| 502 | current_pos_ < offset + size; |
| 503 | } |
| 504 | |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 505 | void DocumentLoader::ReadComplete() { |
| 506 | if (!partial_document_) { |
| 507 | if (document_size_ == 0) { |
| 508 | // For the document with no 'content-length" specified we've collected all |
| 509 | // the chunks already. Let's allocate final document buffer and copy them |
| 510 | // over. |
| 511 | chunk_stream_.Preallocate(current_pos_); |
thestig | 945cd0cb | 2015-05-28 01:58:05 | [diff] [blame] | 512 | uint32_t pos = 0; |
| 513 | for (auto& chunk : chunk_buffer_) { |
| 514 | chunk_stream_.WriteData(pos, &(chunk[0]), chunk.size()); |
| 515 | pos += chunk.size(); |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 516 | } |
| 517 | chunk_buffer_.clear(); |
| 518 | } |
| 519 | document_size_ = current_pos_; |
| 520 | client_->OnDocumentComplete(); |
| 521 | return; |
| 522 | } |
| 523 | |
| 524 | request_pending_ = false; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 525 | |
| 526 | if (IsDocumentComplete()) { |
| 527 | client_->OnDocumentComplete(); |
| 528 | return; |
| 529 | } |
| 530 | |
spelchat | 3ba2a281 | 2015-12-10 00:44:15 | [diff] [blame] | 531 | UpdateRendering(); |
| 532 | DownloadPendingRequests(); |
| 533 | } |
| 534 | |
| 535 | void DocumentLoader::UpdateRendering() { |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 536 | if (header_request_) |
| 537 | client_->OnPartialDocumentLoaded(); |
| 538 | else |
| 539 | client_->OnPendingRequestComplete(); |
| 540 | header_request_ = false; |
[email protected] | 1b1e9eff | 2014-05-20 01:56:40 | [diff] [blame] | 541 | } |
| 542 | |
| 543 | } // namespace chrome_pdf |