danakj | c06d5f4 | 2024-02-29 17:54:11 | [diff] [blame] | 1 | // Copyright 2024 The Chromium Authors |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "clang/AST/ASTConsumer.h" |
| 6 | #include "clang/Basic/DiagnosticSema.h" |
| 7 | #include "clang/Frontend/CompilerInstance.h" |
| 8 | #include "clang/Frontend/FrontendAction.h" |
| 9 | #include "clang/Frontend/FrontendPluginRegistry.h" |
| 10 | #include "llvm/ADT/StringMap.h" |
| 11 | #include "llvm/ADT/StringRef.h" |
| 12 | #include "llvm/Support/MemoryBuffer.h" |
| 13 | |
| 14 | #include "Util.h" |
| 15 | |
| 16 | namespace chrome_checker { |
| 17 | |
| 18 | struct CheckFilePrefixes { |
| 19 | // Owns the memory holding the strings. |
| 20 | std::unique_ptr<llvm::MemoryBuffer> buffer; |
| 21 | // Pointers into the `buffer`, in sorted order. |
| 22 | std::vector<llvm::StringRef> prefixes; |
| 23 | }; |
| 24 | |
| 25 | class UnsafeBuffersDiagnosticConsumer : public clang::DiagnosticConsumer { |
| 26 | public: |
| 27 | UnsafeBuffersDiagnosticConsumer(clang::DiagnosticsEngine* engine, |
| 28 | clang::DiagnosticConsumer* next, |
| 29 | clang::CompilerInstance* instance, |
| 30 | CheckFilePrefixes check_file_prefixes) |
| 31 | : engine_(engine), |
| 32 | next_(next), |
| 33 | instance_(instance), |
| 34 | check_file_prefixes_(std::move(check_file_prefixes)) {} |
| 35 | ~UnsafeBuffersDiagnosticConsumer() override = default; |
| 36 | |
| 37 | void clear() override { |
| 38 | if (next_) { |
| 39 | next_->clear(); |
| 40 | NumErrors = next_->getNumErrors(); |
| 41 | NumWarnings = next_->getNumWarnings(); |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | void BeginSourceFile(const clang::LangOptions& opts, |
| 46 | const clang::Preprocessor* pp) override { |
| 47 | if (next_) { |
| 48 | next_->BeginSourceFile(opts, pp); |
| 49 | NumErrors = next_->getNumErrors(); |
| 50 | NumWarnings = next_->getNumWarnings(); |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | void EndSourceFile() override { |
| 55 | if (next_) { |
| 56 | next_->EndSourceFile(); |
| 57 | NumErrors = next_->getNumErrors(); |
| 58 | NumWarnings = next_->getNumWarnings(); |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | void finish() override { |
| 63 | if (next_) { |
| 64 | next_->finish(); |
| 65 | NumErrors = next_->getNumErrors(); |
| 66 | NumWarnings = next_->getNumWarnings(); |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | bool IncludeInDiagnosticCounts() const override { |
| 71 | return next_ && next_->IncludeInDiagnosticCounts(); |
| 72 | } |
| 73 | |
| 74 | void HandleDiagnostic(clang::DiagnosticsEngine::Level level, |
| 75 | const clang::Diagnostic& diag) override { |
| 76 | const unsigned diag_id = diag.getID(); |
| 77 | |
| 78 | if (inside_handle_diagnostic_) { |
| 79 | // Avoid handling the diagnostics which we emit in here. |
| 80 | return PassthroughDiagnostic(level, diag); |
| 81 | } |
| 82 | |
danakj | a8ecec3 | 2024-03-08 19:47:47 | [diff] [blame^] | 83 | // The `-Runsafe-buffer-usage-in-container` warning gets enabled along with |
| 84 | // `-Runsafe-buffer-usage`, but it's a hardcoded warning about std::span |
| 85 | // constructor. We don't want to emit these, we instead want the span ctor |
| 86 | // (and our own base::span ctor) to be marked [[clang::unsafe_buffer_usage]] |
| 87 | // and have that work: https://github.com/llvm/llvm-project/issues/80482 |
| 88 | if (diag_id == clang::diag::warn_unsafe_buffer_usage_in_container) { |
| 89 | return; |
| 90 | } |
| 91 | |
danakj | c06d5f4 | 2024-02-29 17:54:11 | [diff] [blame] | 92 | if (!(diag_id == clang::diag::warn_unsafe_buffer_variable || |
| 93 | diag_id == clang::diag::warn_unsafe_buffer_operation || |
| 94 | diag_id == clang::diag::note_unsafe_buffer_operation || |
| 95 | diag_id == clang::diag::note_unsafe_buffer_variable_fixit_group || |
| 96 | diag_id == clang::diag::note_unsafe_buffer_variable_fixit_together || |
| 97 | diag_id == clang::diag::note_safe_buffer_usage_suggestions_disabled || |
| 98 | diag_id == clang::diag::note_safe_buffer_debug_mode)) { |
| 99 | return PassthroughDiagnostic(level, diag); |
| 100 | } |
| 101 | |
| 102 | // Note that we promote from Remark directly to Error, rather than to |
| 103 | // Warning, as -Werror will not get applied to whatever we choose here. |
| 104 | const auto elevated_level = |
| 105 | (diag_id == clang::diag::warn_unsafe_buffer_variable || |
| 106 | diag_id == clang::diag::warn_unsafe_buffer_operation) |
| 107 | ? (engine_->getWarningsAsErrors() |
| 108 | ? clang::DiagnosticsEngine::Level::Error |
| 109 | : clang::DiagnosticsEngine::Level::Warning) |
| 110 | : clang::DiagnosticsEngine::Level::Note; |
| 111 | |
| 112 | const clang::SourceManager& sm = instance_->getSourceManager(); |
| 113 | const clang::SourceLocation loc = diag.getLocation(); |
| 114 | |
| 115 | // -Wunsage-buffer-usage errors are omitted conditionally based on what file |
| 116 | // they are coming from. |
| 117 | if (FileHasSafeBuffersWarnings(sm, loc)) { |
| 118 | // Elevate the Remark to a Warning, and pass along its Notes without |
| 119 | // changing them. Otherwise, do nothing, and the Remark (and its notes) |
| 120 | // will not be displayed. |
| 121 | // |
| 122 | // We don't count warnings/errors in this DiagnosticConsumer, so we don't |
| 123 | // call up to the base class here. Instead, whenever we pass through to |
| 124 | // the `next_` DiagnosticConsumer, we record its counts. |
| 125 | // |
| 126 | // Construct the StoredDiagnostic before Clear() or we get bad data from |
| 127 | // `diag`. |
| 128 | auto stored = clang::StoredDiagnostic(elevated_level, diag); |
| 129 | engine_->Clear(); |
| 130 | inside_handle_diagnostic_ = true; |
| 131 | engine_->Report(stored); |
| 132 | inside_handle_diagnostic_ = false; |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | private: |
| 137 | void PassthroughDiagnostic(clang::DiagnosticsEngine::Level level, |
| 138 | const clang::Diagnostic& diag) { |
| 139 | if (next_) { |
| 140 | next_->HandleDiagnostic(level, diag); |
| 141 | NumErrors = next_->getNumErrors(); |
| 142 | NumWarnings = next_->getNumWarnings(); |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | // Depending on where the diagnostic is coming from, we may ignore it or |
| 147 | // cause it to generate a warning. |
| 148 | bool FileHasSafeBuffersWarnings(const clang::SourceManager& sm, |
| 149 | clang::SourceLocation loc) { |
| 150 | // TODO(crbug.com/40284755): Expand this diagnostic to more code. It should |
| 151 | // include everything except kThirdParty and kSystem eventually. |
| 152 | LocationClassification loc_class = ClassifySourceLocation(sm, loc); |
| 153 | switch (loc_class) { |
| 154 | case LocationClassification::kThirdParty: |
| 155 | return false; |
| 156 | case LocationClassification::kSystem: |
| 157 | return false; |
| 158 | case LocationClassification::kGenerated: |
| 159 | return false; |
| 160 | case LocationClassification::kChromiumThirdParty: |
| 161 | return false; |
| 162 | case LocationClassification::kMacro: |
| 163 | break; |
| 164 | case LocationClassification::kFirstParty: |
| 165 | break; |
| 166 | case LocationClassification::kBlink: |
| 167 | break; |
| 168 | } |
| 169 | |
| 170 | // TODO(crbug.com/40284755): Currently we default to everything being |
| 171 | // known-bad except for a list of clean files. Eventually this should become |
| 172 | // default known-good with a list of bad files (which should become empty in |
| 173 | // time). |
| 174 | // |
| 175 | // TODO(danakj): It would be an optimization to find a way to avoid creating |
| 176 | // a std::string here. |
| 177 | std::string filename = GetFilename(sm, loc); |
| 178 | |
| 179 | // Avoid searching `check_file_prefixes_` more than once for a file. |
| 180 | auto cache_it = checked_files_cache_.find(filename); |
| 181 | if (cache_it != checked_files_cache_.end()) { |
| 182 | return cache_it->second; |
| 183 | } |
| 184 | |
| 185 | // Drop the ../ prefixes. |
| 186 | llvm::StringRef cmp_filename = filename; |
| 187 | while (cmp_filename.consume_front("./") || |
| 188 | cmp_filename.consume_front("../")) |
| 189 | ; |
| 190 | if (cmp_filename.empty()) { |
| 191 | return false; |
| 192 | } |
| 193 | |
| 194 | // Look for prefix match (whether any of `check_file_prefixes_` is a prefix |
| 195 | // of the filename). |
| 196 | if (!check_file_prefixes_.prefixes.empty()) { |
| 197 | const auto begin = check_file_prefixes_.prefixes.begin(); |
| 198 | const auto end = check_file_prefixes_.prefixes.end(); |
| 199 | auto it = std::upper_bound(begin, end, cmp_filename); |
| 200 | if (it != begin) { |
| 201 | --it; // Now `it` will be either the exact or prefix match. |
| 202 | if (*it == cmp_filename.take_front(it->size())) { |
| 203 | checked_files_cache_.insert({filename, true}); |
| 204 | return true; |
| 205 | } |
| 206 | } |
| 207 | } |
| 208 | checked_files_cache_.insert({filename, false}); |
| 209 | return false; |
| 210 | } |
| 211 | |
| 212 | // Used to prevent recursing into HandleDiagnostic() when we're emitting a |
| 213 | // diagnostic from that function. |
| 214 | bool inside_handle_diagnostic_ = false; |
| 215 | clang::DiagnosticsEngine* engine_; |
| 216 | clang::DiagnosticConsumer* next_; |
| 217 | clang::CompilerInstance* instance_; |
| 218 | CheckFilePrefixes check_file_prefixes_; |
| 219 | // Stores `true` if the filename (key) matches against the |
| 220 | // check_file_prefixes_, and `false` if it does not. Used as a shortcut to |
| 221 | // avoid looking through `check_file_prefixes_` for any file in this map. |
| 222 | // |
| 223 | // TODO(danakj): Another form of optimization here would be to replace this |
| 224 | // and the `check_file_prefixes_` vector with a string-prefix-matching data |
| 225 | // structure. |
| 226 | llvm::StringMap<bool> checked_files_cache_; |
| 227 | }; |
| 228 | |
| 229 | class UnsafeBuffersASTConsumer : public clang::ASTConsumer { |
| 230 | public: |
| 231 | UnsafeBuffersASTConsumer(clang::CompilerInstance* instance, |
| 232 | CheckFilePrefixes check_file_prefixes) |
| 233 | : instance_(instance) { |
| 234 | // Replace the DiagnosticConsumer with our own that sniffs diagnostics and |
| 235 | // can omit them. |
| 236 | clang::DiagnosticsEngine& engine = instance_->getDiagnostics(); |
| 237 | old_client_ = engine.getClient(); |
| 238 | old_owned_client_ = engine.takeClient(); |
| 239 | engine.setClient( |
| 240 | new UnsafeBuffersDiagnosticConsumer(&engine, old_client_, instance_, |
| 241 | std::move(check_file_prefixes)), |
| 242 | /*owned=*/true); |
| 243 | |
| 244 | // Enable the -Wunsafe-buffer-usage warning as a remark. This prevents it |
| 245 | // from stopping compilation, even with -Werror. If we see the remark go by, |
| 246 | // we can re-emit it as a warning for the files we want to include in the |
| 247 | // check. |
| 248 | engine.setSeverityForGroup(clang::diag::Flavor::WarningOrError, |
| 249 | "unsafe-buffer-usage", |
| 250 | clang::diag::Severity::Remark); |
| 251 | } |
| 252 | |
| 253 | ~UnsafeBuffersASTConsumer() { |
| 254 | // Restore the original DiagnosticConsumer that we replaced with our own. |
| 255 | clang::DiagnosticsEngine& engine = instance_->getDiagnostics(); |
| 256 | if (old_owned_client_) { |
| 257 | engine.setClient(old_owned_client_.release(), |
| 258 | /*owned=*/true); |
| 259 | } else { |
| 260 | engine.setClient(old_client_, /*owned=*/false); |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | private: |
| 265 | clang::CompilerInstance* instance_; |
| 266 | clang::DiagnosticConsumer* old_client_; |
| 267 | std::unique_ptr<clang::DiagnosticConsumer> old_owned_client_; |
| 268 | }; |
| 269 | |
| 270 | class UnsafeBuffersASTAction : public clang::PluginASTAction { |
| 271 | public: |
| 272 | std::unique_ptr<clang::ASTConsumer> CreateASTConsumer( |
| 273 | clang::CompilerInstance& instance, |
| 274 | llvm::StringRef ref) override { |
| 275 | assert(!moved_prefixes_); // This would mean we move the prefixes twice. |
| 276 | moved_prefixes_ = true; |
| 277 | |
| 278 | // The ASTConsumer can outlive `this`, so we can't give it references to |
| 279 | // members here and must move the `check_file_prefixes_` vector instead. |
| 280 | return std::make_unique<UnsafeBuffersASTConsumer>( |
| 281 | &instance, std::move(check_file_prefixes_)); |
| 282 | } |
| 283 | |
| 284 | bool ParseArgs(const clang::CompilerInstance& instance, |
| 285 | const std::vector<std::string>& args) override { |
| 286 | bool found_file_arg = false; |
| 287 | for (size_t i = 0u; i < args.size(); ++i) { |
| 288 | // Look for any switches first (there are currently none). |
| 289 | |
| 290 | if (found_file_arg) { |
| 291 | llvm::errs() |
| 292 | << "[unsafe-buffers] Extra argument to unsafe-buffers plugin: '" |
| 293 | << args[i] << ". Usage: [SWITCHES] PATH_TO_CHECK_FILE'\n"; |
| 294 | return false; |
| 295 | } else { |
| 296 | found_file_arg = true; |
| 297 | if (!LoadCheckFilePrefixes(args[i])) { |
| 298 | llvm::errs() << "[unsafe-buffers] Failed to load paths from file '" |
| 299 | << args[i] << "'\n"; |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | return true; |
| 304 | } |
| 305 | |
| 306 | bool LoadCheckFilePrefixes(std::string_view path) { |
| 307 | if (auto buffer = llvm::MemoryBuffer::getFileAsStream(path)) { |
| 308 | check_file_prefixes_.buffer = std::move(buffer.get()); |
| 309 | } else { |
| 310 | llvm::errs() << "[unsafe-buffers] Error reading file: '" |
| 311 | << buffer.getError().message() << "'\n"; |
| 312 | return false; |
| 313 | } |
| 314 | |
| 315 | // Parse out the paths into `check_file_prefixes_.prefixes`. |
| 316 | // |
| 317 | // The file format is as follows: |
| 318 | // * Lines that begin with `#` are comments are are ignored. |
| 319 | // * Empty lines are ignored. |
| 320 | // * Every other line is a path prefix from the source tree root using |
| 321 | // unix-style delimiters. |
| 322 | // * For instance `a/b` will match the file at `//a/b/c.h` but will *not* |
| 323 | // match `//other/a/b/c.h`. |
| 324 | // * Exact file paths look like `a/b/c.h` and directory prefixes should end |
| 325 | // with a `/` such as `a/b/`. |
| 326 | // |
| 327 | // Example: |
| 328 | // ``` |
| 329 | // # A file of path prefixes. |
| 330 | // # Matches anything under the directory //foo/bar. |
| 331 | // foo/bar/ |
| 332 | // # Matches a specific file at //my/file.cc. |
| 333 | // my/file.cc |
| 334 | |
| 335 | llvm::StringRef string = check_file_prefixes_.buffer->getBuffer(); |
| 336 | while (!string.empty()) { |
| 337 | auto [lhs, rhs] = string.split('\n'); |
| 338 | string = rhs; |
| 339 | bool keep_lhs = false; |
| 340 | for (char c : lhs) { |
| 341 | if (c != ' ' && c != '#') { |
| 342 | keep_lhs = true; |
| 343 | break; |
| 344 | } |
| 345 | } |
| 346 | if (keep_lhs) { |
| 347 | check_file_prefixes_.prefixes.push_back(lhs); |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | // TODO(danakj): Use std::ranges::sort when Clang is build with C++20. |
| 352 | std::sort(check_file_prefixes_.prefixes.begin(), |
| 353 | check_file_prefixes_.prefixes.end()); |
| 354 | return true; |
| 355 | } |
| 356 | |
| 357 | private: |
| 358 | CheckFilePrefixes check_file_prefixes_; |
| 359 | bool moved_prefixes_ = false; |
| 360 | }; |
| 361 | |
| 362 | static clang::FrontendPluginRegistry::Add<UnsafeBuffersASTAction> X( |
| 363 | "unsafe-buffers", |
| 364 | "Enforces -Wunsafe-buffer-usage during incremental rollout"); |
| 365 | |
| 366 | } // namespace chrome_checker |