kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/extractor/CommandLineUtils.cc (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 // This file uses the Clang style conventions. 17 18 #include "CommandLineUtils.h" 19 20 #include <stddef.h> 21 #include <stdio.h> 22 #include <string.h> 23 24 #include <algorithm> 25 #include <cassert> 26 #include <mutex> 27 #include <string> 28 #include <string_view> 29 #include <vector> 30 31 #include "absl/functional/function_ref.h" 32 #include "absl/strings/str_format.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Regex.h" 36 37 namespace kythe { 38 namespace common { 39 namespace { 40 41 /// \brief A `llvm::Regex` wrapper that only performs full matches on 42 /// non-empty strings. 43 /// 44 /// The second restriction makes it easier to write long chains of 'or'-ed 45 /// regular expressions which may contain empty options without those silently 46 /// matching empty strings. 47 class FullMatchRegex { 48 public: 49 /// \param Regex an extended-syntax regex to match. 50 explicit FullMatchRegex(llvm::StringRef Regex) 51 : InnerRegex("^(" + Regex.str() + ")$", llvm::Regex::NoFlags) { 52 std::string st; 53 if (!InnerRegex.isValid(st)) { 54 absl::FPrintF(stderr, "%s (regex was %s)\n", st, Regex.str()); 55 assert(0 && "!InnerRegex.isValid()"); 56 } 57 } 58 59 /// \return true if `String` is nonempty and a full match of this regex. 60 bool FullMatch(llvm::StringRef String) const { 61 std::lock_guard<std::mutex> MutexLock(RegexMutex); 62 llvm::SmallVector<llvm::StringRef, 1> Matches; 63 return !String.empty() && InnerRegex.match(String, &Matches); 64 } 65 66 private: 67 mutable llvm::Regex InnerRegex; 68 /// This mutex protects `InnerRegex`, since `llvm::Regex` is not threadsafe. 69 mutable std::mutex RegexMutex; 70 }; 71 72 } // anonymous namespace 73 74 // Decide what will the driver do based on the inputs found on the command 75 // line. 76 DriverAction DetermineDriverAction(const std::vector<std::string>& args) { 77 const FullMatchRegex c_file_re("[^-].*\\.(c|i)"); 78 const FullMatchRegex cxx_file_re("[^-].*\\.(C|c\\+\\+|cc|cp|cpp|cxx|CPP|ii)"); 79 const FullMatchRegex fortran_file_re( 80 "[^-].*\\.(f|for|ftn|F|FOR|fpp|FPP|FTN|f90|f95|f03|f08|F90|F95|F03|F08)"); 81 const FullMatchRegex go_file_re("[^-].*\\.go"); 82 const FullMatchRegex asm_file_re("[^-].*\\.(s|S|sx)"); 83 84 enum DriverAction action = UNKNOWN; 85 bool is_link = true; 86 for (size_t i = 0; i < args.size(); ++i) { 87 const std::string& arg = args[i]; 88 if (arg == "-c") { 89 is_link = false; 90 } else if (arg == "-x" && i < args.size() - 1) { 91 // If we find -x, the language is being overridden by the user. 92 const std::string& language = args[i + 1]; 93 if (language == "c++" || language == "c++-header" || 94 language == "c++-cpp-output") 95 action = CXX_COMPILE; 96 else if (language == "c" || language == "c-header" || 97 language == "cpp-output") 98 action = C_COMPILE; 99 else if (language == "assembler" || language == "assembler-with-cpp") 100 action = ASSEMBLY; 101 else if (language == "f77" || language == "f77-cpp-input" || 102 language == "f95" || language == "f95-cpp-input") 103 action = FORTRAN_COMPILE; 104 else if (language == "go") 105 action = GO_COMPILE; 106 } else if (action == UNKNOWN) { 107 // If we still have not recognized the input language, try to 108 // recognize it from the input file (in order of relative frequency). 109 if (cxx_file_re.FullMatch(arg)) { 110 action = CXX_COMPILE; 111 } else if (c_file_re.FullMatch(arg)) { 112 action = C_COMPILE; 113 } else if (asm_file_re.FullMatch(arg)) { 114 action = ASSEMBLY; 115 } else if (go_file_re.FullMatch(arg)) { 116 action = GO_COMPILE; 117 } else if (fortran_file_re.FullMatch(arg)) { 118 action = FORTRAN_COMPILE; 119 } 120 } 121 } 122 123 // If the user did not specify -c, then the linker will be invoked. 124 // Note that if the command line was something like "clang foo.cc", 125 // it will be considered a LINK action. 126 if (is_link) return LINK; 127 128 return action; 129 } 130 131 // Returns true if a C or C++ source file (or other files we want Clang 132 // diagnostics for) appears in the given command line or args. 133 bool HasCxxInputInCommandLineOrArgs( 134 const std::vector<std::string>& command_line_or_args) { 135 const enum DriverAction action = DetermineDriverAction(command_line_or_args); 136 return action == CXX_COMPILE || action == C_COMPILE; 137 } 138 139 static std::vector<std::string> CopySkippingN( 140 const std::vector<std::string>& input, 141 absl::FunctionRef<size_t(std::string_view)> skip) { 142 std::vector<std::string> output; 143 output.reserve(input.size()); 144 for (auto iter = input.begin(), end = input.end(); iter < end;) { 145 if (size_t count = skip(*iter)) { 146 if (count >= end - iter) { 147 break; 148 } 149 iter += count; 150 } else { 151 output.push_back(*iter++); 152 } 153 } 154 return output; 155 } 156 157 // Returns a copy of the input vector with every string which matches the 158 // regular expression removed. 159 static std::vector<std::string> CopyOmittingMatches( 160 const FullMatchRegex& re, const std::vector<std::string>& input) { 161 return CopySkippingN(input, [&](std::string_view arg) -> size_t { 162 return re.FullMatch(arg) ? 1 : 0; 163 }); 164 } 165 166 // Returns a copy of the input vector after removing each string which matches 167 // the regular expression and one string immediately following the matching 168 // string. 169 static std::vector<std::string> CopyOmittingMatchesAndFollowers( 170 const FullMatchRegex& re, const std::vector<std::string>& input) { 171 return CopySkippingN(input, [&](std::string_view arg) -> size_t { 172 return re.FullMatch(arg) ? 2 : 0; 173 }); 174 } 175 176 // Returns a copy of the input vector with the supplied prefix string removed 177 // from any element of which it was a prefix. 178 static std::vector<std::string> StripPrefix( 179 const std::string& prefix, const std::vector<std::string>& input) { 180 std::vector<std::string> output; 181 const size_t prefix_size = prefix.size(); 182 for (const auto& arg : input) { 183 if (arg.compare(0, prefix_size, prefix) == 0) { 184 output.push_back(arg.substr(prefix_size)); 185 } else { 186 output.push_back(arg); 187 } 188 } 189 return output; 190 } 191 192 static int ReplaceAllMatches(std::vector<std::string>& input, 193 const FullMatchRegex& re, llvm::StringRef repl) { 194 int count = 0; 195 for (auto& arg : input) { 196 if (re.FullMatch(arg)) { 197 arg = repl; 198 ++count; 199 } 200 } 201 return count; 202 } 203 204 std::vector<std::string> GCCArgsToClangArgs( 205 const std::vector<std::string>& gcc_args) { 206 // These are GCC-specific arguments which Clang does not yet understand or 207 // support without issuing ugly warnings, and cannot otherwise be suppressed. 208 const FullMatchRegex unsupported_args_re( 209 "-W(no-)?(error=)?coverage-mismatch" 210 "|-W(no-)?(error=)?frame-larger-than.*" 211 "|-W(no-)?(error=)?maybe-uninitialized" 212 "|-W(no-)?(error=)?thread-safety" 213 "|-W(no-)?(error=)?thread-unsupported-lock-name" 214 "|-W(no-)?(error=)?unused-but-set-parameter" 215 "|-W(no-)?(error=)?unused-but-set-variable" 216 "|-W(no-)?(error=)?unused-local-typedefs" 217 "|-enable-libstdcxx-debug" 218 "|-f(no-)?align-functions.*" 219 "|-f(no-)?asynchronous-unwind-tables" 220 "|-f(no-)?builtin-.*" 221 "|-f(no-)?callgraph-profiles-sections" 222 "|-f(no-)?float-store" 223 "|-f(no-)?canonical-system-headers" 224 "|-f(no-)?eliminate-unused-debug-types" 225 "|-f(no-)?gcse" 226 "|-f(no-)?ident" 227 "|-f(no-)?inline-small-functions" 228 "|-f(no-)?ivopts" 229 "|-f(no-)?non-call-exceptions" 230 "|-f(no-)?optimize-locality" 231 "|-f(no-)?permissive" 232 "|-f(no-)?plugin-arg-.*" 233 "|-f(no-)?plugin=.*" 234 "|-f(no-)?prefetch-loop-arrays" 235 "|-f(no-)?profile-correction" 236 "|-f(no-)?profile-dir.*" 237 "|-f(no-)?profile-generate.*" 238 "|-f(no-)?profile-use.*" 239 "|-f(no-)?profile-reusedist" 240 "|-f(no-)?profile-values" 241 "|-f(no-)?record-compilation-info-in-elf" 242 "|-f(no-)?reorder-functions=.*" 243 "|-f(no-)?rounding-math" 244 "|-f(no-)?ripa" 245 "|-f(no-)?ripa-disallow-asm-modules" 246 "|-f(no-)?sanitize.*" 247 "|-f(no-)?see" 248 "|-f(no-)?strict-enum-precision" 249 "|-f(no-)?tracer" 250 "|-f(no-)?tree-.*" 251 "|-f(no-)?unroll-all-loops" 252 "|-f(no-)?warn-incomplete-patterns" // Why do we see this haskell flag? 253 "|-g(:lines,source|gdb)" 254 "|-m(no-)?align-double" 255 "|-m(no-)?fpmath=.*" 256 "|-m(no-)?cld" 257 "|-m(no-)?red-zone" 258 "|--param=.*" 259 "|-mcpu=.*" // For -mcpu=armv7-a, this leads to an assertion failure 260 // in llvm::ARM::getSubArch (and an error about an 261 // unsupported -mcpu); for cortex-a15, we get no such 262 // failure. TODO(zarko): Leave this filtered out for now, 263 // but figure out what to do to make this work properly. 264 "|-mapcs-frame" 265 "|-pass-exit-codes"); 266 const FullMatchRegex unsupported_args_with_values_re("-wrapper"); 267 268 return StripPrefix("-Xclang-only=", 269 CopyOmittingMatchesAndFollowers( 270 unsupported_args_with_values_re, 271 CopyOmittingMatches(unsupported_args_re, gcc_args))); 272 } 273 274 std::vector<std::string> GCCArgsToClangSyntaxOnlyArgs( 275 const std::vector<std::string>& gcc_args) { 276 return AdjustClangArgsForSyntaxOnly(GCCArgsToClangArgs(gcc_args)); 277 } 278 279 std::vector<std::string> GCCArgsToClangAnalyzeArgs( 280 const std::vector<std::string>& gcc_args) { 281 return AdjustClangArgsForAnalyze(GCCArgsToClangArgs(gcc_args)); 282 } 283 284 std::vector<std::string> AdjustClangArgsForSyntaxOnly( 285 const std::vector<std::string>& clang_args) { 286 // These are arguments which are inapplicable to '-fsyntax-only' behavior, but 287 // are applicable to regular compilation. 288 const FullMatchRegex inapplicable_args_re( 289 "--analyze" 290 "|-CC?" 291 "|-E" 292 "|-L.*" 293 "|-MM?D" 294 "|-M[MGP]?" 295 "|-S" 296 "|-W[al],.*" 297 "|-Xlinker=.*" 298 "|--for-linker=.*" 299 "|--mllvm=.*" 300 "|-f(no-)?data-sections" 301 "|-f(no-)?function-sections" 302 "|-f(no-)?omit-frame-pointer" 303 "|-f(no-)?profile-arcs" 304 "|-f(no-)?stack-protector(-all)?" 305 "|-f(no-)?strict-aliasing" 306 "|-f(no-)?test-coverage" 307 "|-f(no-)?unroll-loops" 308 "|-g.+" 309 "|-nostartfiles" 310 "|-s" 311 "|-shared" 312 "|-Xcrosstool.*"); 313 const FullMatchRegex inapplicable_args_with_values_re( 314 "-M[FTQ]" 315 "|-Xlinker" 316 "|--for-linker" 317 "|-Xassembler" 318 "|-Xarch_.*" 319 "|-mllvm"); 320 // Arguments which may match one of the above lists which we want to keep 321 // regardless. 322 const FullMatchRegex keep_args("-X(clang|preprocessor).*"); 323 324 std::vector<std::string> result = 325 CopySkippingN(clang_args, [&](std::string_view arg) -> size_t { 326 if (keep_args.FullMatch(arg)) { 327 return 0; 328 } 329 if (inapplicable_args_re.FullMatch(arg)) { 330 return 1; 331 } 332 if (inapplicable_args_with_values_re.FullMatch(arg)) { 333 return 2; 334 } 335 return 0; 336 }); 337 338 const FullMatchRegex action_args( 339 "-E|--preprocess" 340 "|-S|--assemble" 341 "|-c|--compile" 342 "|-fdriver-only" 343 "|-fsyntax-only" 344 "--precompile"); 345 346 // Attempt to preserve the location of any extant action args so that 347 // subsequent path arguments aren't misinterpreted as arguments to preceding 348 // flags. 349 if (ReplaceAllMatches(result, action_args, "-fsyntax-only") == 0) { 350 result.push_back("-fsyntax-only"); 351 } 352 353 return result; 354 } 355 356 std::vector<std::string> AdjustClangArgsForAnalyze( 357 const std::vector<std::string>& clang_args) { 358 // --analyze is just like -fsyntax-only, except for the name of the 359 // flag itself. 360 std::vector<std::string> args = AdjustClangArgsForSyntaxOnly(clang_args); 361 std::replace(args.begin(), args.end(), std::string("-fsyntax-only"), 362 std::string("--analyze")); 363 364 // cfg-temporary-dtors is still off by default in the analyzer, but analyzing 365 // that way would give us lots of false positives. This can go away once the 366 // temporary destructors support switches to on. 367 args.insert(args.end(), {"-Xanalyzer", "-analyzer-config", "-Xanalyzer", 368 "cfg-temporary-dtors=true"}); 369 370 return args; 371 } 372 373 std::vector<std::string> ClangArgsToGCCArgs( 374 const std::vector<std::string>& clang_args) { 375 // These are Clang-specific args which GCC does not understand. 376 const FullMatchRegex unsupported_args_re( 377 "--target=.*" 378 "|-W(no-)?(error=)?ambiguous-member-template" 379 "|-W(no-)?(error=)?bind-to-temporary-copy" 380 "|-W(no-)?(error=)?bool-conversions" 381 "|-W(no-)?(error=)?c\\+\\+0x-static-nonintegral-init" 382 "|-W(no-)?(error=)?constant-conversion" 383 "|-W(no-)?(error=)?constant-logical-operand" 384 "|-W(no-)?(error=)?gnu" 385 "|-W(no-)?(error=)?gnu-designator" 386 "|-W(no-)?(error=)?initializer-overrides" 387 "|-W(no-)?(error=)?invalid-noreturn" 388 "|-W(no-)?(error=)?local-type-template-args" 389 "|-W(no-)?(error=)?mismatched-tags" 390 "|-W(no-)?(error=)?null-dereference" 391 "|-W(no-)?(error=)?out-of-line-declaration" 392 "|-W(no-)?(error=)?really-dont-use-clang-diagnostics" 393 "|-W(no-)?(error=)?tautological-compare" 394 "|-W(no-)?(error=)?unknown-attributes" 395 "|-W(no-)?(error=)?unnamed-type-template-args" 396 "|-W(no-)?(error=)?thread-safety-.*" 397 "|-Xclang=.*" 398 "|-Xclang-only=.*" 399 "|-f(no-)?assume-sane-operator-new" 400 "|-f(no-)?caret-diagnostics" 401 "|-f(no-)?catch-undefined-behavior" 402 "|-f(no-)?color-diagnostics" 403 "|-f(no-)?diagnostics-fixit-info" 404 "|-f(no-)?diagnostics-parseable-fixits" 405 "|-f(no-)?diagnostics-print-source-range-info" 406 "|-f(no-)?diagnostics-show-category.*" 407 "|-f(no-)?heinous-gnu-extensions" 408 "|-f(no-)?macro-backtrace-limit.*" 409 "|-f(no-)?sanitize-address-zero-base-shadow" 410 "|-f(no-)?sanitize-blacklist" 411 "|-f(no-)?sanitize-memory-track-origins" 412 "|-f(no-)?sanitize-recover" 413 "|-f(no-)?sanitize=.*" 414 "|-f(no-)?show-overloads.*" 415 "|-f(no-)?use-init-array" 416 "|-f(no-)?template-backtrace-limit.*" 417 418 // TODO(zarko): Are plugin arguments sensible to keep? 419 "|-fplugin=.*" 420 "|-fplugin-arg-.*" 421 "|-gline-tables-only"); 422 const FullMatchRegex unsupported_args_with_values_re( 423 "-Xclang" 424 "|-target"); 425 426 // It's important to remove the matches that have followers first -- those 427 // followers might match one of the flag regular expressions, and removing 428 // just the follower completely changes the semantics of the command. 429 return CopyOmittingMatches(unsupported_args_re, 430 CopyOmittingMatchesAndFollowers( 431 unsupported_args_with_values_re, clang_args)); 432 } 433 434 std::vector<std::string> AdjustClangArgsForAddressSanitizer( 435 const std::vector<std::string>& input) { 436 const FullMatchRegex inapplicable_flags_re("-static"); 437 const FullMatchRegex inapplicable_flags_with_shared_re("-pie"); 438 439 for (const auto& arg : input) { 440 if (arg == "-shared") { 441 return CopyOmittingMatches( 442 inapplicable_flags_with_shared_re, 443 CopyOmittingMatches(inapplicable_flags_re, input)); 444 } 445 } 446 447 return CopyOmittingMatches(inapplicable_flags_re, input); 448 } 449 450 std::vector<char*> CommandLineToArgv(const std::vector<std::string>& command) { 451 std::vector<char*> result; 452 result.reserve(command.size() + 1); 453 for (const auto& arg : command) { 454 result.push_back(const_cast<char*>(arg.c_str())); 455 } 456 result.push_back(nullptr); 457 return result; 458 } 459 460 } // namespace common 461 } // namespace kythe