kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/extractor/CommandLineUtils.cc (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  // This file uses the Clang style conventions.
    17  
    18  #include "CommandLineUtils.h"
    19  
    20  #include <stddef.h>
    21  #include <stdio.h>
    22  #include <string.h>
    23  
    24  #include <algorithm>
    25  #include <cassert>
    26  #include <mutex>
    27  #include <string>
    28  #include <string_view>
    29  #include <vector>
    30  
    31  #include "absl/functional/function_ref.h"
    32  #include "absl/strings/str_format.h"
    33  #include "llvm/ADT/SmallVector.h"
    34  #include "llvm/ADT/StringRef.h"
    35  #include "llvm/Support/Regex.h"
    36  
    37  namespace kythe {
    38  namespace common {
    39  namespace {
    40  
    41  /// \brief A `llvm::Regex` wrapper that only performs full matches on
    42  /// non-empty strings.
    43  ///
    44  /// The second restriction makes it easier to write long chains of 'or'-ed
    45  /// regular expressions which may contain empty options without those silently
    46  /// matching empty strings.
    47  class FullMatchRegex {
    48   public:
    49    /// \param Regex an extended-syntax regex to match.
    50    explicit FullMatchRegex(llvm::StringRef Regex)
    51        : InnerRegex("^(" + Regex.str() + ")$", llvm::Regex::NoFlags) {
    52      std::string st;
    53      if (!InnerRegex.isValid(st)) {
    54        absl::FPrintF(stderr, "%s (regex was %s)\n", st, Regex.str());
    55        assert(0 && "!InnerRegex.isValid()");
    56      }
    57    }
    58  
    59    /// \return true if `String` is nonempty and a full match of this regex.
    60    bool FullMatch(llvm::StringRef String) const {
    61      std::lock_guard<std::mutex> MutexLock(RegexMutex);
    62      llvm::SmallVector<llvm::StringRef, 1> Matches;
    63      return !String.empty() && InnerRegex.match(String, &Matches);
    64    }
    65  
    66   private:
    67    mutable llvm::Regex InnerRegex;
    68    /// This mutex protects `InnerRegex`, since `llvm::Regex` is not threadsafe.
    69    mutable std::mutex RegexMutex;
    70  };
    71  
    72  }  // anonymous namespace
    73  
    74  // Decide what will the driver do based on the inputs found on the command
    75  // line.
    76  DriverAction DetermineDriverAction(const std::vector<std::string>& args) {
    77    const FullMatchRegex c_file_re("[^-].*\\.(c|i)");
    78    const FullMatchRegex cxx_file_re("[^-].*\\.(C|c\\+\\+|cc|cp|cpp|cxx|CPP|ii)");
    79    const FullMatchRegex fortran_file_re(
    80        "[^-].*\\.(f|for|ftn|F|FOR|fpp|FPP|FTN|f90|f95|f03|f08|F90|F95|F03|F08)");
    81    const FullMatchRegex go_file_re("[^-].*\\.go");
    82    const FullMatchRegex asm_file_re("[^-].*\\.(s|S|sx)");
    83  
    84    enum DriverAction action = UNKNOWN;
    85    bool is_link = true;
    86    for (size_t i = 0; i < args.size(); ++i) {
    87      const std::string& arg = args[i];
    88      if (arg == "-c") {
    89        is_link = false;
    90      } else if (arg == "-x" && i < args.size() - 1) {
    91        // If we find -x, the language is being overridden by the user.
    92        const std::string& language = args[i + 1];
    93        if (language == "c++" || language == "c++-header" ||
    94            language == "c++-cpp-output")
    95          action = CXX_COMPILE;
    96        else if (language == "c" || language == "c-header" ||
    97                 language == "cpp-output")
    98          action = C_COMPILE;
    99        else if (language == "assembler" || language == "assembler-with-cpp")
   100          action = ASSEMBLY;
   101        else if (language == "f77" || language == "f77-cpp-input" ||
   102                 language == "f95" || language == "f95-cpp-input")
   103          action = FORTRAN_COMPILE;
   104        else if (language == "go")
   105          action = GO_COMPILE;
   106      } else if (action == UNKNOWN) {
   107        // If we still have not recognized the input language, try to
   108        // recognize it from the input file (in order of relative frequency).
   109        if (cxx_file_re.FullMatch(arg)) {
   110          action = CXX_COMPILE;
   111        } else if (c_file_re.FullMatch(arg)) {
   112          action = C_COMPILE;
   113        } else if (asm_file_re.FullMatch(arg)) {
   114          action = ASSEMBLY;
   115        } else if (go_file_re.FullMatch(arg)) {
   116          action = GO_COMPILE;
   117        } else if (fortran_file_re.FullMatch(arg)) {
   118          action = FORTRAN_COMPILE;
   119        }
   120      }
   121    }
   122  
   123    // If the user did not specify -c, then the linker will be invoked.
   124    // Note that if the command line was something like "clang foo.cc",
   125    // it will be considered a LINK action.
   126    if (is_link) return LINK;
   127  
   128    return action;
   129  }
   130  
   131  // Returns true if a C or C++ source file (or other files we want Clang
   132  // diagnostics for) appears in the given command line or args.
   133  bool HasCxxInputInCommandLineOrArgs(
   134      const std::vector<std::string>& command_line_or_args) {
   135    const enum DriverAction action = DetermineDriverAction(command_line_or_args);
   136    return action == CXX_COMPILE || action == C_COMPILE;
   137  }
   138  
   139  static std::vector<std::string> CopySkippingN(
   140      const std::vector<std::string>& input,
   141      absl::FunctionRef<size_t(std::string_view)> skip) {
   142    std::vector<std::string> output;
   143    output.reserve(input.size());
   144    for (auto iter = input.begin(), end = input.end(); iter < end;) {
   145      if (size_t count = skip(*iter)) {
   146        if (count >= end - iter) {
   147          break;
   148        }
   149        iter += count;
   150      } else {
   151        output.push_back(*iter++);
   152      }
   153    }
   154    return output;
   155  }
   156  
   157  // Returns a copy of the input vector with every string which matches the
   158  // regular expression removed.
   159  static std::vector<std::string> CopyOmittingMatches(
   160      const FullMatchRegex& re, const std::vector<std::string>& input) {
   161    return CopySkippingN(input, [&](std::string_view arg) -> size_t {
   162      return re.FullMatch(arg) ? 1 : 0;
   163    });
   164  }
   165  
   166  // Returns a copy of the input vector after removing each string which matches
   167  // the regular expression and one string immediately following the matching
   168  // string.
   169  static std::vector<std::string> CopyOmittingMatchesAndFollowers(
   170      const FullMatchRegex& re, const std::vector<std::string>& input) {
   171    return CopySkippingN(input, [&](std::string_view arg) -> size_t {
   172      return re.FullMatch(arg) ? 2 : 0;
   173    });
   174  }
   175  
   176  // Returns a copy of the input vector with the supplied prefix string removed
   177  // from any element of which it was a prefix.
   178  static std::vector<std::string> StripPrefix(
   179      const std::string& prefix, const std::vector<std::string>& input) {
   180    std::vector<std::string> output;
   181    const size_t prefix_size = prefix.size();
   182    for (const auto& arg : input) {
   183      if (arg.compare(0, prefix_size, prefix) == 0) {
   184        output.push_back(arg.substr(prefix_size));
   185      } else {
   186        output.push_back(arg);
   187      }
   188    }
   189    return output;
   190  }
   191  
   192  static int ReplaceAllMatches(std::vector<std::string>& input,
   193                               const FullMatchRegex& re, llvm::StringRef repl) {
   194    int count = 0;
   195    for (auto& arg : input) {
   196      if (re.FullMatch(arg)) {
   197        arg = repl;
   198        ++count;
   199      }
   200    }
   201    return count;
   202  }
   203  
   204  std::vector<std::string> GCCArgsToClangArgs(
   205      const std::vector<std::string>& gcc_args) {
   206    // These are GCC-specific arguments which Clang does not yet understand or
   207    // support without issuing ugly warnings, and cannot otherwise be suppressed.
   208    const FullMatchRegex unsupported_args_re(
   209        "-W(no-)?(error=)?coverage-mismatch"
   210        "|-W(no-)?(error=)?frame-larger-than.*"
   211        "|-W(no-)?(error=)?maybe-uninitialized"
   212        "|-W(no-)?(error=)?thread-safety"
   213        "|-W(no-)?(error=)?thread-unsupported-lock-name"
   214        "|-W(no-)?(error=)?unused-but-set-parameter"
   215        "|-W(no-)?(error=)?unused-but-set-variable"
   216        "|-W(no-)?(error=)?unused-local-typedefs"
   217        "|-enable-libstdcxx-debug"
   218        "|-f(no-)?align-functions.*"
   219        "|-f(no-)?asynchronous-unwind-tables"
   220        "|-f(no-)?builtin-.*"
   221        "|-f(no-)?callgraph-profiles-sections"
   222        "|-f(no-)?float-store"
   223        "|-f(no-)?canonical-system-headers"
   224        "|-f(no-)?eliminate-unused-debug-types"
   225        "|-f(no-)?gcse"
   226        "|-f(no-)?ident"
   227        "|-f(no-)?inline-small-functions"
   228        "|-f(no-)?ivopts"
   229        "|-f(no-)?non-call-exceptions"
   230        "|-f(no-)?optimize-locality"
   231        "|-f(no-)?permissive"
   232        "|-f(no-)?plugin-arg-.*"
   233        "|-f(no-)?plugin=.*"
   234        "|-f(no-)?prefetch-loop-arrays"
   235        "|-f(no-)?profile-correction"
   236        "|-f(no-)?profile-dir.*"
   237        "|-f(no-)?profile-generate.*"
   238        "|-f(no-)?profile-use.*"
   239        "|-f(no-)?profile-reusedist"
   240        "|-f(no-)?profile-values"
   241        "|-f(no-)?record-compilation-info-in-elf"
   242        "|-f(no-)?reorder-functions=.*"
   243        "|-f(no-)?rounding-math"
   244        "|-f(no-)?ripa"
   245        "|-f(no-)?ripa-disallow-asm-modules"
   246        "|-f(no-)?sanitize.*"
   247        "|-f(no-)?see"
   248        "|-f(no-)?strict-enum-precision"
   249        "|-f(no-)?tracer"
   250        "|-f(no-)?tree-.*"
   251        "|-f(no-)?unroll-all-loops"
   252        "|-f(no-)?warn-incomplete-patterns"  // Why do we see this haskell flag?
   253        "|-g(:lines,source|gdb)"
   254        "|-m(no-)?align-double"
   255        "|-m(no-)?fpmath=.*"
   256        "|-m(no-)?cld"
   257        "|-m(no-)?red-zone"
   258        "|--param=.*"
   259        "|-mcpu=.*"  // For -mcpu=armv7-a, this leads to an assertion failure
   260                     // in llvm::ARM::getSubArch (and an error about an
   261                     // unsupported -mcpu); for cortex-a15, we get no such
   262                     // failure. TODO(zarko): Leave this filtered out for now,
   263                     // but figure out what to do to make this work properly.
   264        "|-mapcs-frame"
   265        "|-pass-exit-codes");
   266    const FullMatchRegex unsupported_args_with_values_re("-wrapper");
   267  
   268    return StripPrefix("-Xclang-only=",
   269                       CopyOmittingMatchesAndFollowers(
   270                           unsupported_args_with_values_re,
   271                           CopyOmittingMatches(unsupported_args_re, gcc_args)));
   272  }
   273  
   274  std::vector<std::string> GCCArgsToClangSyntaxOnlyArgs(
   275      const std::vector<std::string>& gcc_args) {
   276    return AdjustClangArgsForSyntaxOnly(GCCArgsToClangArgs(gcc_args));
   277  }
   278  
   279  std::vector<std::string> GCCArgsToClangAnalyzeArgs(
   280      const std::vector<std::string>& gcc_args) {
   281    return AdjustClangArgsForAnalyze(GCCArgsToClangArgs(gcc_args));
   282  }
   283  
   284  std::vector<std::string> AdjustClangArgsForSyntaxOnly(
   285      const std::vector<std::string>& clang_args) {
   286    // These are arguments which are inapplicable to '-fsyntax-only' behavior, but
   287    // are applicable to regular compilation.
   288    const FullMatchRegex inapplicable_args_re(
   289        "--analyze"
   290        "|-CC?"
   291        "|-E"
   292        "|-L.*"
   293        "|-MM?D"
   294        "|-M[MGP]?"
   295        "|-S"
   296        "|-W[al],.*"
   297        "|-Xlinker=.*"
   298        "|--for-linker=.*"
   299        "|--mllvm=.*"
   300        "|-f(no-)?data-sections"
   301        "|-f(no-)?function-sections"
   302        "|-f(no-)?omit-frame-pointer"
   303        "|-f(no-)?profile-arcs"
   304        "|-f(no-)?stack-protector(-all)?"
   305        "|-f(no-)?strict-aliasing"
   306        "|-f(no-)?test-coverage"
   307        "|-f(no-)?unroll-loops"
   308        "|-g.+"
   309        "|-nostartfiles"
   310        "|-s"
   311        "|-shared"
   312        "|-Xcrosstool.*");
   313    const FullMatchRegex inapplicable_args_with_values_re(
   314        "-M[FTQ]"
   315        "|-Xlinker"
   316        "|--for-linker"
   317        "|-Xassembler"
   318        "|-Xarch_.*"
   319        "|-mllvm");
   320    // Arguments which may match one of the above lists which we want to keep
   321    // regardless.
   322    const FullMatchRegex keep_args("-X(clang|preprocessor).*");
   323  
   324    std::vector<std::string> result =
   325        CopySkippingN(clang_args, [&](std::string_view arg) -> size_t {
   326          if (keep_args.FullMatch(arg)) {
   327            return 0;
   328          }
   329          if (inapplicable_args_re.FullMatch(arg)) {
   330            return 1;
   331          }
   332          if (inapplicable_args_with_values_re.FullMatch(arg)) {
   333            return 2;
   334          }
   335          return 0;
   336        });
   337  
   338    const FullMatchRegex action_args(
   339        "-E|--preprocess"
   340        "|-S|--assemble"
   341        "|-c|--compile"
   342        "|-fdriver-only"
   343        "|-fsyntax-only"
   344        "--precompile");
   345  
   346    // Attempt to preserve the location of any extant action args so that
   347    // subsequent path arguments aren't misinterpreted as arguments to preceding
   348    // flags.
   349    if (ReplaceAllMatches(result, action_args, "-fsyntax-only") == 0) {
   350      result.push_back("-fsyntax-only");
   351    }
   352  
   353    return result;
   354  }
   355  
   356  std::vector<std::string> AdjustClangArgsForAnalyze(
   357      const std::vector<std::string>& clang_args) {
   358    // --analyze is just like -fsyntax-only, except for the name of the
   359    // flag itself.
   360    std::vector<std::string> args = AdjustClangArgsForSyntaxOnly(clang_args);
   361    std::replace(args.begin(), args.end(), std::string("-fsyntax-only"),
   362                 std::string("--analyze"));
   363  
   364    // cfg-temporary-dtors is still off by default in the analyzer, but analyzing
   365    // that way would give us lots of false positives. This can go away once the
   366    // temporary destructors support switches to on.
   367    args.insert(args.end(), {"-Xanalyzer", "-analyzer-config", "-Xanalyzer",
   368                             "cfg-temporary-dtors=true"});
   369  
   370    return args;
   371  }
   372  
   373  std::vector<std::string> ClangArgsToGCCArgs(
   374      const std::vector<std::string>& clang_args) {
   375    // These are Clang-specific args which GCC does not understand.
   376    const FullMatchRegex unsupported_args_re(
   377        "--target=.*"
   378        "|-W(no-)?(error=)?ambiguous-member-template"
   379        "|-W(no-)?(error=)?bind-to-temporary-copy"
   380        "|-W(no-)?(error=)?bool-conversions"
   381        "|-W(no-)?(error=)?c\\+\\+0x-static-nonintegral-init"
   382        "|-W(no-)?(error=)?constant-conversion"
   383        "|-W(no-)?(error=)?constant-logical-operand"
   384        "|-W(no-)?(error=)?gnu"
   385        "|-W(no-)?(error=)?gnu-designator"
   386        "|-W(no-)?(error=)?initializer-overrides"
   387        "|-W(no-)?(error=)?invalid-noreturn"
   388        "|-W(no-)?(error=)?local-type-template-args"
   389        "|-W(no-)?(error=)?mismatched-tags"
   390        "|-W(no-)?(error=)?null-dereference"
   391        "|-W(no-)?(error=)?out-of-line-declaration"
   392        "|-W(no-)?(error=)?really-dont-use-clang-diagnostics"
   393        "|-W(no-)?(error=)?tautological-compare"
   394        "|-W(no-)?(error=)?unknown-attributes"
   395        "|-W(no-)?(error=)?unnamed-type-template-args"
   396        "|-W(no-)?(error=)?thread-safety-.*"
   397        "|-Xclang=.*"
   398        "|-Xclang-only=.*"
   399        "|-f(no-)?assume-sane-operator-new"
   400        "|-f(no-)?caret-diagnostics"
   401        "|-f(no-)?catch-undefined-behavior"
   402        "|-f(no-)?color-diagnostics"
   403        "|-f(no-)?diagnostics-fixit-info"
   404        "|-f(no-)?diagnostics-parseable-fixits"
   405        "|-f(no-)?diagnostics-print-source-range-info"
   406        "|-f(no-)?diagnostics-show-category.*"
   407        "|-f(no-)?heinous-gnu-extensions"
   408        "|-f(no-)?macro-backtrace-limit.*"
   409        "|-f(no-)?sanitize-address-zero-base-shadow"
   410        "|-f(no-)?sanitize-blacklist"
   411        "|-f(no-)?sanitize-memory-track-origins"
   412        "|-f(no-)?sanitize-recover"
   413        "|-f(no-)?sanitize=.*"
   414        "|-f(no-)?show-overloads.*"
   415        "|-f(no-)?use-init-array"
   416        "|-f(no-)?template-backtrace-limit.*"
   417  
   418        // TODO(zarko): Are plugin arguments sensible to keep?
   419        "|-fplugin=.*"
   420        "|-fplugin-arg-.*"
   421        "|-gline-tables-only");
   422    const FullMatchRegex unsupported_args_with_values_re(
   423        "-Xclang"
   424        "|-target");
   425  
   426    // It's important to remove the matches that have followers first -- those
   427    // followers might match one of the flag regular expressions, and removing
   428    // just the follower completely changes the semantics of the command.
   429    return CopyOmittingMatches(unsupported_args_re,
   430                               CopyOmittingMatchesAndFollowers(
   431                                   unsupported_args_with_values_re, clang_args));
   432  }
   433  
   434  std::vector<std::string> AdjustClangArgsForAddressSanitizer(
   435      const std::vector<std::string>& input) {
   436    const FullMatchRegex inapplicable_flags_re("-static");
   437    const FullMatchRegex inapplicable_flags_with_shared_re("-pie");
   438  
   439    for (const auto& arg : input) {
   440      if (arg == "-shared") {
   441        return CopyOmittingMatches(
   442            inapplicable_flags_with_shared_re,
   443            CopyOmittingMatches(inapplicable_flags_re, input));
   444      }
   445    }
   446  
   447    return CopyOmittingMatches(inapplicable_flags_re, input);
   448  }
   449  
   450  std::vector<char*> CommandLineToArgv(const std::vector<std::string>& command) {
   451    std::vector<char*> result;
   452    result.reserve(command.size() + 1);
   453    for (const auto& arg : command) {
   454      result.push_back(const_cast<char*>(arg.c_str()));
   455    }
   456    result.push_back(nullptr);
   457    return result;
   458  }
   459  
   460  }  // namespace common
   461  }  // namespace kythe