kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/path_utils.cc (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "kythe/cxx/common/path_utils.h"
    18  
    19  #include <stdlib.h>
    20  #include <unistd.h>
    21  
    22  #include <cerrno>
    23  #include <cstdlib>
    24  #include <cstring>
    25  #include <memory>
    26  #include <optional>
    27  #include <string>
    28  #include <utility>
    29  #include <variant>
    30  #include <vector>
    31  
    32  #include "absl/algorithm/container.h"
    33  #include "absl/log/log.h"
    34  #include "absl/status/statusor.h"
    35  #include "absl/strings/match.h"
    36  #include "absl/strings/str_cat.h"
    37  #include "absl/strings/str_join.h"
    38  #include "absl/strings/str_split.h"
    39  #include "absl/strings/string_view.h"
    40  #include "absl/strings/strip.h"
    41  #include "absl/synchronization/mutex.h"
    42  #include "absl/types/span.h"
    43  #include "kythe/cxx/common/regex.h"
    44  #include "kythe/cxx/common/status.h"
    45  
    46  namespace kythe {
    47  namespace {
    48  
    49  struct FreeDeleter {
    50    void operator()(void* pointer) const { free(pointer); }
    51  };
    52  
    53  // Predicate used in CleanPath for skipping empty components
    54  // and components consistening of a single '.'.
    55  struct SkipEmptyDot {
    56    bool operator()(absl::string_view sp) { return !(sp.empty() || sp == "."); }
    57  };
    58  
    59  // Deal with relative paths as well as '/' and '//'.
    60  absl::string_view PathPrefix(absl::string_view path) {
    61    int slash_count = 0;
    62    for (char ch : path) {
    63      if (ch == '/' && ++slash_count <= 2) continue;
    64      break;
    65    }
    66    switch (slash_count) {
    67      case 0:
    68        return "";
    69      case 2:
    70        return "//";
    71      default:
    72        return "/";
    73    }
    74  }
    75  
    76  absl::string_view TrimPathPrefix(const absl::string_view path,
    77                                   absl::string_view prefix) {
    78    absl::string_view result = path;
    79    if (absl::ConsumePrefix(&result, prefix) &&
    80        (result.empty() || prefix == "/" || absl::ConsumePrefix(&result, "/"))) {
    81      return result;
    82    }
    83    return path;
    84  }
    85  
    86  absl::StatusOr<std::optional<PathRealizer>> MaybeMakeRealizer(
    87      PathCanonicalizer::Policy policy, absl::string_view root) {
    88    switch (policy) {
    89      case PathCanonicalizer::Policy::kCleanOnly:
    90        return {std::nullopt};
    91      case PathCanonicalizer::Policy::kPreferRelative:
    92      case PathCanonicalizer::Policy::kPreferReal:
    93        if (auto realizer = PathRealizer::Create(root); realizer.ok()) {
    94          return {*std::move(realizer)};
    95        } else {
    96          return realizer.status();
    97        }
    98    }
    99    return {std::nullopt};
   100  }
   101  
   102  std::optional<std::string> MaybeRealPath(
   103      const std::optional<PathRealizer>& realizer, absl::string_view root) {
   104    if (realizer.has_value()) {
   105      if (auto result = realizer->Relativize(root); result.ok()) {
   106        return *std::move(result);
   107      } else {
   108        LOG(ERROR) << "Unable to resolve " << root << ": " << result.status();
   109      }
   110    }
   111    return std::nullopt;
   112  }
   113  
   114  struct PathParts {
   115    absl::string_view dir, base;
   116  };
   117  
   118  PathParts SplitPath(absl::string_view path) {
   119    std::string::difference_type pos = path.find_last_of('/');
   120  
   121    // Handle the case with no '/' in 'path'.
   122    if (pos == absl::string_view::npos) return {path.substr(0, 0), path};
   123  
   124    // Handle the case with a single leading '/' in 'path'.
   125    if (pos == 0) return {path.substr(0, 1), absl::ClippedSubstr(path, 1)};
   126  
   127    return {path.substr(0, pos), absl::ClippedSubstr(path, pos + 1)};
   128  }
   129  
   130  constexpr struct VisitPattern {
   131    absl::string_view operator()(absl::string_view pattern) const {
   132      return pattern;
   133    }
   134    absl::string_view operator()(const Regex& pattern) const {
   135      return pattern.pattern();
   136    }
   137  } kVisitPattern;
   138  
   139  }  // namespace
   140  
   141  absl::StatusOr<PathCleaner> PathCleaner::Create(absl::string_view root) {
   142    if (absl::StatusOr<std::string> resolved = MakeCleanAbsolutePath(root);
   143        resolved.ok()) {
   144      return PathCleaner(*std::move(resolved));
   145    } else {
   146      return resolved.status();
   147    }
   148  }
   149  
   150  absl::StatusOr<std::string> PathCleaner::Relativize(
   151      absl::string_view path) const {
   152    if (absl::StatusOr<std::string> resolved = MakeCleanAbsolutePath(path);
   153        resolved.ok()) {
   154      return std::string(TrimPathPrefix(*std::move(resolved), root_));
   155    } else {
   156      return resolved.status();
   157    }
   158  }
   159  
   160  absl::StatusOr<PathRealizer> PathRealizer::Create(absl::string_view root) {
   161    if (absl::StatusOr<std::string> resolved = RealPath(root); resolved.ok()) {
   162      return PathRealizer(*std::move(resolved));
   163    } else {
   164      return resolved.status();
   165    }
   166  }
   167  
   168  // We do not copy the cache on assignment or construction to retain thread
   169  // safety.
   170  PathRealizer::PathRealizer(const PathRealizer& other) : root_(other.root_) {}
   171  PathRealizer& PathRealizer::operator=(const PathRealizer& other) {
   172    root_ = other.root_;
   173    return *this;
   174  }
   175  
   176  template <typename K, typename Fn>
   177  absl::StatusOr<std::string> PathRealizer::PathCache::FindOrInsert(K&& key,
   178                                                                    Fn&& make) {
   179    absl::MutexLock lock(&mu_);
   180    auto [iter, inserted] = cache_.try_emplace(std::forward<K>(key), "");
   181    if (inserted) {
   182      iter->second = std::forward<Fn>(make)();
   183    }
   184    return iter->second;
   185  }
   186  
   187  absl::StatusOr<std::string> PathRealizer::Relativize(
   188      absl::string_view path) const {
   189    return cache_->FindOrInsert(
   190        CleanPath(path), [this, path]() -> absl::StatusOr<std::string> {
   191          if (absl::StatusOr<std::string> resolved = RealPath(path);
   192              resolved.ok()) {
   193            return std::string(TrimPathPrefix(*std::move(resolved), root_));
   194          } else {
   195            return resolved.status();
   196          }
   197        });
   198  }
   199  
   200  absl::StatusOr<PathCanonicalizer> PathCanonicalizer::Create(
   201      absl::string_view root, Policy policy,
   202      absl::Span<const PathEntry> path_map) {
   203    absl::StatusOr<PathCleaner> cleaner = PathCleaner::Create(root);
   204    if (!cleaner.ok()) {
   205      return cleaner.status();
   206    }
   207    absl::StatusOr<std::optional<PathRealizer>> realizer =
   208        MaybeMakeRealizer(policy, root);
   209    if (!realizer.ok()) {
   210      return realizer.status();
   211    }
   212  
   213    std::vector<Policy> override_policies;
   214    std::vector<absl::string_view> override_paths;
   215    for (const auto& [path, policy] : path_map) {
   216      if (!realizer->has_value()) {
   217        realizer = MaybeMakeRealizer(policy, root);
   218        if (!realizer.ok()) {
   219          return realizer.status();
   220        }
   221      }
   222      override_policies.push_back(policy);
   223      override_paths.push_back(std::visit(kVisitPattern, path));
   224    }
   225    absl::StatusOr<RegexSet> override_set = RegexSet::Build(override_paths);
   226    if (!override_set.ok()) {
   227      return override_set.status();
   228    }
   229    return PathCanonicalizer(policy, *std::move(cleaner), *std::move(realizer),
   230                             *std::move(override_set),
   231                             std::move(override_policies));
   232  }
   233  
   234  absl::StatusOr<std::string> PathCanonicalizer::Relativize(
   235      absl::string_view path) const {
   236    absl::StatusOr<Policy> policy = PolicyFor(path);
   237    if (!policy.ok()) return policy.status();
   238  
   239    switch (*policy) {
   240      case Policy::kPreferRelative:
   241        if (auto resolved = MaybeRealPath(realizer_, path)) {
   242          if (!IsAbsolutePath(*resolved)) {
   243            return *std::move(resolved);
   244          }
   245        }
   246        return cleaner_.Relativize(path);
   247      case Policy::kPreferReal:
   248        if (auto resolved = MaybeRealPath(realizer_, path)) {
   249          return *std::move(resolved);
   250        }
   251        return cleaner_.Relativize(path);
   252      case Policy::kCleanOnly:
   253        return cleaner_.Relativize(path);
   254    }
   255    LOG(FATAL) << "Unknown policy: " << static_cast<int>(*policy);
   256    return std::string(path);
   257  }
   258  
   259  absl::StatusOr<PathCanonicalizer::Policy> PathCanonicalizer::PolicyFor(
   260      absl::string_view path) const {
   261    absl::StatusOr<std::vector<int>> match = override_set_.ExplainMatch(path);
   262    if (!match.ok()) {
   263      return match.status();
   264    }
   265    if (match->empty()) {
   266      return policy_;
   267    }
   268    return override_policy_[*absl::c_min_element(*match)];
   269  }
   270  
   271  std::optional<PathCanonicalizer::Policy> ParseCanonicalizationPolicy(
   272      absl::string_view policy) {
   273    using Policy = PathCanonicalizer::Policy;
   274    if (policy == "0" || policy == "clean-only") {
   275      return Policy::kCleanOnly;
   276    }
   277    if (policy == "1" || policy == "prefer-relative") {
   278      return Policy::kPreferRelative;
   279    }
   280    if (policy == "2" || policy == "prefer-real") {
   281      return Policy::kPreferReal;
   282    }
   283    return std::nullopt;
   284  }
   285  
   286  bool AbslParseFlag(absl::string_view text, PathCanonicalizer::Policy* policy,
   287                     std::string* error) {
   288    if (auto parsed = ParseCanonicalizationPolicy(text)) {
   289      *policy = *parsed;
   290      return true;
   291    }
   292    *error = "policy not one of: clean-only, prefer-relative, prefer-real";
   293    return false;
   294  }
   295  
   296  std::string AbslUnparseFlag(PathCanonicalizer::Policy policy) {
   297    using Policy = PathCanonicalizer::Policy;
   298    switch (policy) {
   299      case Policy::kCleanOnly:
   300        return "clean-only";
   301      case Policy::kPreferRelative:
   302        return "prefer-relative";
   303      case Policy::kPreferReal:
   304        return "prefer-real";
   305    }
   306    LOG(FATAL) << "Invalid path policy provided: " << static_cast<int>(policy);
   307    return "(unknown)";
   308  }
   309  
   310  std::string JoinPath(absl::string_view a, absl::string_view b) {
   311    return absl::StrCat(absl::StripSuffix(a, "/"), "/",
   312                        absl::StripPrefix(b, "/"));
   313  }
   314  
   315  bool AbslParseFlag(absl::string_view text, PathCanonicalizer::PathEntry* entry,
   316                     std::string* error) {
   317    size_t pos = text.find('@');
   318    if (pos == text.npos) {
   319      *error = "missing @ delimiter between path and policy";
   320      return false;
   321    }
   322    absl::StatusOr<Regex> path = Regex::Compile(text.substr(0, pos));
   323    if (!path.ok()) {
   324      *error = path.status().message();
   325      return false;
   326    }
   327    entry->path = *std::move(path);
   328    return AbslParseFlag(text.substr(pos + 1), &entry->policy, error);
   329  }
   330  
   331  std::string AbslUnparseFlag(const PathCanonicalizer::PathEntry& entry) {
   332    return absl::StrCat(std::visit(kVisitPattern, entry.path), "@",
   333                        AbslUnparseFlag(entry.policy));
   334  }
   335  
   336  bool AbslParseFlag(absl::string_view text,
   337                     std::vector<PathCanonicalizer::PathEntry>* entries,
   338                     std::string* error) {
   339    for (const auto& entry : absl::StrSplit(text, ' ', absl::SkipEmpty())) {
   340      if (!AbslParseFlag(entry, &entries->emplace_back(), error)) {
   341        entries->pop_back();
   342        return false;
   343      }
   344    }
   345    return true;
   346  }
   347  
   348  std::string AbslUnparseFlag(
   349      const std::vector<PathCanonicalizer::PathEntry>& entries) {
   350    return absl::StrJoin(entries, " ", [](std::string* out, const auto& entry) {
   351      absl::StrAppend(out, AbslUnparseFlag(entry));
   352    });
   353  }
   354  
   355  std::string CleanPath(absl::string_view input) {
   356    const bool is_absolute_path = absl::StartsWith(input, "/");
   357    std::vector<absl::string_view> parts;
   358    for (absl::string_view comp : absl::StrSplit(input, '/', SkipEmptyDot{})) {
   359      if (comp == "..") {
   360        if (!parts.empty() && parts.back() != "..") {
   361          parts.pop_back();
   362          continue;
   363        }
   364        if (is_absolute_path) continue;
   365      }
   366      parts.push_back(comp);
   367    }
   368    // Deal with leading '//' as well as '/'.
   369    return absl::StrCat(PathPrefix(input), absl::StrJoin(parts, "/"));
   370  }
   371  
   372  bool IsAbsolutePath(absl::string_view path) {
   373    return absl::StartsWith(path, "/");
   374  }
   375  
   376  absl::StatusOr<std::string> GetCurrentDirectory() {
   377    std::string result(128, '\0');
   378    while (::getcwd(&result.front(), result.size() + 1) == nullptr) {
   379      if (errno != ERANGE) {
   380        return ErrnoToStatus(errno);
   381      }
   382      result.resize(result.size() * 2);
   383    }
   384    result.resize(::strlen(result.data()));
   385    return result;
   386  }
   387  
   388  absl::StatusOr<std::string> MakeCleanAbsolutePath(absl::string_view path) {
   389    if (IsAbsolutePath(path)) {
   390      return CleanPath(path);
   391    }
   392    if (absl::StatusOr<std::string> dir = GetCurrentDirectory(); dir.ok()) {
   393      return CleanPath(JoinPath(*std::move(dir), path));
   394    } else {
   395      return dir.status();
   396    }
   397  }
   398  
   399  absl::string_view Dirname(absl::string_view path) {
   400    return SplitPath(path).dir;
   401  }
   402  
   403  absl::string_view Basename(absl::string_view path) {
   404    return SplitPath(path).base;
   405  }
   406  
   407  std::string RelativizePath(absl::string_view to_relativize,
   408                             absl::string_view relativize_against) {
   409    absl::StatusOr<PathCleaner> cleaner = PathCleaner::Create(relativize_against);
   410    if (!cleaner.ok()) {
   411      return "";
   412    }
   413    return cleaner->Relativize(to_relativize).value_or("");
   414  }
   415  
   416  absl::StatusOr<std::string> RealPath(absl::string_view path) {
   417    // realpath requires a null-terminated cstring, but string_view may not be.
   418    // checking whether or not it is null-terminated is potentially UB.
   419    std::string zpath(path);
   420  
   421    std::unique_ptr<char, FreeDeleter> resolved(
   422        ::realpath(zpath.c_str(), nullptr));
   423    if (resolved == nullptr) {
   424      return ErrnoToStatus(errno);
   425    }
   426    return std::string(resolved.get());
   427  }
   428  
   429  }  // namespace kythe