kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/file_vname_generator.cc (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "kythe/cxx/common/file_vname_generator.h"
    18  
    19  #include <algorithm>
    20  #include <memory>
    21  #include <optional>
    22  #include <string>
    23  #include <utility>
    24  #include <vector>
    25  
    26  #include "absl/status/status.h"
    27  #include "absl/status/statusor.h"
    28  #include "absl/strings/numbers.h"
    29  #include "absl/strings/str_cat.h"
    30  #include "absl/strings/str_replace.h"
    31  #include "absl/strings/string_view.h"
    32  #include "google/protobuf/io/zero_copy_stream.h"
    33  #include "google/protobuf/io/zero_copy_stream_impl.h"
    34  #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
    35  #include "kythe/cxx/common/json_proto.h"
    36  #include "kythe/proto/storage.pb.h"
    37  #include "kythe/proto/vnames_config.pb.h"
    38  #include "re2/re2.h"
    39  
    40  namespace kythe {
    41  namespace {
    42  using ::google::protobuf::io::ArrayInputStream;
    43  using ::google::protobuf::io::ConcatenatingInputStream;
    44  using ::google::protobuf::io::ZeroCopyInputStream;
    45  
    46  const LazyRE2 kSubstitutionsPattern = {R"(@\w+@)"};
    47  
    48  std::string EscapeBackslashes(absl::string_view value) {
    49    return absl::StrReplaceAll(value, {{R"(\)", R"(\\)"}});
    50  }
    51  
    52  std::optional<absl::string_view> FindMatch(absl::string_view text,
    53                                             const RE2& pattern) {
    54    absl::string_view match;
    55    if (pattern.Match(text, 0, text.size(), RE2::UNANCHORED, &match, 1)) {
    56      return match;
    57    }
    58    return std::nullopt;
    59  }
    60  
    61  absl::StatusOr<std::string> ParseTemplate(const RE2& pattern,
    62                                            absl::string_view input) {
    63    std::string result;
    64    while (std::optional<absl::string_view> match =
    65               FindMatch(input, *kSubstitutionsPattern)) {
    66      absl::string_view group = match->substr(1, match->size() - 2);
    67  
    68      int index = 0;
    69      if (!absl::SimpleAtoi(group, &index)) {
    70        auto iter = pattern.NamedCapturingGroups().find(std::string(group));
    71        if (iter == pattern.NamedCapturingGroups().end()) {
    72          return absl::InvalidArgumentError(
    73              absl::StrCat("Unknown named capture: ", group));
    74        }
    75        index = iter->second;
    76      }
    77      if (index == 0 || index > pattern.NumberOfCapturingGroups()) {
    78        return absl::InvalidArgumentError(
    79            absl::StrCat("Capture index out of range: ", index));
    80      }
    81      absl::string_view prefix = input.substr(0, match->begin() - input.begin());
    82      absl::StrAppend(&result, EscapeBackslashes(prefix), "\\", index);
    83      input.remove_prefix(prefix.size() + match->size());
    84    }
    85    // Include the unmatched tail.
    86    absl::StrAppend(&result, EscapeBackslashes(input));
    87    return result;
    88  }
    89  
    90  absl::StatusOr<proto::VNamesConfiguration> ParseConfigurationFromRuleArray(
    91      ZeroCopyInputStream& input) {
    92    static constexpr absl::string_view kOpen = R"({ "rules": )";
    93    ArrayInputStream open(kOpen.data(), kOpen.size());
    94  
    95    static constexpr absl::string_view kClose = "}";
    96    ArrayInputStream close(kClose.data(), kClose.size());
    97  
    98    // The vnames.json format is as an array of Rules, so we need to enclose it
    99    // in a top-level object and parse it as a field.
   100    ZeroCopyInputStream* streams[] = {&open, &input, &close};
   101    ConcatenatingInputStream stream(streams, std::size(streams));
   102  
   103    proto::VNamesConfiguration config;
   104    if (absl::Status status = ParseFromJsonStream(&stream, &config);
   105        !status.ok()) {
   106      return status;
   107    }
   108    return config;
   109  }
   110  
   111  }  // namespace
   112  
   113  kythe::proto::VName FileVNameGenerator::LookupBaseVName(
   114      absl::string_view path) const {
   115    for (const auto& rule : rules_) {
   116      std::vector<absl::string_view> captures(
   117          1 +
   118          std::max({RE2::MaxSubmatch(rule.corpus), RE2::MaxSubmatch(rule.root),
   119                    RE2::MaxSubmatch(rule.path)}));
   120      if (rule.pattern->Match(path, 0, path.size(), RE2::ANCHOR_BOTH,
   121                              captures.data(), captures.size())) {
   122        kythe::proto::VName result;
   123        if (!rule.corpus.empty()) {
   124          rule.pattern->Rewrite(result.mutable_corpus(), rule.corpus,
   125                                captures.data(), captures.size());
   126        }
   127        if (!rule.root.empty()) {
   128          rule.pattern->Rewrite(result.mutable_root(), rule.root, captures.data(),
   129                                captures.size());
   130        }
   131        if (!rule.path.empty()) {
   132          rule.pattern->Rewrite(result.mutable_path(), rule.path, captures.data(),
   133                                captures.size());
   134        }
   135        return result;
   136      }
   137    }
   138    return default_vname_;
   139  }
   140  
   141  kythe::proto::VName FileVNameGenerator::LookupVName(
   142      absl::string_view path) const {
   143    kythe::proto::VName vname = LookupBaseVName(path);
   144    if (vname.path().empty()) {
   145      vname.set_path(path);
   146    }
   147    return vname;
   148  }
   149  
   150  bool FileVNameGenerator::LoadJsonString(absl::string_view data,
   151                                          std::string* error_text) {
   152    absl::Status status = LoadJsonString(data);
   153    if (!status.ok() && error_text != nullptr) {
   154      *error_text = status.ToString();
   155    }
   156    return status.ok();
   157  }
   158  
   159  absl::Status FileVNameGenerator::LoadJsonStream(ZeroCopyInputStream& input) {
   160    absl::StatusOr<proto::VNamesConfiguration> config =
   161        ParseConfigurationFromRuleArray(input);
   162    if (!config.ok()) {
   163      return config.status();
   164    }
   165    for (const auto& entry : config->rules()) {
   166      if (entry.pattern().empty()) {
   167        return absl::InvalidArgumentError("VName rule is missing its pattern.");
   168      }
   169  
   170      auto pattern = std::make_shared<const RE2>(entry.pattern());
   171      if (pattern->error_code() != RE2::NoError) {
   172        return absl::InvalidArgumentError(pattern->error());
   173      }
   174      if (!entry.has_vname()) {
   175        return absl::InvalidArgumentError("VName rule is missing its template.");
   176      }
   177  
   178      absl::StatusOr<std::string> corpus =
   179          ParseTemplate(*pattern, entry.vname().corpus());
   180      if (!corpus.ok()) {
   181        return corpus.status();
   182      }
   183      absl::StatusOr<std::string> root =
   184          ParseTemplate(*pattern, entry.vname().root());
   185      if (!root.ok()) {
   186        return root.status();
   187      }
   188      absl::StatusOr<std::string> path =
   189          ParseTemplate(*pattern, entry.vname().path());
   190      if (!path.ok()) {
   191        return path.status();
   192      }
   193      rules_.push_back(VNameRule{
   194          .pattern = std::move(pattern),
   195          .corpus = *std::move(corpus),
   196          .root = *std::move(root),
   197          .path = *std::move(path),
   198      });
   199    }
   200    return absl::OkStatus();
   201  }
   202  
   203  absl::Status FileVNameGenerator::LoadJsonString(absl::string_view data) {
   204    ArrayInputStream stream(data.data(), data.size());
   205    return LoadJsonStream(stream);
   206  }
   207  }  // namespace kythe