kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/file_vname_generator.cc (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "kythe/cxx/common/file_vname_generator.h" 18 19 #include <algorithm> 20 #include <memory> 21 #include <optional> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 #include "absl/status/status.h" 27 #include "absl/status/statusor.h" 28 #include "absl/strings/numbers.h" 29 #include "absl/strings/str_cat.h" 30 #include "absl/strings/str_replace.h" 31 #include "absl/strings/string_view.h" 32 #include "google/protobuf/io/zero_copy_stream.h" 33 #include "google/protobuf/io/zero_copy_stream_impl.h" 34 #include "google/protobuf/io/zero_copy_stream_impl_lite.h" 35 #include "kythe/cxx/common/json_proto.h" 36 #include "kythe/proto/storage.pb.h" 37 #include "kythe/proto/vnames_config.pb.h" 38 #include "re2/re2.h" 39 40 namespace kythe { 41 namespace { 42 using ::google::protobuf::io::ArrayInputStream; 43 using ::google::protobuf::io::ConcatenatingInputStream; 44 using ::google::protobuf::io::ZeroCopyInputStream; 45 46 const LazyRE2 kSubstitutionsPattern = {R"(@\w+@)"}; 47 48 std::string EscapeBackslashes(absl::string_view value) { 49 return absl::StrReplaceAll(value, {{R"(\)", R"(\\)"}}); 50 } 51 52 std::optional<absl::string_view> FindMatch(absl::string_view text, 53 const RE2& pattern) { 54 absl::string_view match; 55 if (pattern.Match(text, 0, text.size(), RE2::UNANCHORED, &match, 1)) { 56 return match; 57 } 58 return std::nullopt; 59 } 60 61 absl::StatusOr<std::string> ParseTemplate(const RE2& pattern, 62 absl::string_view input) { 63 std::string result; 64 while (std::optional<absl::string_view> match = 65 FindMatch(input, *kSubstitutionsPattern)) { 66 absl::string_view group = match->substr(1, match->size() - 2); 67 68 int index = 0; 69 if (!absl::SimpleAtoi(group, &index)) { 70 auto iter = pattern.NamedCapturingGroups().find(std::string(group)); 71 if (iter == pattern.NamedCapturingGroups().end()) { 72 return absl::InvalidArgumentError( 73 absl::StrCat("Unknown named capture: ", group)); 74 } 75 index = iter->second; 76 } 77 if (index == 0 || index > pattern.NumberOfCapturingGroups()) { 78 return absl::InvalidArgumentError( 79 absl::StrCat("Capture index out of range: ", index)); 80 } 81 absl::string_view prefix = input.substr(0, match->begin() - input.begin()); 82 absl::StrAppend(&result, EscapeBackslashes(prefix), "\\", index); 83 input.remove_prefix(prefix.size() + match->size()); 84 } 85 // Include the unmatched tail. 86 absl::StrAppend(&result, EscapeBackslashes(input)); 87 return result; 88 } 89 90 absl::StatusOr<proto::VNamesConfiguration> ParseConfigurationFromRuleArray( 91 ZeroCopyInputStream& input) { 92 static constexpr absl::string_view kOpen = R"({ "rules": )"; 93 ArrayInputStream open(kOpen.data(), kOpen.size()); 94 95 static constexpr absl::string_view kClose = "}"; 96 ArrayInputStream close(kClose.data(), kClose.size()); 97 98 // The vnames.json format is as an array of Rules, so we need to enclose it 99 // in a top-level object and parse it as a field. 100 ZeroCopyInputStream* streams[] = {&open, &input, &close}; 101 ConcatenatingInputStream stream(streams, std::size(streams)); 102 103 proto::VNamesConfiguration config; 104 if (absl::Status status = ParseFromJsonStream(&stream, &config); 105 !status.ok()) { 106 return status; 107 } 108 return config; 109 } 110 111 } // namespace 112 113 kythe::proto::VName FileVNameGenerator::LookupBaseVName( 114 absl::string_view path) const { 115 for (const auto& rule : rules_) { 116 std::vector<absl::string_view> captures( 117 1 + 118 std::max({RE2::MaxSubmatch(rule.corpus), RE2::MaxSubmatch(rule.root), 119 RE2::MaxSubmatch(rule.path)})); 120 if (rule.pattern->Match(path, 0, path.size(), RE2::ANCHOR_BOTH, 121 captures.data(), captures.size())) { 122 kythe::proto::VName result; 123 if (!rule.corpus.empty()) { 124 rule.pattern->Rewrite(result.mutable_corpus(), rule.corpus, 125 captures.data(), captures.size()); 126 } 127 if (!rule.root.empty()) { 128 rule.pattern->Rewrite(result.mutable_root(), rule.root, captures.data(), 129 captures.size()); 130 } 131 if (!rule.path.empty()) { 132 rule.pattern->Rewrite(result.mutable_path(), rule.path, captures.data(), 133 captures.size()); 134 } 135 return result; 136 } 137 } 138 return default_vname_; 139 } 140 141 kythe::proto::VName FileVNameGenerator::LookupVName( 142 absl::string_view path) const { 143 kythe::proto::VName vname = LookupBaseVName(path); 144 if (vname.path().empty()) { 145 vname.set_path(path); 146 } 147 return vname; 148 } 149 150 bool FileVNameGenerator::LoadJsonString(absl::string_view data, 151 std::string* error_text) { 152 absl::Status status = LoadJsonString(data); 153 if (!status.ok() && error_text != nullptr) { 154 *error_text = status.ToString(); 155 } 156 return status.ok(); 157 } 158 159 absl::Status FileVNameGenerator::LoadJsonStream(ZeroCopyInputStream& input) { 160 absl::StatusOr<proto::VNamesConfiguration> config = 161 ParseConfigurationFromRuleArray(input); 162 if (!config.ok()) { 163 return config.status(); 164 } 165 for (const auto& entry : config->rules()) { 166 if (entry.pattern().empty()) { 167 return absl::InvalidArgumentError("VName rule is missing its pattern."); 168 } 169 170 auto pattern = std::make_shared<const RE2>(entry.pattern()); 171 if (pattern->error_code() != RE2::NoError) { 172 return absl::InvalidArgumentError(pattern->error()); 173 } 174 if (!entry.has_vname()) { 175 return absl::InvalidArgumentError("VName rule is missing its template."); 176 } 177 178 absl::StatusOr<std::string> corpus = 179 ParseTemplate(*pattern, entry.vname().corpus()); 180 if (!corpus.ok()) { 181 return corpus.status(); 182 } 183 absl::StatusOr<std::string> root = 184 ParseTemplate(*pattern, entry.vname().root()); 185 if (!root.ok()) { 186 return root.status(); 187 } 188 absl::StatusOr<std::string> path = 189 ParseTemplate(*pattern, entry.vname().path()); 190 if (!path.ok()) { 191 return path.status(); 192 } 193 rules_.push_back(VNameRule{ 194 .pattern = std::move(pattern), 195 .corpus = *std::move(corpus), 196 .root = *std::move(root), 197 .path = *std::move(path), 198 }); 199 } 200 return absl::OkStatus(); 201 } 202 203 absl::Status FileVNameGenerator::LoadJsonString(absl::string_view data) { 204 ArrayInputStream stream(data.data(), data.size()); 205 return LoadJsonStream(stream); 206 } 207 } // namespace kythe