kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/kythe_metadata_file.cc (about) 1 /* 2 * Copyright 2015 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "kythe/cxx/common/kythe_metadata_file.h" 18 19 #include <cctype> 20 #include <cstddef> 21 #include <memory> 22 #include <optional> 23 #include <string> 24 #include <utility> 25 #include <vector> 26 27 #include "absl/log/log.h" 28 #include "absl/strings/escaping.h" 29 #include "absl/strings/string_view.h" 30 #include "absl/strings/strip.h" 31 #include "google/protobuf/util/json_util.h" 32 #include "kythe/cxx/common/json_proto.h" 33 #include "kythe/cxx/common/schema/edges.h" 34 #include "kythe/proto/metadata.pb.h" 35 #include "kythe/proto/storage.pb.h" 36 37 namespace kythe { 38 namespace { 39 bool CheckVName(const proto::VName& vname) { 40 if (vname.corpus().empty() && vname.path().empty() && vname.root().empty() && 41 vname.signature().empty() && vname.language().empty()) { 42 LOG(WARNING) << "When loading metadata: empty vname."; 43 return false; 44 } 45 return true; 46 } 47 48 /// \brief Reads the contents of a C or C++ comment. 49 /// \param buf_string a buffer containing the text to read. 50 /// \param comment_slash_pos the offset of the first / starting the comment 51 /// in buf_string 52 /// \param data_start_pos the offset of the first byte of payload in 53 /// buf_string 54 std::optional<std::string> LoadCommentMetadata(absl::string_view buf_string, 55 size_t comment_slash_pos, 56 size_t data_start_pos) { 57 std::string raw_data; 58 // Over-reserves--though we expect the comment to be the only thing in the 59 // file or the last thing in the file, so this approximation is reasonable. 60 raw_data.reserve(buf_string.size() - comment_slash_pos); 61 size_t pos = data_start_pos; 62 // Tolerate single-line comments as well as multi-line comments. 63 // If there's a single-line comment, it should be the only thing in the 64 // file. 65 bool single_line = buf_string[comment_slash_pos + 1] == '/'; 66 auto next_term = 67 single_line ? absl::string_view::npos : buf_string.find("*/", pos); 68 for (; pos < buf_string.size();) { 69 while (pos < buf_string.size() && isspace(buf_string[pos])) ++pos; 70 auto next_newline = buf_string.find('\n', pos); 71 if (next_term != absl::string_view::npos && 72 (next_newline == absl::string_view::npos || next_term < next_newline)) { 73 raw_data.append(buf_string.data() + pos, next_term - pos); 74 } else if (next_newline != absl::string_view::npos) { 75 raw_data.append(buf_string.data() + pos, next_newline - pos); 76 pos = next_newline + 1; 77 if (!single_line) { 78 continue; 79 } 80 } else { 81 raw_data.append(buf_string.data() + pos, buf_string.size() - pos); 82 } 83 break; 84 } 85 std::string decoded; 86 return absl::Base64Unescape(raw_data, &decoded) 87 ? std::optional<std::string>(std::string(decoded)) 88 : std::nullopt; 89 } 90 91 /// \brief Attempts to load buffer as a header-style metadata file. 92 /// \param buffer data to try and parse. 93 /// \return the decoded metadata on success or std::nullopt on failure. 94 std::optional<std::string> LoadHeaderMetadata(absl::string_view buffer) { 95 if (buffer.size() < 2) { 96 return std::nullopt; 97 } 98 auto buf_string = buffer.data(); 99 // If the header doesn't start with a comment, it's invalid. 100 if (buf_string[0] != '/' || !(buf_string[1] == '*' || buf_string[1] == '/')) { 101 return std::nullopt; 102 } 103 return LoadCommentMetadata(buf_string, 0, 2); 104 } 105 106 /// \brief Attempts to load buffer as an inline metadata file 107 /// \param buffer data to try and parse. 108 /// \param search_string the string identifying the data. 109 /// \return the decoded metadata on success or std::nullopt on failure. 110 std::optional<std::string> FindCommentMetadata( 111 absl::string_view buffer, const std::string& search_string) { 112 auto comment_start = buffer.find("/* " + search_string); 113 if (comment_start == absl::string_view::npos) { 114 comment_start = buffer.find("// " + search_string); 115 if (comment_start == absl::string_view::npos) { 116 return std::nullopt; 117 } 118 } 119 // Data starts after the comment token, a space, and the user-provided 120 // marker. 121 return LoadCommentMetadata(buffer, comment_start, 122 comment_start + 3 + search_string.size()); 123 } 124 } // anonymous namespace 125 126 std::optional<MetadataFile::Rule> MetadataFile::LoadMetaElement( 127 const proto::metadata::MappingRule& mapping) { 128 using ::kythe::proto::metadata::MappingRule; 129 if (mapping.type() == MappingRule::NOP) { 130 return MetadataFile::Rule{}; 131 } 132 133 absl::string_view edge_string = mapping.edge(); 134 if (edge_string.empty() && !(mapping.type() == MappingRule::ANCHOR_DEFINES && 135 mapping.semantic() != MappingRule::SEMA_NONE)) { 136 LOG(WARNING) << "When loading metadata: empty edge."; 137 return std::nullopt; 138 } 139 bool reverse_edge = absl::ConsumePrefix(&edge_string, "%"); 140 if (mapping.type() == MappingRule::ANCHOR_DEFINES) { 141 if (!CheckVName(mapping.vname())) { 142 return std::nullopt; 143 } 144 Semantic sema; 145 switch (mapping.semantic()) { 146 case MappingRule::SEMA_WRITE: 147 sema = Semantic::kWrite; 148 break; 149 case MappingRule::SEMA_READ_WRITE: 150 sema = Semantic::kReadWrite; 151 break; 152 case MappingRule::SEMA_TAKE_ALIAS: 153 sema = Semantic::kTakeAlias; 154 break; 155 default: 156 sema = Semantic::kNone; 157 } 158 return MetadataFile::Rule{mapping.begin(), 159 mapping.end(), 160 kythe::common::schema::kDefinesBinding, 161 std::string(edge_string), 162 mapping.vname(), 163 reverse_edge, 164 false, 165 0, 166 0, 167 false, 168 sema}; 169 } else if (mapping.type() == MappingRule::ANCHOR_ANCHOR) { 170 if (!CheckVName(mapping.source_vname())) { 171 return std::nullopt; 172 } 173 return MetadataFile::Rule{mapping.target_begin(), 174 mapping.target_end(), 175 kythe::common::schema::kDefinesBinding, 176 std::string(edge_string), 177 mapping.source_vname(), 178 !reverse_edge, 179 true, 180 mapping.source_begin(), 181 mapping.source_end()}; 182 } else { 183 LOG(WARNING) << "When loading metadata: unknown meta type " 184 << mapping.type(); 185 return std::nullopt; 186 } 187 } 188 189 std::unique_ptr<MetadataFile> KytheMetadataSupport::LoadFromJSON( 190 absl::string_view id, absl::string_view json) { 191 proto::metadata::GeneratedCodeInfo metadata; 192 google::protobuf::util::JsonParseOptions options; 193 // Existing implementations specify message types using lower-case enum names, 194 // so ensure we can parse those. 195 options.case_insensitive_enum_parsing = true; 196 auto status = ParseFromJsonString(json, options, &metadata); 197 if (!status.ok()) { 198 LOG(WARNING) << "Error parsing JSON metadata: " << status; 199 return nullptr; 200 } 201 202 std::vector<MetadataFile::Rule> rules; 203 rules.reserve(metadata.meta().size()); 204 for (const auto& meta_element : metadata.meta()) { 205 if (auto rule = MetadataFile::LoadMetaElement(meta_element)) { 206 rules.push_back(*std::move(rule)); 207 } else { 208 return nullptr; 209 } 210 } 211 return MetadataFile::LoadFromRules(id, rules.begin(), rules.end()); 212 } 213 214 std::unique_ptr<kythe::MetadataFile> KytheMetadataSupport::ParseFile( 215 const std::string& raw_filename, const std::string& filename, 216 absl::string_view buffer, absl::string_view target_buffer) { 217 auto metadata = LoadFromJSON(raw_filename, buffer); 218 if (!metadata) { 219 LOG(WARNING) << "Failed loading " << raw_filename; 220 } 221 return metadata; 222 } 223 224 void MetadataSupports::UseVNameLookup(VNameLookup lookup) const { 225 for (auto& support : supports_) { 226 support->UseVNameLookup(lookup); 227 } 228 } 229 230 std::unique_ptr<kythe::MetadataFile> MetadataSupports::ParseFile( 231 const std::string& filename, absl::string_view buffer, 232 const std::string& search_string, absl::string_view target_buffer) const { 233 std::string modified_filename = filename; 234 std::optional<std::string> decoded_buffer_storage; 235 absl::string_view decoded_buffer = buffer; 236 if (!search_string.empty()) { 237 decoded_buffer_storage = FindCommentMetadata(buffer, search_string); 238 if (!decoded_buffer_storage) { 239 return nullptr; 240 } 241 decoded_buffer = *decoded_buffer_storage; 242 } 243 if (!decoded_buffer_storage && filename.size() >= 2 && 244 filename.find(".h", filename.size() - 2) != std::string::npos) { 245 decoded_buffer_storage = LoadHeaderMetadata(buffer); 246 if (!decoded_buffer_storage) { 247 LOG(WARNING) << filename << " wasn't a metadata header."; 248 } else { 249 decoded_buffer = *decoded_buffer_storage; 250 modified_filename = filename.substr(0, filename.size() - 2); 251 } 252 } 253 for (const auto& support : supports_) { 254 if (auto metadata = support->ParseFile(filename, modified_filename, 255 decoded_buffer, target_buffer)) { 256 return metadata; 257 } 258 } 259 return nullptr; 260 } 261 262 } // namespace kythe