kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/kythe_metadata_file.cc (about)

     1  /*
     2   * Copyright 2015 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "kythe/cxx/common/kythe_metadata_file.h"
    18  
    19  #include <cctype>
    20  #include <cstddef>
    21  #include <memory>
    22  #include <optional>
    23  #include <string>
    24  #include <utility>
    25  #include <vector>
    26  
    27  #include "absl/log/log.h"
    28  #include "absl/strings/escaping.h"
    29  #include "absl/strings/string_view.h"
    30  #include "absl/strings/strip.h"
    31  #include "google/protobuf/util/json_util.h"
    32  #include "kythe/cxx/common/json_proto.h"
    33  #include "kythe/cxx/common/schema/edges.h"
    34  #include "kythe/proto/metadata.pb.h"
    35  #include "kythe/proto/storage.pb.h"
    36  
    37  namespace kythe {
    38  namespace {
    39  bool CheckVName(const proto::VName& vname) {
    40    if (vname.corpus().empty() && vname.path().empty() && vname.root().empty() &&
    41        vname.signature().empty() && vname.language().empty()) {
    42      LOG(WARNING) << "When loading metadata: empty vname.";
    43      return false;
    44    }
    45    return true;
    46  }
    47  
    48  /// \brief Reads the contents of a C or C++ comment.
    49  /// \param buf_string a buffer containing the text to read.
    50  /// \param comment_slash_pos the offset of the first / starting the comment
    51  /// in buf_string
    52  /// \param data_start_pos the offset of the first byte of payload in
    53  /// buf_string
    54  std::optional<std::string> LoadCommentMetadata(absl::string_view buf_string,
    55                                                 size_t comment_slash_pos,
    56                                                 size_t data_start_pos) {
    57    std::string raw_data;
    58    // Over-reserves--though we expect the comment to be the only thing in the
    59    // file or the last thing in the file, so this approximation is reasonable.
    60    raw_data.reserve(buf_string.size() - comment_slash_pos);
    61    size_t pos = data_start_pos;
    62    // Tolerate single-line comments as well as multi-line comments.
    63    // If there's a single-line comment, it should be the only thing in the
    64    // file.
    65    bool single_line = buf_string[comment_slash_pos + 1] == '/';
    66    auto next_term =
    67        single_line ? absl::string_view::npos : buf_string.find("*/", pos);
    68    for (; pos < buf_string.size();) {
    69      while (pos < buf_string.size() && isspace(buf_string[pos])) ++pos;
    70      auto next_newline = buf_string.find('\n', pos);
    71      if (next_term != absl::string_view::npos &&
    72          (next_newline == absl::string_view::npos || next_term < next_newline)) {
    73        raw_data.append(buf_string.data() + pos, next_term - pos);
    74      } else if (next_newline != absl::string_view::npos) {
    75        raw_data.append(buf_string.data() + pos, next_newline - pos);
    76        pos = next_newline + 1;
    77        if (!single_line) {
    78          continue;
    79        }
    80      } else {
    81        raw_data.append(buf_string.data() + pos, buf_string.size() - pos);
    82      }
    83      break;
    84    }
    85    std::string decoded;
    86    return absl::Base64Unescape(raw_data, &decoded)
    87               ? std::optional<std::string>(std::string(decoded))
    88               : std::nullopt;
    89  }
    90  
    91  /// \brief Attempts to load buffer as a header-style metadata file.
    92  /// \param buffer data to try and parse.
    93  /// \return the decoded metadata on success or std::nullopt on failure.
    94  std::optional<std::string> LoadHeaderMetadata(absl::string_view buffer) {
    95    if (buffer.size() < 2) {
    96      return std::nullopt;
    97    }
    98    auto buf_string = buffer.data();
    99    // If the header doesn't start with a comment, it's invalid.
   100    if (buf_string[0] != '/' || !(buf_string[1] == '*' || buf_string[1] == '/')) {
   101      return std::nullopt;
   102    }
   103    return LoadCommentMetadata(buf_string, 0, 2);
   104  }
   105  
   106  /// \brief Attempts to load buffer as an inline metadata file
   107  /// \param buffer data to try and parse.
   108  /// \param search_string the string identifying the data.
   109  /// \return the decoded metadata on success or std::nullopt on failure.
   110  std::optional<std::string> FindCommentMetadata(
   111      absl::string_view buffer, const std::string& search_string) {
   112    auto comment_start = buffer.find("/* " + search_string);
   113    if (comment_start == absl::string_view::npos) {
   114      comment_start = buffer.find("// " + search_string);
   115      if (comment_start == absl::string_view::npos) {
   116        return std::nullopt;
   117      }
   118    }
   119    // Data starts after the comment token, a space, and the user-provided
   120    // marker.
   121    return LoadCommentMetadata(buffer, comment_start,
   122                               comment_start + 3 + search_string.size());
   123  }
   124  }  // anonymous namespace
   125  
   126  std::optional<MetadataFile::Rule> MetadataFile::LoadMetaElement(
   127      const proto::metadata::MappingRule& mapping) {
   128    using ::kythe::proto::metadata::MappingRule;
   129    if (mapping.type() == MappingRule::NOP) {
   130      return MetadataFile::Rule{};
   131    }
   132  
   133    absl::string_view edge_string = mapping.edge();
   134    if (edge_string.empty() && !(mapping.type() == MappingRule::ANCHOR_DEFINES &&
   135                                 mapping.semantic() != MappingRule::SEMA_NONE)) {
   136      LOG(WARNING) << "When loading metadata: empty edge.";
   137      return std::nullopt;
   138    }
   139    bool reverse_edge = absl::ConsumePrefix(&edge_string, "%");
   140    if (mapping.type() == MappingRule::ANCHOR_DEFINES) {
   141      if (!CheckVName(mapping.vname())) {
   142        return std::nullopt;
   143      }
   144      Semantic sema;
   145      switch (mapping.semantic()) {
   146        case MappingRule::SEMA_WRITE:
   147          sema = Semantic::kWrite;
   148          break;
   149        case MappingRule::SEMA_READ_WRITE:
   150          sema = Semantic::kReadWrite;
   151          break;
   152        case MappingRule::SEMA_TAKE_ALIAS:
   153          sema = Semantic::kTakeAlias;
   154          break;
   155        default:
   156          sema = Semantic::kNone;
   157      }
   158      return MetadataFile::Rule{mapping.begin(),
   159                                mapping.end(),
   160                                kythe::common::schema::kDefinesBinding,
   161                                std::string(edge_string),
   162                                mapping.vname(),
   163                                reverse_edge,
   164                                false,
   165                                0,
   166                                0,
   167                                false,
   168                                sema};
   169    } else if (mapping.type() == MappingRule::ANCHOR_ANCHOR) {
   170      if (!CheckVName(mapping.source_vname())) {
   171        return std::nullopt;
   172      }
   173      return MetadataFile::Rule{mapping.target_begin(),
   174                                mapping.target_end(),
   175                                kythe::common::schema::kDefinesBinding,
   176                                std::string(edge_string),
   177                                mapping.source_vname(),
   178                                !reverse_edge,
   179                                true,
   180                                mapping.source_begin(),
   181                                mapping.source_end()};
   182    } else {
   183      LOG(WARNING) << "When loading metadata: unknown meta type "
   184                   << mapping.type();
   185      return std::nullopt;
   186    }
   187  }
   188  
   189  std::unique_ptr<MetadataFile> KytheMetadataSupport::LoadFromJSON(
   190      absl::string_view id, absl::string_view json) {
   191    proto::metadata::GeneratedCodeInfo metadata;
   192    google::protobuf::util::JsonParseOptions options;
   193    // Existing implementations specify message types using lower-case enum names,
   194    // so ensure we can parse those.
   195    options.case_insensitive_enum_parsing = true;
   196    auto status = ParseFromJsonString(json, options, &metadata);
   197    if (!status.ok()) {
   198      LOG(WARNING) << "Error parsing JSON metadata: " << status;
   199      return nullptr;
   200    }
   201  
   202    std::vector<MetadataFile::Rule> rules;
   203    rules.reserve(metadata.meta().size());
   204    for (const auto& meta_element : metadata.meta()) {
   205      if (auto rule = MetadataFile::LoadMetaElement(meta_element)) {
   206        rules.push_back(*std::move(rule));
   207      } else {
   208        return nullptr;
   209      }
   210    }
   211    return MetadataFile::LoadFromRules(id, rules.begin(), rules.end());
   212  }
   213  
   214  std::unique_ptr<kythe::MetadataFile> KytheMetadataSupport::ParseFile(
   215      const std::string& raw_filename, const std::string& filename,
   216      absl::string_view buffer, absl::string_view target_buffer) {
   217    auto metadata = LoadFromJSON(raw_filename, buffer);
   218    if (!metadata) {
   219      LOG(WARNING) << "Failed loading " << raw_filename;
   220    }
   221    return metadata;
   222  }
   223  
   224  void MetadataSupports::UseVNameLookup(VNameLookup lookup) const {
   225    for (auto& support : supports_) {
   226      support->UseVNameLookup(lookup);
   227    }
   228  }
   229  
   230  std::unique_ptr<kythe::MetadataFile> MetadataSupports::ParseFile(
   231      const std::string& filename, absl::string_view buffer,
   232      const std::string& search_string, absl::string_view target_buffer) const {
   233    std::string modified_filename = filename;
   234    std::optional<std::string> decoded_buffer_storage;
   235    absl::string_view decoded_buffer = buffer;
   236    if (!search_string.empty()) {
   237      decoded_buffer_storage = FindCommentMetadata(buffer, search_string);
   238      if (!decoded_buffer_storage) {
   239        return nullptr;
   240      }
   241      decoded_buffer = *decoded_buffer_storage;
   242    }
   243    if (!decoded_buffer_storage && filename.size() >= 2 &&
   244        filename.find(".h", filename.size() - 2) != std::string::npos) {
   245      decoded_buffer_storage = LoadHeaderMetadata(buffer);
   246      if (!decoded_buffer_storage) {
   247        LOG(WARNING) << filename << " wasn't a metadata header.";
   248      } else {
   249        decoded_buffer = *decoded_buffer_storage;
   250        modified_filename = filename.substr(0, filename.size() - 2);
   251      }
   252    }
   253    for (const auto& support : supports_) {
   254      if (auto metadata = support->ParseFile(filename, modified_filename,
   255                                             decoded_buffer, target_buffer)) {
   256        return metadata;
   257      }
   258    }
   259    return nullptr;
   260  }
   261  
   262  }  // namespace kythe