kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/kzip_reader.cc (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "kythe/cxx/common/kzip_reader.h"
    18  
    19  #include <zip.h>
    20  #include <zipconf.h>
    21  
    22  #include <algorithm>
    23  #include <cstdint>
    24  #include <cstdio>
    25  #include <iterator>
    26  #include <memory>
    27  #include <optional>
    28  #include <set>
    29  #include <string>
    30  #include <utility>
    31  #include <vector>
    32  
    33  #include "absl/log/check.h"
    34  #include "absl/log/log.h"
    35  #include "absl/memory/memory.h"
    36  #include "absl/status/status.h"
    37  #include "absl/status/statusor.h"
    38  #include "absl/strings/str_cat.h"
    39  #include "absl/strings/string_view.h"
    40  #include "absl/strings/strip.h"
    41  #include "google/protobuf/io/zero_copy_stream.h"
    42  #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
    43  #include "kythe/cxx/common/index_reader.h"
    44  #include "kythe/cxx/common/json_proto.h"
    45  #include "kythe/cxx/common/kzip_encoding.h"
    46  #include "kythe/cxx/common/libzip/error.h"
    47  #include "kythe/proto/analysis.pb.h"
    48  
    49  namespace kythe {
    50  namespace {
    51  
    52  constexpr absl::string_view kJsonUnitsDir = "/units/";
    53  constexpr absl::string_view kProtoUnitsDir = "/pbunits/";
    54  
    55  struct ZipFileClose {
    56    void operator()(zip_file_t* file) {
    57      if (file != nullptr) {
    58        CHECK_EQ(zip_fclose(file), 0);
    59      }
    60    }
    61  };
    62  using ZipFile = std::unique_ptr<zip_file_t, ZipFileClose>;
    63  
    64  class ZipFileInputStream : public google::protobuf::io::ZeroCopyInputStream {
    65   public:
    66    explicit ZipFileInputStream(zip_file_t* file) : input_(file) {}
    67  
    68    bool Next(const void** data, int* size) override {
    69      return impl_.Next(data, size);
    70    }
    71  
    72    void BackUp(int count) override { impl_.BackUp(count); }
    73    bool Skip(int count) override { return impl_.Skip(count); }
    74    int64_t ByteCount() const override { return impl_.ByteCount(); }
    75  
    76   private:
    77    class CopyingZipInputStream
    78        : public google::protobuf::io::CopyingInputStream {
    79     public:
    80      explicit CopyingZipInputStream(zip_file_t* file) : file_(file) {}
    81  
    82      int Read(void* buffer, int size) override {
    83        return zip_fread(file_, buffer, size);
    84      }
    85  
    86      int Skip(int count) override {
    87        zip_int64_t start = zip_ftell(file_);
    88        if (start < 0) {
    89          return 0;
    90        }
    91        if (zip_fseek(file_, count, SEEK_CUR) < 0) {
    92          return 0;
    93        }
    94        zip_int64_t end = zip_ftell(file_);
    95        if (end < 0) {
    96          return 0;
    97        }
    98        return end - start;
    99      }
   100  
   101     private:
   102      zip_file_t* file_;
   103    };
   104  
   105    CopyingZipInputStream input_;
   106    google::protobuf::io::CopyingInputStreamAdaptor impl_{&input_};
   107  };
   108  
   109  struct KzipOptions {
   110    absl::string_view root;
   111    KzipEncoding encoding;
   112  };
   113  
   114  absl::StatusOr<KzipOptions> Validate(zip_t* archive) {
   115    if (!zip_get_num_entries(archive, 0)) {
   116      return absl::InvalidArgumentError("Empty kzip archive");
   117    }
   118  
   119    // Pull the root directory from an arbitrary entry.
   120    absl::string_view root = zip_get_name(archive, 0, 0);
   121    auto slashpos = root.find('/');
   122    if (slashpos == 0 || slashpos == absl::string_view::npos) {
   123      return absl::InvalidArgumentError(
   124          absl::StrCat("Malformed kzip: invalid root: ", root));
   125    }
   126    root.remove_suffix(root.size() - slashpos);
   127    VLOG(1) << "Using archive root: " << root;
   128    std::set<absl::string_view> proto_units;
   129    std::set<absl::string_view> json_units;
   130    for (int i = 0; i < zip_get_num_entries(archive, 0); ++i) {
   131      absl::string_view name = zip_get_name(archive, i, 0);
   132      if (!absl::ConsumePrefix(&name, root)) {
   133        return absl::InvalidArgumentError(
   134            absl::StrCat("Malformed kzip: invalid entry: ", name));
   135      }
   136      if (absl::ConsumePrefix(&name, kJsonUnitsDir)) {
   137        json_units.insert(name);
   138      } else if (absl::ConsumePrefix(&name, kProtoUnitsDir)) {
   139        proto_units.insert(name);
   140      }
   141    }
   142    KzipEncoding encoding = KzipEncoding::kJson;
   143    if (json_units.empty()) {
   144      encoding = KzipEncoding::kProto;
   145    } else if (!proto_units.empty()) {
   146      std::vector<absl::string_view> diff;
   147      std::set_symmetric_difference(json_units.begin(), json_units.end(),
   148                                    proto_units.begin(), proto_units.end(),
   149                                    std::inserter(diff, diff.end()));
   150      if (!diff.empty()) {
   151        return absl::InvalidArgumentError(absl::StrCat(
   152            "Malformed kzip: multiple unit encodings but different entries"));
   153      }
   154    }
   155    return KzipOptions{root, encoding};
   156  }
   157  
   158  std::optional<zip_uint64_t> FileSize(zip_t* archive, zip_uint64_t index) {
   159    zip_stat_t sb;
   160    zip_stat_init(&sb);
   161  
   162    if (zip_stat_index(archive, index, ZIP_STAT_SIZE, &sb) < 0) {
   163      return std::nullopt;
   164    }
   165    return sb.size;
   166  }
   167  
   168  absl::StatusOr<std::string> ReadTextFile(zip_t* archive,
   169                                           const std::string& path) {
   170    zip_int64_t index = zip_name_locate(archive, path.c_str(), 0);
   171    if (index >= 0) {
   172      if (auto file = ZipFile(zip_fopen_index(archive, index, 0))) {
   173        if (auto size = FileSize(archive, index)) {
   174          std::string result(*size, '\0');
   175          if (*size == 0 ||
   176              zip_fread(file.get(), result.data(), *size) == *size) {
   177            return result;
   178          } else {
   179            return libzip::ToStatus(zip_file_get_error(file.get()));
   180          }
   181        }
   182      }
   183    }
   184    absl::Status status = libzip::ToStatus(zip_get_error(archive));
   185    if (!status.ok()) {
   186      return status;
   187    }
   188    return absl::UnknownError(absl::StrCat("Unable to read: ", path));
   189  }
   190  
   191  absl::string_view DirNameForEncoding(KzipEncoding encoding) {
   192    switch (encoding) {
   193      case KzipEncoding::kJson:
   194        return kJsonUnitsDir;
   195      case KzipEncoding::kProto:
   196        return kProtoUnitsDir;
   197      default:
   198        LOG(FATAL) << "Unsupported encoding: " << static_cast<int>(encoding);
   199    }
   200    return "";
   201  }
   202  
   203  }  // namespace
   204  
   205  std::optional<absl::string_view> KzipReader::UnitDigest(
   206      absl::string_view path) {
   207    if (!absl::ConsumePrefix(&path, unit_prefix_) || path.empty()) {
   208      return std::nullopt;
   209    }
   210    return path;
   211  }
   212  
   213  /* static */
   214  absl::StatusOr<IndexReader> KzipReader::Open(absl::string_view path) {
   215    int error;
   216    if (auto archive =
   217            ZipHandle(zip_open(std::string(path).c_str(), ZIP_RDONLY, &error))) {
   218      if (auto options = Validate(archive.get()); options.ok()) {
   219        return IndexReader(absl::WrapUnique(new KzipReader(
   220            std::move(archive), options->root, options->encoding)));
   221      } else {
   222        return options.status();
   223      }
   224    }
   225    return libzip::Error(error).ToStatus();
   226  }
   227  
   228  /* static */
   229  absl::StatusOr<IndexReader> KzipReader::FromSource(zip_source_t* source) {
   230    libzip::Error error;
   231    if (auto archive =
   232            ZipHandle(zip_open_from_source(source, ZIP_RDONLY, error.get()))) {
   233      if (auto options = Validate(archive.get()); options.ok()) {
   234        return IndexReader(absl::WrapUnique(new KzipReader(
   235            std::move(archive), options->root, options->encoding)));
   236      } else {
   237        // Ensure source is retained when `archive` is deleted.
   238        // It is the callers responsitility to free it on error.
   239        zip_source_keep(source);
   240        return options.status();
   241      }
   242    }
   243    return error.ToStatus();
   244  }
   245  
   246  KzipReader::KzipReader(ZipHandle archive, absl::string_view root,
   247                         KzipEncoding encoding)
   248      : archive_(std::move(archive)),
   249        encoding_(encoding),
   250        files_prefix_(absl::StrCat(root, "/files/")),
   251        unit_prefix_(absl::StrCat(root, DirNameForEncoding(encoding))) {}
   252  
   253  absl::StatusOr<proto::IndexedCompilation> KzipReader::ReadUnit(
   254      absl::string_view digest) {
   255    std::string path = absl::StrCat(unit_prefix_, digest);
   256  
   257    if (auto file = ZipFile(zip_fopen(archive(), path.c_str(), 0))) {
   258      proto::IndexedCompilation unit;
   259      ZipFileInputStream input(file.get());
   260      absl::Status status;
   261      if (encoding_ == KzipEncoding::kJson) {
   262        status = ParseFromJsonStream(&input, &unit);
   263      } else {
   264        if (!unit.ParseFromZeroCopyStream(&input)) {
   265          status = absl::InvalidArgumentError("Failure parsing proto unit");
   266        }
   267      }
   268      if (!status.ok()) {
   269        absl::Status zip_status =
   270            libzip::ToStatus(zip_file_get_error(file.get()));
   271        if (!zip_status.ok()) {
   272          // Prefer the underlying zip error, if present.
   273          return zip_status;
   274        }
   275        return status;
   276      }
   277      return unit;
   278    }
   279    absl::Status status = libzip::ToStatus(zip_get_error(archive()));
   280    if (!status.ok()) {
   281      return status;
   282    }
   283    return absl::UnknownError(absl::StrCat("Unable to open unit ", digest));
   284  }
   285  
   286  absl::StatusOr<std::string> KzipReader::ReadFile(absl::string_view digest) {
   287    return ReadTextFile(archive(), absl::StrCat(files_prefix_, digest));
   288  }
   289  
   290  absl::Status KzipReader::Scan(const ScanCallback& callback) {
   291    for (int i = 0; i < zip_get_num_entries(archive(), 0); ++i) {
   292      if (auto digest = UnitDigest(zip_get_name(archive(), i, 0))) {
   293        if (!callback(*digest)) {
   294          break;
   295        }
   296      }
   297    }
   298    return absl::OkStatus();
   299  }
   300  
   301  }  // namespace kythe