kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/common/kzip_writer.cc (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "kythe/cxx/common/kzip_writer.h"
    18  
    19  #include <openssl/sha.h>
    20  #include <zip.h>
    21  
    22  #include <array>
    23  #include <cstdlib>
    24  #include <ctime>
    25  #include <string>
    26  #include <type_traits>
    27  #include <vector>
    28  
    29  #include "absl/log/check.h"
    30  #include "absl/log/log.h"
    31  #include "absl/memory/memory.h"
    32  #include "absl/status/status.h"
    33  #include "absl/status/statusor.h"
    34  #include "absl/strings/ascii.h"
    35  #include "absl/strings/escaping.h"
    36  #include "absl/strings/str_cat.h"
    37  #include "absl/strings/string_view.h"
    38  #include "kythe/cxx/common/index_writer.h"
    39  #include "kythe/cxx/common/json_proto.h"
    40  #include "kythe/cxx/common/kzip_encoding.h"
    41  #include "kythe/cxx/common/libzip/error.h"
    42  #include "kythe/proto/analysis.pb.h"
    43  
    44  namespace kythe {
    45  namespace {
    46  
    47  constexpr absl::string_view kRoot = "root/";
    48  constexpr absl::string_view kJsonUnitRoot = "root/units/";
    49  constexpr absl::string_view kProtoUnitRoot = "root/pbunits/";
    50  constexpr absl::string_view kFileRoot = "root/files/";
    51  // Set all file modified times to 0 so zip file diffs only show content diffs,
    52  // not zip creation time diffs.
    53  constexpr time_t kModTime = 0;
    54  
    55  std::string SHA256Digest(absl::string_view content) {
    56    std::array<unsigned char, SHA256_DIGEST_LENGTH> buf;
    57    ::SHA256(reinterpret_cast<const unsigned char*>(content.data()),
    58             content.size(), buf.data());
    59    return absl::BytesToHexString(
    60        absl::string_view(reinterpret_cast<const char*>(buf.data()), buf.size()));
    61  }
    62  
    63  absl::Status WriteTextFile(zip_t* archive, const std::string& path,
    64                             absl::string_view content) {
    65    if (auto source =
    66            zip_source_buffer(archive, content.data(), content.size(), 0)) {
    67      auto idx = zip_file_add(archive, path.c_str(), source, ZIP_FL_ENC_UTF_8);
    68      if (idx >= 0) {
    69        // If a file was added, set the last modified time.
    70        if (zip_file_set_mtime(archive, idx, kModTime, 0) == 0) {
    71          return absl::OkStatus();
    72        }
    73      }
    74      zip_source_free(source);
    75    }
    76    return libzip::ToStatus(zip_get_error(archive));
    77  }
    78  
    79  absl::string_view Basename(absl::string_view path) {
    80    auto pos = path.find_last_of('/');
    81    if (pos == absl::string_view::npos) {
    82      return path;
    83    }
    84    return absl::ClippedSubstr(path, pos + 1);
    85  }
    86  
    87  bool HasEncoding(KzipEncoding lhs, KzipEncoding rhs) {
    88    return static_cast<typename std::underlying_type<KzipEncoding>::type>(lhs) &
    89           static_cast<typename std::underlying_type<KzipEncoding>::type>(rhs);
    90  }
    91  
    92  }  // namespace
    93  
    94  /* static */
    95  absl::StatusOr<IndexWriter> KzipWriter::Create(absl::string_view path,
    96                                                 KzipEncoding encoding) {
    97    int error;
    98    if (auto archive =
    99            zip_open(std::string(path).c_str(), ZIP_CREATE | ZIP_EXCL, &error)) {
   100      return IndexWriter(absl::WrapUnique(new KzipWriter(archive, encoding)));
   101    }
   102    return libzip::Error(error).ToStatus();
   103  }
   104  
   105  /* static */
   106  absl::StatusOr<IndexWriter> KzipWriter::FromSource(zip_source_t* source,
   107                                                     KzipEncoding encoding,
   108                                                     const int flags) {
   109    libzip::Error error;
   110    if (auto archive = zip_open_from_source(source, flags, error.get())) {
   111      return IndexWriter(absl::WrapUnique(new KzipWriter(archive, encoding)));
   112    }
   113    return error.ToStatus();
   114  }
   115  
   116  KzipWriter::KzipWriter(zip_t* archive, KzipEncoding encoding)
   117      : archive_(archive), encoding_(encoding) {}
   118  
   119  KzipWriter::~KzipWriter() {
   120    DCHECK(archive_ == nullptr) << "Disposing of open KzipWriter!";
   121  }
   122  
   123  // Creates entries for the three directories if not already present.
   124  absl::Status KzipWriter::InitializeArchive(zip_t* archive) {
   125    std::vector<absl::string_view> dirs = {kRoot, kFileRoot};
   126    if (HasEncoding(encoding_, KzipEncoding::kJson)) {
   127      dirs.push_back(kJsonUnitRoot);
   128    }
   129    if (HasEncoding(encoding_, KzipEncoding::kProto)) {
   130      dirs.push_back(kProtoUnitRoot);
   131    }
   132    for (const auto& name : dirs) {
   133      auto idx = zip_dir_add(archive, name.data(), ZIP_FL_ENC_UTF_8);
   134      if (idx < 0) {
   135        absl::Status status = libzip::ToStatus(zip_get_error(archive));
   136        zip_error_clear(archive);
   137        return status;
   138      }
   139      if (zip_file_set_mtime(archive, idx, kModTime, 0) < 0) {
   140        absl::Status status = libzip::ToStatus(zip_get_error(archive));
   141        zip_error_clear(archive);
   142        return status;
   143      }
   144    }
   145    return absl::OkStatus();
   146  }
   147  
   148  absl::StatusOr<std::string> KzipWriter::WriteUnit(
   149      const kythe::proto::IndexedCompilation& unit) {
   150    if (!initialized_) {
   151      auto status = InitializeArchive(archive_);
   152      if (!status.ok()) {
   153        return status;
   154      }
   155      initialized_ = true;
   156    }
   157    auto json = WriteMessageAsJsonToString(unit);
   158    if (!json.ok()) {
   159      return json.status();
   160    }
   161    auto digest = SHA256Digest(*json);
   162    absl::StatusOr<std::string> result =
   163        absl::InternalError("unsupported encoding");
   164    if (HasEncoding(encoding_, KzipEncoding::kJson)) {
   165      result = InsertFile(absl::StrCat(kJsonUnitRoot, digest), *json);
   166      if (!result.ok()) {
   167        return result;
   168      }
   169    }
   170    if (HasEncoding(encoding_, KzipEncoding::kProto)) {
   171      std::string contents;
   172      if (!unit.SerializeToString(&contents)) {
   173        return absl::InternalError("Failure serializing compilation unit");
   174      }
   175      result = InsertFile(absl::StrCat(kProtoUnitRoot, digest), contents);
   176    }
   177    return result;
   178  }
   179  
   180  absl::StatusOr<std::string> KzipWriter::WriteFile(absl::string_view content) {
   181    if (!initialized_) {
   182      auto status = InitializeArchive(archive_);
   183      if (!status.ok()) {
   184        return status;
   185      }
   186      initialized_ = true;
   187    }
   188    return InsertFile(absl::StrCat(kFileRoot, SHA256Digest(content)), content);
   189  }
   190  
   191  absl::Status KzipWriter::Close() {
   192    DCHECK(archive_ != nullptr);
   193  
   194    absl::Status result = absl::OkStatus();
   195    if (zip_close(archive_) != 0) {
   196      result = libzip::ToStatus(zip_get_error(archive_));
   197      zip_discard(archive_);
   198    }
   199  
   200    archive_ = nullptr;
   201    contents_.clear();
   202    return result;
   203  }
   204  
   205  absl::StatusOr<std::string> KzipWriter::InsertFile(absl::string_view path,
   206                                                     absl::string_view content) {
   207    // Initially insert an empty string for the file content.
   208    auto insertion = contents_.emplace(std::string(path), "");
   209    if (insertion.second) {
   210      // Only copy in the real content if it was actually inserted into the map.
   211      auto& entry = insertion.first;
   212      entry->second = std::string(content);
   213      auto status = WriteTextFile(archive_, entry->first, entry->second);
   214      if (!status.ok()) {
   215        contents_.erase(entry->first);
   216        return status;
   217      }
   218    }
   219    return std::string(Basename(path));
   220  }
   221  
   222  /* static */
   223  KzipEncoding KzipWriter::DefaultEncoding() {
   224    if (const char* env_enc = getenv("KYTHE_KZIP_ENCODING")) {
   225      std::string enc = absl::AsciiStrToUpper(env_enc);
   226      if (enc == "JSON") {
   227        return KzipEncoding::kJson;
   228      }
   229      if (enc == "PROTO") {
   230        return KzipEncoding::kProto;
   231      }
   232      if (enc == "ALL") {
   233        return KzipEncoding::kAll;
   234      }
   235      LOG(ERROR) << "Unknown encoding '" << enc << "', using PROTO";
   236    }
   237    return KzipEncoding::kProto;
   238  }
   239  
   240  }  // namespace kythe