kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/cxx/indexer/textproto/recordio_textparser.cc (about) 1 /* 2 * Copyright 2023 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "kythe/cxx/indexer/textproto/recordio_textparser.h" 18 19 #include <optional> 20 #include <sstream> 21 22 #include "absl/functional/function_ref.h" 23 #include "absl/log/log.h" 24 #include "absl/strings/ascii.h" 25 #include "absl/strings/match.h" 26 #include "absl/strings/str_split.h" 27 #include "absl/strings/string_view.h" 28 #include "absl/strings/strip.h" 29 30 namespace kythe { 31 namespace lang_textproto { 32 33 namespace { 34 35 // WithChar is a delimiter for absl::StrSplit() that splits on given char but 36 // also includes the delimiter char. 37 struct WithChar { 38 explicit WithChar(char ch) : delimiter_(ch) {} 39 absl::string_view Find(absl::string_view text, size_t pos) const { 40 absl::string_view sep = delimiter_.Find(text, pos); 41 // Always return a zero-width span after the delimiter, so that it's 42 // included if present. 43 sep.remove_prefix(sep.size()); 44 return sep; 45 } 46 47 private: 48 absl::ByChar delimiter_; 49 }; 50 51 class ProtoLineDelimiter { 52 public: 53 explicit ProtoLineDelimiter(absl::string_view delimiter, 54 int* line_count = nullptr) 55 : delimiter_(delimiter), line_count_(line_count), current_line_(0) {} 56 57 /// \brief Finds the next occurrence of the configured delimiter 58 /// on a line by itself, after the first non-comment, non-empty line. 59 absl::string_view Find(absl::string_view text, size_t pos) { 60 // Store the start line of chunk. 61 if (line_count_) { 62 *line_count_ = current_line_; 63 } 64 for (absl::string_view line : 65 absl::StrSplit(text.substr(pos), WithChar('\n'))) { 66 current_line_++; 67 // Don't look for the delimiter until we've seen our first non-empty, 68 // non-comment line. 69 data_seen_ = data_seen_ || !(absl::StartsWith(line, "#") || 70 absl::StripPrefix(line, "\n").empty()); 71 bool is_delimiter = 72 // The line consists entirely of the delimiter and delimiter may 73 // start with a comment. 74 absl::StripPrefix(absl::StripPrefix(line, delimiter_), "\n").empty(); 75 if (!data_seen_ && is_delimiter) continue; 76 77 if (is_delimiter) { 78 return line; 79 } 80 } 81 return text.substr(text.size()); 82 } 83 84 private: 85 std::string delimiter_; 86 int* line_count_; 87 int current_line_; 88 89 bool data_seen_ = false; 90 }; 91 92 } // namespace 93 94 void ParseRecordTextChunks( 95 absl::string_view content, absl::string_view record_delimiter, 96 absl::FunctionRef<void(absl::string_view chunk, int chunk_start_line)> 97 callback) { 98 int line_count = 0; 99 for (absl::string_view chunk : absl::StrSplit( 100 content, ProtoLineDelimiter(record_delimiter, &line_count))) { 101 callback(chunk, line_count); 102 } 103 } 104 105 } // namespace lang_textproto 106 } // namespace kythe