go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/common/archive/archive.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package archive constructs a LogDog archive out of log stream components. 16 // Records are read from the stream and emitted as an archive. 17 package archive 18 19 import ( 20 "crypto/sha256" 21 "encoding/hex" 22 "io" 23 "reflect" 24 25 cl "cloud.google.com/go/logging" 26 "github.com/golang/protobuf/proto" 27 28 "go.chromium.org/luci/common/data/recordio" 29 "go.chromium.org/luci/common/logging" 30 "go.chromium.org/luci/common/sync/parallel" 31 "go.chromium.org/luci/logdog/api/logpb" 32 "go.chromium.org/luci/logdog/common/renderer" 33 ) 34 35 // CloudLogging entry has a limit of 256KB in the internal byte representation. 36 // If an entry is larger, CloudLogging will reject the entry with an error. 37 // 38 // To minimize the chance of a LogEntry exceeding the limit, Archive applies 39 // the following limits to the entry before exporting logpb.LogEntry 40 // to CloudLogging. 41 const ( 42 // maxPayload is the maximum size for the payload of a CloudLogging entry. 43 // 44 // If a single line exceeds the limit in size, the line likely contains 45 // a dump of a serialized object, which wouldn't be useful in searches, and 46 // the line will get truncated when being exported to CloudLogging. 47 maxPayload = 128 * 1024 48 49 // maxTagSum is the maximum size sum of tag keys and values that can be 50 // attached to a CloudLogging Entry. If the sum exceeds the limit, 51 // the stream won't be exported to CloudLogging. 52 maxTagSum = 96 * 1024 53 ) 54 55 // CLLogger is a general interface for CloudLogging logger and intended to enable 56 // unit tests and stub out CloudLogging. 57 type CLLogger interface { 58 Log(cl.Entry) 59 } 60 61 // Manifest is a set of archival parameters. 62 type Manifest struct { 63 // LUCIProject is the LUCI project for the stream. 64 LUCIProject string 65 66 // Desc is the logpb.LogStreamDescriptor for the stream. 67 Desc *logpb.LogStreamDescriptor 68 // Source is the LogEntry Source for the stream. 69 Source renderer.Source 70 71 // LogWriter, if not nil, is the Writer to which the log stream record stream 72 // will be written. 73 LogWriter io.Writer 74 // IndexWriter, if not nil, is the Writer to which the log stream Index 75 // protobuf stream will be written. 76 IndexWriter io.Writer 77 78 // StreamIndexRange, if >0, is the maximum number of log entry stream indices 79 // in between successive index entries. 80 // 81 // If no index constraints are set, an index entry will be emitted for each 82 // LogEntry. 83 StreamIndexRange int 84 // PrefixIndexRange, if >0, is the maximum number of log entry prefix indices 85 // in between successive index entries. 86 PrefixIndexRange int 87 // ByteRange, if >0, is the maximum number of log entry bytes in between 88 // successive index entries. 89 ByteRange int 90 91 // Logger, if not nil, will be used to log status during archival. 92 Logger logging.Logger 93 94 // CloudLogger, if not nil, will be used to export archived log entries to 95 // Cloud Logging. 96 CloudLogger CLLogger 97 98 // sizeFunc is a size method override used for testing. 99 sizeFunc func(proto.Message) int 100 } 101 102 func (m *Manifest) logger() logging.Logger { 103 if m.Logger == nil || 104 (reflect.ValueOf(m.Logger).Kind() == reflect.Ptr && 105 reflect.ValueOf(m.Logger).IsNil()) { 106 return logging.Null 107 } 108 return m.Logger 109 } 110 111 // Archive performs the log archival described in the supplied Manifest. 112 func Archive(m Manifest) error { 113 // Wrap our log source in a safeLogEntrySource to protect our index order. 114 m.Source = &safeLogEntrySource{ 115 Manifest: &m, 116 Source: m.Source, 117 } 118 119 // If no constraints are applied, index every LogEntry. 120 if m.StreamIndexRange <= 0 && m.PrefixIndexRange <= 0 && m.ByteRange <= 0 { 121 m.StreamIndexRange = 1 122 } 123 124 if m.LogWriter == nil { 125 return nil 126 } 127 128 // If we're constructing an index, allocate a stateful index builder. 129 var idx *indexBuilder 130 if m.IndexWriter != nil { 131 idx = &indexBuilder{ 132 Manifest: &m, 133 index: logpb.LogIndex{ 134 Desc: m.Desc, 135 }, 136 sizeFunc: m.sizeFunc, 137 } 138 } 139 140 // Compute a hash to be used as the ID of the stream in Cloud Logging. 141 sha := sha256.New() 142 sha.Write([]byte(m.LUCIProject)) 143 sha.Write([]byte(m.Desc.Prefix)) 144 sha.Write([]byte(m.Desc.Name)) 145 streamIDHash := sha.Sum(nil) 146 147 return parallel.FanOutIn(func(taskC chan<- func() error) { 148 logC := make(chan *logpb.LogEntry) 149 150 taskC <- func() error { 151 if err := archiveLogs(m.LogWriter, m.Desc, logC, idx, m.CloudLogger, streamIDHash, m.logger()); err != nil { 152 return err 153 } 154 155 // If we're building an index, emit it now that the log stream has 156 // finished. 157 if idx != nil { 158 return idx.emit(m.IndexWriter) 159 } 160 return nil 161 } 162 163 // Iterate through all of our Source's logs and process them. 164 taskC <- func() error { 165 defer close(logC) 166 167 for { 168 le, err := m.Source.NextLogEntry() 169 if le != nil { 170 logC <- le 171 } 172 173 switch err { 174 case nil: 175 case io.EOF: 176 return nil 177 default: 178 return err 179 } 180 } 181 } 182 }) 183 } 184 185 func archiveLogs(w io.Writer, d *logpb.LogStreamDescriptor, logC <-chan *logpb.LogEntry, idx *indexBuilder, cloudLogger CLLogger, streamIDHash []byte, logger logging.Logger) error { 186 offset := int64(0) 187 out := func(pb proto.Message) error { 188 d, err := proto.Marshal(pb) 189 if err != nil { 190 return err 191 } 192 193 count, err := recordio.WriteFrame(w, d) 194 offset += int64(count) 195 return err 196 } 197 198 isCLDisabled := (cloudLogger == nil || 199 (reflect.ValueOf(cloudLogger).Kind() == reflect.Ptr && 200 reflect.ValueOf(cloudLogger).IsNil())) 201 202 if !isCLDisabled { 203 tsum := 0 204 for k, v := range d.GetTags() { 205 tsum += len(k) 206 tsum += len(v) 207 208 if tsum > maxTagSum { 209 logger.Errorf("sum(tags) > %d; skipping the stream for CloudLogging export", maxTagSum) 210 isCLDisabled = true 211 break 212 } 213 } 214 } 215 // Start with our descriptor protobuf. Defer error handling until later, as 216 // we are still responsible for draining "logC". 217 err := out(d) 218 219 eb := newEntryBuffer(maxPayload, hex.EncodeToString(streamIDHash), d) 220 for le := range logC { 221 if err != nil { 222 continue 223 } 224 225 // Add this LogEntry to our index, noting the current offset. 226 if idx != nil { 227 idx.addLogEntry(le, offset) 228 } 229 err = out(le) 230 231 // Skip CloudLogging export, if disabled. 232 if isCLDisabled { 233 continue 234 } 235 for _, entry := range eb.append(le) { 236 cloudLogger.Log(*entry) 237 } 238 } 239 240 // Export the last entry. 241 // 242 // If there was an error, the buffered line can possibly contain 243 // an incomplete line, which was going to be completed in the next LogEntry. 244 // 245 // If so, skip flushing out the buffered line to prevent the complete 246 // version of the line from being deduped on the next retry of 247 // the archival task. 248 if err == nil { 249 if entry := eb.flush(); entry != nil { 250 cloudLogger.Log(*entry) 251 } 252 } 253 return err 254 }