go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/client/butler/bundler/stream.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bundler 16 17 import ( 18 "fmt" 19 "sync" 20 "sync/atomic" 21 "time" 22 23 "go.chromium.org/luci/logdog/api/logpb" 24 ) 25 26 var ( 27 // dataBufferSize is the size (in bytes) of the Data objects that a Stream 28 // will lease. 29 dataBufferSize = 4096 30 ) 31 32 // Stream is an individual Bundler Stream. Data is added to the Stream as a 33 // series of ordered binary chunks. 34 // 35 // A Stream is not goroutine-safe. 36 type Stream interface { 37 // LeaseData allocates and returns a Data block that stream data can be 38 // loaded into. The caller should Release() the Data, or transfer ownership to 39 // something that will (e.g., Append()). 40 // 41 // If the leased data is not Released, it is merely inefficient, not fatal. 42 LeaseData() Data 43 44 // Append adds a sequential chunk of data to the Stream. Append may block if 45 // the data isn't ready to be consumed. 46 // 47 // Append takes ownership of the data regardless of whether or not it returns 48 // an error. The supplied Data must not be referenced after calling Append. 49 Append(Data) error 50 51 // Close closes the Stream, flushing any remaining data. 52 Close() 53 } 54 55 // streamConfig is the set of static configuration parameters for the stream. 56 type streamConfig struct { 57 // name is the name of this stream. 58 name string 59 60 // parser is the stream parser to use. 61 parser parser 62 63 // maximumBufferedBytes is the maximum number of bytes that this stream will 64 // retain in its parser before blocking subsequent Append attempts. 65 maximumBufferedBytes int64 66 // maximumBufferDuration is the maximum amount of time that a block of data 67 // can be comfortably buffered in the stream. 68 maximumBufferDuration time.Duration 69 70 // template is the minimally-populated Butler log bundle entry. 71 template logpb.ButlerLogBundle_Entry 72 73 // onAppend, if not nil, is invoked when an attempt to append data to the 74 // stream occurs. If true is passed, the data was successfully appended. If 75 // false was passed, the data could not be appended immediately and the stream 76 // will block pending data consumption. 77 // 78 // The stream's append lock will be held when this method is called. 79 onAppend func(bool) 80 } 81 82 // streamImpl is a Stream implementation that is bound to a Bundler. 83 type streamImpl struct { 84 c *streamConfig 85 86 // drained is true if the stream is finished emitting data, including its 87 // terminal state. 88 // 89 // It is an atomic value, with zero indicating not drained and non-zero 90 // indicating drained. It should be accessed via isDrained, and set with 91 // setDrained. 92 drained int32 93 94 // parserLock is a Mutex protecting the stream's parser instance and its 95 // underlying chunk.Buffer. Any access to either of these fields must be done 96 // while holding this lock. 97 parserLock sync.Mutex 98 99 // dataConsumedSignalC is a channel that can be used to signal when data has 100 // been consumed. It is set via signalDataConsumed. 101 dataConsumedSignalC chan struct{} 102 103 // stateLock protects stream state against concurrent access. 104 stateLock sync.Mutex 105 106 // closed, if non-zero, indicates that we have been closed and our stream has 107 // finished reading. 108 // 109 // stateLock must be held when accessing this field. 110 closed bool 111 112 // lastLogEntry is a pointer to the last LogEntry that was exported. 113 // 114 // stateLock must be held when accessing this field. 115 lastLogEntry *logpb.LogEntry 116 117 // appendErr is the error that should be returned by Append. It is set when 118 // stream content processing hits a fatal state. 119 appendErr error 120 } 121 122 func newStream(c streamConfig) *streamImpl { 123 return &streamImpl{ 124 c: &c, 125 126 dataConsumedSignalC: make(chan struct{}, 1), 127 } 128 } 129 130 func (s *streamImpl) LeaseData() Data { 131 return globalDataPoolRegistry.getPool(dataBufferSize).getData() 132 } 133 134 func (s *streamImpl) Append(d Data) error { 135 // Block/loop until we've successfully appended the data. 136 for { 137 dLen := int64(len(d.Bytes())) 138 if err := s.appendError(); err != nil || dLen == 0 { 139 d.Release() 140 return err 141 } 142 143 s.withParserLock(func() error { 144 if s.c.parser.bufferedBytes() == 0 || 145 s.c.parser.bufferedBytes()+dLen <= s.c.maximumBufferedBytes { 146 s.c.parser.appendData(d) 147 d = nil 148 } 149 return nil 150 }) 151 152 // The data was appended; we're done. 153 if s.c.onAppend != nil { 154 s.c.onAppend(d == nil) 155 } 156 if d == nil { 157 break 158 } 159 160 // Not ready to append; wait for a data event and re-evaluate. 161 <-s.dataConsumedSignalC 162 } 163 164 if d != nil { 165 d.Release() 166 } 167 return nil 168 } 169 170 // Signals our Append loop that data has been consumed. 171 func (s *streamImpl) signalDataConsumed() { 172 select { 173 case s.dataConsumedSignalC <- struct{}{}: 174 break 175 176 default: 177 break 178 } 179 } 180 181 func (s *streamImpl) Close() { 182 s.stateLock.Lock() 183 defer s.stateLock.Unlock() 184 s.closeLocked() 185 } 186 187 func (s *streamImpl) closeLocked() { 188 s.closed = true 189 if s.c.onAppend != nil { 190 // If anyone is listening, notify that our state has changed; it doesn't 191 // actually matter WHEN this state notification happens, just that it 192 // happens after closed=true. 193 // 194 // The current implementation of Bundler has this as b.signalStreamUpdate(), 195 // which is synchronized with Bundler.streamsLock so doing this without 196 // a goroutine can lead to deadlock. 197 go s.c.onAppend(true) 198 } 199 } 200 201 func (s *streamImpl) name() string { 202 return s.c.name 203 } 204 205 // isDrained returns true if this stream is finished emitting data, including 206 // its terminal state. 207 // 208 // This can happen if either: 209 // - The stream is closed and has no more buffered data, or 210 // - The stream has encountered a fatal error during processing. 211 func (s *streamImpl) isDrained() bool { 212 return atomic.LoadInt32(&s.drained) != 0 213 } 214 215 // setDrained marks this stream as drained. 216 func (s *streamImpl) setDrained() { 217 atomic.StoreInt32(&s.drained, 1) 218 } 219 220 // noMoreDataLocked returns true if our stream has been closed and its buffer 221 // is empty. 222 // 223 // The stream's stateLock must be held when calling this method. 224 func (s *streamImpl) noMoreDataLocked() bool { 225 if !s.closed { 226 return false 227 } 228 229 // If we have an append error, we will no longer accept or consume data. 230 if s.appendErr != nil { 231 return true 232 } 233 234 var bufSize int64 235 s.withParserLock(func() error { 236 bufSize = s.c.parser.bufferedBytes() 237 return nil 238 }) 239 return bufSize == 0 240 } 241 242 // expireTime returns the Time when the oldest chunk in the stream will expire. 243 // 244 // This is calculated ask: 245 // oldest.Timestamp + stream.maximumBufferDuration 246 // If there is no buffered data, oldest will return nil. 247 func (s *streamImpl) expireTime() (t time.Time, has bool) { 248 s.withParserLock(func() error { 249 t, has = s.c.parser.firstChunkTime() 250 return nil 251 }) 252 253 if has { 254 t = t.Add(s.c.maximumBufferDuration) 255 } 256 return 257 } 258 259 // nextBundleEntry generates bundles for this stream. The total bundle data size 260 // must not exceed the supplied size. 261 // 262 // If no bundle entry could be generated given the constraints, nil will be 263 // returned. 264 // 265 // It is possible for some entries to be returned alongside an error. 266 func (s *streamImpl) nextBundleEntry(bb *builder, aggressive bool) bool { 267 s.stateLock.Lock() 268 defer s.stateLock.Unlock() 269 270 // If we're not drained, try and get the next bundle. 271 modified := false 272 if !s.noMoreDataLocked() { 273 err := error(nil) 274 modified, err = s.nextBundleEntryLocked(bb, aggressive) 275 if err != nil { 276 s.setAppendErrorLocked(err) 277 } 278 279 if modified { 280 s.signalDataConsumed() 281 } 282 } 283 284 // If we're drained, populate our terminal state. 285 if s.noMoreDataLocked() { 286 if s.lastLogEntry != nil { 287 bb.setStreamTerminal(&s.c.template, s.lastLogEntry.StreamIndex) 288 } 289 s.setDrained() 290 } 291 292 return modified 293 } 294 295 func (s *streamImpl) nextBundleEntryLocked(bb *builder, aggressive bool) (bool, error) { 296 c := constraints{ 297 allowSplit: aggressive, 298 closed: s.closed, 299 } 300 301 // Extract as many entries as possible from the stream. As we extract, adjust 302 // our byte size. 303 // 304 // If we're closed, this will continue to consume until finished. If an error 305 // occurs, shut down data collection. 306 modified := false 307 308 for c.limit = bb.remaining(); c.limit > 0; c.limit = bb.remaining() { 309 emittedLog := false 310 err := s.withParserLock(func() error { 311 le, err := s.c.parser.nextEntry(&c) 312 if err != nil { 313 return err 314 } 315 316 if le == nil { 317 return nil 318 } 319 320 // Enforce basic log entry consistency. 321 if err := s.fixupLogEntry(s.lastLogEntry, le); err != nil { 322 return err 323 } 324 325 emittedLog = true 326 modified = true 327 328 bb.add(&s.c.template, le) 329 s.lastLogEntry = le 330 return nil 331 }) 332 333 if err != nil || !emittedLog { 334 return modified, err 335 } 336 } 337 return modified, nil 338 } 339 340 // fixupLogEntry asserts and corrects a log entry's stream offset and ordering 341 // given the previous entry in the stream. 342 // 343 // If prev is nil, that means that cur is expected to be the first log entry 344 // in the stream. 345 func (s *streamImpl) fixupLogEntry(prev, cur *logpb.LogEntry) error { 346 if prev == nil { 347 if cur.StreamIndex != 0 { 348 return fmt.Errorf("first log entry is not zero index (%d)", cur.StreamIndex) 349 } 350 } else { 351 if cur.StreamIndex != prev.StreamIndex+1 { 352 return fmt.Errorf("non-contiguous stream indices (%d != %d)", cur.StreamIndex, prev.StreamIndex+1) 353 } 354 355 if cur.TimeOffset.AsDuration() < prev.TimeOffset.AsDuration() { 356 to := *prev.TimeOffset 357 cur.TimeOffset = &to 358 } 359 } 360 361 return nil 362 } 363 364 func (s *streamImpl) withParserLock(f func() error) error { 365 s.parserLock.Lock() 366 defer s.parserLock.Unlock() 367 368 return f() 369 } 370 371 func (s *streamImpl) appendError() error { 372 s.stateLock.Lock() 373 defer s.stateLock.Unlock() 374 375 return s.appendErr 376 } 377 378 func (s *streamImpl) setAppendErrorLocked(err error) { 379 s.appendErr = err 380 381 s.closeLocked() 382 s.signalDataConsumed() 383 } 384 385 func (s *streamImpl) streamDesc() *logpb.LogStreamDescriptor { 386 return s.c.template.Desc 387 }