github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cli/debug_merge_logs.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package cli 12 13 import ( 14 "bufio" 15 "bytes" 16 "container/heap" 17 "context" 18 "io" 19 "os" 20 "path/filepath" 21 "regexp" 22 "sort" 23 "sync" 24 "time" 25 26 "github.com/cockroachdb/cockroach/pkg/util/log" 27 "golang.org/x/sync/errgroup" 28 ) 29 30 type logStream interface { 31 fileInfo() *fileInfo // FileInfo for the current entry available in peek. 32 peek() (log.Entry, bool) 33 pop() (log.Entry, bool) // If called after peek, must return the same values. 34 error() error 35 } 36 37 // writeLogStream pops messages off of s and writes them to out prepending 38 // prefix per message and filtering messages which match filter. 39 func writeLogStream(s logStream, out io.Writer, filter *regexp.Regexp, prefix string) error { 40 const chanSize = 1 << 16 // 64k 41 const maxWriteBufSize = 1 << 18 // 256kB 42 43 prefixCache := map[*fileInfo][]byte{} 44 getPrefix := func(fi *fileInfo) ([]byte, error) { 45 if prefixBuf, ok := prefixCache[fi]; ok { 46 return prefixBuf, nil 47 } 48 prefixCache[fi] = fi.pattern.ExpandString(nil, prefix, fi.path, fi.matches) 49 return prefixCache[fi], nil 50 } 51 52 type entryInfo struct { 53 log.Entry 54 *fileInfo 55 } 56 render := func(ei entryInfo, w io.Writer) (err error) { 57 var prefixBytes []byte 58 if prefixBytes, err = getPrefix(ei.fileInfo); err != nil { 59 return err 60 } 61 if _, err = w.Write(prefixBytes); err != nil { 62 return err 63 } 64 return ei.Format(w) 65 } 66 67 g, ctx := errgroup.WithContext(context.Background()) 68 entryChan := make(chan entryInfo, chanSize) // read -> bufferWrites 69 writeChan := make(chan *bytes.Buffer) // bufferWrites -> write 70 read := func() error { 71 defer close(entryChan) 72 for e, ok := s.peek(); ok; e, ok = s.peek() { 73 select { 74 case entryChan <- entryInfo{Entry: e, fileInfo: s.fileInfo()}: 75 case <-ctx.Done(): 76 return nil 77 } 78 s.pop() 79 } 80 return s.error() 81 } 82 bufferWrites := func() error { 83 defer close(writeChan) 84 writing, pending := &bytes.Buffer{}, &bytes.Buffer{} 85 for { 86 send, recv := writeChan, entryChan 87 var scratch []byte 88 if pending.Len() == 0 { 89 send = nil 90 if recv == nil { 91 return nil 92 } 93 } else if pending.Len() > maxWriteBufSize { 94 recv = nil 95 } 96 select { 97 case ei, open := <-recv: 98 if !open { 99 entryChan = nil 100 break 101 } 102 startLen := pending.Len() 103 if err := render(ei, pending); err != nil { 104 return err 105 } 106 if filter != nil { 107 matches := filter.FindSubmatch(pending.Bytes()[startLen:]) 108 if matches == nil { 109 // Did not match. 110 pending.Truncate(startLen) 111 } else if len(matches) > 1 { 112 // Matched and there are capturing groups (if there 113 // aren't any we'll want to print the whole message). We 114 // only want to print what was captured. This is mildly 115 // awkward since we can't output anything directly, so 116 // we write the submatches back into the buffer and then 117 // discard what we've looked at so far. 118 // 119 // NB: it's tempting to try to write into `pending` 120 // directly, and while it's probably safe to do so 121 // (despite truncating before writing back from the 122 // truncated buffer into itself), it's not worth the 123 // complexity in this code. Just use a scratch buffer. 124 scratch = scratch[:0] 125 for _, b := range matches[1:] { 126 scratch = append(scratch, b...) 127 } 128 pending.Truncate(startLen) 129 _, _ = pending.Write(scratch) 130 pending.WriteByte('\n') 131 } 132 } 133 case send <- pending: 134 writing.Reset() 135 pending, writing = writing, pending 136 case <-ctx.Done(): 137 return nil 138 } 139 } 140 } 141 write := func() error { 142 for buf := range writeChan { 143 if _, err := out.Write(buf.Bytes()); err != nil { 144 return err 145 } 146 } 147 return nil 148 } 149 g.Go(read) 150 g.Go(bufferWrites) 151 g.Go(write) 152 return g.Wait() 153 } 154 155 // mergedStream is a merged heap of log streams. 156 type mergedStream []logStream 157 158 // newMergedStreamFromPatterns creates a new logStream by first glob 159 // expanding pattern, then filtering for matching files which conform to the 160 // filePattern and if program is non-nil, they match the program which is 161 // extracted from matching files via the named capture group with the name 162 // "program". The returned stream will only return log entries in [from, to]. 163 // If no program capture group exists all files match. 164 func newMergedStreamFromPatterns( 165 ctx context.Context, 166 patterns []string, 167 filePattern, programFilter *regexp.Regexp, 168 from, to time.Time, 169 ) (logStream, error) { 170 paths, err := expandPatterns(patterns) 171 if err != nil { 172 return nil, err 173 } 174 files, err := findLogFiles(paths, filePattern, programFilter, 175 groupIndex(filePattern, "program"), to) 176 if err != nil { 177 return nil, err 178 } 179 return newMergedStream(ctx, files, from, to) 180 } 181 182 func groupIndex(re *regexp.Regexp, groupName string) int { 183 for i, n := range re.SubexpNames() { 184 if n == groupName { 185 return i 186 } 187 } 188 return -1 189 } 190 191 func newMergedStream( 192 ctx context.Context, files []fileInfo, from, to time.Time, 193 ) (*mergedStream, error) { 194 // TODO(ajwerner): think about clock movement and PID 195 const maxConcurrentFiles = 256 // should be far less than the FD limit 196 sem := make(chan struct{}, maxConcurrentFiles) 197 res := make(mergedStream, len(files)) 198 g, _ := errgroup.WithContext(ctx) 199 createFileStream := func(i int) func() error { 200 return func() error { 201 sem <- struct{}{} 202 defer func() { <-sem }() 203 s, err := newFileLogStream(files[i], from, to) 204 if s != nil { 205 res[i] = s 206 } 207 return err 208 } 209 } 210 for i := range files { 211 g.Go(createFileStream(i)) 212 } 213 if err := g.Wait(); err != nil { 214 return nil, err 215 } 216 g, ctx = errgroup.WithContext(ctx) 217 filtered := res[:0] // filter nil streams 218 for _, s := range res { 219 if s != nil { 220 s = newBufferedLogStream(ctx, g, s) 221 filtered = append(filtered, s) 222 } 223 } 224 res = filtered 225 heap.Init(&res) 226 return &res, nil 227 } 228 229 func (l mergedStream) Len() int { return len(l) } 230 func (l mergedStream) Swap(i, j int) { l[i], l[j] = l[j], l[i] } 231 func (l mergedStream) Less(i, j int) bool { 232 ie, iok := l[i].peek() 233 je, jok := l[j].peek() 234 if iok && jok { 235 return ie.Time < je.Time 236 } 237 return !iok && jok 238 } 239 240 func (l *mergedStream) Push(s interface{}) { 241 *l = append(*l, s.(logStream)) 242 } 243 244 func (l *mergedStream) Pop() (v interface{}) { 245 n := len(*l) - 1 246 v = (*l)[n] 247 *l = (*l)[:n] 248 return 249 } 250 251 func (l *mergedStream) peek() (log.Entry, bool) { 252 if len(*l) == 0 { 253 return log.Entry{}, false 254 } 255 return (*l)[0].peek() 256 } 257 258 func (l *mergedStream) pop() (log.Entry, bool) { 259 e, ok := l.peek() 260 if !ok { 261 return log.Entry{}, false 262 } 263 s := (*l)[0] 264 s.pop() 265 if _, stillOk := s.peek(); stillOk { 266 heap.Push(l, heap.Pop(l)) 267 } else if err := s.error(); err != nil && err != io.EOF { 268 return log.Entry{}, false 269 } else { 270 heap.Pop(l) 271 } 272 return e, true 273 } 274 275 func (l *mergedStream) fileInfo() *fileInfo { 276 if len(*l) == 0 { 277 return nil 278 } 279 return (*l)[0].fileInfo() 280 } 281 282 func (l *mergedStream) error() error { 283 if len(*l) == 0 { 284 return nil 285 } 286 return (*l)[0].error() 287 } 288 289 func expandPatterns(patterns []string) ([]string, error) { 290 var paths []string 291 for _, p := range patterns { 292 matches, err := filepath.Glob(p) 293 if err != nil { 294 return nil, err 295 } 296 paths = append(paths, matches...) 297 } 298 return removeDuplicates(paths), nil 299 } 300 301 func removeDuplicates(strings []string) (filtered []string) { 302 filtered = strings[:0] 303 prev := "" 304 for _, s := range strings { 305 if s == prev { 306 continue 307 } 308 filtered = append(filtered, s) 309 prev = s 310 } 311 return filtered 312 } 313 314 func getLogFileInfo(path string, filePattern *regexp.Regexp) (fileInfo, bool) { 315 if matches := filePattern.FindStringSubmatchIndex(path); matches != nil { 316 return fileInfo{path: path, matches: matches, pattern: filePattern}, true 317 } 318 return fileInfo{}, false 319 } 320 321 type fileInfo struct { 322 path string 323 pattern *regexp.Regexp 324 matches []int 325 } 326 327 func findLogFiles( 328 paths []string, filePattern, programFilter *regexp.Regexp, programGroup int, to time.Time, 329 ) ([]fileInfo, error) { 330 to = to.Truncate(time.Second) // log files only have second resolution 331 fileChan := make(chan fileInfo, len(paths)) 332 var wg sync.WaitGroup 333 wg.Add(len(paths)) 334 for _, p := range paths { 335 go func(p string) { 336 defer wg.Done() 337 fi, ok := getLogFileInfo(p, filePattern) 338 if !ok { 339 return 340 } 341 if programGroup > 0 { 342 program := fi.path[fi.matches[2*programGroup]:fi.matches[2*programGroup+1]] 343 if !programFilter.MatchString(program) { 344 return 345 } 346 } 347 fileChan <- fi 348 }(p) 349 } 350 wg.Wait() 351 files := make([]fileInfo, 0, len(fileChan)) 352 close(fileChan) 353 for f := range fileChan { 354 files = append(files, f) 355 } 356 return files, nil 357 } 358 359 // newBufferedLogStream creates a new logStream which will create a goroutine to 360 // pop off of s and buffer in memory up to readChanSize entries. 361 // The channel and goroutine are created lazily upon the first read after the 362 // first call to pop. The initial peek does not consume resources. 363 func newBufferedLogStream(ctx context.Context, g *errgroup.Group, s logStream) logStream { 364 bs := &bufferedLogStream{ctx: ctx, logStream: s, g: g} 365 // Fill the initial entry value to prevent initial peek from running the 366 // goroutine. 367 bs.e, bs.ok = s.peek() 368 bs.read = true 369 s.pop() 370 return bs 371 } 372 373 type bufferedLogStream struct { 374 logStream 375 runOnce sync.Once 376 e log.Entry 377 read bool 378 ok bool 379 c chan log.Entry 380 ctx context.Context 381 g *errgroup.Group 382 } 383 384 func (bs *bufferedLogStream) run() { 385 const readChanSize = 512 386 bs.c = make(chan log.Entry, readChanSize) 387 bs.g.Go(func() error { 388 defer close(bs.c) 389 for { 390 e, ok := bs.logStream.pop() 391 if !ok { 392 if err := bs.error(); err != io.EOF { 393 return err 394 } 395 return nil 396 } 397 select { 398 case bs.c <- e: 399 case <-bs.ctx.Done(): 400 return nil 401 } 402 } 403 }) 404 } 405 406 func (bs *bufferedLogStream) peek() (log.Entry, bool) { 407 if bs.ok && !bs.read { 408 if bs.c == nil { // indicates that run has not been called 409 bs.runOnce.Do(bs.run) 410 } 411 bs.e, bs.ok = <-bs.c 412 bs.read = true 413 } 414 if !bs.ok { 415 return log.Entry{}, false 416 } 417 return bs.e, true 418 } 419 420 func (bs *bufferedLogStream) pop() (log.Entry, bool) { 421 e, ok := bs.peek() 422 bs.read = false 423 return e, ok 424 } 425 426 // fileLogStream represents a logStream from a single file. 427 type fileLogStream struct { 428 from, to time.Time 429 prevTime int64 430 fi fileInfo 431 f *os.File 432 d *log.EntryDecoder 433 read bool 434 435 e log.Entry 436 err error 437 } 438 439 // newFileLogStream constructs a *fileLogStream and then peeks at the file to 440 // ensure that it contains a valid entry after from. If the file contains only 441 // data which precedes from, (nil, nil) is returned. If an io error is 442 // encountered during the initial peek, that error is returned. The underlying 443 // file is always closed before returning from this constructor so the initial 444 // peek does not consume resources. 445 func newFileLogStream(fi fileInfo, from, to time.Time) (logStream, error) { 446 s := &fileLogStream{ 447 fi: fi, 448 from: from, 449 to: to, 450 } 451 if _, ok := s.peek(); !ok { 452 if err := s.error(); err != io.EOF { 453 return nil, err 454 } 455 return nil, nil 456 } 457 s.close() 458 return s, nil 459 } 460 461 func (s *fileLogStream) close() { 462 s.f.Close() 463 s.f = nil 464 s.d = nil 465 } 466 467 func (s *fileLogStream) open() bool { 468 const readBufSize = 1024 469 if s.f, s.err = os.Open(s.fi.path); s.err != nil { 470 return false 471 } 472 if s.err = seekToFirstAfterFrom(s.f, s.from); s.err != nil { 473 return false 474 } 475 s.d = log.NewEntryDecoder(bufio.NewReaderSize(s.f, readBufSize)) 476 return true 477 } 478 479 func (s *fileLogStream) peek() (log.Entry, bool) { 480 for !s.read && s.err == nil { 481 justOpened := false 482 if s.d == nil { 483 if !s.open() { 484 return log.Entry{}, false 485 } 486 justOpened = true 487 } 488 var e log.Entry 489 if s.err = s.d.Decode(&e); s.err != nil { 490 s.close() 491 s.e = log.Entry{} 492 break 493 } 494 // Upon re-opening the file, we'll read s.e again. 495 if justOpened && e == s.e { 496 continue 497 } 498 s.e = e 499 if s.e.Time < s.prevTime { 500 s.e.Time = s.prevTime 501 } else { 502 s.prevTime = s.e.Time 503 } 504 afterTo := !s.to.IsZero() && s.e.Time > s.to.UnixNano() 505 if afterTo { 506 s.close() 507 s.e = log.Entry{} 508 s.err = io.EOF 509 } else { 510 beforeFrom := !s.from.IsZero() && s.e.Time < s.from.UnixNano() 511 s.read = !beforeFrom 512 } 513 } 514 return s.e, s.err == nil 515 } 516 517 func (s *fileLogStream) pop() (e log.Entry, ok bool) { 518 if e, ok = s.peek(); !ok { 519 return 520 } 521 s.read = false 522 return e, ok 523 } 524 525 func (s *fileLogStream) fileInfo() *fileInfo { return &s.fi } 526 func (s *fileLogStream) error() error { return s.err } 527 528 // seekToFirstAfterFrom uses binary search to seek to an offset after all 529 // entries which occur before from. 530 func seekToFirstAfterFrom(f *os.File, from time.Time) (err error) { 531 if from.IsZero() { 532 return nil 533 } 534 fi, err := f.Stat() 535 if err != nil { 536 return err 537 } 538 size := fi.Size() 539 defer func() { 540 if r := recover(); r != nil { 541 err = r.(error) 542 } 543 }() 544 offset := sort.Search(int(size), func(i int) bool { 545 if _, err := f.Seek(int64(i), io.SeekStart); err != nil { 546 panic(err) 547 } 548 var e log.Entry 549 err := log.NewEntryDecoder(f).Decode(&e) 550 if err != nil { 551 if err == io.EOF { 552 return true 553 } 554 panic(err) 555 } 556 return e.Time >= from.UnixNano() 557 }) 558 if _, err := f.Seek(int64(offset), io.SeekStart); err != nil { 559 return err 560 } 561 var e log.Entry 562 if err := log.NewEntryDecoder(f).Decode(&e); err != nil { 563 return err 564 } 565 _, err = f.Seek(int64(offset), io.SeekStart) 566 return err 567 }