golang.org/x/build@v0.0.0-20240506185731-218518f32b70/maintner/maintnerd/gcslog/gcslog.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package gcslog is an implementation of maintner.MutationSource and Logger for Google Cloud Storage. 6 package gcslog 7 8 import ( 9 "bytes" 10 "context" 11 "crypto/sha256" 12 "encoding/json" 13 "fmt" 14 "hash" 15 "io" 16 "log" 17 "net/http" 18 "path" 19 "regexp" 20 "sort" 21 "strconv" 22 "strings" 23 "sync" 24 "time" 25 26 "cloud.google.com/go/storage" 27 "github.com/golang/protobuf/proto" 28 "golang.org/x/build/maintner" 29 "golang.org/x/build/maintner/maintpb" 30 "golang.org/x/build/maintner/reclog" 31 "google.golang.org/api/iterator" 32 ) 33 34 // targetObjectSize is the goal maximum size for each log segment on 35 // GCS. In the unlikely case that a single record is larger than this 36 // then a segment would be bigger than this (records are never split 37 // between log segments). But otherwise this is the max size. 38 const targetObjectSize = 16 << 20 39 40 const flushInterval = 10 * time.Minute 41 42 // GCSLog implements MutationLogger and MutationSource. 43 var _ maintner.MutationLogger = &GCSLog{} 44 var _ maintner.MutationSource = &GCSLog{} 45 46 // GCSLog logs mutations to GCS. 47 type GCSLog struct { 48 sc *storage.Client 49 bucketName string 50 bucket *storage.BucketHandle 51 segmentPrefix string 52 debug bool 53 54 mu sync.Mutex // guards the following 55 cond *sync.Cond 56 seg map[int]gcsLogSegment 57 curNum int 58 logBuf bytes.Buffer 59 logSHA224 hash.Hash 60 flushTimer *time.Timer // non-nil if flush timer is active 61 } 62 63 type gcsLogSegment struct { 64 num int // starting with 0 65 size int64 66 sha224 string // in lowercase hex 67 created time.Time 68 } 69 70 func (s gcsLogSegment) ObjectName() string { 71 return fmt.Sprintf("%04d.%s.mutlog", s.num, s.sha224) 72 } 73 74 func (s gcsLogSegment) String() string { 75 return fmt.Sprintf("{gcsLogSegment num=%v, size=%v, sha=%v, created=%v}", s.num, s.size, s.sha224, s.created.Format(time.RFC3339)) 76 } 77 78 // newGCSLogBase returns a new gcsLog instance without any association 79 // with Google Cloud Storage. 80 func newGCSLogBase() *GCSLog { 81 gl := &GCSLog{ 82 seg: map[int]gcsLogSegment{}, 83 } 84 gl.cond = sync.NewCond(&gl.mu) 85 return gl 86 } 87 88 // NewGCSLog creates a GCSLog that logs mutations to a given GCS bucket. 89 // If the bucket name contains a "/", the part after the slash will be a 90 // prefix for the segments. 91 func NewGCSLog(ctx context.Context, bucketName string) (*GCSLog, error) { 92 sc, err := storage.NewClient(ctx) 93 if err != nil { 94 return nil, fmt.Errorf("storage.NewClient: %v", err) 95 } 96 gl := newGCSLogBase() 97 gl.sc = sc 98 99 prefix := "" 100 if f := strings.SplitN(bucketName, "/", 2); len(f) > 1 { 101 bucketName, prefix = f[0], f[1] 102 } 103 104 gl.bucketName = bucketName 105 gl.segmentPrefix = prefix 106 gl.bucket = sc.Bucket(gl.bucketName) 107 if err := gl.initLoad(ctx); err != nil { 108 return nil, err 109 } 110 return gl, nil 111 } 112 113 // objNameRx is used to identify a mutation log file by suffix. 114 var objnameRx = regexp.MustCompile(`(\d{4})\.([0-9a-f]{56})\.mutlog$`) 115 116 func (gl *GCSLog) initLoad(ctx context.Context) error { 117 it := gl.bucket.Objects(ctx, nil) 118 maxNum := 0 119 for { 120 objAttrs, err := it.Next() 121 if err == iterator.Done { 122 break 123 } 124 if err != nil { 125 return fmt.Errorf("iterating over %s bucket: %v", gl.bucketName, err) 126 } 127 if !strings.HasPrefix(objAttrs.Name, gl.segmentPrefix) { 128 log.Printf("Ignoring GCS object with invalid prefix %q", objAttrs.Name) 129 continue 130 } 131 m := objnameRx.FindStringSubmatch(objAttrs.Name) 132 if m == nil { 133 log.Printf("Ignoring unrecognized GCS object %q", objAttrs.Name) 134 continue 135 } 136 n, _ := strconv.ParseInt(m[1], 10, 32) 137 seg := gcsLogSegment{ 138 num: int(n), 139 sha224: m[2], 140 size: objAttrs.Size, 141 created: objAttrs.Created, 142 } 143 if seg.num > maxNum { 144 maxNum = seg.num 145 } 146 if prevSeg, ok := gl.seg[int(n)]; !ok || prevSeg.created.Before(seg.created) { 147 gl.seg[int(n)] = seg 148 log.Printf("seg[%v] = %s", n, seg) 149 if ok { 150 gl.deleteOldSegment(ctx, gl.objectPath(prevSeg)) 151 } 152 } 153 } 154 gl.curNum = maxNum 155 156 if len(gl.seg) == 0 { 157 return nil 158 } 159 160 // Check for any missing segments. 161 for i := 0; i < maxNum; i++ { 162 if _, ok := gl.seg[i]; !ok { 163 return fmt.Errorf("saw max segment number %d but missing segment %d", maxNum, i) 164 } 165 } 166 167 // Should we resume writing to the latest entry? 168 // If the latest one is big enough, leave it be. 169 // Otherwise slurp it in and we'll append to it. 170 if gl.seg[maxNum].size >= targetObjectSize-(4<<10) { 171 gl.curNum++ 172 return nil 173 } 174 175 r, err := gl.bucket.Object(gl.objectPath(gl.seg[maxNum])).NewReader(ctx) 176 if err != nil { 177 return err 178 } 179 defer r.Close() 180 if _, err = io.Copy(gcsLogWriter{gl}, r); err != nil { 181 return err 182 } 183 184 return nil 185 } 186 187 func (gl *GCSLog) objectPath(seg gcsLogSegment) string { 188 return path.Join(gl.segmentPrefix, seg.ObjectName()) 189 } 190 191 func (gl *GCSLog) serveLogFile(w http.ResponseWriter, r *http.Request) { 192 if r.Method != "GET" && r.Method != "HEAD" { 193 http.Error(w, "bad method", http.StatusBadRequest) 194 return 195 } 196 197 num, err := strconv.Atoi(strings.TrimPrefix(r.URL.Path, "/logs/")) 198 if err != nil { 199 http.Error(w, "bad path", http.StatusBadRequest) 200 return 201 } 202 203 gl.mu.Lock() 204 if num > gl.curNum { 205 gl.mu.Unlock() 206 http.Error(w, "bad segment number", http.StatusBadRequest) 207 return 208 } 209 if num != gl.curNum { 210 obj := gl.objectPath(gl.seg[num]) 211 gl.mu.Unlock() 212 http.Redirect(w, r, "https://storage.googleapis.com/"+gl.bucketName+"/"+obj, http.StatusFound) 213 return 214 } 215 content := gl.logBuf.String() 216 217 gl.mu.Unlock() 218 w.Header().Set("Content-Type", "application/octet-stream") 219 http.ServeContent(w, r, "", time.Time{}, strings.NewReader(content)) 220 } 221 222 func (gl *GCSLog) serveJSONLogsIndex(w http.ResponseWriter, r *http.Request) { 223 if r.Method != "GET" && r.Method != "HEAD" { 224 http.Error(w, "bad method", http.StatusBadRequest) 225 return 226 } 227 228 startSeg, _ := strconv.Atoi(r.FormValue("startseg")) 229 if startSeg < 0 { 230 http.Error(w, "bad startseg", http.StatusBadRequest) 231 return 232 } 233 234 // Long poll if request contains non-zero waitsizenot parameter. 235 // The client's provided 'waitsizenot' value is the sum of the segment 236 // sizes they already know. They're waiting for something new. 237 if s := r.FormValue("waitsizenot"); s != "" { 238 oldSize, err := strconv.ParseInt(s, 10, 64) 239 if err != nil || oldSize < 0 { 240 http.Error(w, "bad waitsizenot", http.StatusBadRequest) 241 return 242 } 243 // Return a 304 if there's no activity in just under a minute. 244 // This keeps some occasional activity on the TCP connection 245 // so we (and any proxies) know it's alive, and can fit 246 // within reasonable read/write deadlines on either side. 247 ctx, cancel := context.WithTimeout(r.Context(), 55*time.Second) 248 defer cancel() 249 changed := gl.waitSizeNot(ctx, oldSize) 250 if !changed { 251 w.WriteHeader(http.StatusNotModified) 252 return 253 } 254 } 255 256 segs := gl.getJSONLogs(startSeg) 257 258 w.Header().Set("Content-Type", "application/json") 259 w.Header().Set("X-Sum-Segment-Size", fmt.Sprint(sumSegmentSizes(segs))) 260 261 body, _ := json.MarshalIndent(segs, "", "\t") 262 w.Write(body) 263 } 264 265 // sumSegmentSizes returns the sum of each seg.Size in segs. 266 func sumSegmentSizes(segs []maintner.LogSegmentJSON) (sum int64) { 267 for _, seg := range segs { 268 sum += seg.Size 269 } 270 return sum 271 } 272 273 // waitSizeNot blocks until the sum of GCSLog is not v, or the context expires. 274 // It reports whether the size changed. 275 func (gl *GCSLog) waitSizeNot(ctx context.Context, v int64) (changed bool) { 276 returned := make(chan struct{}) 277 defer close(returned) 278 go gl.waitSizeNotAwaitContextOrChange(ctx, returned) 279 gl.mu.Lock() 280 defer gl.mu.Unlock() 281 for { 282 if curSize := gl.sumSizeLocked(); curSize != v { 283 if gl.debug { 284 log.Printf("gcslog: waitSize fired. from %d => %d", v, curSize) 285 } 286 return true 287 } 288 select { 289 case <-ctx.Done(): 290 return false 291 default: 292 gl.cond.Wait() 293 } 294 } 295 } 296 297 // waitSizeNotAwaitContextOrChange is part of waitSizeNot. 298 // It's a goroutine that selects on two channels and calls 299 // sync.Cond.Broadcast to wake up the waitSizeNot waiter if the 300 // context expires. 301 func (gl *GCSLog) waitSizeNotAwaitContextOrChange(ctx context.Context, returned <-chan struct{}) { 302 select { 303 case <-ctx.Done(): 304 gl.cond.Broadcast() 305 case <-returned: 306 // No need to do a wakeup. Caller is already gone. 307 } 308 } 309 310 func (gl *GCSLog) sumSizeLocked() int64 { 311 var sum int64 312 for n, seg := range gl.seg { 313 if n != gl.curNum { 314 sum += seg.size 315 } 316 } 317 sum += int64(gl.logBuf.Len()) 318 return sum 319 } 320 321 func (gl *GCSLog) getJSONLogs(startSeg int) (segs []maintner.LogSegmentJSON) { 322 gl.mu.Lock() 323 defer gl.mu.Unlock() 324 if startSeg > gl.curNum || startSeg < 0 { 325 startSeg = 0 326 } 327 segs = make([]maintner.LogSegmentJSON, 0, gl.curNum-startSeg) 328 for i := startSeg; i < gl.curNum; i++ { 329 seg := gl.seg[i] 330 segs = append(segs, maintner.LogSegmentJSON{ 331 Number: i, 332 Size: seg.size, 333 SHA224: seg.sha224, 334 URL: fmt.Sprintf("https://storage.googleapis.com/%s/%s", gl.bucketName, gl.objectPath(seg)), 335 }) 336 } 337 if gl.logBuf.Len() > 0 { 338 segs = append(segs, maintner.LogSegmentJSON{ 339 Number: gl.curNum, 340 Size: int64(gl.logBuf.Len()), 341 SHA224: fmt.Sprintf("%x", gl.logSHA224.Sum(nil)), 342 URL: fmt.Sprintf("/logs/%d", gl.curNum), 343 }) 344 } 345 return 346 } 347 348 // gcsLogWriter is the io.Writer used to write to GCSLog.logBuf. It 349 // keeps the sha224 in sync. Caller must hold gl.mu. 350 type gcsLogWriter struct{ gl *GCSLog } 351 352 func (w gcsLogWriter) Write(p []byte) (n int, err error) { 353 gl := w.gl 354 if gl.logBuf.Len() == 0 { 355 gl.logSHA224 = sha256.New224() 356 } 357 n, err = gl.logSHA224.Write(p) 358 if n != len(p) || err != nil { 359 panic(fmt.Sprintf("unexpected write (%v, %v) for %v bytes", n, err, len(p))) 360 } 361 n, err = gl.logBuf.Write(p) 362 if n != len(p) || err != nil { 363 panic(fmt.Sprintf("unexpected write (%v, %v) for %v bytes", n, err, len(p))) 364 } 365 return len(p), nil 366 } 367 368 // SetDebug controls whether verbose debugging is enabled on this log. 369 // 370 // It must only be called before it's used. 371 func (gl *GCSLog) SetDebug(v bool) { gl.debug = v } 372 373 // Log writes m to GCS after the buffer is full or after a periodic flush. 374 func (gl *GCSLog) Log(m *maintpb.Mutation) error { 375 data, err := proto.Marshal(m) 376 if err != nil { 377 return err 378 } 379 380 gl.mu.Lock() 381 defer gl.mu.Unlock() 382 383 // If we have some data and this item would push us over, flush. 384 if gl.logBuf.Len()+len(data) > targetObjectSize { 385 log.Printf("Log: record requires buffer flush.") 386 if err := gl.flushLocked(context.TODO()); err != nil { 387 return err 388 } 389 gl.curNum++ 390 gl.logBuf.Reset() 391 log.Printf("cur log file now %d", gl.curNum) 392 } 393 394 if err := reclog.WriteRecord(gcsLogWriter{gl}, int64(gl.logBuf.Len()), data); err != nil { 395 return err 396 } 397 gl.cond.Broadcast() // wake any long-polling subscribers 398 399 // Otherwise schedule a periodic flush. 400 if gl.flushTimer == nil { 401 log.Printf("wrote record; flush timer registered.") 402 gl.flushTimer = time.AfterFunc(flushInterval, gl.onFlushTimer) 403 } else { 404 log.Printf("wrote record; using existing flush timer.") 405 } 406 return nil 407 } 408 409 func (gl *GCSLog) onFlushTimer() { 410 log.Printf("flush timer fired.") 411 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) 412 defer cancel() 413 gl.flush(ctx) 414 } 415 416 func (gl *GCSLog) flush(ctx context.Context) error { 417 gl.mu.Lock() 418 419 defer gl.mu.Unlock() 420 if gl.flushTimer != nil { 421 gl.flushTimer.Stop() 422 gl.flushTimer = nil 423 } 424 425 if err := gl.flushLocked(ctx); err != nil { 426 gl.flushTimer = time.AfterFunc(1*time.Minute, gl.onFlushTimer) 427 log.Printf("Error background flushing: %v", err) 428 return err 429 } 430 431 return nil 432 } 433 434 func (gl *GCSLog) flushLocked(ctx context.Context) error { 435 buf := gl.logBuf.Bytes() 436 if len(buf) == 0 { 437 return nil 438 } 439 seg := gcsLogSegment{ 440 num: gl.curNum, 441 sha224: fmt.Sprintf("%x", sha256.Sum224(buf)), 442 size: int64(len(buf)), 443 } 444 objName := gl.objectPath(seg) 445 log.Printf("flushing %s (%d bytes)", objName, len(buf)) 446 err := try(4, time.Second, func() error { 447 w := gl.bucket.Object(objName).NewWriter(ctx) 448 w.ContentType = "application/octet-stream" 449 if _, err := w.Write(buf); err != nil { 450 return err 451 } 452 if err := w.Close(); err != nil { 453 return err 454 } 455 attrs := w.Attrs() 456 seg.created = attrs.Created 457 return nil 458 }) 459 if err != nil { 460 return err 461 } 462 463 old := gl.seg[seg.num] 464 gl.seg[seg.num] = seg 465 466 // Atomically update the manifest file. If we lose the CAS 467 // race, that means some other instance of this process is 468 // running at the same time (e.g. accidental replica count > 1 469 // in k8s config) and we should fail hard. 470 // TODO: that^ 471 472 // Delete any old segment from the same position. 473 if old.sha224 != "" && old.sha224 != seg.sha224 { 474 gl.deleteOldSegment(ctx, gl.objectPath(old)) 475 } 476 return nil 477 } 478 479 func (gl *GCSLog) deleteOldSegment(ctx context.Context, objName string) { 480 err := gl.bucket.Object(objName).Delete(ctx) 481 if err != nil { 482 // Can ignore, though. Probably emphemeral, and not critical. 483 // It'll be deleted by new versions or next start-up anyway. 484 log.Printf("Warning: error deleting old segment version %v: %v", objName, err) 485 } else { 486 log.Printf("deleted old segment version %v", objName) 487 } 488 } 489 490 func (gl *GCSLog) objectNames() (names []string) { 491 gl.mu.Lock() 492 defer gl.mu.Unlock() 493 for _, seg := range gl.seg { 494 names = append(names, gl.objectPath(seg)) 495 } 496 sort.Strings(names) 497 return 498 } 499 500 func (gl *GCSLog) foreachSegmentReader(ctx context.Context, fn func(r io.Reader) error) error { 501 objs := gl.objectNames() 502 for i, obj := range objs { 503 log.Printf("Reading %d/%d: %s ...", i+1, len(objs), obj) 504 rd, err := gl.bucket.Object(obj).NewReader(ctx) 505 if err != nil { 506 return fmt.Errorf("failed to open %v: %v", obj, err) 507 } 508 err = fn(rd) 509 rd.Close() 510 if err != nil { 511 return fmt.Errorf("error processing %v: %v", obj, err) 512 } 513 } 514 return nil 515 } 516 517 // GetMutations returns a channel of mutations or related events. 518 // The channel will never be closed. 519 // All sends on the returned channel should select on the provided context. 520 func (gl *GCSLog) GetMutations(ctx context.Context) <-chan maintner.MutationStreamEvent { 521 ch := make(chan maintner.MutationStreamEvent, 50) // buffered: overlap gunzip/unmarshal with loading 522 go func() { 523 err := gl.foreachSegmentReader(ctx, func(r io.Reader) error { 524 return reclog.ForeachRecord(r, 0, func(off int64, hdr, rec []byte) error { 525 m := new(maintpb.Mutation) 526 if err := proto.Unmarshal(rec, m); err != nil { 527 return err 528 } 529 select { 530 case ch <- maintner.MutationStreamEvent{Mutation: m}: 531 return nil 532 case <-ctx.Done(): 533 return ctx.Err() 534 } 535 }) 536 }) 537 final := maintner.MutationStreamEvent{Err: err} 538 if err == nil { 539 final.End = true 540 } 541 select { 542 case ch <- final: 543 case <-ctx.Done(): 544 } 545 }() 546 return ch 547 } 548 549 func try(tries int, firstDelay time.Duration, fn func() error) error { 550 var err error 551 delay := firstDelay 552 for i := 0; i < tries; i++ { 553 err = fn() 554 if err == nil { 555 return nil 556 } 557 time.Sleep(delay) 558 delay *= 2 559 } 560 return err 561 } 562 563 // CopyFrom is only used for the one-time migrate from disk-to-GCS code path. 564 func (gl *GCSLog) CopyFrom(src maintner.MutationSource) error { 565 gl.curNum = 0 566 ctx := context.Background() 567 for e := range src.GetMutations(ctx) { 568 if e.Err != nil { 569 log.Printf("Corpus.Initialize: %v", e.Err) 570 return e.Err 571 } 572 if e.End { 573 log.Printf("reached end. flushing.") 574 err := gl.flush(ctx) 575 log.Printf("final flush = %v", err) 576 return nil 577 } 578 if err := gl.Log(e.Mutation); err != nil { 579 return err 580 } 581 } 582 panic("unexpected channel close") 583 } 584 585 // RegisterHandlers adds handlers for the default paths (/logs and /logs/). 586 func (gl *GCSLog) RegisterHandlers(mux *http.ServeMux) { 587 mux.HandleFunc("/logs", gl.serveJSONLogsIndex) 588 mux.HandleFunc("/logs/", gl.serveLogFile) 589 }