golang.org/x/build@v0.0.0-20240506185731-218518f32b70/maintner/maintnerd/gcslog/gcslog.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package gcslog is an implementation of maintner.MutationSource and Logger for Google Cloud Storage.
     6  package gcslog
     7  
     8  import (
     9  	"bytes"
    10  	"context"
    11  	"crypto/sha256"
    12  	"encoding/json"
    13  	"fmt"
    14  	"hash"
    15  	"io"
    16  	"log"
    17  	"net/http"
    18  	"path"
    19  	"regexp"
    20  	"sort"
    21  	"strconv"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"cloud.google.com/go/storage"
    27  	"github.com/golang/protobuf/proto"
    28  	"golang.org/x/build/maintner"
    29  	"golang.org/x/build/maintner/maintpb"
    30  	"golang.org/x/build/maintner/reclog"
    31  	"google.golang.org/api/iterator"
    32  )
    33  
    34  // targetObjectSize is the goal maximum size for each log segment on
    35  // GCS. In the unlikely case that a single record is larger than this
    36  // then a segment would be bigger than this (records are never split
    37  // between log segments).  But otherwise this is the max size.
    38  const targetObjectSize = 16 << 20
    39  
    40  const flushInterval = 10 * time.Minute
    41  
    42  // GCSLog implements MutationLogger and MutationSource.
    43  var _ maintner.MutationLogger = &GCSLog{}
    44  var _ maintner.MutationSource = &GCSLog{}
    45  
    46  // GCSLog logs mutations to GCS.
    47  type GCSLog struct {
    48  	sc            *storage.Client
    49  	bucketName    string
    50  	bucket        *storage.BucketHandle
    51  	segmentPrefix string
    52  	debug         bool
    53  
    54  	mu         sync.Mutex // guards the following
    55  	cond       *sync.Cond
    56  	seg        map[int]gcsLogSegment
    57  	curNum     int
    58  	logBuf     bytes.Buffer
    59  	logSHA224  hash.Hash
    60  	flushTimer *time.Timer // non-nil if flush timer is active
    61  }
    62  
    63  type gcsLogSegment struct {
    64  	num     int // starting with 0
    65  	size    int64
    66  	sha224  string // in lowercase hex
    67  	created time.Time
    68  }
    69  
    70  func (s gcsLogSegment) ObjectName() string {
    71  	return fmt.Sprintf("%04d.%s.mutlog", s.num, s.sha224)
    72  }
    73  
    74  func (s gcsLogSegment) String() string {
    75  	return fmt.Sprintf("{gcsLogSegment num=%v, size=%v, sha=%v, created=%v}", s.num, s.size, s.sha224, s.created.Format(time.RFC3339))
    76  }
    77  
    78  // newGCSLogBase returns a new gcsLog instance without any association
    79  // with Google Cloud Storage.
    80  func newGCSLogBase() *GCSLog {
    81  	gl := &GCSLog{
    82  		seg: map[int]gcsLogSegment{},
    83  	}
    84  	gl.cond = sync.NewCond(&gl.mu)
    85  	return gl
    86  }
    87  
    88  // NewGCSLog creates a GCSLog that logs mutations to a given GCS bucket.
    89  // If the bucket name contains a "/", the part after the slash will be a
    90  // prefix for the segments.
    91  func NewGCSLog(ctx context.Context, bucketName string) (*GCSLog, error) {
    92  	sc, err := storage.NewClient(ctx)
    93  	if err != nil {
    94  		return nil, fmt.Errorf("storage.NewClient: %v", err)
    95  	}
    96  	gl := newGCSLogBase()
    97  	gl.sc = sc
    98  
    99  	prefix := ""
   100  	if f := strings.SplitN(bucketName, "/", 2); len(f) > 1 {
   101  		bucketName, prefix = f[0], f[1]
   102  	}
   103  
   104  	gl.bucketName = bucketName
   105  	gl.segmentPrefix = prefix
   106  	gl.bucket = sc.Bucket(gl.bucketName)
   107  	if err := gl.initLoad(ctx); err != nil {
   108  		return nil, err
   109  	}
   110  	return gl, nil
   111  }
   112  
   113  // objNameRx is used to identify a mutation log file by suffix.
   114  var objnameRx = regexp.MustCompile(`(\d{4})\.([0-9a-f]{56})\.mutlog$`)
   115  
   116  func (gl *GCSLog) initLoad(ctx context.Context) error {
   117  	it := gl.bucket.Objects(ctx, nil)
   118  	maxNum := 0
   119  	for {
   120  		objAttrs, err := it.Next()
   121  		if err == iterator.Done {
   122  			break
   123  		}
   124  		if err != nil {
   125  			return fmt.Errorf("iterating over %s bucket: %v", gl.bucketName, err)
   126  		}
   127  		if !strings.HasPrefix(objAttrs.Name, gl.segmentPrefix) {
   128  			log.Printf("Ignoring GCS object with invalid prefix %q", objAttrs.Name)
   129  			continue
   130  		}
   131  		m := objnameRx.FindStringSubmatch(objAttrs.Name)
   132  		if m == nil {
   133  			log.Printf("Ignoring unrecognized GCS object %q", objAttrs.Name)
   134  			continue
   135  		}
   136  		n, _ := strconv.ParseInt(m[1], 10, 32)
   137  		seg := gcsLogSegment{
   138  			num:     int(n),
   139  			sha224:  m[2],
   140  			size:    objAttrs.Size,
   141  			created: objAttrs.Created,
   142  		}
   143  		if seg.num > maxNum {
   144  			maxNum = seg.num
   145  		}
   146  		if prevSeg, ok := gl.seg[int(n)]; !ok || prevSeg.created.Before(seg.created) {
   147  			gl.seg[int(n)] = seg
   148  			log.Printf("seg[%v] = %s", n, seg)
   149  			if ok {
   150  				gl.deleteOldSegment(ctx, gl.objectPath(prevSeg))
   151  			}
   152  		}
   153  	}
   154  	gl.curNum = maxNum
   155  
   156  	if len(gl.seg) == 0 {
   157  		return nil
   158  	}
   159  
   160  	// Check for any missing segments.
   161  	for i := 0; i < maxNum; i++ {
   162  		if _, ok := gl.seg[i]; !ok {
   163  			return fmt.Errorf("saw max segment number %d but missing segment %d", maxNum, i)
   164  		}
   165  	}
   166  
   167  	// Should we resume writing to the latest entry?
   168  	// If the latest one is big enough, leave it be.
   169  	// Otherwise slurp it in and we'll append to it.
   170  	if gl.seg[maxNum].size >= targetObjectSize-(4<<10) {
   171  		gl.curNum++
   172  		return nil
   173  	}
   174  
   175  	r, err := gl.bucket.Object(gl.objectPath(gl.seg[maxNum])).NewReader(ctx)
   176  	if err != nil {
   177  		return err
   178  	}
   179  	defer r.Close()
   180  	if _, err = io.Copy(gcsLogWriter{gl}, r); err != nil {
   181  		return err
   182  	}
   183  
   184  	return nil
   185  }
   186  
   187  func (gl *GCSLog) objectPath(seg gcsLogSegment) string {
   188  	return path.Join(gl.segmentPrefix, seg.ObjectName())
   189  }
   190  
   191  func (gl *GCSLog) serveLogFile(w http.ResponseWriter, r *http.Request) {
   192  	if r.Method != "GET" && r.Method != "HEAD" {
   193  		http.Error(w, "bad method", http.StatusBadRequest)
   194  		return
   195  	}
   196  
   197  	num, err := strconv.Atoi(strings.TrimPrefix(r.URL.Path, "/logs/"))
   198  	if err != nil {
   199  		http.Error(w, "bad path", http.StatusBadRequest)
   200  		return
   201  	}
   202  
   203  	gl.mu.Lock()
   204  	if num > gl.curNum {
   205  		gl.mu.Unlock()
   206  		http.Error(w, "bad segment number", http.StatusBadRequest)
   207  		return
   208  	}
   209  	if num != gl.curNum {
   210  		obj := gl.objectPath(gl.seg[num])
   211  		gl.mu.Unlock()
   212  		http.Redirect(w, r, "https://storage.googleapis.com/"+gl.bucketName+"/"+obj, http.StatusFound)
   213  		return
   214  	}
   215  	content := gl.logBuf.String()
   216  
   217  	gl.mu.Unlock()
   218  	w.Header().Set("Content-Type", "application/octet-stream")
   219  	http.ServeContent(w, r, "", time.Time{}, strings.NewReader(content))
   220  }
   221  
   222  func (gl *GCSLog) serveJSONLogsIndex(w http.ResponseWriter, r *http.Request) {
   223  	if r.Method != "GET" && r.Method != "HEAD" {
   224  		http.Error(w, "bad method", http.StatusBadRequest)
   225  		return
   226  	}
   227  
   228  	startSeg, _ := strconv.Atoi(r.FormValue("startseg"))
   229  	if startSeg < 0 {
   230  		http.Error(w, "bad startseg", http.StatusBadRequest)
   231  		return
   232  	}
   233  
   234  	// Long poll if request contains non-zero waitsizenot parameter.
   235  	// The client's provided 'waitsizenot' value is the sum of the segment
   236  	// sizes they already know. They're waiting for something new.
   237  	if s := r.FormValue("waitsizenot"); s != "" {
   238  		oldSize, err := strconv.ParseInt(s, 10, 64)
   239  		if err != nil || oldSize < 0 {
   240  			http.Error(w, "bad waitsizenot", http.StatusBadRequest)
   241  			return
   242  		}
   243  		// Return a 304 if there's no activity in just under a minute.
   244  		// This keeps some occasional activity on the TCP connection
   245  		// so we (and any proxies) know it's alive, and can fit
   246  		// within reasonable read/write deadlines on either side.
   247  		ctx, cancel := context.WithTimeout(r.Context(), 55*time.Second)
   248  		defer cancel()
   249  		changed := gl.waitSizeNot(ctx, oldSize)
   250  		if !changed {
   251  			w.WriteHeader(http.StatusNotModified)
   252  			return
   253  		}
   254  	}
   255  
   256  	segs := gl.getJSONLogs(startSeg)
   257  
   258  	w.Header().Set("Content-Type", "application/json")
   259  	w.Header().Set("X-Sum-Segment-Size", fmt.Sprint(sumSegmentSizes(segs)))
   260  
   261  	body, _ := json.MarshalIndent(segs, "", "\t")
   262  	w.Write(body)
   263  }
   264  
   265  // sumSegmentSizes returns the sum of each seg.Size in segs.
   266  func sumSegmentSizes(segs []maintner.LogSegmentJSON) (sum int64) {
   267  	for _, seg := range segs {
   268  		sum += seg.Size
   269  	}
   270  	return sum
   271  }
   272  
   273  // waitSizeNot blocks until the sum of GCSLog is not v, or the context expires.
   274  // It reports whether the size changed.
   275  func (gl *GCSLog) waitSizeNot(ctx context.Context, v int64) (changed bool) {
   276  	returned := make(chan struct{})
   277  	defer close(returned)
   278  	go gl.waitSizeNotAwaitContextOrChange(ctx, returned)
   279  	gl.mu.Lock()
   280  	defer gl.mu.Unlock()
   281  	for {
   282  		if curSize := gl.sumSizeLocked(); curSize != v {
   283  			if gl.debug {
   284  				log.Printf("gcslog: waitSize fired. from %d => %d", v, curSize)
   285  			}
   286  			return true
   287  		}
   288  		select {
   289  		case <-ctx.Done():
   290  			return false
   291  		default:
   292  			gl.cond.Wait()
   293  		}
   294  	}
   295  }
   296  
   297  // waitSizeNotAwaitContextOrChange is part of waitSizeNot.
   298  // It's a goroutine that selects on two channels and calls
   299  // sync.Cond.Broadcast to wake up the waitSizeNot waiter if the
   300  // context expires.
   301  func (gl *GCSLog) waitSizeNotAwaitContextOrChange(ctx context.Context, returned <-chan struct{}) {
   302  	select {
   303  	case <-ctx.Done():
   304  		gl.cond.Broadcast()
   305  	case <-returned:
   306  		// No need to do a wakeup. Caller is already gone.
   307  	}
   308  }
   309  
   310  func (gl *GCSLog) sumSizeLocked() int64 {
   311  	var sum int64
   312  	for n, seg := range gl.seg {
   313  		if n != gl.curNum {
   314  			sum += seg.size
   315  		}
   316  	}
   317  	sum += int64(gl.logBuf.Len())
   318  	return sum
   319  }
   320  
   321  func (gl *GCSLog) getJSONLogs(startSeg int) (segs []maintner.LogSegmentJSON) {
   322  	gl.mu.Lock()
   323  	defer gl.mu.Unlock()
   324  	if startSeg > gl.curNum || startSeg < 0 {
   325  		startSeg = 0
   326  	}
   327  	segs = make([]maintner.LogSegmentJSON, 0, gl.curNum-startSeg)
   328  	for i := startSeg; i < gl.curNum; i++ {
   329  		seg := gl.seg[i]
   330  		segs = append(segs, maintner.LogSegmentJSON{
   331  			Number: i,
   332  			Size:   seg.size,
   333  			SHA224: seg.sha224,
   334  			URL:    fmt.Sprintf("https://storage.googleapis.com/%s/%s", gl.bucketName, gl.objectPath(seg)),
   335  		})
   336  	}
   337  	if gl.logBuf.Len() > 0 {
   338  		segs = append(segs, maintner.LogSegmentJSON{
   339  			Number: gl.curNum,
   340  			Size:   int64(gl.logBuf.Len()),
   341  			SHA224: fmt.Sprintf("%x", gl.logSHA224.Sum(nil)),
   342  			URL:    fmt.Sprintf("/logs/%d", gl.curNum),
   343  		})
   344  	}
   345  	return
   346  }
   347  
   348  // gcsLogWriter is the io.Writer used to write to GCSLog.logBuf. It
   349  // keeps the sha224 in sync. Caller must hold gl.mu.
   350  type gcsLogWriter struct{ gl *GCSLog }
   351  
   352  func (w gcsLogWriter) Write(p []byte) (n int, err error) {
   353  	gl := w.gl
   354  	if gl.logBuf.Len() == 0 {
   355  		gl.logSHA224 = sha256.New224()
   356  	}
   357  	n, err = gl.logSHA224.Write(p)
   358  	if n != len(p) || err != nil {
   359  		panic(fmt.Sprintf("unexpected write (%v, %v) for %v bytes", n, err, len(p)))
   360  	}
   361  	n, err = gl.logBuf.Write(p)
   362  	if n != len(p) || err != nil {
   363  		panic(fmt.Sprintf("unexpected write (%v, %v) for %v bytes", n, err, len(p)))
   364  	}
   365  	return len(p), nil
   366  }
   367  
   368  // SetDebug controls whether verbose debugging is enabled on this log.
   369  //
   370  // It must only be called before it's used.
   371  func (gl *GCSLog) SetDebug(v bool) { gl.debug = v }
   372  
   373  // Log writes m to GCS after the buffer is full or after a periodic flush.
   374  func (gl *GCSLog) Log(m *maintpb.Mutation) error {
   375  	data, err := proto.Marshal(m)
   376  	if err != nil {
   377  		return err
   378  	}
   379  
   380  	gl.mu.Lock()
   381  	defer gl.mu.Unlock()
   382  
   383  	// If we have some data and this item would push us over, flush.
   384  	if gl.logBuf.Len()+len(data) > targetObjectSize {
   385  		log.Printf("Log: record requires buffer flush.")
   386  		if err := gl.flushLocked(context.TODO()); err != nil {
   387  			return err
   388  		}
   389  		gl.curNum++
   390  		gl.logBuf.Reset()
   391  		log.Printf("cur log file now %d", gl.curNum)
   392  	}
   393  
   394  	if err := reclog.WriteRecord(gcsLogWriter{gl}, int64(gl.logBuf.Len()), data); err != nil {
   395  		return err
   396  	}
   397  	gl.cond.Broadcast() // wake any long-polling subscribers
   398  
   399  	// Otherwise schedule a periodic flush.
   400  	if gl.flushTimer == nil {
   401  		log.Printf("wrote record; flush timer registered.")
   402  		gl.flushTimer = time.AfterFunc(flushInterval, gl.onFlushTimer)
   403  	} else {
   404  		log.Printf("wrote record; using existing flush timer.")
   405  	}
   406  	return nil
   407  }
   408  
   409  func (gl *GCSLog) onFlushTimer() {
   410  	log.Printf("flush timer fired.")
   411  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
   412  	defer cancel()
   413  	gl.flush(ctx)
   414  }
   415  
   416  func (gl *GCSLog) flush(ctx context.Context) error {
   417  	gl.mu.Lock()
   418  
   419  	defer gl.mu.Unlock()
   420  	if gl.flushTimer != nil {
   421  		gl.flushTimer.Stop()
   422  		gl.flushTimer = nil
   423  	}
   424  
   425  	if err := gl.flushLocked(ctx); err != nil {
   426  		gl.flushTimer = time.AfterFunc(1*time.Minute, gl.onFlushTimer)
   427  		log.Printf("Error background flushing: %v", err)
   428  		return err
   429  	}
   430  
   431  	return nil
   432  }
   433  
   434  func (gl *GCSLog) flushLocked(ctx context.Context) error {
   435  	buf := gl.logBuf.Bytes()
   436  	if len(buf) == 0 {
   437  		return nil
   438  	}
   439  	seg := gcsLogSegment{
   440  		num:    gl.curNum,
   441  		sha224: fmt.Sprintf("%x", sha256.Sum224(buf)),
   442  		size:   int64(len(buf)),
   443  	}
   444  	objName := gl.objectPath(seg)
   445  	log.Printf("flushing %s (%d bytes)", objName, len(buf))
   446  	err := try(4, time.Second, func() error {
   447  		w := gl.bucket.Object(objName).NewWriter(ctx)
   448  		w.ContentType = "application/octet-stream"
   449  		if _, err := w.Write(buf); err != nil {
   450  			return err
   451  		}
   452  		if err := w.Close(); err != nil {
   453  			return err
   454  		}
   455  		attrs := w.Attrs()
   456  		seg.created = attrs.Created
   457  		return nil
   458  	})
   459  	if err != nil {
   460  		return err
   461  	}
   462  
   463  	old := gl.seg[seg.num]
   464  	gl.seg[seg.num] = seg
   465  
   466  	// Atomically update the manifest file. If we lose the CAS
   467  	// race, that means some other instance of this process is
   468  	// running at the same time (e.g. accidental replica count > 1
   469  	// in k8s config) and we should fail hard.
   470  	// TODO: that^
   471  
   472  	// Delete any old segment from the same position.
   473  	if old.sha224 != "" && old.sha224 != seg.sha224 {
   474  		gl.deleteOldSegment(ctx, gl.objectPath(old))
   475  	}
   476  	return nil
   477  }
   478  
   479  func (gl *GCSLog) deleteOldSegment(ctx context.Context, objName string) {
   480  	err := gl.bucket.Object(objName).Delete(ctx)
   481  	if err != nil {
   482  		// Can ignore, though. Probably emphemeral, and not critical.
   483  		// It'll be deleted by new versions or next start-up anyway.
   484  		log.Printf("Warning: error deleting old segment version %v: %v", objName, err)
   485  	} else {
   486  		log.Printf("deleted old segment version %v", objName)
   487  	}
   488  }
   489  
   490  func (gl *GCSLog) objectNames() (names []string) {
   491  	gl.mu.Lock()
   492  	defer gl.mu.Unlock()
   493  	for _, seg := range gl.seg {
   494  		names = append(names, gl.objectPath(seg))
   495  	}
   496  	sort.Strings(names)
   497  	return
   498  }
   499  
   500  func (gl *GCSLog) foreachSegmentReader(ctx context.Context, fn func(r io.Reader) error) error {
   501  	objs := gl.objectNames()
   502  	for i, obj := range objs {
   503  		log.Printf("Reading %d/%d: %s ...", i+1, len(objs), obj)
   504  		rd, err := gl.bucket.Object(obj).NewReader(ctx)
   505  		if err != nil {
   506  			return fmt.Errorf("failed to open %v: %v", obj, err)
   507  		}
   508  		err = fn(rd)
   509  		rd.Close()
   510  		if err != nil {
   511  			return fmt.Errorf("error processing %v: %v", obj, err)
   512  		}
   513  	}
   514  	return nil
   515  }
   516  
   517  // GetMutations returns a channel of mutations or related events.
   518  // The channel will never be closed.
   519  // All sends on the returned channel should select on the provided context.
   520  func (gl *GCSLog) GetMutations(ctx context.Context) <-chan maintner.MutationStreamEvent {
   521  	ch := make(chan maintner.MutationStreamEvent, 50) // buffered: overlap gunzip/unmarshal with loading
   522  	go func() {
   523  		err := gl.foreachSegmentReader(ctx, func(r io.Reader) error {
   524  			return reclog.ForeachRecord(r, 0, func(off int64, hdr, rec []byte) error {
   525  				m := new(maintpb.Mutation)
   526  				if err := proto.Unmarshal(rec, m); err != nil {
   527  					return err
   528  				}
   529  				select {
   530  				case ch <- maintner.MutationStreamEvent{Mutation: m}:
   531  					return nil
   532  				case <-ctx.Done():
   533  					return ctx.Err()
   534  				}
   535  			})
   536  		})
   537  		final := maintner.MutationStreamEvent{Err: err}
   538  		if err == nil {
   539  			final.End = true
   540  		}
   541  		select {
   542  		case ch <- final:
   543  		case <-ctx.Done():
   544  		}
   545  	}()
   546  	return ch
   547  }
   548  
   549  func try(tries int, firstDelay time.Duration, fn func() error) error {
   550  	var err error
   551  	delay := firstDelay
   552  	for i := 0; i < tries; i++ {
   553  		err = fn()
   554  		if err == nil {
   555  			return nil
   556  		}
   557  		time.Sleep(delay)
   558  		delay *= 2
   559  	}
   560  	return err
   561  }
   562  
   563  // CopyFrom is only used for the one-time migrate from disk-to-GCS code path.
   564  func (gl *GCSLog) CopyFrom(src maintner.MutationSource) error {
   565  	gl.curNum = 0
   566  	ctx := context.Background()
   567  	for e := range src.GetMutations(ctx) {
   568  		if e.Err != nil {
   569  			log.Printf("Corpus.Initialize: %v", e.Err)
   570  			return e.Err
   571  		}
   572  		if e.End {
   573  			log.Printf("reached end. flushing.")
   574  			err := gl.flush(ctx)
   575  			log.Printf("final flush = %v", err)
   576  			return nil
   577  		}
   578  		if err := gl.Log(e.Mutation); err != nil {
   579  			return err
   580  		}
   581  	}
   582  	panic("unexpected channel close")
   583  }
   584  
   585  // RegisterHandlers adds handlers for the default paths (/logs and /logs/).
   586  func (gl *GCSLog) RegisterHandlers(mux *http.ServeMux) {
   587  	mux.HandleFunc("/logs", gl.serveJSONLogsIndex)
   588  	mux.HandleFunc("/logs/", gl.serveLogFile)
   589  }