go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/task/gitiles/state.go (about)

     1  // Copyright 2016 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gitiles
    16  
    17  import (
    18  	"bytes"
    19  	"compress/gzip"
    20  	"context"
    21  	"encoding/hex"
    22  	"io"
    23  	"sort"
    24  	"strings"
    25  
    26  	"google.golang.org/protobuf/proto"
    27  
    28  	"go.chromium.org/luci/common/api/gitiles"
    29  	"go.chromium.org/luci/common/retry/transient"
    30  	"go.chromium.org/luci/common/tsmon/field"
    31  	"go.chromium.org/luci/common/tsmon/metric"
    32  	"go.chromium.org/luci/common/tsmon/types"
    33  	ds "go.chromium.org/luci/gae/service/datastore"
    34  
    35  	"go.chromium.org/luci/scheduler/appengine/task/gitiles/pb"
    36  )
    37  
    38  var (
    39  	metricTaskGitilesStoredSize = metric.NewInt(
    40  		"luci/scheduler/task/gitiles/stored/size",
    41  		"Size of serialized state in bytes.",
    42  		&types.MetricMetadata{Units: types.Bytes},
    43  		field.String("jobID"),
    44  	)
    45  
    46  	metricTaskGitilesStoredRefs = metric.NewInt(
    47  		"luci/scheduler/task/gitiles/stored/refs",
    48  		"Number of refs stored in a serialized state.",
    49  		nil,
    50  		field.String("jobID"),
    51  	)
    52  )
    53  
    54  // Repository is used to store the repository status.
    55  type Repository struct {
    56  	_kind  string         `gae:"$kind,gitiles.Repository"`
    57  	_extra ds.PropertyMap `gae:"-,extra"`
    58  
    59  	// ID is uniquely derived from jobID and repository URL, see repositoryID().
    60  	ID string `gae:"$id"`
    61  
    62  	// CompressedState stores gzip-compressed proto-serialized list of watched
    63  	// refs with hashes of their tips.
    64  	CompressedState []byte `gae:",noindex"`
    65  }
    66  
    67  func repositoryID(jobID, repo string) (string, error) {
    68  	host, proj, err := gitiles.ParseRepoURL(repo)
    69  	if err != nil {
    70  		return "", err
    71  	}
    72  	return strings.Join([]string{jobID, host, proj}, "\x00 "), nil
    73  }
    74  
    75  // loadStateEntry loads Repository instance from datastore.
    76  func loadStateEntry(c context.Context, jobID, repo string) (*Repository, error) {
    77  	id, err := repositoryID(jobID, repo)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  	entry := &Repository{ID: id}
    82  	if err := ds.Get(c, entry); err == ds.ErrNoSuchEntity {
    83  		return nil, err
    84  	}
    85  	return entry, transient.Tag.Apply(err)
    86  }
    87  
    88  func saveStateEntry(c context.Context, jobID, repo string, compressedBytes []byte) error {
    89  	id, err := repositoryID(jobID, repo)
    90  	if err != nil {
    91  		return err
    92  	}
    93  	entry := Repository{ID: id, CompressedState: compressedBytes}
    94  	return transient.Tag.Apply(ds.Put(c, &entry))
    95  }
    96  
    97  func loadState(c context.Context, jobID, repo string) (map[string]string, error) {
    98  	switch stored, err := loadStateEntry(c, jobID, repo); {
    99  	case err == ds.ErrNoSuchEntity:
   100  		return map[string]string{}, nil
   101  	case err != nil:
   102  		return nil, err
   103  	case len(stored.CompressedState) > 0:
   104  		unGzip, err := gzip.NewReader(bytes.NewBuffer(stored.CompressedState))
   105  		if err != nil {
   106  			return nil, err
   107  		}
   108  		uncompressed, err := io.ReadAll(unGzip)
   109  		if err != nil {
   110  			return nil, err
   111  		}
   112  		if err = unGzip.Close(); err != nil {
   113  			return nil, err
   114  		}
   115  
   116  		var state pb.RepositoryState
   117  		if err = proto.Unmarshal(uncompressed, &state); err != nil {
   118  			return nil, err
   119  		}
   120  
   121  		heads := map[string]string{}
   122  		for _, space := range state.Spaces {
   123  			for _, child := range space.Children {
   124  				heads[space.Prefix+"/"+child.Suffix] = hex.EncodeToString(child.Sha1)
   125  			}
   126  		}
   127  		return heads, nil
   128  
   129  	default:
   130  		return map[string]string{}, nil
   131  	}
   132  }
   133  
   134  func saveState(c context.Context, jobID, repo string, refTips map[string]string) error {
   135  	// There could be many refTips in repos, though most will share some prefix.
   136  	// So we trade CPU to save this efficiently.
   137  
   138  	byNamespace := map[string]*pb.RefSpace{}
   139  	for ref, sha1 := range refTips {
   140  		sha1bytes, err := hex.DecodeString(sha1)
   141  		if err != nil {
   142  			return err
   143  		}
   144  		lastSlash := strings.LastIndex(ref, "/")
   145  		ns, suffix := ref[:lastSlash], ref[lastSlash+1:]
   146  		child := &pb.Child{Sha1: sha1bytes, Suffix: suffix}
   147  		if namespace, exists := byNamespace[ns]; exists {
   148  			namespace.Children = append(namespace.Children, child)
   149  		} else {
   150  			byNamespace[ns] = &pb.RefSpace{
   151  				Prefix:   ns,
   152  				Children: []*pb.Child{child},
   153  			}
   154  		}
   155  	}
   156  
   157  	spaces := make(sortedSpaces, 0, len(byNamespace))
   158  	for _, space := range byNamespace {
   159  		cs := sortedChildren(space.Children)
   160  		sort.Sort(cs)
   161  		spaces = append(spaces, space)
   162  	}
   163  	sort.Sort(spaces)
   164  
   165  	serialized, err := proto.Marshal(&pb.RepositoryState{Spaces: spaces})
   166  	if err != nil {
   167  		return err
   168  	}
   169  	compressed := &bytes.Buffer{}
   170  	w := gzip.NewWriter(compressed)
   171  	if _, err := w.Write(serialized); err != nil {
   172  		return err
   173  	}
   174  	if err = w.Close(); err != nil {
   175  		return err
   176  	}
   177  
   178  	metricTaskGitilesStoredRefs.Set(c, int64(len(refTips)), jobID)
   179  	metricTaskGitilesStoredSize.Set(c, int64(compressed.Len()), jobID)
   180  	return saveStateEntry(c, jobID, repo, compressed.Bytes())
   181  }
   182  
   183  type sortedSpaces []*pb.RefSpace
   184  
   185  func (s sortedSpaces) Len() int           { return len(s) }
   186  func (s sortedSpaces) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   187  func (s sortedSpaces) Less(i, j int) bool { return s[i].Prefix < s[j].Prefix }
   188  
   189  type sortedChildren []*pb.Child
   190  
   191  func (s sortedChildren) Len() int           { return len(s) }
   192  func (s sortedChildren) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   193  func (s sortedChildren) Less(i, j int) bool { return s[i].Suffix < s[j].Suffix }