go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/repo/processing/client_extractor.go (about)

     1  // Copyright 2018 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package processing
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"strings"
    22  
    23  	"go.chromium.org/luci/common/errors"
    24  	"go.chromium.org/luci/common/logging"
    25  	"go.chromium.org/luci/common/retry/transient"
    26  	"go.chromium.org/luci/gae/service/datastore"
    27  
    28  	api "go.chromium.org/luci/cipd/api/cipd/v1"
    29  	"go.chromium.org/luci/cipd/appengine/impl/cas"
    30  	"go.chromium.org/luci/cipd/appengine/impl/model"
    31  	"go.chromium.org/luci/cipd/common"
    32  )
    33  
    34  // ClientExtractorProcID is identifier of ClientExtractor processor.
    35  const ClientExtractorProcID = "cipd_client_binary:v1"
    36  
    37  const clientPkgPrefix = "infra/tools/cipd/"
    38  
    39  // GetClientPackage returns the name of the client package for CIPD client for
    40  // the given platform.
    41  //
    42  // Returns an error if the platform name is invalid.
    43  func GetClientPackage(platform string) (string, error) {
    44  	pkg := clientPkgPrefix + platform
    45  	if err := common.ValidatePackageName(pkg); err != nil {
    46  		return "", err
    47  	}
    48  	return pkg, nil
    49  }
    50  
    51  // IsClientPackage returns true if the given package stores a CIPD client.
    52  func IsClientPackage(pkg string) bool {
    53  	return strings.HasPrefix(pkg, clientPkgPrefix)
    54  }
    55  
    56  // GetClientBinaryName returns name of CIPD binary inside the package.
    57  //
    58  // Either 'cipd' or 'cipd.exe'.
    59  func GetClientBinaryName(pkg string) string {
    60  	if strings.HasPrefix(pkg, clientPkgPrefix+"windows-") {
    61  		return "cipd.exe"
    62  	}
    63  	return "cipd"
    64  }
    65  
    66  // ClientExtractorResult is stored in JSON form as a result of ClientExtractor
    67  // execution.
    68  //
    69  // Compatible with Python version of the backend.
    70  //
    71  // If format of this struct changes in a non backward compatible way, the
    72  // version number in ClientExtractorProcID should change too.
    73  type ClientExtractorResult struct {
    74  	ClientBinary struct {
    75  		Size int64 `json:"size"`
    76  
    77  		// Algo used to name the extracted file, matches the client package algo.
    78  		HashAlgo   string `json:"hash_algo"`   // cas.HashAlgo enum serialized to string
    79  		HashDigest string `json:"hash_digest"` // as hex string
    80  
    81  		// AllHashDigests are hex digests of the extracted file calculated using all
    82  		// algos known to the server at the time the file was uploaded.
    83  		//
    84  		// Keys are cas.HashAlgo enum values as strings ('SHA1', 'SHA256', ...).
    85  		//
    86  		// If empty (for old records), only supported algo is HashAlgo from above
    87  		// (which for old records is always SHA1).
    88  		AllHashDigests map[string]string `json:"all_hash_digests"`
    89  	} `json:"client_binary"`
    90  }
    91  
    92  // ToObjectRef returns a reference to the extracted client binary in CAS.
    93  //
    94  // The returned ObjectRef is validated to be syntactically correct already.
    95  func (r *ClientExtractorResult) ToObjectRef() (*api.ObjectRef, error) {
    96  	algo := api.HashAlgo_value[r.ClientBinary.HashAlgo]
    97  	if algo == 0 {
    98  		// Note: this means OLD version of the server may not be able to serve
    99  		// NEW ClientExtractorResult entries due to unknown hash algo. Many other
   100  		// things will also break in this situation. If this is really happening,
   101  		// all new entries can be manually removed from the datastore, to stop
   102  		// confusing the old server version.
   103  		return nil, fmt.Errorf("unrecognized hash algo %q", r.ClientBinary.HashAlgo)
   104  	}
   105  	ref := &api.ObjectRef{
   106  		HashAlgo:  api.HashAlgo(algo),
   107  		HexDigest: r.ClientBinary.HashDigest,
   108  	}
   109  	if err := common.ValidateObjectRef(ref, common.KnownHash); err != nil {
   110  		return nil, err
   111  	}
   112  	return ref, nil
   113  }
   114  
   115  // ObjectRefAliases is list of ObjectRefs calculated using all hash algos known
   116  // to the server when the client binary was extracted.
   117  //
   118  // Additionally all algos not understood by the server right NOW are skipped
   119  // too. This may arise if the server was rolled back, but some files have
   120  // already been uploaded with a newer algo.
   121  func (r *ClientExtractorResult) ObjectRefAliases() []*api.ObjectRef {
   122  	all := r.ClientBinary.AllHashDigests
   123  
   124  	// Older entries do not have AllHashDigests field at all.
   125  	if len(all) == 0 {
   126  		ref := &api.ObjectRef{
   127  			HashAlgo:  api.HashAlgo(api.HashAlgo_value[r.ClientBinary.HashAlgo]),
   128  			HexDigest: r.ClientBinary.HashDigest,
   129  		}
   130  		if common.ValidateObjectRef(ref, common.KnownHash) == nil {
   131  			return []*api.ObjectRef{ref}
   132  		}
   133  		return nil // welp, have 0 supported algos, should not really happen
   134  	}
   135  
   136  	// Order the result by HashAlgo enum values. This loop also naturally skips
   137  	// algos not understood by the current version of the server, since they are
   138  	// not in HashAlgo_name map.
   139  	refs := make([]*api.ObjectRef, 0, len(all))
   140  	for algo := int32(1); api.HashAlgo_name[algo] != ""; algo++ { // skip UNSPECIFIED
   141  		if digest := all[api.HashAlgo_name[algo]]; digest != "" {
   142  			ref := &api.ObjectRef{HashAlgo: api.HashAlgo(algo), HexDigest: digest}
   143  			if common.ValidateObjectRef(ref, common.KnownHash) == nil {
   144  				refs = append(refs, ref)
   145  			}
   146  		}
   147  	}
   148  	return refs
   149  }
   150  
   151  // ClientExtractor is a processor that extracts CIPD client binary from CIPD
   152  // client packages (infra/tools/cipd/...) and stores it in the CAS, so it can be
   153  // fetched directly.
   154  //
   155  // This is needed to support CIPD client bootstrap using e.g. 'curl'.
   156  type ClientExtractor struct {
   157  	CAS cas.StorageServer
   158  
   159  	// uploader returns an io.Writer to push all extracted data to.
   160  	//
   161  	// Default is gsUploader, but can be mocked in tests.
   162  	uploader func(ctx context.Context, size int64, uploadURL string) io.Writer
   163  
   164  	// bufferSize is size of the buffer for GS uploads (default is 2 Mb).
   165  	bufferSize int
   166  }
   167  
   168  // ID is part of Processor interface.
   169  func (e *ClientExtractor) ID() string {
   170  	return ClientExtractorProcID
   171  }
   172  
   173  // Applicable is part of Processor interface.
   174  func (e *ClientExtractor) Applicable(ctx context.Context, inst *model.Instance) (bool, error) {
   175  	return IsClientPackage(inst.Package.StringID()), nil
   176  }
   177  
   178  // Run is part of Processor interface.
   179  func (e *ClientExtractor) Run(ctx context.Context, inst *model.Instance, pkg *PackageReader) (res Result, err error) {
   180  	// Put fatal errors into 'res' and return transient ones as is.
   181  	defer func() {
   182  		if err != nil && !transient.Tag.In(err) {
   183  			res.Err = err
   184  			err = nil
   185  		}
   186  	}()
   187  
   188  	// We use same hash algo for naming the extracted file as was used to name
   189  	// the package instance it is in. This avoid some confusion during the
   190  	// transition to a new hash.
   191  	if err = common.ValidateInstanceID(inst.InstanceID, common.KnownHash); err != nil {
   192  		err = errors.Annotate(err, "unrecognized client instance ID format").Err()
   193  		return
   194  	}
   195  	instRef := common.InstanceIDToObjectRef(inst.InstanceID)
   196  
   197  	// We also always calculate all other hashes we know about at the same time,
   198  	// for old bootstrap scripts that may not understand the most recent hash
   199  	// algo.
   200  	hashes := make([]api.HashAlgo, 0, len(api.HashAlgo_name))
   201  	for algo := range api.HashAlgo_name {
   202  		if a := api.HashAlgo(algo); a != api.HashAlgo_HASH_ALGO_UNSPECIFIED {
   203  			hashes = append(hashes, a)
   204  		}
   205  	}
   206  
   207  	// Execute the extraction.
   208  	result, err := (&Extractor{
   209  		Reader:            pkg,
   210  		CAS:               e.CAS,
   211  		PrimaryHash:       instRef.HashAlgo,
   212  		AlternativeHashes: hashes,
   213  		Uploader:          e.uploader,
   214  		BufferSize:        e.bufferSize,
   215  	}).Run(ctx, GetClientBinaryName(inst.Package.StringID()))
   216  	if err != nil {
   217  		return
   218  	}
   219  
   220  	// Store the results in the appropriate format.
   221  	hexDigests := make(map[string]string, len(result.Hashes))
   222  	for algo, hash := range result.Hashes {
   223  		hexDigests[algo.String()] = common.HexDigest(hash)
   224  	}
   225  
   226  	r := ClientExtractorResult{}
   227  	r.ClientBinary.Size = result.Size
   228  	r.ClientBinary.HashAlgo = result.Ref.HashAlgo.String()
   229  	r.ClientBinary.HashDigest = result.Ref.HexDigest
   230  	r.ClientBinary.AllHashDigests = hexDigests
   231  
   232  	logging.Infof(ctx, "Uploaded CIPD client binary %s with %s %s (%d bytes)",
   233  		inst.Package.StringID(), result.Ref.HashAlgo, result.Ref.HexDigest, result.Size)
   234  
   235  	res.Result = r
   236  	return
   237  }
   238  
   239  // GetClientExtractorResult returns results of client extractor processor.
   240  //
   241  // They contain a reference to the unpacked CIPD binary object in the Google
   242  // Storage.
   243  //
   244  // Returns:
   245  //
   246  //	(result, nil) on success.
   247  //	(nil, datastore.ErrNoSuchEntity) if results are not available.
   248  //	(nil, transient-tagged error) on retrieval errors.
   249  //	(nil, non-transient-tagged error) if the client extractor failed.
   250  func GetClientExtractorResult(ctx context.Context, inst *api.Instance) (*ClientExtractorResult, error) {
   251  	r := &model.ProcessingResult{
   252  		ProcID:   ClientExtractorProcID,
   253  		Instance: datastore.KeyForObj(ctx, (&model.Instance{}).FromProto(ctx, inst)),
   254  	}
   255  	switch err := datastore.Get(ctx, r); {
   256  	case err == datastore.ErrNoSuchEntity:
   257  		return nil, err
   258  	case err != nil:
   259  		return nil, transient.Tag.Apply(err)
   260  	case !r.Success:
   261  		return nil, errors.Reason("client extraction failed: %s", r.Error).Err()
   262  	}
   263  	out := &ClientExtractorResult{}
   264  	if err := r.ReadResult(out); err != nil {
   265  		return nil, errors.Annotate(err, "failed to parse the client extractor status").Err()
   266  	}
   267  	return out, nil
   268  }