go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/repo/processing/client_extractor.go (about) 1 // Copyright 2018 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package processing 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 "strings" 22 23 "go.chromium.org/luci/common/errors" 24 "go.chromium.org/luci/common/logging" 25 "go.chromium.org/luci/common/retry/transient" 26 "go.chromium.org/luci/gae/service/datastore" 27 28 api "go.chromium.org/luci/cipd/api/cipd/v1" 29 "go.chromium.org/luci/cipd/appengine/impl/cas" 30 "go.chromium.org/luci/cipd/appengine/impl/model" 31 "go.chromium.org/luci/cipd/common" 32 ) 33 34 // ClientExtractorProcID is identifier of ClientExtractor processor. 35 const ClientExtractorProcID = "cipd_client_binary:v1" 36 37 const clientPkgPrefix = "infra/tools/cipd/" 38 39 // GetClientPackage returns the name of the client package for CIPD client for 40 // the given platform. 41 // 42 // Returns an error if the platform name is invalid. 43 func GetClientPackage(platform string) (string, error) { 44 pkg := clientPkgPrefix + platform 45 if err := common.ValidatePackageName(pkg); err != nil { 46 return "", err 47 } 48 return pkg, nil 49 } 50 51 // IsClientPackage returns true if the given package stores a CIPD client. 52 func IsClientPackage(pkg string) bool { 53 return strings.HasPrefix(pkg, clientPkgPrefix) 54 } 55 56 // GetClientBinaryName returns name of CIPD binary inside the package. 57 // 58 // Either 'cipd' or 'cipd.exe'. 59 func GetClientBinaryName(pkg string) string { 60 if strings.HasPrefix(pkg, clientPkgPrefix+"windows-") { 61 return "cipd.exe" 62 } 63 return "cipd" 64 } 65 66 // ClientExtractorResult is stored in JSON form as a result of ClientExtractor 67 // execution. 68 // 69 // Compatible with Python version of the backend. 70 // 71 // If format of this struct changes in a non backward compatible way, the 72 // version number in ClientExtractorProcID should change too. 73 type ClientExtractorResult struct { 74 ClientBinary struct { 75 Size int64 `json:"size"` 76 77 // Algo used to name the extracted file, matches the client package algo. 78 HashAlgo string `json:"hash_algo"` // cas.HashAlgo enum serialized to string 79 HashDigest string `json:"hash_digest"` // as hex string 80 81 // AllHashDigests are hex digests of the extracted file calculated using all 82 // algos known to the server at the time the file was uploaded. 83 // 84 // Keys are cas.HashAlgo enum values as strings ('SHA1', 'SHA256', ...). 85 // 86 // If empty (for old records), only supported algo is HashAlgo from above 87 // (which for old records is always SHA1). 88 AllHashDigests map[string]string `json:"all_hash_digests"` 89 } `json:"client_binary"` 90 } 91 92 // ToObjectRef returns a reference to the extracted client binary in CAS. 93 // 94 // The returned ObjectRef is validated to be syntactically correct already. 95 func (r *ClientExtractorResult) ToObjectRef() (*api.ObjectRef, error) { 96 algo := api.HashAlgo_value[r.ClientBinary.HashAlgo] 97 if algo == 0 { 98 // Note: this means OLD version of the server may not be able to serve 99 // NEW ClientExtractorResult entries due to unknown hash algo. Many other 100 // things will also break in this situation. If this is really happening, 101 // all new entries can be manually removed from the datastore, to stop 102 // confusing the old server version. 103 return nil, fmt.Errorf("unrecognized hash algo %q", r.ClientBinary.HashAlgo) 104 } 105 ref := &api.ObjectRef{ 106 HashAlgo: api.HashAlgo(algo), 107 HexDigest: r.ClientBinary.HashDigest, 108 } 109 if err := common.ValidateObjectRef(ref, common.KnownHash); err != nil { 110 return nil, err 111 } 112 return ref, nil 113 } 114 115 // ObjectRefAliases is list of ObjectRefs calculated using all hash algos known 116 // to the server when the client binary was extracted. 117 // 118 // Additionally all algos not understood by the server right NOW are skipped 119 // too. This may arise if the server was rolled back, but some files have 120 // already been uploaded with a newer algo. 121 func (r *ClientExtractorResult) ObjectRefAliases() []*api.ObjectRef { 122 all := r.ClientBinary.AllHashDigests 123 124 // Older entries do not have AllHashDigests field at all. 125 if len(all) == 0 { 126 ref := &api.ObjectRef{ 127 HashAlgo: api.HashAlgo(api.HashAlgo_value[r.ClientBinary.HashAlgo]), 128 HexDigest: r.ClientBinary.HashDigest, 129 } 130 if common.ValidateObjectRef(ref, common.KnownHash) == nil { 131 return []*api.ObjectRef{ref} 132 } 133 return nil // welp, have 0 supported algos, should not really happen 134 } 135 136 // Order the result by HashAlgo enum values. This loop also naturally skips 137 // algos not understood by the current version of the server, since they are 138 // not in HashAlgo_name map. 139 refs := make([]*api.ObjectRef, 0, len(all)) 140 for algo := int32(1); api.HashAlgo_name[algo] != ""; algo++ { // skip UNSPECIFIED 141 if digest := all[api.HashAlgo_name[algo]]; digest != "" { 142 ref := &api.ObjectRef{HashAlgo: api.HashAlgo(algo), HexDigest: digest} 143 if common.ValidateObjectRef(ref, common.KnownHash) == nil { 144 refs = append(refs, ref) 145 } 146 } 147 } 148 return refs 149 } 150 151 // ClientExtractor is a processor that extracts CIPD client binary from CIPD 152 // client packages (infra/tools/cipd/...) and stores it in the CAS, so it can be 153 // fetched directly. 154 // 155 // This is needed to support CIPD client bootstrap using e.g. 'curl'. 156 type ClientExtractor struct { 157 CAS cas.StorageServer 158 159 // uploader returns an io.Writer to push all extracted data to. 160 // 161 // Default is gsUploader, but can be mocked in tests. 162 uploader func(ctx context.Context, size int64, uploadURL string) io.Writer 163 164 // bufferSize is size of the buffer for GS uploads (default is 2 Mb). 165 bufferSize int 166 } 167 168 // ID is part of Processor interface. 169 func (e *ClientExtractor) ID() string { 170 return ClientExtractorProcID 171 } 172 173 // Applicable is part of Processor interface. 174 func (e *ClientExtractor) Applicable(ctx context.Context, inst *model.Instance) (bool, error) { 175 return IsClientPackage(inst.Package.StringID()), nil 176 } 177 178 // Run is part of Processor interface. 179 func (e *ClientExtractor) Run(ctx context.Context, inst *model.Instance, pkg *PackageReader) (res Result, err error) { 180 // Put fatal errors into 'res' and return transient ones as is. 181 defer func() { 182 if err != nil && !transient.Tag.In(err) { 183 res.Err = err 184 err = nil 185 } 186 }() 187 188 // We use same hash algo for naming the extracted file as was used to name 189 // the package instance it is in. This avoid some confusion during the 190 // transition to a new hash. 191 if err = common.ValidateInstanceID(inst.InstanceID, common.KnownHash); err != nil { 192 err = errors.Annotate(err, "unrecognized client instance ID format").Err() 193 return 194 } 195 instRef := common.InstanceIDToObjectRef(inst.InstanceID) 196 197 // We also always calculate all other hashes we know about at the same time, 198 // for old bootstrap scripts that may not understand the most recent hash 199 // algo. 200 hashes := make([]api.HashAlgo, 0, len(api.HashAlgo_name)) 201 for algo := range api.HashAlgo_name { 202 if a := api.HashAlgo(algo); a != api.HashAlgo_HASH_ALGO_UNSPECIFIED { 203 hashes = append(hashes, a) 204 } 205 } 206 207 // Execute the extraction. 208 result, err := (&Extractor{ 209 Reader: pkg, 210 CAS: e.CAS, 211 PrimaryHash: instRef.HashAlgo, 212 AlternativeHashes: hashes, 213 Uploader: e.uploader, 214 BufferSize: e.bufferSize, 215 }).Run(ctx, GetClientBinaryName(inst.Package.StringID())) 216 if err != nil { 217 return 218 } 219 220 // Store the results in the appropriate format. 221 hexDigests := make(map[string]string, len(result.Hashes)) 222 for algo, hash := range result.Hashes { 223 hexDigests[algo.String()] = common.HexDigest(hash) 224 } 225 226 r := ClientExtractorResult{} 227 r.ClientBinary.Size = result.Size 228 r.ClientBinary.HashAlgo = result.Ref.HashAlgo.String() 229 r.ClientBinary.HashDigest = result.Ref.HexDigest 230 r.ClientBinary.AllHashDigests = hexDigests 231 232 logging.Infof(ctx, "Uploaded CIPD client binary %s with %s %s (%d bytes)", 233 inst.Package.StringID(), result.Ref.HashAlgo, result.Ref.HexDigest, result.Size) 234 235 res.Result = r 236 return 237 } 238 239 // GetClientExtractorResult returns results of client extractor processor. 240 // 241 // They contain a reference to the unpacked CIPD binary object in the Google 242 // Storage. 243 // 244 // Returns: 245 // 246 // (result, nil) on success. 247 // (nil, datastore.ErrNoSuchEntity) if results are not available. 248 // (nil, transient-tagged error) on retrieval errors. 249 // (nil, non-transient-tagged error) if the client extractor failed. 250 func GetClientExtractorResult(ctx context.Context, inst *api.Instance) (*ClientExtractorResult, error) { 251 r := &model.ProcessingResult{ 252 ProcID: ClientExtractorProcID, 253 Instance: datastore.KeyForObj(ctx, (&model.Instance{}).FromProto(ctx, inst)), 254 } 255 switch err := datastore.Get(ctx, r); { 256 case err == datastore.ErrNoSuchEntity: 257 return nil, err 258 case err != nil: 259 return nil, transient.Tag.Apply(err) 260 case !r.Success: 261 return nil, errors.Reason("client extraction failed: %s", r.Error).Err() 262 } 263 out := &ClientExtractorResult{} 264 if err := r.ReadResult(out); err != nil { 265 return nil, errors.Annotate(err, "failed to parse the client extractor status").Err() 266 } 267 return out, nil 268 }