go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/lucicfg/output.go (about) 1 // Copyright 2019 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package lucicfg 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "os" 22 "path" 23 "path/filepath" 24 "runtime" 25 "sort" 26 "strings" 27 "sync" 28 29 "go.starlark.net/starlark" 30 31 "google.golang.org/protobuf/encoding/prototext" 32 "google.golang.org/protobuf/proto" 33 "google.golang.org/protobuf/types/dynamicpb" 34 35 "go.chromium.org/luci/common/errors" 36 "go.chromium.org/luci/common/logging" 37 "go.chromium.org/luci/common/proto/textpb" 38 "go.chromium.org/luci/common/sync/parallel" 39 "go.chromium.org/luci/starlark/starlarkproto" 40 ) 41 42 // Output is an in-memory representation of all generated output files. 43 // 44 // Output may span zero or more config sets, each defined by its root directory. 45 // Config sets may intersect (though this is rare). 46 type Output struct { 47 // Data is all output files. 48 // 49 // Keys are slash-separated filenames, values are corresponding file bodies. 50 Data map[string]Datum 51 52 // Roots is mapping "config set name => its root". 53 // 54 // Roots are given as slash-separated paths relative to the output root, e.g. 55 // '.' matches ALL output files. 56 Roots map[string]string 57 } 58 59 // CompareResult is returned by Datum.Compare. 60 type CompareResult int 61 62 const ( 63 UnknownResult CompareResult = iota // used as a placeholder on errors 64 Identical // datums are byte-to-byte identical 65 SemanticallyEqual // datums are byte-to-byte different, but semantically equal 66 Different // datums are semantically different 67 ) 68 69 // Datum represents one generated output file. 70 type Datum interface { 71 // Bytes is a raw file body to put on disk. 72 Bytes() ([]byte, error) 73 // Compare semantically compares this datum to 'other'. 74 Compare(other []byte) (CompareResult, error) 75 } 76 77 // BlobDatum is a Datum which is just a raw byte blob. 78 type BlobDatum []byte 79 80 // Bytes is a raw file body to put on disk. 81 func (b BlobDatum) Bytes() ([]byte, error) { return b, nil } 82 83 // Compare is Identical if 'other == b' else it is Different. 84 func (b BlobDatum) Compare(other []byte) (CompareResult, error) { 85 if bytes.Equal(b, other) { 86 return Identical, nil 87 } 88 return Different, nil 89 } 90 91 // MessageDatum is a Datum constructed from a proto message. 92 type MessageDatum struct { 93 Header string 94 Message *starlarkproto.Message 95 96 // Cache proto.Message and serialized representations, since we need them 97 // in multiple places: when constructing ConfigSet for sending to the 98 // validation, when comparing with configs on disk and when writing them 99 // to disk. 100 once sync.Once 101 pmsg proto.Message 102 blob []byte 103 err error 104 } 105 106 // ensureConverted populates `pmsg` and `blob`. 107 func (m *MessageDatum) ensureConverted() error { 108 m.once.Do(func() { 109 // Grab it as proto.Message for comparisons in Compare. 110 m.pmsg = m.Message.ToProto() 111 112 // And convert to a text for strict comparisons and the final output. 113 opts := prototext.MarshalOptions{ 114 AllowPartial: true, 115 Indent: " ", 116 Resolver: m.Message.MessageType().Loader().Types(), // used for google.protobuf.Any fields 117 } 118 blob, err := opts.Marshal(m.pmsg) 119 if err == nil { 120 blob, err = textpb.Format(blob, m.Message.MessageType().Descriptor()) 121 } 122 123 if err != nil { 124 m.err = err 125 } else { 126 m.blob = make([]byte, 0, len(m.Header)+len(blob)) 127 m.blob = append(m.blob, m.Header...) 128 m.blob = append(m.blob, blob...) 129 } 130 }) 131 return m.err 132 } 133 134 // Bytes is a raw file body to put on disk. 135 func (m *MessageDatum) Bytes() ([]byte, error) { 136 if err := m.ensureConverted(); err != nil { 137 return nil, err 138 } 139 return m.blob, nil 140 } 141 142 // Compare deserializes `other` and compares it to `m.Message`. 143 // 144 // If `other` can't be deserialized as a proto message at all returns Different. 145 // Returns an error if `m` can't be serialized. 146 func (m *MessageDatum) Compare(other []byte) (CompareResult, error) { 147 // This populates m.blob and m.pmsg. 148 if err := m.ensureConverted(); err != nil { 149 return UnknownResult, err 150 } 151 152 if bytes.Equal(m.blob, other) { 153 return Identical, nil 154 } 155 156 // Try to load `other` as a proto message of the same type. 157 otherpb := dynamicpb.NewMessage(m.Message.MessageType().Descriptor()) 158 opts := prototext.UnmarshalOptions{ 159 AllowPartial: true, 160 Resolver: m.Message.MessageType().Loader().Types(), // used for google.protobuf.Any fields 161 } 162 if err := opts.Unmarshal(other, otherpb); err != nil { 163 return Different, nil // e.g. the schema has changed or the file is totally bogus 164 } 165 166 // Compare them semantically as protos. 167 if semanticallyEqual(m.pmsg, otherpb) { 168 return SemanticallyEqual, nil 169 } 170 return Different, nil 171 } 172 173 // ConfigSets partitions this output into 0 or more config sets based on Roots. 174 // 175 // Returns an error if some output Datum can't be serialized. 176 func (o Output) ConfigSets() ([]ConfigSet, error) { 177 names := make([]string, 0, len(o.Roots)) 178 for name := range o.Roots { 179 names = append(names, name) 180 } 181 sort.Strings(names) // order is important for logs 182 183 cs := make([]ConfigSet, len(names)) 184 for i, nm := range names { 185 root := o.Roots[nm] 186 187 // Normalize in preparation for prefix matching. 188 root = path.Clean(root) 189 if root == "." { 190 root = "" // match EVERYTHING 191 } else { 192 root = root + "/" // match only what's under 'root/...' 193 } 194 195 files := map[string][]byte{} 196 for f, body := range o.Data { 197 f = path.Clean(f) 198 if strings.HasPrefix(f, root) { 199 var err error 200 if files[f[len(root):]], err = body.Bytes(); err != nil { 201 return nil, errors.Annotate(err, "serializing %s", f).Err() 202 } 203 } 204 } 205 206 cs[i] = ConfigSet{Name: nm, Data: files} 207 } 208 209 return cs, nil 210 } 211 212 // Compare compares files on disk to what's in the output. 213 // 214 // If 'semantic' is true, for output files based on proto messages uses semantic 215 // comparison, i.e. loads the file on disk as a proto message and compares 216 // it to the output message. If 'semantic' is false, just always compares files 217 // as byte blobs. 218 // 219 // For each file in the output set, the resulting map has a CompareResult 220 // describing how it compares to the file on disk. They can either be identical 221 // as byte blobs (Identical), different as byte blobs, but semantically 222 // the same (SemanticallyEqual), or totally different (Different). 223 // 224 // Note that when 'semantic' is false, only Identical and Different can appear 225 // in the result, since we compare files as byte blobs only, so there's no 226 // notion of being "semantically the same". 227 // 228 // Files on disk that are not in the output set are totally ignored. Files in 229 // the output set that are missing on disk as Different. 230 // 231 // Returns an error if some file on disk can't be read or some output file can't 232 // be serialized. 233 func (o Output) Compare(dir string, semantic bool) (map[string]CompareResult, error) { 234 compare := func(d Datum, b []byte) (CompareResult, error) { 235 if semantic { 236 return d.Compare(b) 237 } 238 switch a, err := d.Bytes(); { 239 case err != nil: 240 return UnknownResult, err 241 case bytes.Equal(a, b): 242 return Identical, nil 243 default: 244 return Different, nil 245 } 246 } 247 248 out := make(map[string]CompareResult, len(o.Data)) 249 m := sync.Mutex{} 250 251 err := parallel.WorkPool(runtime.NumCPU()+4, func(tasks chan<- func() error) { 252 for name, datum := range o.Data { 253 name := name 254 datum := datum 255 256 tasks <- func() error { 257 path := filepath.Join(dir, filepath.FromSlash(name)) 258 259 var res CompareResult 260 switch existing, err := os.ReadFile(path); { 261 case os.IsNotExist(err): 262 res = Different // new output file 263 case err != nil: 264 return errors.Annotate(err, "when checking diff of %q", name).Err() 265 default: 266 if res, err = compare(datum, existing); err != nil { 267 return errors.Annotate(err, "when checking diff of %q", name).Err() 268 } 269 } 270 271 m.Lock() 272 out[name] = res 273 m.Unlock() 274 275 return nil 276 } 277 } 278 }) 279 280 if err != nil { 281 return nil, err 282 } 283 return out, nil 284 } 285 286 // Write updates files on disk to match the output. 287 // 288 // Returns a list of written files and a list of files that were left untouched. 289 // 290 // If 'force' is false, compares files on disk to the generated files using 291 // the semantic comparison. If they are all up-to-date (semantically) does 292 // nothing. If at least one file is stale, rewrites *all* not already identical 293 // files. That way all output files always have consistent formatting, but 294 // `lucicfg generate` still doesn't produce noop formatting changes by default 295 // (it piggy backs formatting changes onto real changes). 296 // 297 // If 'force' is true, compares files as byte blobs and rewrites all files 298 // that changed as blobs. No semantic comparison is done. 299 // 300 // Creates missing directories. Not atomic. All files have mode 0666. 301 func (o Output) Write(dir string, force bool) (written, untouched []string, err error) { 302 // Find which files we definitely need to rewrite and which can be skipped. 303 cmp, err := o.Compare(dir, !force) 304 if err != nil { 305 return 306 } 307 308 // If nothing has *semantically* changed, don't touch any outputs at all. 309 // Note that when 'force' is true, we compare files as byte blobs, so even if 310 // files are equal semantically, but different as byte blobs, they'll end up 311 // as Different and we'll proceed to overwrite them. 312 different := false 313 for _, res := range cmp { 314 if res == Different { 315 different = true 316 break 317 } 318 } 319 if !different { 320 untouched = make([]string, 0, len(cmp)) 321 for name := range cmp { 322 untouched = append(untouched, name) 323 } 324 sort.Strings(untouched) 325 return 326 } 327 328 // We are going to overwrite all files that are not already byte-to-byte 329 // identical to existing files on disk (even if they are semantically the 330 // same) and left byte-to-byte identical files untouched. 331 for name, res := range cmp { 332 switch res { 333 case Identical: 334 untouched = append(untouched, name) 335 case SemanticallyEqual, Different: 336 written = append(written, name) 337 default: 338 panic("impossible") 339 } 340 } 341 sort.Strings(untouched) 342 sort.Strings(written) 343 344 for _, name := range written { 345 path := filepath.Join(dir, filepath.FromSlash(name)) 346 if err = os.MkdirAll(filepath.Dir(path), 0777); err != nil { 347 return 348 } 349 var blob []byte 350 if blob, err = o.Data[name].Bytes(); err != nil { 351 return 352 } 353 if err = os.WriteFile(path, blob, 0666); err != nil { 354 return 355 } 356 } 357 358 return 359 } 360 361 // Read replaces values in o.Data by reading them from disk as blobs. 362 // 363 // Returns an error if some file can't be read. 364 func (o Output) Read(dir string) error { 365 for name := range o.Data { 366 path := filepath.Join(dir, filepath.FromSlash(name)) 367 blob, err := os.ReadFile(path) 368 if err != nil { 369 return errors.Annotate(err, "reading %q", name).Err() 370 } 371 o.Data[name] = BlobDatum(blob) 372 } 373 return nil 374 } 375 376 // Files returns a sorted list of file names in the output. 377 func (o Output) Files() []string { 378 f := make([]string, 0, len(o.Data)) 379 for k := range o.Data { 380 f = append(f, k) 381 } 382 sort.Strings(f) 383 return f 384 } 385 386 // DebugDump writes the output to stdout in a format useful for debugging. 387 func (o Output) DebugDump() { 388 for _, f := range o.Files() { 389 fmt.Println("--------------------------------------------------") 390 fmt.Println(f) 391 fmt.Println("--------------------------------------------------") 392 if blob, err := o.Data[f].Bytes(); err == nil { 393 fmt.Print(string(blob)) 394 } else { 395 fmt.Printf("ERROR: %s\n", err) 396 } 397 fmt.Println("--------------------------------------------------") 398 } 399 } 400 401 // DiscardChangesToUntracked replaces bodies of the files that are in the output 402 // set, but not in the `tracked` set (per TrackedSet semantics) with what's on 403 // disk in the given `dir`. 404 // 405 // This allows to construct partially generated output: some configs (the ones 406 // in the tracked set) are generated, others are loaded from disk. 407 // 408 // If `dir` is "-" (which indicates that the output is going to be dumped to 409 // stdout rather then to disk), just removes untracked files from the output. 410 func (o Output) DiscardChangesToUntracked(ctx context.Context, tracked []string, dir string) error { 411 isTracked := TrackedSet(tracked) 412 413 for _, path := range o.Files() { 414 yes, err := isTracked(path) 415 if err != nil { 416 return err 417 } 418 if yes { 419 continue 420 } 421 422 logging.Warningf(ctx, "Discarding changes to %s, not in the tracked set", path) 423 424 if dir == "-" { 425 // When using stdout as destination, there's nowhere to read existing 426 // files from. 427 delete(o.Data, path) 428 continue 429 } 430 431 switch body, err := os.ReadFile(filepath.Join(dir, filepath.FromSlash(path))); { 432 case err == nil: 433 o.Data[path] = BlobDatum(body) 434 case os.IsNotExist(err): 435 delete(o.Data, path) 436 case err != nil: 437 return errors.Annotate(err, "when discarding changes to %s", path).Err() 438 } 439 } 440 441 return nil 442 } 443 444 //////////////////////////////////////////////////////////////////////////////// 445 // Constructing Output from Starlark (tested through starlark_test.go). 446 447 // outputBuilder is a map-like starlark.Value that has file names as keys and 448 // strings or protobuf messages as values. 449 // 450 // At the end of the execution all protos are serialized to strings too, using 451 // textpb encoding, to get the final Output. 452 type outputBuilder struct { 453 starlark.Dict 454 } 455 456 func newOutputBuilder() *outputBuilder { 457 return &outputBuilder{} 458 } 459 460 func (o *outputBuilder) Type() string { return "output" } 461 462 func (o *outputBuilder) SetKey(k, v starlark.Value) error { 463 key, ok := k.(starlark.String) 464 if !ok { 465 return fmt.Errorf("output set key should be a string, not %s", k.Type()) 466 } 467 468 // Paths must be within the config output directory, "../" is not allowed. 469 if _, err := cleanRelativePath("", key.GoString(), false); err != nil { 470 return err 471 } 472 473 _, str := v.(starlark.String) 474 _, msg := v.(*starlarkproto.Message) 475 if !str && !msg { 476 return fmt.Errorf("output set value should be either a string or a proto message, not %s", v.Type()) 477 } 478 479 // Use the exact same key (not a version cleaned by cleanRelativePath), so 480 // that Starlark code can read the value back using whatever dirty key it 481 // used. We do the final cleanup of keys in finalize(...). 482 return o.Dict.SetKey(k, v) 483 } 484 485 // finalize returns all output files in a single map. 486 // 487 // Protos are eventually serialized to text proto format (optionally with the 488 // header that tells how the file was generated). 489 // 490 // Configs supplied as strings are serialized using UTF-8 encoding. 491 func (o *outputBuilder) finalize(includePBHeader bool) (map[string]Datum, error) { 492 out := make(map[string]Datum, o.Len()) 493 494 for _, kv := range o.Items() { 495 k, v := kv[0].(starlark.String), kv[1] 496 497 key, err := cleanRelativePath("", k.GoString(), false) 498 if err != nil { 499 panic(err) // already validated in SetKey 500 } 501 502 if s, ok := v.(starlark.String); ok { 503 out[key] = BlobDatum(s.GoString()) 504 continue 505 } 506 507 md := &MessageDatum{Message: v.(*starlarkproto.Message)} 508 if includePBHeader { 509 buf := strings.Builder{} 510 buf.WriteString("# Auto-generated by lucicfg.\n") 511 buf.WriteString("# Do not modify manually.\n") 512 if msgName, docURL := protoMessageDoc(md.Message); docURL != "" { 513 buf.WriteString("#\n") 514 fmt.Fprintf(&buf, "# For the schema of this file, see %s message:\n", msgName) 515 fmt.Fprintf(&buf, "# %s\n", docURL) 516 } 517 buf.WriteString("\n") 518 md.Header = buf.String() 519 } 520 out[key] = md 521 } 522 523 return out, nil 524 } 525 526 func init() { 527 // new_output_builder() makes a new output builder, useful in tests. 528 declNative("new_output_builder", func(call nativeCall) (starlark.Value, error) { 529 if err := call.unpack(0); err != nil { 530 return nil, err 531 } 532 return newOutputBuilder(), nil 533 }) 534 }