go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/lucicfg/output.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package lucicfg
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"os"
    22  	"path"
    23  	"path/filepath"
    24  	"runtime"
    25  	"sort"
    26  	"strings"
    27  	"sync"
    28  
    29  	"go.starlark.net/starlark"
    30  
    31  	"google.golang.org/protobuf/encoding/prototext"
    32  	"google.golang.org/protobuf/proto"
    33  	"google.golang.org/protobuf/types/dynamicpb"
    34  
    35  	"go.chromium.org/luci/common/errors"
    36  	"go.chromium.org/luci/common/logging"
    37  	"go.chromium.org/luci/common/proto/textpb"
    38  	"go.chromium.org/luci/common/sync/parallel"
    39  	"go.chromium.org/luci/starlark/starlarkproto"
    40  )
    41  
    42  // Output is an in-memory representation of all generated output files.
    43  //
    44  // Output may span zero or more config sets, each defined by its root directory.
    45  // Config sets may intersect (though this is rare).
    46  type Output struct {
    47  	// Data is all output files.
    48  	//
    49  	// Keys are slash-separated filenames, values are corresponding file bodies.
    50  	Data map[string]Datum
    51  
    52  	// Roots is mapping "config set name => its root".
    53  	//
    54  	// Roots are given as slash-separated paths relative to the output root, e.g.
    55  	// '.' matches ALL output files.
    56  	Roots map[string]string
    57  }
    58  
    59  // CompareResult is returned by Datum.Compare.
    60  type CompareResult int
    61  
    62  const (
    63  	UnknownResult     CompareResult = iota // used as a placeholder on errors
    64  	Identical                              // datums are byte-to-byte identical
    65  	SemanticallyEqual                      // datums are byte-to-byte different, but semantically equal
    66  	Different                              // datums are semantically different
    67  )
    68  
    69  // Datum represents one generated output file.
    70  type Datum interface {
    71  	// Bytes is a raw file body to put on disk.
    72  	Bytes() ([]byte, error)
    73  	// Compare semantically compares this datum to 'other'.
    74  	Compare(other []byte) (CompareResult, error)
    75  }
    76  
    77  // BlobDatum is a Datum which is just a raw byte blob.
    78  type BlobDatum []byte
    79  
    80  // Bytes is a raw file body to put on disk.
    81  func (b BlobDatum) Bytes() ([]byte, error) { return b, nil }
    82  
    83  // Compare is Identical if 'other == b' else it is Different.
    84  func (b BlobDatum) Compare(other []byte) (CompareResult, error) {
    85  	if bytes.Equal(b, other) {
    86  		return Identical, nil
    87  	}
    88  	return Different, nil
    89  }
    90  
    91  // MessageDatum is a Datum constructed from a proto message.
    92  type MessageDatum struct {
    93  	Header  string
    94  	Message *starlarkproto.Message
    95  
    96  	// Cache proto.Message and serialized representations, since we need them
    97  	// in multiple places: when constructing ConfigSet for sending to the
    98  	// validation, when comparing with configs on disk and when writing them
    99  	// to disk.
   100  	once sync.Once
   101  	pmsg proto.Message
   102  	blob []byte
   103  	err  error
   104  }
   105  
   106  // ensureConverted populates `pmsg` and `blob`.
   107  func (m *MessageDatum) ensureConverted() error {
   108  	m.once.Do(func() {
   109  		// Grab it as proto.Message for comparisons in Compare.
   110  		m.pmsg = m.Message.ToProto()
   111  
   112  		// And convert to a text for strict comparisons and the final output.
   113  		opts := prototext.MarshalOptions{
   114  			AllowPartial: true,
   115  			Indent:       " ",
   116  			Resolver:     m.Message.MessageType().Loader().Types(), // used for google.protobuf.Any fields
   117  		}
   118  		blob, err := opts.Marshal(m.pmsg)
   119  		if err == nil {
   120  			blob, err = textpb.Format(blob, m.Message.MessageType().Descriptor())
   121  		}
   122  
   123  		if err != nil {
   124  			m.err = err
   125  		} else {
   126  			m.blob = make([]byte, 0, len(m.Header)+len(blob))
   127  			m.blob = append(m.blob, m.Header...)
   128  			m.blob = append(m.blob, blob...)
   129  		}
   130  	})
   131  	return m.err
   132  }
   133  
   134  // Bytes is a raw file body to put on disk.
   135  func (m *MessageDatum) Bytes() ([]byte, error) {
   136  	if err := m.ensureConverted(); err != nil {
   137  		return nil, err
   138  	}
   139  	return m.blob, nil
   140  }
   141  
   142  // Compare deserializes `other` and compares it to `m.Message`.
   143  //
   144  // If `other` can't be deserialized as a proto message at all returns Different.
   145  // Returns an error if `m` can't be serialized.
   146  func (m *MessageDatum) Compare(other []byte) (CompareResult, error) {
   147  	// This populates m.blob and m.pmsg.
   148  	if err := m.ensureConverted(); err != nil {
   149  		return UnknownResult, err
   150  	}
   151  
   152  	if bytes.Equal(m.blob, other) {
   153  		return Identical, nil
   154  	}
   155  
   156  	// Try to load `other` as a proto message of the same type.
   157  	otherpb := dynamicpb.NewMessage(m.Message.MessageType().Descriptor())
   158  	opts := prototext.UnmarshalOptions{
   159  		AllowPartial: true,
   160  		Resolver:     m.Message.MessageType().Loader().Types(), // used for google.protobuf.Any fields
   161  	}
   162  	if err := opts.Unmarshal(other, otherpb); err != nil {
   163  		return Different, nil // e.g. the schema has changed or the file is totally bogus
   164  	}
   165  
   166  	// Compare them semantically as protos.
   167  	if semanticallyEqual(m.pmsg, otherpb) {
   168  		return SemanticallyEqual, nil
   169  	}
   170  	return Different, nil
   171  }
   172  
   173  // ConfigSets partitions this output into 0 or more config sets based on Roots.
   174  //
   175  // Returns an error if some output Datum can't be serialized.
   176  func (o Output) ConfigSets() ([]ConfigSet, error) {
   177  	names := make([]string, 0, len(o.Roots))
   178  	for name := range o.Roots {
   179  		names = append(names, name)
   180  	}
   181  	sort.Strings(names) // order is important for logs
   182  
   183  	cs := make([]ConfigSet, len(names))
   184  	for i, nm := range names {
   185  		root := o.Roots[nm]
   186  
   187  		// Normalize in preparation for prefix matching.
   188  		root = path.Clean(root)
   189  		if root == "." {
   190  			root = "" // match EVERYTHING
   191  		} else {
   192  			root = root + "/" // match only what's under 'root/...'
   193  		}
   194  
   195  		files := map[string][]byte{}
   196  		for f, body := range o.Data {
   197  			f = path.Clean(f)
   198  			if strings.HasPrefix(f, root) {
   199  				var err error
   200  				if files[f[len(root):]], err = body.Bytes(); err != nil {
   201  					return nil, errors.Annotate(err, "serializing %s", f).Err()
   202  				}
   203  			}
   204  		}
   205  
   206  		cs[i] = ConfigSet{Name: nm, Data: files}
   207  	}
   208  
   209  	return cs, nil
   210  }
   211  
   212  // Compare compares files on disk to what's in the output.
   213  //
   214  // If 'semantic' is true, for output files based on proto messages uses semantic
   215  // comparison, i.e. loads the file on disk as a proto message and compares
   216  // it to the output message. If 'semantic' is false, just always compares files
   217  // as byte blobs.
   218  //
   219  // For each file in the output set, the resulting map has a CompareResult
   220  // describing how it compares to the file on disk. They can either be identical
   221  // as byte blobs (Identical), different as byte blobs, but semantically
   222  // the same (SemanticallyEqual), or totally different (Different).
   223  //
   224  // Note that when 'semantic' is false, only Identical and Different can appear
   225  // in the result, since we compare files as byte blobs only, so there's no
   226  // notion of being "semantically the same".
   227  //
   228  // Files on disk that are not in the output set are totally ignored. Files in
   229  // the output set that are missing on disk as Different.
   230  //
   231  // Returns an error if some file on disk can't be read or some output file can't
   232  // be serialized.
   233  func (o Output) Compare(dir string, semantic bool) (map[string]CompareResult, error) {
   234  	compare := func(d Datum, b []byte) (CompareResult, error) {
   235  		if semantic {
   236  			return d.Compare(b)
   237  		}
   238  		switch a, err := d.Bytes(); {
   239  		case err != nil:
   240  			return UnknownResult, err
   241  		case bytes.Equal(a, b):
   242  			return Identical, nil
   243  		default:
   244  			return Different, nil
   245  		}
   246  	}
   247  
   248  	out := make(map[string]CompareResult, len(o.Data))
   249  	m := sync.Mutex{}
   250  
   251  	err := parallel.WorkPool(runtime.NumCPU()+4, func(tasks chan<- func() error) {
   252  		for name, datum := range o.Data {
   253  			name := name
   254  			datum := datum
   255  
   256  			tasks <- func() error {
   257  				path := filepath.Join(dir, filepath.FromSlash(name))
   258  
   259  				var res CompareResult
   260  				switch existing, err := os.ReadFile(path); {
   261  				case os.IsNotExist(err):
   262  					res = Different // new output file
   263  				case err != nil:
   264  					return errors.Annotate(err, "when checking diff of %q", name).Err()
   265  				default:
   266  					if res, err = compare(datum, existing); err != nil {
   267  						return errors.Annotate(err, "when checking diff of %q", name).Err()
   268  					}
   269  				}
   270  
   271  				m.Lock()
   272  				out[name] = res
   273  				m.Unlock()
   274  
   275  				return nil
   276  			}
   277  		}
   278  	})
   279  
   280  	if err != nil {
   281  		return nil, err
   282  	}
   283  	return out, nil
   284  }
   285  
   286  // Write updates files on disk to match the output.
   287  //
   288  // Returns a list of written files and a list of files that were left untouched.
   289  //
   290  // If 'force' is false, compares files on disk to the generated files using
   291  // the semantic comparison. If they are all up-to-date (semantically) does
   292  // nothing. If at least one file is stale, rewrites *all* not already identical
   293  // files. That way all output files always have consistent formatting, but
   294  // `lucicfg generate` still doesn't produce noop formatting changes by default
   295  // (it piggy backs formatting changes onto real changes).
   296  //
   297  // If 'force' is true, compares files as byte blobs and rewrites all files
   298  // that changed as blobs. No semantic comparison is done.
   299  //
   300  // Creates missing directories. Not atomic. All files have mode 0666.
   301  func (o Output) Write(dir string, force bool) (written, untouched []string, err error) {
   302  	// Find which files we definitely need to rewrite and which can be skipped.
   303  	cmp, err := o.Compare(dir, !force)
   304  	if err != nil {
   305  		return
   306  	}
   307  
   308  	// If nothing has *semantically* changed, don't touch any outputs at all.
   309  	// Note that when 'force' is true, we compare files as byte blobs, so even if
   310  	// files are equal semantically, but different as byte blobs, they'll end up
   311  	// as Different and we'll proceed to overwrite them.
   312  	different := false
   313  	for _, res := range cmp {
   314  		if res == Different {
   315  			different = true
   316  			break
   317  		}
   318  	}
   319  	if !different {
   320  		untouched = make([]string, 0, len(cmp))
   321  		for name := range cmp {
   322  			untouched = append(untouched, name)
   323  		}
   324  		sort.Strings(untouched)
   325  		return
   326  	}
   327  
   328  	// We are going to overwrite all files that are not already byte-to-byte
   329  	// identical to existing files on disk (even if they are semantically the
   330  	// same) and left byte-to-byte identical files untouched.
   331  	for name, res := range cmp {
   332  		switch res {
   333  		case Identical:
   334  			untouched = append(untouched, name)
   335  		case SemanticallyEqual, Different:
   336  			written = append(written, name)
   337  		default:
   338  			panic("impossible")
   339  		}
   340  	}
   341  	sort.Strings(untouched)
   342  	sort.Strings(written)
   343  
   344  	for _, name := range written {
   345  		path := filepath.Join(dir, filepath.FromSlash(name))
   346  		if err = os.MkdirAll(filepath.Dir(path), 0777); err != nil {
   347  			return
   348  		}
   349  		var blob []byte
   350  		if blob, err = o.Data[name].Bytes(); err != nil {
   351  			return
   352  		}
   353  		if err = os.WriteFile(path, blob, 0666); err != nil {
   354  			return
   355  		}
   356  	}
   357  
   358  	return
   359  }
   360  
   361  // Read replaces values in o.Data by reading them from disk as blobs.
   362  //
   363  // Returns an error if some file can't be read.
   364  func (o Output) Read(dir string) error {
   365  	for name := range o.Data {
   366  		path := filepath.Join(dir, filepath.FromSlash(name))
   367  		blob, err := os.ReadFile(path)
   368  		if err != nil {
   369  			return errors.Annotate(err, "reading %q", name).Err()
   370  		}
   371  		o.Data[name] = BlobDatum(blob)
   372  	}
   373  	return nil
   374  }
   375  
   376  // Files returns a sorted list of file names in the output.
   377  func (o Output) Files() []string {
   378  	f := make([]string, 0, len(o.Data))
   379  	for k := range o.Data {
   380  		f = append(f, k)
   381  	}
   382  	sort.Strings(f)
   383  	return f
   384  }
   385  
   386  // DebugDump writes the output to stdout in a format useful for debugging.
   387  func (o Output) DebugDump() {
   388  	for _, f := range o.Files() {
   389  		fmt.Println("--------------------------------------------------")
   390  		fmt.Println(f)
   391  		fmt.Println("--------------------------------------------------")
   392  		if blob, err := o.Data[f].Bytes(); err == nil {
   393  			fmt.Print(string(blob))
   394  		} else {
   395  			fmt.Printf("ERROR: %s\n", err)
   396  		}
   397  		fmt.Println("--------------------------------------------------")
   398  	}
   399  }
   400  
   401  // DiscardChangesToUntracked replaces bodies of the files that are in the output
   402  // set, but not in the `tracked` set (per TrackedSet semantics) with what's on
   403  // disk in the given `dir`.
   404  //
   405  // This allows to construct partially generated output: some configs (the ones
   406  // in the tracked set) are generated, others are loaded from disk.
   407  //
   408  // If `dir` is "-" (which indicates that the output is going to be dumped to
   409  // stdout rather then to disk), just removes untracked files from the output.
   410  func (o Output) DiscardChangesToUntracked(ctx context.Context, tracked []string, dir string) error {
   411  	isTracked := TrackedSet(tracked)
   412  
   413  	for _, path := range o.Files() {
   414  		yes, err := isTracked(path)
   415  		if err != nil {
   416  			return err
   417  		}
   418  		if yes {
   419  			continue
   420  		}
   421  
   422  		logging.Warningf(ctx, "Discarding changes to %s, not in the tracked set", path)
   423  
   424  		if dir == "-" {
   425  			// When using stdout as destination, there's nowhere to read existing
   426  			// files from.
   427  			delete(o.Data, path)
   428  			continue
   429  		}
   430  
   431  		switch body, err := os.ReadFile(filepath.Join(dir, filepath.FromSlash(path))); {
   432  		case err == nil:
   433  			o.Data[path] = BlobDatum(body)
   434  		case os.IsNotExist(err):
   435  			delete(o.Data, path)
   436  		case err != nil:
   437  			return errors.Annotate(err, "when discarding changes to %s", path).Err()
   438  		}
   439  	}
   440  
   441  	return nil
   442  }
   443  
   444  ////////////////////////////////////////////////////////////////////////////////
   445  // Constructing Output from Starlark (tested through starlark_test.go).
   446  
   447  // outputBuilder is a map-like starlark.Value that has file names as keys and
   448  // strings or protobuf messages as values.
   449  //
   450  // At the end of the execution all protos are serialized to strings too, using
   451  // textpb encoding, to get the final Output.
   452  type outputBuilder struct {
   453  	starlark.Dict
   454  }
   455  
   456  func newOutputBuilder() *outputBuilder {
   457  	return &outputBuilder{}
   458  }
   459  
   460  func (o *outputBuilder) Type() string { return "output" }
   461  
   462  func (o *outputBuilder) SetKey(k, v starlark.Value) error {
   463  	key, ok := k.(starlark.String)
   464  	if !ok {
   465  		return fmt.Errorf("output set key should be a string, not %s", k.Type())
   466  	}
   467  
   468  	// Paths must be within the config output directory, "../" is not allowed.
   469  	if _, err := cleanRelativePath("", key.GoString(), false); err != nil {
   470  		return err
   471  	}
   472  
   473  	_, str := v.(starlark.String)
   474  	_, msg := v.(*starlarkproto.Message)
   475  	if !str && !msg {
   476  		return fmt.Errorf("output set value should be either a string or a proto message, not %s", v.Type())
   477  	}
   478  
   479  	// Use the exact same key (not a version cleaned by cleanRelativePath), so
   480  	// that Starlark code can read the value back using whatever dirty key it
   481  	// used. We do the final cleanup of keys in finalize(...).
   482  	return o.Dict.SetKey(k, v)
   483  }
   484  
   485  // finalize returns all output files in a single map.
   486  //
   487  // Protos are eventually serialized to text proto format (optionally with the
   488  // header that tells how the file was generated).
   489  //
   490  // Configs supplied as strings are serialized using UTF-8 encoding.
   491  func (o *outputBuilder) finalize(includePBHeader bool) (map[string]Datum, error) {
   492  	out := make(map[string]Datum, o.Len())
   493  
   494  	for _, kv := range o.Items() {
   495  		k, v := kv[0].(starlark.String), kv[1]
   496  
   497  		key, err := cleanRelativePath("", k.GoString(), false)
   498  		if err != nil {
   499  			panic(err) // already validated in SetKey
   500  		}
   501  
   502  		if s, ok := v.(starlark.String); ok {
   503  			out[key] = BlobDatum(s.GoString())
   504  			continue
   505  		}
   506  
   507  		md := &MessageDatum{Message: v.(*starlarkproto.Message)}
   508  		if includePBHeader {
   509  			buf := strings.Builder{}
   510  			buf.WriteString("# Auto-generated by lucicfg.\n")
   511  			buf.WriteString("# Do not modify manually.\n")
   512  			if msgName, docURL := protoMessageDoc(md.Message); docURL != "" {
   513  				buf.WriteString("#\n")
   514  				fmt.Fprintf(&buf, "# For the schema of this file, see %s message:\n", msgName)
   515  				fmt.Fprintf(&buf, "#   %s\n", docURL)
   516  			}
   517  			buf.WriteString("\n")
   518  			md.Header = buf.String()
   519  		}
   520  		out[key] = md
   521  	}
   522  
   523  	return out, nil
   524  }
   525  
   526  func init() {
   527  	// new_output_builder() makes a new output builder, useful in tests.
   528  	declNative("new_output_builder", func(call nativeCall) (starlark.Value, error) {
   529  		if err := call.unpack(0); err != nil {
   530  			return nil, err
   531  		}
   532  		return newOutputBuilder(), nil
   533  	})
   534  }