go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/lucicfg/configset.go (about)

     1  // Copyright 2018 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package lucicfg
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"context"
    21  	"crypto/sha256"
    22  	"encoding/base64"
    23  	"encoding/hex"
    24  	"encoding/json"
    25  	"fmt"
    26  	"io"
    27  	"net/http"
    28  	"os"
    29  	"path/filepath"
    30  	"sort"
    31  	"strings"
    32  	"sync"
    33  
    34  	"github.com/dustin/go-humanize"
    35  	"github.com/klauspost/compress/gzip"
    36  	"github.com/klauspost/compress/zlib"
    37  	"golang.org/x/sync/errgroup"
    38  	"google.golang.org/api/googleapi"
    39  	"google.golang.org/grpc"
    40  	"google.golang.org/grpc/status"
    41  
    42  	legacy_config "go.chromium.org/luci/common/api/luci_config/config/v1"
    43  	"go.chromium.org/luci/common/clock"
    44  	"go.chromium.org/luci/common/data/stringset"
    45  	"go.chromium.org/luci/common/errors"
    46  	"go.chromium.org/luci/common/logging"
    47  	"go.chromium.org/luci/common/proto/config"
    48  	"go.chromium.org/luci/common/sync/parallel"
    49  	configpb "go.chromium.org/luci/config_service/proto"
    50  )
    51  
    52  // ConfigSet is an in-memory representation of a single config set.
    53  type ConfigSet struct {
    54  	// Name is a name of this config set, e.g. "projects/something".
    55  	//
    56  	// It is used by LUCI Config to figure out how to validate files in the set.
    57  	Name string
    58  
    59  	// Data is files belonging to the config set.
    60  	//
    61  	//  Keys are slash-separated filenames, values are corresponding file bodies.
    62  	Data map[string][]byte
    63  }
    64  
    65  // AsOutput converts this config set into Output that have it at the given root
    66  // path (usually ".").
    67  func (cs ConfigSet) AsOutput(root string) Output {
    68  	data := make(map[string]Datum, len(cs.Data))
    69  	for k, v := range cs.Data {
    70  		data[k] = BlobDatum(v)
    71  	}
    72  	return Output{
    73  		Data:  data,
    74  		Roots: map[string]string{cs.Name: root},
    75  	}
    76  }
    77  
    78  // ValidationResult is what we get after validating a config set.
    79  type ValidationResult struct {
    80  	ConfigSet string                             `json:"config_set"`          // a config set being validated
    81  	Failed    bool                               `json:"failed"`              // true if the config is bad
    82  	Messages  []*config.ValidationResult_Message `json:"messages"`            // errors, warnings, infos, etc.
    83  	RPCError  string                             `json:"rpc_error,omitempty"` // set if the RPC itself failed
    84  }
    85  
    86  // ConfigSetValidator is primarily implemented through config.Service, but can
    87  // also be mocked in tests.
    88  type ConfigSetValidator interface {
    89  	// Validate sends the validation request to the service.
    90  	//
    91  	// Returns errors only on RPC errors. Actual validation errors are
    92  	// communicated through []*config.ValidationResult_Message.
    93  	Validate(ctx context.Context, cs ConfigSet) ([]*config.ValidationResult_Message, error)
    94  }
    95  
    96  type remoteValidator struct {
    97  	cfgClient configpb.ConfigsClient
    98  }
    99  
   100  func NewRemoteValidator(conn *grpc.ClientConn) ConfigSetValidator {
   101  	return &remoteValidator{
   102  		cfgClient: configpb.NewConfigsClient(conn),
   103  	}
   104  }
   105  
   106  // Validate implements ConfigSetValidator
   107  func (r *remoteValidator) Validate(ctx context.Context, cs ConfigSet) ([]*config.ValidationResult_Message, error) {
   108  	if len(cs.Data) == 0 {
   109  		return nil, nil
   110  	}
   111  	validateReq := &configpb.ValidateConfigsRequest{
   112  		ConfigSet:  cs.Name,
   113  		FileHashes: make([]*configpb.ValidateConfigsRequest_FileHash, len(cs.Data)),
   114  	}
   115  	for i, file := range cs.Files() {
   116  		content := cs.Data[file]
   117  		h := sha256.New()
   118  		h.Write(content)
   119  		validateReq.FileHashes[i] = &configpb.ValidateConfigsRequest_FileHash{
   120  			Path:   file,
   121  			Sha256: hex.EncodeToString(h.Sum(nil)),
   122  		}
   123  	}
   124  	res, err := r.cfgClient.ValidateConfigs(ctx, validateReq)
   125  	switch fixInfo := findBadRequestFixInfo(err); {
   126  	case fixInfo != nil:
   127  		if err := uploadMissingFiles(ctx, cs, fixInfo.GetUploadFiles()); err != nil {
   128  			return nil, err
   129  		}
   130  		validateReq.FileHashes = filterOutUnvalidatableFiles(ctx, validateReq.GetFileHashes(), fixInfo.GetUnvalidatableFiles())
   131  		if len(validateReq.FileHashes) == 0 {
   132  			logging.Debugf(ctx, "No config file need to be validated by LUCI Config")
   133  			return nil, nil
   134  		}
   135  		switch res, err := r.cfgClient.ValidateConfigs(ctx, validateReq); { // now try again
   136  		case err != nil:
   137  			return nil, errors.Annotate(err, "failed to call LUCI Config").Err()
   138  		default:
   139  			return res.GetMessages(), nil
   140  		}
   141  	case err != nil:
   142  		return nil, errors.Annotate(err, "failed to call LUCI Config").Err()
   143  	default:
   144  		return res.GetMessages(), nil
   145  	}
   146  }
   147  
   148  func findBadRequestFixInfo(err error) *configpb.BadValidationRequestFixInfo {
   149  	for _, detail := range status.Convert(err).Details() {
   150  		switch t := detail.(type) {
   151  		case *configpb.BadValidationRequestFixInfo:
   152  			return t
   153  		}
   154  	}
   155  	return nil
   156  }
   157  
   158  func filterOutUnvalidatableFiles(ctx context.Context,
   159  	fileHashes []*configpb.ValidateConfigsRequest_FileHash,
   160  	unvalidatableFiles []string) []*configpb.ValidateConfigsRequest_FileHash {
   161  	if len(unvalidatableFiles) == 0 {
   162  		return fileHashes
   163  	}
   164  	logging.Debugf(ctx, "No services can validate following files:\n  - %s", strings.Join(unvalidatableFiles, "\n  - "))
   165  	unvalidatableFileSet := stringset.NewFromSlice(unvalidatableFiles...)
   166  	ret := make([]*configpb.ValidateConfigsRequest_FileHash, 0, len(fileHashes))
   167  	for _, fh := range fileHashes {
   168  		if !unvalidatableFileSet.Has(fh.Path) {
   169  			ret = append(ret, fh)
   170  		}
   171  	}
   172  	return ret
   173  }
   174  
   175  func uploadMissingFiles(ctx context.Context, cs ConfigSet, uploadFiles []*configpb.BadValidationRequestFixInfo_UploadFile) error {
   176  	if len(uploadFiles) == 0 {
   177  		return nil
   178  	}
   179  	eg, ectx := errgroup.WithContext(ctx)
   180  	for _, uf := range uploadFiles {
   181  		uf := uf
   182  		eg.Go(func() error {
   183  			logging.Debugf(ectx, "Uploading file %q for validation", uf.GetPath())
   184  			start := clock.Now(ctx)
   185  
   186  			pr, pw := io.Pipe()
   187  
   188  			// Read and gzip in background, writing the compressed data into the pipe.
   189  			// Buffer writes, since gzip writer outputs often and writes to a pipe are
   190  			// slow-ish if unbuffered.
   191  			done := make(chan struct{})
   192  			go func() (err error) {
   193  				defer func() {
   194  					_ = pw.CloseWithError(err)
   195  					close(done)
   196  				}()
   197  				bw := bufio.NewWriterSize(pw, 1024*512)
   198  				zw := gzip.NewWriter(bw)
   199  				if _, err := zw.Write(cs.Data[uf.GetPath()]); err != nil {
   200  					_ = zw.Close()
   201  					return errors.Annotate(err, "failed to write gzip data").Err()
   202  				}
   203  				if err := zw.Close(); err != nil {
   204  					return errors.Annotate(err, "failed to close gzip writer").Err()
   205  				}
   206  				if err := bw.Flush(); err != nil {
   207  					return errors.Annotate(err, "failed to flush writer").Err()
   208  				}
   209  				return nil
   210  			}()
   211  			defer func() {
   212  				_ = pr.Close() // unblocks writes in the goroutine, if still blocked
   213  				<-done         // waits for the goroutine to finish running
   214  			}()
   215  
   216  			// Read from the pipe and upload.
   217  			req, err := http.NewRequestWithContext(ectx, http.MethodPut, uf.GetSignedUrl(), pr)
   218  			if err != nil {
   219  				return errors.Annotate(err, "failed to create http request to upload file %q", uf.GetPath()).Err()
   220  			}
   221  			req.Header.Add("Content-Encoding", "gzip")
   222  			req.Header.Add("x-goog-content-length-range", fmt.Sprintf("0,%d", uf.GetMaxConfigSize()))
   223  
   224  			switch res, err := http.DefaultClient.Do(req); {
   225  			case err != nil:
   226  				return errors.Annotate(err, "failed to execute http request to upload file %q", uf.GetPath()).Err()
   227  
   228  			case res.StatusCode != http.StatusOK:
   229  				defer func() { _ = res.Body.Close() }()
   230  				body, err := io.ReadAll(res.Body)
   231  				if err != nil {
   232  					return errors.Annotate(err, "failed to read response body").Err()
   233  				}
   234  				return errors.Reason("failed to upload file %q;  got http response code: %d, body: %s", uf.GetPath(), res.StatusCode, string(body)).Err()
   235  
   236  			default:
   237  				defer func() { _ = res.Body.Close() }()
   238  				logging.Debugf(ectx, "Successfully uploaded file %q for validation in %s", uf.GetPath(), clock.Since(ctx, start))
   239  				return nil
   240  			}
   241  		})
   242  	}
   243  	return eg.Wait()
   244  }
   245  
   246  type legacyRemoteValidator struct {
   247  	validateConfig        func(context.Context, *legacy_config.LuciConfigValidateConfigRequestMessage) (*legacy_config.LuciConfigValidateConfigResponseMessage, error)
   248  	requestSizeLimitBytes int64
   249  }
   250  
   251  func (r *legacyRemoteValidator) Validate(ctx context.Context, cs ConfigSet) ([]*config.ValidationResult_Message, error) {
   252  	// Sort by size, smaller first, to group small files in a single request.
   253  	files := make([]*legacy_config.LuciConfigValidateConfigRequestMessageFile, 0, len(cs.Data))
   254  	for path, content := range cs.Data {
   255  		files = append(files, &legacy_config.LuciConfigValidateConfigRequestMessageFile{
   256  			Path:    path,
   257  			Content: base64.StdEncoding.EncodeToString(content),
   258  		})
   259  	}
   260  	sort.Slice(files, func(i, j int) bool {
   261  		if len(files[i].Content) == len(files[j].Content) {
   262  			return strings.Compare(files[i].Path, files[j].Path) < 0
   263  		}
   264  		return len(files[i].Content) < len(files[j].Content)
   265  	})
   266  
   267  	// Split all files into a bunch of smallish validation requests to avoid
   268  	// hitting 32MB request size limit.
   269  	var (
   270  		requests []*legacy_config.LuciConfigValidateConfigRequestMessage
   271  		curFiles []*legacy_config.LuciConfigValidateConfigRequestMessageFile
   272  		curSize  int64
   273  	)
   274  	flush := func() {
   275  		if len(curFiles) > 0 {
   276  			requests = append(requests, &legacy_config.LuciConfigValidateConfigRequestMessage{
   277  				ConfigSet: cs.Name,
   278  				Files:     curFiles,
   279  			})
   280  		}
   281  		curFiles = nil
   282  		curSize = 0
   283  	}
   284  	for _, f := range files {
   285  		switch contentSize := int64(len(f.Content)); {
   286  		case contentSize > r.requestSizeLimitBytes:
   287  			return nil, errors.Reason("the size of file %q is %s that is exceeding the limit of %s", f.Path, humanize.Bytes(uint64(contentSize)), humanize.Bytes(uint64(r.requestSizeLimitBytes))).Err()
   288  		case curSize+contentSize > r.requestSizeLimitBytes:
   289  			flush()
   290  			fallthrough
   291  		default:
   292  			curFiles = append(curFiles, f)
   293  			curSize += int64(len(f.Content))
   294  		}
   295  	}
   296  	flush()
   297  
   298  	var (
   299  		lock     sync.Mutex
   300  		messages []*config.ValidationResult_Message
   301  	)
   302  
   303  	// Execute all requests in parallel.
   304  	err := parallel.FanOutIn(func(gen chan<- func() error) {
   305  		for _, req := range requests {
   306  			req := req
   307  			gen <- func() error {
   308  				resp, err := r.validateConfig(ctx, req)
   309  				if resp != nil {
   310  					lock.Lock()
   311  					for _, msg := range resp.Messages {
   312  						if val, ok := config.ValidationResult_Severity_value[strings.ToUpper(msg.Severity)]; ok {
   313  							messages = append(messages, &config.ValidationResult_Message{
   314  								Path:     msg.Path,
   315  								Severity: config.ValidationResult_Severity(val),
   316  								Text:     msg.Text,
   317  							})
   318  						} else {
   319  							logging.Warningf(ctx, "unknown severity %q; full msg: %+v", msg.Severity, msg)
   320  						}
   321  					}
   322  					lock.Unlock()
   323  				}
   324  				return err
   325  			}
   326  		}
   327  	})
   328  
   329  	// Sort messages by path for determinism.
   330  	sort.Slice(messages, func(i, j int) bool {
   331  		return messages[i].Path < messages[j].Path
   332  	})
   333  
   334  	return messages, err
   335  }
   336  
   337  // LegacyRemoteValidator returns ConfigSetValidator that makes RPCs to legacy
   338  // LUCI Config service.
   339  func LegacyRemoteValidator(client *http.Client, host string) ConfigSetValidator {
   340  	validateURL := fmt.Sprintf("https://%s/_ah/api/config/v1/validate-config", host)
   341  	return &legacyRemoteValidator{
   342  		// 160 MiB is picked because compression is done before sending the final
   343  		// request and the real request size limit is 32 MiB. Since config is
   344  		// highly repetitive content, it should easily achieve 5:1 compression
   345  		// ratio.
   346  		requestSizeLimitBytes: 160 * 1024 * 1024,
   347  		validateConfig: func(ctx context.Context, req *legacy_config.LuciConfigValidateConfigRequestMessage) (*legacy_config.LuciConfigValidateConfigResponseMessage, error) {
   348  
   349  			debug := make([]string, len(req.Files))
   350  			for i, f := range req.Files {
   351  				debug[i] = fmt.Sprintf("%s (%s)", f.Path, humanize.Bytes(uint64(len(f.Content))))
   352  			}
   353  			logging.Debugf(ctx, "Sending request to %s to validate %d files: %s",
   354  				validateURL,
   355  				len(req.Files),
   356  				strings.Join(debug, ", "),
   357  			)
   358  
   359  			var body bytes.Buffer
   360  			zlibWriter := zlib.NewWriter(&body)
   361  			if err := json.NewEncoder(zlibWriter).Encode(req); err != nil {
   362  				return nil, errors.Annotate(err, "failed to encode the request").Err()
   363  			}
   364  			if err := zlibWriter.Close(); err != nil {
   365  				return nil, errors.Annotate(err, "failed to close the zlib stream").Err()
   366  			}
   367  			httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, validateURL, &body)
   368  			if err != nil {
   369  				return nil, errors.Annotate(err, "failed to create a new request").Err()
   370  			}
   371  			httpReq.Header.Add("Content-Type", `application/json-zlib`)
   372  			httpReq.Header.Add("User-Agent", UserAgent)
   373  
   374  			res, err := client.Do(httpReq)
   375  			if err != nil {
   376  				return nil, errors.Annotate(err, "failed to execute HTTP request").Err()
   377  			}
   378  			defer func() { _ = res.Body.Close() }()
   379  			if res.StatusCode < 200 || res.StatusCode > 299 {
   380  				return nil, googleapi.CheckResponse(res)
   381  			}
   382  			ret := &legacy_config.LuciConfigValidateConfigResponseMessage{
   383  				ServerResponse: googleapi.ServerResponse{
   384  					Header:         res.Header,
   385  					HTTPStatusCode: res.StatusCode,
   386  				},
   387  			}
   388  			if err := json.NewDecoder(res.Body).Decode(&ret); err != nil {
   389  				return nil, err
   390  			}
   391  			return ret, nil
   392  		},
   393  	}
   394  }
   395  
   396  // ReadConfigSet reads all regular files in the given directory (recursively)
   397  // and returns them as a ConfigSet with given name.
   398  func ReadConfigSet(dir, name string) (ConfigSet, error) {
   399  	configs := map[string][]byte{}
   400  	err := filepath.Walk(dir, func(p string, info os.FileInfo, err error) error {
   401  		if err != nil || !info.Mode().IsRegular() {
   402  			return err
   403  		}
   404  		content, err := os.ReadFile(p)
   405  		if err != nil {
   406  			return err
   407  		}
   408  		relPath, err := filepath.Rel(dir, p)
   409  		if err != nil {
   410  			return err
   411  		}
   412  		configs[filepath.ToSlash(relPath)] = content
   413  		return nil
   414  	})
   415  	if err != nil {
   416  		return ConfigSet{}, errors.Annotate(err, "failed to read config files").Err()
   417  	}
   418  	return ConfigSet{
   419  		Name: name,
   420  		Data: configs,
   421  	}, nil
   422  }
   423  
   424  // Files returns a sorted list of file names in the config set.
   425  func (cs ConfigSet) Files() []string {
   426  	f := make([]string, 0, len(cs.Data))
   427  	for k := range cs.Data {
   428  		f = append(f, k)
   429  	}
   430  	sort.Strings(f)
   431  	return f
   432  }
   433  
   434  // Validate sends the config set for validation to LUCI Config service.
   435  //
   436  // Returns ValidationResult with a list of validation message (errors, warnings,
   437  // etc). The list of messages may be empty if the config set is 100% valid.
   438  //
   439  // If the RPC call itself failed, ValidationResult is still returned, but it has
   440  // only ConfigSet and RPCError fields populated.
   441  func (cs ConfigSet) Validate(ctx context.Context, val ConfigSetValidator) *ValidationResult {
   442  	logging.Infof(ctx, "Sending to LUCI Config for validation as config set %q:", cs.Name)
   443  	for _, f := range cs.Files() {
   444  		logging.Infof(ctx, "  %s (%s)", f, humanize.Bytes(uint64(len(cs.Data[f]))))
   445  	}
   446  
   447  	messages, err := val.Validate(ctx, cs)
   448  	res := &ValidationResult{
   449  		ConfigSet: cs.Name,
   450  		Messages:  messages,
   451  	}
   452  	if err != nil {
   453  		res.RPCError = err.Error()
   454  		res.Failed = true
   455  	}
   456  	return res
   457  }
   458  
   459  // Format formats the validation result as a multi-line string
   460  func (vr *ValidationResult) Format() string {
   461  	buf := bytes.Buffer{}
   462  	for _, msg := range vr.Messages {
   463  		fmt.Fprintf(&buf, "%s: %s: %s: %s\n", msg.Severity, vr.ConfigSet, msg.Path, msg.Text)
   464  	}
   465  	return buf.String()
   466  }
   467  
   468  // OverallError is nil if the validation succeeded or non-nil if failed.
   469  //
   470  // Beware: mutates Failed field accordingly.
   471  func (vr *ValidationResult) OverallError(failOnWarnings bool) error {
   472  	errs, warns := 0, 0
   473  	for _, msg := range vr.Messages {
   474  		switch msg.Severity {
   475  		case config.ValidationResult_WARNING:
   476  			warns++
   477  		case config.ValidationResult_ERROR, config.ValidationResult_CRITICAL:
   478  			errs++
   479  		}
   480  	}
   481  
   482  	switch {
   483  	case errs > 0:
   484  		vr.Failed = true
   485  		return errors.Reason("some files were invalid").Err()
   486  	case warns > 0 && failOnWarnings:
   487  		vr.Failed = true
   488  		return errors.Reason("some files had validation warnings and -fail-on-warnings is set").Err()
   489  	case vr.RPCError != "":
   490  		vr.Failed = true
   491  		return errors.Reason("failed to send RPC to LUCI Config - %s", vr.RPCError).Err()
   492  	}
   493  
   494  	vr.Failed = false
   495  	return nil
   496  }