go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/tools/cmd/bqschemaupdater/main.go (about)

     1  // Copyright 2018 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package main
    16  
    17  import (
    18  	"context"
    19  	"flag"
    20  	"fmt"
    21  	"io/ioutil"
    22  	"log"
    23  	"net/url"
    24  	"os"
    25  	"os/exec"
    26  	"path/filepath"
    27  	"strings"
    28  	"time"
    29  
    30  	"cloud.google.com/go/bigquery"
    31  
    32  	"google.golang.org/api/option"
    33  	"google.golang.org/protobuf/proto"
    34  	"google.golang.org/protobuf/types/descriptorpb"
    35  
    36  	"go.chromium.org/luci/auth"
    37  	"go.chromium.org/luci/common/bq"
    38  	"go.chromium.org/luci/common/errors"
    39  	luciflag "go.chromium.org/luci/common/flag"
    40  	"go.chromium.org/luci/common/flag/stringlistflag"
    41  	"go.chromium.org/luci/common/logging"
    42  	"go.chromium.org/luci/common/logging/gologger"
    43  	"go.chromium.org/luci/common/proto/google/descutil"
    44  	"go.chromium.org/luci/common/proto/protoc"
    45  	"go.chromium.org/luci/hardcoded/chromeinfra"
    46  )
    47  
    48  var (
    49  	canceledByUser = errors.BoolTag{
    50  		Key: errors.NewTagKey("operation canceled by user"),
    51  	}
    52  	errCanceledByUser = errors.Reason("operation canceled by user").Tag(canceledByUser).Err()
    53  )
    54  
    55  type tableDef struct {
    56  	ProjectID              string
    57  	DataSetID              string
    58  	TableID                string
    59  	FriendlyName           string
    60  	Description            string
    61  	PartitioningDisabled   bool
    62  	PartitioningExpiration time.Duration
    63  	PartitioningField      string
    64  	PartitioningType       string
    65  	Schema                 bigquery.Schema
    66  	ClusteringFields       []string
    67  }
    68  
    69  func updateFromTableDef(ctx context.Context, force bool, ts tableStore, td tableDef) error {
    70  	tableID := fmt.Sprintf("%s.%s.%s", td.ProjectID, td.DataSetID, td.TableID)
    71  	shouldContinue := func() bool {
    72  		if force {
    73  			return true
    74  		}
    75  		return confirm("Continue")
    76  	}
    77  
    78  	md, err := ts.getTableMetadata(ctx, td.DataSetID, td.TableID)
    79  	switch {
    80  	case isNotFound(err): // new table
    81  		fmt.Printf("Table %q does not exist.\n", tableID)
    82  		fmt.Println("It will be created with the following schema:")
    83  		fmt.Println(strings.Repeat("=", 80))
    84  		fmt.Println(bq.SchemaString(td.Schema))
    85  		fmt.Println(strings.Repeat("=", 80))
    86  		if !shouldContinue() {
    87  			return errCanceledByUser
    88  		}
    89  
    90  		md = &bigquery.TableMetadata{
    91  			Name:        td.FriendlyName,
    92  			Description: td.Description,
    93  			Schema:      td.Schema,
    94  		}
    95  		if !td.PartitioningDisabled {
    96  			md.TimePartitioning = &bigquery.TimePartitioning{
    97  				Expiration: td.PartitioningExpiration,
    98  				Field:      td.PartitioningField,
    99  				Type:       bigquery.TimePartitioningType(td.PartitioningType),
   100  			}
   101  		}
   102  		if len(td.ClusteringFields) > 0 {
   103  			md.Clustering = &bigquery.Clustering{Fields: td.ClusteringFields}
   104  		}
   105  		if err = ts.createTable(ctx, td.DataSetID, td.TableID, md); err != nil {
   106  			return err
   107  		}
   108  		fmt.Println("Table is created.")
   109  		fmt.Println("Please update the documentation in https://chromium.googlesource.com/infra/infra/+/master/doc/bigquery_tables.md or the internal equivalent.")
   110  		return nil
   111  
   112  	case err != nil:
   113  		return err
   114  
   115  	default: // existing table
   116  		fmt.Printf("Updating table %q\n", tableID)
   117  
   118  		// add fields missing in td.Schema because BigQuery does not support
   119  		// removing fields anyway.
   120  		bq.AddMissingFields(&td.Schema, md.Schema)
   121  
   122  		if diff := bq.SchemaDiff(md.Schema, td.Schema); diff == "" {
   123  			fmt.Println("No changes to schema detected.")
   124  		} else {
   125  			fmt.Println("The following changes to the schema will be made:")
   126  			fmt.Println(strings.Repeat("=", 80))
   127  			fmt.Println(diff)
   128  			fmt.Println(strings.Repeat("=", 80))
   129  			if !shouldContinue() {
   130  				return errCanceledByUser
   131  			}
   132  		}
   133  
   134  		update := bigquery.TableMetadataToUpdate{
   135  			Name:        td.FriendlyName,
   136  			Description: td.Description,
   137  			Schema:      td.Schema,
   138  		}
   139  		if err := ts.updateTable(ctx, td.DataSetID, td.TableID, update); err != nil {
   140  			return err
   141  		}
   142  		fmt.Println("Finished updating the table.")
   143  		return nil
   144  	}
   145  }
   146  
   147  type flags struct {
   148  	tableDef
   149  	protoDir    string
   150  	messageName string
   151  	force       bool
   152  	verbose     bool
   153  	importPaths stringlistflag.Flag
   154  	noGoMode    bool
   155  	goModules   stringlistflag.Flag
   156  }
   157  
   158  func parseFlags() (*flags, error) {
   159  	var f flags
   160  	table := flag.String("table", "", `Table name with format "<project id>.<dataset id>.<table id>"`)
   161  	flag.StringVar(&f.FriendlyName, "friendly-name", "", "Friendly name for the table.")
   162  	flag.StringVar(&f.PartitioningField, "partitioning-field", "", "Name of a timestamp field to use for table partitioning (beta).")
   163  	// See: https://pkg.go.dev/cloud.google.com/go/bigquery#TimePartitioning
   164  	flag.StringVar(&f.PartitioningType, "partitioning-type", "DAY", "One of HOUR, DAY, MONTH and YEAR.")
   165  	flag.BoolVar(&f.PartitioningDisabled, "disable-partitioning", false, "Makes the table not time-partitioned.")
   166  	flag.DurationVar(&f.PartitioningExpiration, "partitioning-expiration", 0, "Expiration for partitions. 0 for no expiration.")
   167  	flag.Var(luciflag.StringSlice(&f.ClusteringFields), "clustering-field", "Optional, one or more clustering fields. Can be specified multiple times and order is significant.")
   168  	flag.StringVar(&f.protoDir, "message-dir", ".", "Path to directory with the .proto file that defines the schema message.")
   169  	flag.BoolVar(&f.noGoMode, "no-go-mode", false, "Don't try to recognize active Go module based on cwd.")
   170  	flag.Var(&f.goModules, "go-module", "Make protos in the given module available in proto import path. Can be specified multiple times.")
   171  	flag.BoolVar(&f.force, "force", false, "Proceed without a user confirmation.")
   172  	flag.BoolVar(&f.verbose, "verbose", false, "Print more information in the log.")
   173  	// -I matches protoc's flag and its error message suggesting to pass -I.
   174  	flag.Var(&f.importPaths, "I", "Path to directory with the imported .proto file; can be specified multiple times.")
   175  
   176  	flag.StringVar(&f.messageName,
   177  		"message",
   178  		"",
   179  		"Full name of the protobuf message that defines the table schema. The name must contain proto package name.")
   180  
   181  	flag.Parse()
   182  
   183  	switch {
   184  	case len(flag.Args()) > 0:
   185  		return nil, fmt.Errorf("unexpected arguments: %q", flag.Args())
   186  	case *table == "":
   187  		return nil, fmt.Errorf("-table is required")
   188  	case f.messageName == "":
   189  		return nil, fmt.Errorf("-message is required (the name must contain the proto package name)")
   190  	case f.PartitioningField != "" && f.PartitioningDisabled:
   191  		return nil, fmt.Errorf("partitioning field cannot be non-empty with disabled partitioning")
   192  	case f.noGoMode && len(f.goModules) > 0:
   193  		return nil, fmt.Errorf("-no-go-mode and -go-module flags are not compatible")
   194  	}
   195  	if parts := strings.Split(*table, "."); len(parts) == 3 {
   196  		f.ProjectID = parts[0]
   197  		f.DataSetID = parts[1]
   198  		f.TableID = parts[2]
   199  	} else {
   200  		return nil, fmt.Errorf("expected exactly 2 dots in table name %q", *table)
   201  	}
   202  
   203  	return &f, nil
   204  }
   205  
   206  func run(ctx context.Context) error {
   207  	flags, err := parseFlags()
   208  	if err != nil {
   209  		return errors.Annotate(err, "failed to parse flags").Err()
   210  	}
   211  
   212  	if flags.verbose {
   213  		ctx = logging.SetLevel(ctx, logging.Debug)
   214  	} else {
   215  		ctx = logging.SetLevel(ctx, logging.Error)
   216  	}
   217  
   218  	td := flags.tableDef
   219  
   220  	desc, err := loadProtoDescription(ctx, flags.protoDir, !flags.noGoMode, flags.goModules, flags.importPaths)
   221  	if err != nil {
   222  		return errors.Annotate(err, "failed to load proto descriptor").Err()
   223  	}
   224  	td.Schema, td.Description, err = schemaFromMessage(desc, flags.messageName)
   225  	if err != nil {
   226  		return errors.Annotate(err, "could not derive schema from message %q at path %q", flags.messageName, flags.protoDir).Err()
   227  	}
   228  	file, _, _ := descutil.Resolve(desc, flags.messageName)
   229  	td.Description = fmt.Sprintf(
   230  		"Proto: https://cs.chromium.org/%s\nTable Description:\n%s",
   231  		url.PathEscape(fmt.Sprintf("%s file:%s", flags.messageName, file.GetName())),
   232  		td.Description)
   233  
   234  	// Create an Authenticator and use it for BigQuery operations.
   235  	authOpts := chromeinfra.DefaultAuthOptions()
   236  	authOpts.Scopes = []string{bigquery.Scope}
   237  	authenticator := auth.NewAuthenticator(ctx, auth.InteractiveLogin, authOpts)
   238  
   239  	authTS, err := authenticator.TokenSource()
   240  	if err != nil {
   241  		return errors.Annotate(err, "could not get authentication credentials").Err()
   242  	}
   243  
   244  	c, err := bigquery.NewClient(ctx, td.ProjectID, option.WithTokenSource(authTS))
   245  	if err != nil {
   246  		return errors.Annotate(err, "could not create BigQuery client").Err()
   247  	}
   248  	return updateFromTableDef(ctx, flags.force, bqTableStore{c}, td)
   249  }
   250  
   251  func main() {
   252  	ctx := gologger.StdConfig.Use(context.Background())
   253  	switch err := run(ctx); {
   254  	case canceledByUser.In(err):
   255  		os.Exit(1)
   256  	case err != nil:
   257  		log.Fatal(err)
   258  	}
   259  }
   260  
   261  // schemaFromMessage loads a message by name from .proto files in dir
   262  // and converts the message to a bigquery schema.
   263  func schemaFromMessage(desc *descriptorpb.FileDescriptorSet, messageName string) (schema bigquery.Schema, description string, err error) {
   264  	conv := bq.SchemaConverter{
   265  		Desc:           desc,
   266  		SourceCodeInfo: make(map[*descriptorpb.FileDescriptorProto]bq.SourceCodeInfoMap, len(desc.File)),
   267  	}
   268  	for _, f := range desc.File {
   269  		conv.SourceCodeInfo[f], err = descutil.IndexSourceCodeInfo(f)
   270  		if err != nil {
   271  			return nil, "", errors.Annotate(err, "failed to index source code info in file %q", f.GetName()).Err()
   272  		}
   273  	}
   274  	return conv.Schema(messageName)
   275  }
   276  
   277  // checkGoMode returns true if `go` executable is in PATH and `dir` is in
   278  // a Go module.
   279  //
   280  // Note that GOPATH mode is not supported. Returns an error if it sees GOPATH
   281  // env var.
   282  func checkGoMode(dir string) (bool, error) {
   283  	cmd := exec.Command("go", "list", "-m")
   284  	cmd.Dir = dir
   285  	buf, err := cmd.CombinedOutput()
   286  	if err == nil {
   287  		// When `dir` is not a Go package, `go -list -m` returns
   288  		// "command-line-arguments". See https://github.com/golang/go/issues/36793.
   289  		return strings.TrimSpace(string(buf)) != "command-line-arguments", nil
   290  	}
   291  	if os.Getenv("GO111MODULE") != "off" && os.Getenv("GOPATH") != "" {
   292  		return false, errors.Reason("GOPATH mode is not supported").Err()
   293  	}
   294  	return false, nil
   295  }
   296  
   297  // prepInputs prepares inputs for protoc depending on Go vs non-Go mode.
   298  func prepInputs(ctx context.Context, dir string, allowGoMode bool, goModules, importPaths []string) (*protoc.StagedInputs, error) {
   299  	useGo := allowGoMode && len(goModules) > 0
   300  	if !useGo && allowGoMode {
   301  		var err error
   302  		if useGo, err = checkGoMode(dir); err != nil {
   303  			return nil, err
   304  		}
   305  	}
   306  	if useGo {
   307  		logging.Infof(ctx, "Running in Go mode: importing *.proto from Go source tree")
   308  		return protoc.StageGoInputs(ctx, dir, goModules, nil, importPaths)
   309  	}
   310  	logging.Infof(ctx, "Running in generic mode: importing *.proto from explicitly given paths only")
   311  	return protoc.StageGenericInputs(ctx, dir, importPaths)
   312  }
   313  
   314  // loadProtoDescription compiles .proto files in the dir
   315  // and returns their descriptor.
   316  func loadProtoDescription(ctx context.Context, dir string, allowGoMode bool, goModules, importPaths []string) (*descriptorpb.FileDescriptorSet, error) {
   317  	// Stage all requested Go modules under a single root.
   318  	inputs, err := prepInputs(ctx, dir, allowGoMode, goModules, importPaths)
   319  	if err != nil {
   320  		return nil, err
   321  	}
   322  	defer inputs.Cleanup()
   323  
   324  	// Prep the temp directory for the resulting descriptor file.
   325  	tempDir, err := ioutil.TempDir("", "")
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	defer os.RemoveAll(tempDir)
   330  	descFile := filepath.Join(tempDir, "desc")
   331  
   332  	// Compile protos to get the descriptor.
   333  	err = protoc.Compile(ctx, &protoc.CompileParams{
   334  		Inputs:              inputs,
   335  		OutputDescriptorSet: descFile,
   336  	})
   337  	if err != nil {
   338  		return nil, err
   339  	}
   340  
   341  	// Read the resulting descriptor.
   342  	descBytes, err := os.ReadFile(descFile)
   343  	if err != nil {
   344  		return nil, err
   345  	}
   346  	var desc descriptorpb.FileDescriptorSet
   347  	err = proto.Unmarshal(descBytes, &desc)
   348  	return &desc, err
   349  }
   350  
   351  // confirm asks for a user confirmation for an action, with No as default.
   352  // Only "y" or "Y" responses is treated as yes.
   353  func confirm(action string) (response bool) {
   354  	fmt.Printf("%s? [y/N] ", action)
   355  	var res string
   356  	fmt.Scanln(&res)
   357  	return res == "y" || res == "Y"
   358  }