go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/tools/cmd/bqschemaupdater/main.go (about) 1 // Copyright 2018 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package main 16 17 import ( 18 "context" 19 "flag" 20 "fmt" 21 "io/ioutil" 22 "log" 23 "net/url" 24 "os" 25 "os/exec" 26 "path/filepath" 27 "strings" 28 "time" 29 30 "cloud.google.com/go/bigquery" 31 32 "google.golang.org/api/option" 33 "google.golang.org/protobuf/proto" 34 "google.golang.org/protobuf/types/descriptorpb" 35 36 "go.chromium.org/luci/auth" 37 "go.chromium.org/luci/common/bq" 38 "go.chromium.org/luci/common/errors" 39 luciflag "go.chromium.org/luci/common/flag" 40 "go.chromium.org/luci/common/flag/stringlistflag" 41 "go.chromium.org/luci/common/logging" 42 "go.chromium.org/luci/common/logging/gologger" 43 "go.chromium.org/luci/common/proto/google/descutil" 44 "go.chromium.org/luci/common/proto/protoc" 45 "go.chromium.org/luci/hardcoded/chromeinfra" 46 ) 47 48 var ( 49 canceledByUser = errors.BoolTag{ 50 Key: errors.NewTagKey("operation canceled by user"), 51 } 52 errCanceledByUser = errors.Reason("operation canceled by user").Tag(canceledByUser).Err() 53 ) 54 55 type tableDef struct { 56 ProjectID string 57 DataSetID string 58 TableID string 59 FriendlyName string 60 Description string 61 PartitioningDisabled bool 62 PartitioningExpiration time.Duration 63 PartitioningField string 64 PartitioningType string 65 Schema bigquery.Schema 66 ClusteringFields []string 67 } 68 69 func updateFromTableDef(ctx context.Context, force bool, ts tableStore, td tableDef) error { 70 tableID := fmt.Sprintf("%s.%s.%s", td.ProjectID, td.DataSetID, td.TableID) 71 shouldContinue := func() bool { 72 if force { 73 return true 74 } 75 return confirm("Continue") 76 } 77 78 md, err := ts.getTableMetadata(ctx, td.DataSetID, td.TableID) 79 switch { 80 case isNotFound(err): // new table 81 fmt.Printf("Table %q does not exist.\n", tableID) 82 fmt.Println("It will be created with the following schema:") 83 fmt.Println(strings.Repeat("=", 80)) 84 fmt.Println(bq.SchemaString(td.Schema)) 85 fmt.Println(strings.Repeat("=", 80)) 86 if !shouldContinue() { 87 return errCanceledByUser 88 } 89 90 md = &bigquery.TableMetadata{ 91 Name: td.FriendlyName, 92 Description: td.Description, 93 Schema: td.Schema, 94 } 95 if !td.PartitioningDisabled { 96 md.TimePartitioning = &bigquery.TimePartitioning{ 97 Expiration: td.PartitioningExpiration, 98 Field: td.PartitioningField, 99 Type: bigquery.TimePartitioningType(td.PartitioningType), 100 } 101 } 102 if len(td.ClusteringFields) > 0 { 103 md.Clustering = &bigquery.Clustering{Fields: td.ClusteringFields} 104 } 105 if err = ts.createTable(ctx, td.DataSetID, td.TableID, md); err != nil { 106 return err 107 } 108 fmt.Println("Table is created.") 109 fmt.Println("Please update the documentation in https://chromium.googlesource.com/infra/infra/+/master/doc/bigquery_tables.md or the internal equivalent.") 110 return nil 111 112 case err != nil: 113 return err 114 115 default: // existing table 116 fmt.Printf("Updating table %q\n", tableID) 117 118 // add fields missing in td.Schema because BigQuery does not support 119 // removing fields anyway. 120 bq.AddMissingFields(&td.Schema, md.Schema) 121 122 if diff := bq.SchemaDiff(md.Schema, td.Schema); diff == "" { 123 fmt.Println("No changes to schema detected.") 124 } else { 125 fmt.Println("The following changes to the schema will be made:") 126 fmt.Println(strings.Repeat("=", 80)) 127 fmt.Println(diff) 128 fmt.Println(strings.Repeat("=", 80)) 129 if !shouldContinue() { 130 return errCanceledByUser 131 } 132 } 133 134 update := bigquery.TableMetadataToUpdate{ 135 Name: td.FriendlyName, 136 Description: td.Description, 137 Schema: td.Schema, 138 } 139 if err := ts.updateTable(ctx, td.DataSetID, td.TableID, update); err != nil { 140 return err 141 } 142 fmt.Println("Finished updating the table.") 143 return nil 144 } 145 } 146 147 type flags struct { 148 tableDef 149 protoDir string 150 messageName string 151 force bool 152 verbose bool 153 importPaths stringlistflag.Flag 154 noGoMode bool 155 goModules stringlistflag.Flag 156 } 157 158 func parseFlags() (*flags, error) { 159 var f flags 160 table := flag.String("table", "", `Table name with format "<project id>.<dataset id>.<table id>"`) 161 flag.StringVar(&f.FriendlyName, "friendly-name", "", "Friendly name for the table.") 162 flag.StringVar(&f.PartitioningField, "partitioning-field", "", "Name of a timestamp field to use for table partitioning (beta).") 163 // See: https://pkg.go.dev/cloud.google.com/go/bigquery#TimePartitioning 164 flag.StringVar(&f.PartitioningType, "partitioning-type", "DAY", "One of HOUR, DAY, MONTH and YEAR.") 165 flag.BoolVar(&f.PartitioningDisabled, "disable-partitioning", false, "Makes the table not time-partitioned.") 166 flag.DurationVar(&f.PartitioningExpiration, "partitioning-expiration", 0, "Expiration for partitions. 0 for no expiration.") 167 flag.Var(luciflag.StringSlice(&f.ClusteringFields), "clustering-field", "Optional, one or more clustering fields. Can be specified multiple times and order is significant.") 168 flag.StringVar(&f.protoDir, "message-dir", ".", "Path to directory with the .proto file that defines the schema message.") 169 flag.BoolVar(&f.noGoMode, "no-go-mode", false, "Don't try to recognize active Go module based on cwd.") 170 flag.Var(&f.goModules, "go-module", "Make protos in the given module available in proto import path. Can be specified multiple times.") 171 flag.BoolVar(&f.force, "force", false, "Proceed without a user confirmation.") 172 flag.BoolVar(&f.verbose, "verbose", false, "Print more information in the log.") 173 // -I matches protoc's flag and its error message suggesting to pass -I. 174 flag.Var(&f.importPaths, "I", "Path to directory with the imported .proto file; can be specified multiple times.") 175 176 flag.StringVar(&f.messageName, 177 "message", 178 "", 179 "Full name of the protobuf message that defines the table schema. The name must contain proto package name.") 180 181 flag.Parse() 182 183 switch { 184 case len(flag.Args()) > 0: 185 return nil, fmt.Errorf("unexpected arguments: %q", flag.Args()) 186 case *table == "": 187 return nil, fmt.Errorf("-table is required") 188 case f.messageName == "": 189 return nil, fmt.Errorf("-message is required (the name must contain the proto package name)") 190 case f.PartitioningField != "" && f.PartitioningDisabled: 191 return nil, fmt.Errorf("partitioning field cannot be non-empty with disabled partitioning") 192 case f.noGoMode && len(f.goModules) > 0: 193 return nil, fmt.Errorf("-no-go-mode and -go-module flags are not compatible") 194 } 195 if parts := strings.Split(*table, "."); len(parts) == 3 { 196 f.ProjectID = parts[0] 197 f.DataSetID = parts[1] 198 f.TableID = parts[2] 199 } else { 200 return nil, fmt.Errorf("expected exactly 2 dots in table name %q", *table) 201 } 202 203 return &f, nil 204 } 205 206 func run(ctx context.Context) error { 207 flags, err := parseFlags() 208 if err != nil { 209 return errors.Annotate(err, "failed to parse flags").Err() 210 } 211 212 if flags.verbose { 213 ctx = logging.SetLevel(ctx, logging.Debug) 214 } else { 215 ctx = logging.SetLevel(ctx, logging.Error) 216 } 217 218 td := flags.tableDef 219 220 desc, err := loadProtoDescription(ctx, flags.protoDir, !flags.noGoMode, flags.goModules, flags.importPaths) 221 if err != nil { 222 return errors.Annotate(err, "failed to load proto descriptor").Err() 223 } 224 td.Schema, td.Description, err = schemaFromMessage(desc, flags.messageName) 225 if err != nil { 226 return errors.Annotate(err, "could not derive schema from message %q at path %q", flags.messageName, flags.protoDir).Err() 227 } 228 file, _, _ := descutil.Resolve(desc, flags.messageName) 229 td.Description = fmt.Sprintf( 230 "Proto: https://cs.chromium.org/%s\nTable Description:\n%s", 231 url.PathEscape(fmt.Sprintf("%s file:%s", flags.messageName, file.GetName())), 232 td.Description) 233 234 // Create an Authenticator and use it for BigQuery operations. 235 authOpts := chromeinfra.DefaultAuthOptions() 236 authOpts.Scopes = []string{bigquery.Scope} 237 authenticator := auth.NewAuthenticator(ctx, auth.InteractiveLogin, authOpts) 238 239 authTS, err := authenticator.TokenSource() 240 if err != nil { 241 return errors.Annotate(err, "could not get authentication credentials").Err() 242 } 243 244 c, err := bigquery.NewClient(ctx, td.ProjectID, option.WithTokenSource(authTS)) 245 if err != nil { 246 return errors.Annotate(err, "could not create BigQuery client").Err() 247 } 248 return updateFromTableDef(ctx, flags.force, bqTableStore{c}, td) 249 } 250 251 func main() { 252 ctx := gologger.StdConfig.Use(context.Background()) 253 switch err := run(ctx); { 254 case canceledByUser.In(err): 255 os.Exit(1) 256 case err != nil: 257 log.Fatal(err) 258 } 259 } 260 261 // schemaFromMessage loads a message by name from .proto files in dir 262 // and converts the message to a bigquery schema. 263 func schemaFromMessage(desc *descriptorpb.FileDescriptorSet, messageName string) (schema bigquery.Schema, description string, err error) { 264 conv := bq.SchemaConverter{ 265 Desc: desc, 266 SourceCodeInfo: make(map[*descriptorpb.FileDescriptorProto]bq.SourceCodeInfoMap, len(desc.File)), 267 } 268 for _, f := range desc.File { 269 conv.SourceCodeInfo[f], err = descutil.IndexSourceCodeInfo(f) 270 if err != nil { 271 return nil, "", errors.Annotate(err, "failed to index source code info in file %q", f.GetName()).Err() 272 } 273 } 274 return conv.Schema(messageName) 275 } 276 277 // checkGoMode returns true if `go` executable is in PATH and `dir` is in 278 // a Go module. 279 // 280 // Note that GOPATH mode is not supported. Returns an error if it sees GOPATH 281 // env var. 282 func checkGoMode(dir string) (bool, error) { 283 cmd := exec.Command("go", "list", "-m") 284 cmd.Dir = dir 285 buf, err := cmd.CombinedOutput() 286 if err == nil { 287 // When `dir` is not a Go package, `go -list -m` returns 288 // "command-line-arguments". See https://github.com/golang/go/issues/36793. 289 return strings.TrimSpace(string(buf)) != "command-line-arguments", nil 290 } 291 if os.Getenv("GO111MODULE") != "off" && os.Getenv("GOPATH") != "" { 292 return false, errors.Reason("GOPATH mode is not supported").Err() 293 } 294 return false, nil 295 } 296 297 // prepInputs prepares inputs for protoc depending on Go vs non-Go mode. 298 func prepInputs(ctx context.Context, dir string, allowGoMode bool, goModules, importPaths []string) (*protoc.StagedInputs, error) { 299 useGo := allowGoMode && len(goModules) > 0 300 if !useGo && allowGoMode { 301 var err error 302 if useGo, err = checkGoMode(dir); err != nil { 303 return nil, err 304 } 305 } 306 if useGo { 307 logging.Infof(ctx, "Running in Go mode: importing *.proto from Go source tree") 308 return protoc.StageGoInputs(ctx, dir, goModules, nil, importPaths) 309 } 310 logging.Infof(ctx, "Running in generic mode: importing *.proto from explicitly given paths only") 311 return protoc.StageGenericInputs(ctx, dir, importPaths) 312 } 313 314 // loadProtoDescription compiles .proto files in the dir 315 // and returns their descriptor. 316 func loadProtoDescription(ctx context.Context, dir string, allowGoMode bool, goModules, importPaths []string) (*descriptorpb.FileDescriptorSet, error) { 317 // Stage all requested Go modules under a single root. 318 inputs, err := prepInputs(ctx, dir, allowGoMode, goModules, importPaths) 319 if err != nil { 320 return nil, err 321 } 322 defer inputs.Cleanup() 323 324 // Prep the temp directory for the resulting descriptor file. 325 tempDir, err := ioutil.TempDir("", "") 326 if err != nil { 327 return nil, err 328 } 329 defer os.RemoveAll(tempDir) 330 descFile := filepath.Join(tempDir, "desc") 331 332 // Compile protos to get the descriptor. 333 err = protoc.Compile(ctx, &protoc.CompileParams{ 334 Inputs: inputs, 335 OutputDescriptorSet: descFile, 336 }) 337 if err != nil { 338 return nil, err 339 } 340 341 // Read the resulting descriptor. 342 descBytes, err := os.ReadFile(descFile) 343 if err != nil { 344 return nil, err 345 } 346 var desc descriptorpb.FileDescriptorSet 347 err = proto.Unmarshal(descBytes, &desc) 348 return &desc, err 349 } 350 351 // confirm asks for a user confirmation for an action, with No as default. 352 // Only "y" or "Y" responses is treated as yes. 353 func confirm(action string) (response bool) { 354 fmt.Printf("%s? [y/N] ", action) 355 var res string 356 fmt.Scanln(&res) 357 return res == "y" || res == "Y" 358 }