go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/artifactexporter/schema.go (about)

     1  // Copyright 2024 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package artifactexporter handles uploading artifacts to BigQuery.
    16  // Perhaps it makes sense to merge this package into the bqexporter package
    17  // but we intentionally it as a separate package because:
    18  // 1. It is still in experimental phase.
    19  // 2. The bqexporter package contains the legacy artifact exporter. Merging
    20  // the 2 packages may cause some confusions/naming conflict.
    21  package artifactexporter
    22  
    23  import (
    24  	"time"
    25  
    26  	"cloud.google.com/go/bigquery"
    27  	"cloud.google.com/go/bigquery/storage/managedwriter/adapt"
    28  	"github.com/golang/protobuf/descriptor"
    29  	desc "github.com/golang/protobuf/protoc-gen-go/descriptor"
    30  	"google.golang.org/protobuf/types/descriptorpb"
    31  
    32  	"go.chromium.org/luci/resultdb/bqutil"
    33  	bqpb "go.chromium.org/luci/resultdb/proto/bq"
    34  	pb "go.chromium.org/luci/resultdb/proto/v1"
    35  )
    36  
    37  // The table containing test artifacts.
    38  const tableName = "text_artifacts"
    39  
    40  const partitionExpirationTime = 90 * 24 * time.Hour // 90 days.
    41  
    42  const rowMessage = "luci.resultdb.bq.TextArtifactRow"
    43  
    44  var tableMetadata *bigquery.TableMetadata
    45  
    46  // tableSchemaDescriptor is a self-contained DescriptorProto for describing
    47  // row protocol buffers sent to the BigQuery Write API.
    48  var tableSchemaDescriptor *descriptorpb.DescriptorProto
    49  
    50  func init() {
    51  	var err error
    52  	var schema bigquery.Schema
    53  	if schema, err = generateRowSchema(); err != nil {
    54  		panic(err)
    55  	}
    56  	if tableSchemaDescriptor, err = generateRowSchemaDescriptor(); err != nil {
    57  		panic(err)
    58  	}
    59  
    60  	tableMetadata = &bigquery.TableMetadata{
    61  		TimePartitioning: &bigquery.TimePartitioning{
    62  			Type:       bigquery.DayPartitioningType,
    63  			Expiration: partitionExpirationTime,
    64  			Field:      "partition_time",
    65  		},
    66  		// Clustering on these fields will allow good compression rate and query performance.
    67  		Clustering: &bigquery.Clustering{
    68  			Fields: []string{"project", "test_id", "artifact_shard", "invocation_id"},
    69  		},
    70  		// Relax ensures no fields are marked "required".
    71  		Schema: schema.Relax(),
    72  	}
    73  }
    74  
    75  func generateRowSchema() (schema bigquery.Schema, err error) {
    76  	fd, _ := descriptor.MessageDescriptorProto(&bqpb.TextArtifactRow{})
    77  	fdsp, _ := descriptor.MessageDescriptorProto(&pb.StringPair{})
    78  	fdset := &desc.FileDescriptorSet{File: []*desc.FileDescriptorProto{fd, fdsp}}
    79  	return bqutil.GenerateSchema(fdset, rowMessage)
    80  }
    81  
    82  func generateRowSchemaDescriptor() (*desc.DescriptorProto, error) {
    83  	m := &bqpb.TextArtifactRow{}
    84  	descriptorProto, err := adapt.NormalizeDescriptor(m.ProtoReflect().Descriptor())
    85  	if err != nil {
    86  		return nil, err
    87  	}
    88  	return descriptorProto, nil
    89  }