go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/tokenserver/appengine/impl/utils/bq/bq.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package bq contains helpers for uploading rows to BigQuery.
    16  package bq
    17  
    18  import (
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"net/http"
    24  
    25  	"cloud.google.com/go/bigquery"
    26  
    27  	"google.golang.org/api/option"
    28  	"google.golang.org/protobuf/encoding/protojson"
    29  	"google.golang.org/protobuf/encoding/prototext"
    30  	"google.golang.org/protobuf/proto"
    31  
    32  	"go.chromium.org/luci/common/bq"
    33  	"go.chromium.org/luci/common/errors"
    34  	"go.chromium.org/luci/common/logging"
    35  	"go.chromium.org/luci/common/tsmon/field"
    36  	"go.chromium.org/luci/common/tsmon/metric"
    37  	"go.chromium.org/luci/server/auth"
    38  	"go.chromium.org/luci/server/tq"
    39  )
    40  
    41  var (
    42  	bigQueryInserts = metric.NewCounter(
    43  		"luci/tokenserver/bigquery_inserts",
    44  		"Number of insertAll BQ calls.",
    45  		nil,
    46  		field.String("table"),   // "<projID>/<datasetID>/<tableID>"
    47  		field.String("outcome")) // "ok, "bad_row", "deadline", "error"
    48  )
    49  
    50  // RegisterTokenKind registers a TQ class to log a particular token kind into
    51  // a particular BigQuery table.
    52  func RegisterTokenKind(table string, prototype proto.Message) {
    53  	tq.RegisterTaskClass(tq.TaskClass{
    54  		ID:        table,
    55  		Prototype: prototype,
    56  		Kind:      tq.NonTransactional,
    57  		Topic:     "bigquery-log",
    58  		Custom: func(ctx context.Context, m proto.Message) (*tq.CustomPayload, error) {
    59  			blob, err := (protojson.MarshalOptions{Indent: "\t"}).Marshal(m)
    60  			if err != nil {
    61  				return nil, err
    62  			}
    63  			return &tq.CustomPayload{
    64  				Meta: map[string]string{"table": table},
    65  				Body: blob,
    66  			}, nil
    67  		},
    68  	})
    69  }
    70  
    71  // LogToken emits a PubSub task to record the token to the BigQuery log.
    72  //
    73  // If `dryRun` is true, will just log the token to the local text log.
    74  func LogToken(ctx context.Context, tok proto.Message, dryRun bool) error {
    75  	if logging.IsLogging(ctx, logging.Debug) {
    76  		blob, err := (prototext.MarshalOptions{Indent: "\t"}).Marshal(tok)
    77  		if err != nil {
    78  			logging.Errorf(ctx, "Failed to marshal the row to proto text: %s", err)
    79  		} else {
    80  			logging.Debugf(ctx, "BigQuery row:\n%s", blob)
    81  		}
    82  	}
    83  	if dryRun {
    84  		return nil
    85  	}
    86  	return tq.AddTask(ctx, &tq.Task{Payload: tok})
    87  }
    88  
    89  // Inserter receives PubSub push messages and converts them to BQ inserts.
    90  type Inserter struct {
    91  	bq *bigquery.Client
    92  }
    93  
    94  // NewInserter constructs an instance of Inserter.
    95  func NewInserter(ctx context.Context, projectID string) (*Inserter, error) {
    96  	tr, err := auth.GetRPCTransport(ctx, auth.AsSelf, auth.WithScopes(auth.CloudOAuthScopes...))
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  	bq, err := bigquery.NewClient(ctx, projectID, option.WithHTTPClient(&http.Client{Transport: tr}))
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  	return &Inserter{bq: bq}, nil
   105  }
   106  
   107  // HandlePubSubPush handles incoming PubSub push request.
   108  func (ins *Inserter) HandlePubSubPush(ctx context.Context, body io.Reader) error {
   109  	blob, err := io.ReadAll(body)
   110  	if err != nil {
   111  		return errors.Annotate(err, "failed to read the request body").Err()
   112  	}
   113  
   114  	// See https://cloud.google.com/pubsub/docs/push#receiving_messages
   115  	var msg struct {
   116  		Message struct {
   117  			Attributes map[string]string `json:"attributes"`
   118  			Data       []byte            `json:"data"`
   119  			MessageID  string            `json:"messageId"`
   120  		} `json:"message"`
   121  	}
   122  	if json.Unmarshal(blob, &msg); err != nil {
   123  		return errors.Annotate(err, "failed to unmarshal PubSub message").Err()
   124  	}
   125  
   126  	// "table" metadata defines both the destination table and the TQ task class
   127  	// used to push this message, see RegisterTokenKind.
   128  	table := msg.Message.Attributes["table"]
   129  
   130  	// Deserialize the row into a corresponding proto type.
   131  	cls := tq.Default.TaskClassRef(table)
   132  	if cls == nil {
   133  		return errors.Reason("unrecognized task class %q", table).Err()
   134  	}
   135  	row := cls.Definition().Prototype.ProtoReflect().New().Interface()
   136  	if err := protojson.Unmarshal(msg.Message.Data, row); err != nil {
   137  		return errors.Annotate(err, "failed to unmarshal the row for %q", table).Err()
   138  	}
   139  	return ins.insert(ctx, table, row, msg.Message.MessageID)
   140  }
   141  
   142  func (ins *Inserter) insert(ctx context.Context, table string, row proto.Message, messageID string) error {
   143  	tab := ins.bq.Dataset("tokens").Table(table)
   144  
   145  	err := tab.Inserter().Put(ctx, &bq.Row{
   146  		Message:  row,
   147  		InsertID: fmt.Sprintf("v1:%s", messageID),
   148  	})
   149  
   150  	var outcome string
   151  	if err == nil {
   152  		outcome = "ok"
   153  	} else if pme, _ := err.(bigquery.PutMultiError); len(pme) != 0 {
   154  		outcome = "bad_row"
   155  	} else if ctx.Err() != nil {
   156  		outcome = "deadline"
   157  	} else {
   158  		outcome = "error"
   159  	}
   160  
   161  	bigQueryInserts.Add(ctx, 1,
   162  		fmt.Sprintf("%s/%s/%s", tab.ProjectID, tab.DatasetID, tab.TableID),
   163  		outcome)
   164  
   165  	return err
   166  }