github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/cmd/rpl2cloudtrace/rpl2cloudtrace.go (about)

     1  // Copyright 2023 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Binary rpl2cloudtrace converts *.rpl into cloud trace.
    16  //
    17  //	$ rpl2cloudtrace --log_path text:///tmp/reproxy_log.rpl \
    18  //	    --project_id rbe-chromium-trusted
    19  package main
    20  
    21  import (
    22  	"context"
    23  	"crypto/rand"
    24  	"crypto/sha256"
    25  	"encoding/hex"
    26  	"flag"
    27  	"fmt"
    28  	"os"
    29  	"path"
    30  	"strings"
    31  	"time"
    32  
    33  	lpb "github.com/bazelbuild/reclient/api/log"
    34  	"github.com/bazelbuild/reclient/internal/pkg/labels"
    35  	"github.com/bazelbuild/reclient/internal/pkg/logger"
    36  	"github.com/bazelbuild/reclient/internal/pkg/rbeflag"
    37  
    38  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/moreflag"
    39  	"google.golang.org/grpc/codes"
    40  	"google.golang.org/grpc/status"
    41  
    42  	trace "cloud.google.com/go/trace/apiv2"
    43  	cpb "github.com/bazelbuild/remote-apis-sdks/go/api/command"
    44  	log "github.com/golang/glog"
    45  	tpb "google.golang.org/genproto/googleapis/devtools/cloudtrace/v2"
    46  	spb "google.golang.org/genproto/googleapis/rpc/status"
    47  	tspb "google.golang.org/protobuf/types/known/timestamppb"
    48  )
    49  
    50  var (
    51  	proxyLogDir []string
    52  	logPath     = flag.String("log_path", "", "Path to reproxy_log.txt file. E.g., text:///tmp/reproxy_log.txt")
    53  	logFormat   = flag.String("log_format", "text", "Format of proxy log. Currently only text is supported.")
    54  	projectID   = flag.String("project_id", os.Getenv("GOOGLE_CLOUD_PROJECT"), "project id for cloud trace")
    55  
    56  	traceThreshold = flag.Duration("trace_threshold", 1*time.Second, "threshold to emit trace")
    57  	spanThreshold  = flag.Duration("span_threshold", 1*time.Microsecond, "threshold to emit span")
    58  	batchSize      = flag.Int("batch_size", 1500, "batch size for BatchWriteSpans")
    59  
    60  	traceAttributes []string
    61  )
    62  
    63  func init() {
    64  	flag.Var((*moreflag.StringListValue)(&proxyLogDir), "proxy_log_dir", "If provided, the directory path to a proxy log file of executed records.")
    65  	flag.Var((*moreflag.StringListValue)(&traceAttributes), "attributes", "comma separated key=value for attributes")
    66  }
    67  
    68  func truncatableString(s string, limit int) *tpb.TruncatableString {
    69  	n := len(s)
    70  	if n < limit {
    71  		return &tpb.TruncatableString{Value: s}
    72  	}
    73  	return &tpb.TruncatableString{
    74  		Value:              s[:limit],
    75  		TruncatedByteCount: int32(n - limit),
    76  	}
    77  }
    78  
    79  func convertEventTime(cat string, tctx *traceContext, parent string, ets map[string]*cpb.TimeInterval, attrs *tpb.Span_Attributes, st *spb.Status) (spans []*tpb.Span, start, end time.Time) {
    80  	for key, et := range ets {
    81  		// ignore events that doesn't have timestamp
    82  		// e.g. ServerWorkerExecution when RBE execution failed.
    83  		if et.From == nil || et.To == nil {
    84  			continue
    85  		}
    86  		from := et.GetFrom().AsTime()
    87  		to := et.GetTo().AsTime()
    88  		if start.IsZero() || start.After(from) {
    89  			start = from
    90  		}
    91  		if end.IsZero() || end.Before(to) {
    92  			end = to
    93  		}
    94  		dur := to.Sub(from)
    95  		if dur < *spanThreshold {
    96  			log.V(1).Infof("drop short span: %s.%s %s", cat, key, dur)
    97  			continue
    98  		}
    99  		sctx := tctx.newSpanContext(fmt.Sprintf("%s.%s", cat, key))
   100  		spans = append(spans, &tpb.Span{
   101  			Name:         sctx.Name(),
   102  			SpanId:       sctx.SpanID(),
   103  			ParentSpanId: parent,
   104  			DisplayName:  truncatableString(fmt.Sprintf("reproxy.%s.%s", cat, key), 128),
   105  			StartTime:    tspb.New(from),
   106  			EndTime:      tspb.New(to),
   107  			Attributes:   attrs,
   108  			Status:       st,
   109  			SpanKind:     tpb.Span_INTERNAL,
   110  		})
   111  		log.V(1).Infof("%s %s %s.%s %s %s %s", sctx.TraceID(), sctx.SpanID(), cat, key, start, end, dur)
   112  	}
   113  	return spans, start, end
   114  }
   115  
   116  type traceContext struct {
   117  	projectID string
   118  	traceID   [16]byte
   119  	res       map[string]*tpb.AttributeValue
   120  }
   121  
   122  func newTraceContext(projectID string, execID string, res map[string]*tpb.AttributeValue) (*traceContext, error) {
   123  	var traceID [16]byte
   124  	_, err := rand.Read(traceID[:])
   125  	if err != nil {
   126  		return nil, err
   127  	}
   128  	if execID != "" {
   129  		b, err := hex.DecodeString(strings.ReplaceAll(execID, "-", ""))
   130  		if err != nil {
   131  			log.Warningf("Failed to parse execID %q: %v", execID, err)
   132  		} else {
   133  			copy(traceID[:], b)
   134  		}
   135  	} else {
   136  		log.Warningf("execution_id is empty. Use %s as traceID %s", hex.EncodeToString(traceID[:]))
   137  	}
   138  	// can we use execution id or so as trace id?
   139  	return &traceContext{
   140  		projectID: projectID,
   141  		traceID:   traceID,
   142  		res:       res,
   143  	}, nil
   144  }
   145  
   146  func (tc *traceContext) newSpanContext(name string) spanContext {
   147  	var spanID [8]byte
   148  	s := sha256.Sum256([]byte(name))
   149  	copy(spanID[:], s[:])
   150  	return spanContext{
   151  		t:      tc,
   152  		spanID: spanID,
   153  	}
   154  }
   155  
   156  func (tc *traceContext) TraceID() string {
   157  	return hex.EncodeToString(tc.traceID[:])
   158  }
   159  
   160  func (tc *traceContext) attrs(attrs map[string]*tpb.AttributeValue) *tpb.Span_Attributes {
   161  	m := make(map[string]*tpb.AttributeValue)
   162  	for k, v := range tc.res {
   163  		m[k] = v
   164  	}
   165  	for k, v := range attrs {
   166  		m[k] = v
   167  	}
   168  	return &tpb.Span_Attributes{
   169  		AttributeMap: m,
   170  	}
   171  }
   172  
   173  type spanContext struct {
   174  	t      *traceContext
   175  	spanID [8]byte
   176  }
   177  
   178  func (sc spanContext) Name() string {
   179  	return path.Join("projects", sc.t.projectID,
   180  		"traces", hex.EncodeToString(sc.t.traceID[:]),
   181  		"spans", hex.EncodeToString(sc.spanID[:]))
   182  }
   183  
   184  func (sc spanContext) TraceID() string {
   185  	return sc.t.TraceID()
   186  }
   187  
   188  func (sc spanContext) SpanID() string {
   189  	return hex.EncodeToString(sc.spanID[:])
   190  }
   191  
   192  func attrValue(v interface{}) *tpb.AttributeValue {
   193  	var iv int64
   194  	switch v := v.(type) {
   195  	case string:
   196  		return &tpb.AttributeValue{
   197  			Value: &tpb.AttributeValue_StringValue{truncatableString(v, 256)},
   198  		}
   199  	case bool:
   200  		return &tpb.AttributeValue{
   201  			Value: &tpb.AttributeValue_BoolValue{v},
   202  		}
   203  	case int:
   204  		iv = int64(v)
   205  	case uint:
   206  		iv = int64(v)
   207  	case int8:
   208  		iv = int64(v)
   209  	case uint8:
   210  		iv = int64(v)
   211  	case int16:
   212  		iv = int64(v)
   213  	case uint16:
   214  		iv = int64(v)
   215  	case int32:
   216  		iv = int64(v)
   217  	case uint32:
   218  		iv = int64(v)
   219  	case int64:
   220  		iv = v
   221  	case uint64:
   222  		iv = int64(v)
   223  	}
   224  	return &tpb.AttributeValue{
   225  		Value: &tpb.AttributeValue_IntValue{iv},
   226  	}
   227  }
   228  
   229  func statusCode(s cpb.CommandResultStatus_Value) *spb.Status {
   230  	switch s {
   231  	case cpb.CommandResultStatus_SUCCESS, cpb.CommandResultStatus_CACHE_HIT:
   232  		return status.New(codes.OK, s.String()).Proto()
   233  	case cpb.CommandResultStatus_NON_ZERO_EXIT,
   234  		cpb.CommandResultStatus_REMOTE_ERROR,
   235  		cpb.CommandResultStatus_LOCAL_ERROR:
   236  		return status.New(codes.Internal, s.String()).Proto()
   237  
   238  	case cpb.CommandResultStatus_TIMEOUT:
   239  		return status.New(codes.DeadlineExceeded, s.String()).Proto()
   240  	case cpb.CommandResultStatus_INTERRUPTED:
   241  		return status.New(codes.Canceled, s.String()).Proto()
   242  	default:
   243  		return status.New(codes.Unknown, s.String()).Proto()
   244  	}
   245  }
   246  
   247  func convertLogRecord(ctx context.Context, logrec *lpb.LogRecord, tctx *traceContext) []*tpb.Span {
   248  	var spans []*tpb.Span
   249  	var pfrom, pto time.Time
   250  
   251  	sctx := tctx.newSpanContext("RunCommand")
   252  	parent := sctx.SpanID()
   253  
   254  	output := "???"
   255  	if outputs := logrec.GetCommand().GetOutput().GetOutputFiles(); len(outputs) > 0 {
   256  		output = outputs[0]
   257  	}
   258  	wd := logrec.GetCommand().GetWorkingDirectory()
   259  	output = strings.TrimPrefix(output, wd)
   260  
   261  	identifiers := logrec.GetCommand().GetIdentifiers()
   262  	rm := logrec.GetRemoteMetadata()
   263  	if rm != nil {
   264  		sps, rfrom, rto := convertEventTime("Remote", tctx, parent, rm.GetEventTimes(), tctx.attrs(map[string]*tpb.AttributeValue{
   265  			"action_digest":          attrValue(rm.GetActionDigest()),
   266  			"status":                 attrValue(rm.GetResult().GetStatus().String()),
   267  			"exit_code":              attrValue(rm.GetResult().GetExitCode()),
   268  			"cache_hit":              attrValue(rm.GetCacheHit()),
   269  			"num_input_files":        attrValue(rm.GetNumInputFiles()),
   270  			"num_input_directories":  attrValue(rm.GetNumInputDirectories()),
   271  			"total_input_bytes":      attrValue(rm.GetTotalInputBytes()),
   272  			"num_output_files":       attrValue(rm.GetNumOutputFiles()),
   273  			"num_output_directories": attrValue(rm.GetNumOutputDirectories()),
   274  			"total_output_bytes":     attrValue(rm.GetTotalOutputBytes()),
   275  		}), statusCode(rm.GetResult().GetStatus()))
   276  		spans = append(spans, sps...)
   277  		if pfrom.IsZero() || pfrom.After(rfrom) {
   278  			pfrom = rfrom
   279  		}
   280  		if pto.IsZero() || pto.Before(rto) {
   281  			pto = rto
   282  		}
   283  	}
   284  	lm := logrec.GetLocalMetadata()
   285  	if lm != nil {
   286  		sps, lfrom, lto := convertEventTime("Local", tctx, parent, lm.GetEventTimes(), tctx.attrs(map[string]*tpb.AttributeValue{
   287  			"status":           attrValue(lm.GetResult().GetStatus().String()),
   288  			"exit_code":        attrValue(lm.GetResult().GetExitCode()),
   289  			"executed_locally": attrValue(lm.GetExecutedLocally()),
   290  
   291  			"valid_cache_hit": attrValue(lm.GetValidCacheHit()),
   292  			"updated_cache":   attrValue(lm.GetUpdatedCache()),
   293  			"labels":          attrValue(labels.ToKey(lm.GetLabels())),
   294  		}), statusCode(lm.GetResult().GetStatus()))
   295  		spans = append(spans, sps...)
   296  		if pfrom.IsZero() || pfrom.After(lfrom) {
   297  			pfrom = lfrom
   298  		}
   299  		if pto.IsZero() || pto.Before(lto) {
   300  			pto = lto
   301  		}
   302  	}
   303  	st := statusCode(logrec.GetResult().GetStatus())
   304  	attrs := tctx.attrs(map[string]*tpb.AttributeValue{
   305  		"output":                    attrValue(output),
   306  		"command_id":                attrValue(identifiers.GetCommandId()),
   307  		"invocation_id":             attrValue(identifiers.GetInvocationId()),
   308  		"correlated_invocations_id": attrValue(identifiers.GetCorrelatedInvocationsId()),
   309  		"tool_name":                 attrValue(identifiers.GetToolName()),
   310  		"tool_version":              attrValue(identifiers.GetToolVersion()),
   311  		"execution_id":              attrValue(identifiers.GetExecutionId()),
   312  		"exit_code":                 attrValue(logrec.GetResult().GetExitCode()),
   313  	})
   314  	if msg := logrec.GetResult().GetMsg(); msg != "" {
   315  		st.Message = fmt.Sprintf("%s: %s", st.Message, msg)
   316  	}
   317  	spans = append(spans, &tpb.Span{
   318  		Name:         sctx.Name(),
   319  		SpanId:       sctx.SpanID(),
   320  		ParentSpanId: "",
   321  		DisplayName:  truncatableString("reproxy.RunCommand", 128),
   322  		StartTime:    tspb.New(pfrom),
   323  		EndTime:      tspb.New(pto),
   324  		Attributes:   attrs,
   325  		Status:       st,
   326  	})
   327  	dur := pto.Sub(pfrom)
   328  	if dur < *traceThreshold {
   329  		log.V(1).Infof("drop trace %s dur=%s", identifiers, dur)
   330  		return nil
   331  	}
   332  	log.Infof("%s %s reproxy.RunCommand %s %s %s", sctx.TraceID(), sctx.SpanID(), pfrom, pto, dur)
   333  	return spans
   334  }
   335  
   336  func convert(ctx context.Context, logs []*lpb.LogRecord, projectID string, res map[string]*tpb.AttributeValue, client *trace.Client) ([]*tpb.Span, error) {
   337  	var spans []*tpb.Span
   338  	var batch []*tpb.Span
   339  	for _, logrec := range logs {
   340  		identifiers := logrec.GetCommand().GetIdentifiers()
   341  		tctx, err := newTraceContext(projectID, identifiers.GetExecutionId(), res)
   342  		if err != nil {
   343  			return spans, err
   344  		}
   345  		sps := convertLogRecord(ctx, logrec, tctx)
   346  		batch = append(batch, sps...)
   347  		if len(batch) > *batchSize {
   348  			t := time.Now()
   349  			err = client.BatchWriteSpans(ctx, &tpb.BatchWriteSpansRequest{
   350  				Name:  path.Join("projects", projectID),
   351  				Spans: batch,
   352  			})
   353  			if err != nil {
   354  				return spans, fmt.Errorf("failed to export spans: %v", err)
   355  			}
   356  			log.V(1).Infof("exported %d spans: %s", len(batch), time.Since(t))
   357  			spans = append(spans, batch...)
   358  			batch = nil
   359  		}
   360  	}
   361  	if len(batch) > 0 {
   362  		t := time.Now()
   363  		err := client.BatchWriteSpans(ctx, &tpb.BatchWriteSpansRequest{
   364  			Name:  path.Join("projects", projectID),
   365  			Spans: batch,
   366  		})
   367  		if err != nil {
   368  			return spans, fmt.Errorf("failed to export spans: %v", err)
   369  		}
   370  		log.V(1).Infof("exported %d spans: %s", len(batch), time.Since(t))
   371  		spans = append(spans, batch...)
   372  		batch = nil
   373  	}
   374  	return spans, nil
   375  }
   376  
   377  func main() {
   378  	defer log.Flush()
   379  	ctx := context.Background()
   380  	rbeflag.Parse()
   381  	var logRecords []*lpb.LogRecord
   382  	var err error
   383  	switch {
   384  	case *logPath != "":
   385  		log.Infof("Loading log from %v...", *logPath)
   386  		logRecords, err = logger.ParseFromFormatFile(*logPath)
   387  		if err != nil {
   388  			log.Fatalf("Failed reading proxy log: %v", err)
   389  		}
   390  	case len(proxyLogDir) > 0:
   391  		format, err := logger.ParseFormat(*logFormat)
   392  		if err != nil {
   393  			log.Fatal(err)
   394  		}
   395  		log.Infof("Loading log from %v %q...", format, proxyLogDir)
   396  		logRecords, _, err = logger.ParseFromLogDirs(format, proxyLogDir)
   397  		if err != nil {
   398  			log.Fatalf("Failed reading proxy log: %v", err)
   399  		}
   400  	default:
   401  		log.Fatal("Must provide proxy log path or proxy log dir.")
   402  	}
   403  
   404  	log.Infof("export to project=%s", *projectID)
   405  	client, err := trace.NewClient(ctx)
   406  	if err != nil {
   407  		log.Fatalf("Failed to create trace client: %v", err)
   408  	}
   409  	defer client.Close()
   410  	res := map[string]*tpb.AttributeValue{}
   411  	for _, attr := range traceAttributes {
   412  		i := strings.Index(attr, "=")
   413  		if i < 0 {
   414  			log.Fatalf("bad attributes: %q", attr)
   415  		}
   416  		key := attr[:i]
   417  		value := attr[i+1:]
   418  		res[key] = attrValue(value)
   419  	}
   420  
   421  	spans, err := convert(ctx, logRecords, *projectID, res, client)
   422  	if err != nil {
   423  		log.Fatalf("Failed to convert: %v", err)
   424  	}
   425  	log.Infof("%d requests %d spans", len(logRecords), len(spans))
   426  }