github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/cmd/rpl2cloudtrace/rpl2cloudtrace.go (about) 1 // Copyright 2023 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Binary rpl2cloudtrace converts *.rpl into cloud trace. 16 // 17 // $ rpl2cloudtrace --log_path text:///tmp/reproxy_log.rpl \ 18 // --project_id rbe-chromium-trusted 19 package main 20 21 import ( 22 "context" 23 "crypto/rand" 24 "crypto/sha256" 25 "encoding/hex" 26 "flag" 27 "fmt" 28 "os" 29 "path" 30 "strings" 31 "time" 32 33 lpb "github.com/bazelbuild/reclient/api/log" 34 "github.com/bazelbuild/reclient/internal/pkg/labels" 35 "github.com/bazelbuild/reclient/internal/pkg/logger" 36 "github.com/bazelbuild/reclient/internal/pkg/rbeflag" 37 38 "github.com/bazelbuild/remote-apis-sdks/go/pkg/moreflag" 39 "google.golang.org/grpc/codes" 40 "google.golang.org/grpc/status" 41 42 trace "cloud.google.com/go/trace/apiv2" 43 cpb "github.com/bazelbuild/remote-apis-sdks/go/api/command" 44 log "github.com/golang/glog" 45 tpb "google.golang.org/genproto/googleapis/devtools/cloudtrace/v2" 46 spb "google.golang.org/genproto/googleapis/rpc/status" 47 tspb "google.golang.org/protobuf/types/known/timestamppb" 48 ) 49 50 var ( 51 proxyLogDir []string 52 logPath = flag.String("log_path", "", "Path to reproxy_log.txt file. E.g., text:///tmp/reproxy_log.txt") 53 logFormat = flag.String("log_format", "text", "Format of proxy log. Currently only text is supported.") 54 projectID = flag.String("project_id", os.Getenv("GOOGLE_CLOUD_PROJECT"), "project id for cloud trace") 55 56 traceThreshold = flag.Duration("trace_threshold", 1*time.Second, "threshold to emit trace") 57 spanThreshold = flag.Duration("span_threshold", 1*time.Microsecond, "threshold to emit span") 58 batchSize = flag.Int("batch_size", 1500, "batch size for BatchWriteSpans") 59 60 traceAttributes []string 61 ) 62 63 func init() { 64 flag.Var((*moreflag.StringListValue)(&proxyLogDir), "proxy_log_dir", "If provided, the directory path to a proxy log file of executed records.") 65 flag.Var((*moreflag.StringListValue)(&traceAttributes), "attributes", "comma separated key=value for attributes") 66 } 67 68 func truncatableString(s string, limit int) *tpb.TruncatableString { 69 n := len(s) 70 if n < limit { 71 return &tpb.TruncatableString{Value: s} 72 } 73 return &tpb.TruncatableString{ 74 Value: s[:limit], 75 TruncatedByteCount: int32(n - limit), 76 } 77 } 78 79 func convertEventTime(cat string, tctx *traceContext, parent string, ets map[string]*cpb.TimeInterval, attrs *tpb.Span_Attributes, st *spb.Status) (spans []*tpb.Span, start, end time.Time) { 80 for key, et := range ets { 81 // ignore events that doesn't have timestamp 82 // e.g. ServerWorkerExecution when RBE execution failed. 83 if et.From == nil || et.To == nil { 84 continue 85 } 86 from := et.GetFrom().AsTime() 87 to := et.GetTo().AsTime() 88 if start.IsZero() || start.After(from) { 89 start = from 90 } 91 if end.IsZero() || end.Before(to) { 92 end = to 93 } 94 dur := to.Sub(from) 95 if dur < *spanThreshold { 96 log.V(1).Infof("drop short span: %s.%s %s", cat, key, dur) 97 continue 98 } 99 sctx := tctx.newSpanContext(fmt.Sprintf("%s.%s", cat, key)) 100 spans = append(spans, &tpb.Span{ 101 Name: sctx.Name(), 102 SpanId: sctx.SpanID(), 103 ParentSpanId: parent, 104 DisplayName: truncatableString(fmt.Sprintf("reproxy.%s.%s", cat, key), 128), 105 StartTime: tspb.New(from), 106 EndTime: tspb.New(to), 107 Attributes: attrs, 108 Status: st, 109 SpanKind: tpb.Span_INTERNAL, 110 }) 111 log.V(1).Infof("%s %s %s.%s %s %s %s", sctx.TraceID(), sctx.SpanID(), cat, key, start, end, dur) 112 } 113 return spans, start, end 114 } 115 116 type traceContext struct { 117 projectID string 118 traceID [16]byte 119 res map[string]*tpb.AttributeValue 120 } 121 122 func newTraceContext(projectID string, execID string, res map[string]*tpb.AttributeValue) (*traceContext, error) { 123 var traceID [16]byte 124 _, err := rand.Read(traceID[:]) 125 if err != nil { 126 return nil, err 127 } 128 if execID != "" { 129 b, err := hex.DecodeString(strings.ReplaceAll(execID, "-", "")) 130 if err != nil { 131 log.Warningf("Failed to parse execID %q: %v", execID, err) 132 } else { 133 copy(traceID[:], b) 134 } 135 } else { 136 log.Warningf("execution_id is empty. Use %s as traceID %s", hex.EncodeToString(traceID[:])) 137 } 138 // can we use execution id or so as trace id? 139 return &traceContext{ 140 projectID: projectID, 141 traceID: traceID, 142 res: res, 143 }, nil 144 } 145 146 func (tc *traceContext) newSpanContext(name string) spanContext { 147 var spanID [8]byte 148 s := sha256.Sum256([]byte(name)) 149 copy(spanID[:], s[:]) 150 return spanContext{ 151 t: tc, 152 spanID: spanID, 153 } 154 } 155 156 func (tc *traceContext) TraceID() string { 157 return hex.EncodeToString(tc.traceID[:]) 158 } 159 160 func (tc *traceContext) attrs(attrs map[string]*tpb.AttributeValue) *tpb.Span_Attributes { 161 m := make(map[string]*tpb.AttributeValue) 162 for k, v := range tc.res { 163 m[k] = v 164 } 165 for k, v := range attrs { 166 m[k] = v 167 } 168 return &tpb.Span_Attributes{ 169 AttributeMap: m, 170 } 171 } 172 173 type spanContext struct { 174 t *traceContext 175 spanID [8]byte 176 } 177 178 func (sc spanContext) Name() string { 179 return path.Join("projects", sc.t.projectID, 180 "traces", hex.EncodeToString(sc.t.traceID[:]), 181 "spans", hex.EncodeToString(sc.spanID[:])) 182 } 183 184 func (sc spanContext) TraceID() string { 185 return sc.t.TraceID() 186 } 187 188 func (sc spanContext) SpanID() string { 189 return hex.EncodeToString(sc.spanID[:]) 190 } 191 192 func attrValue(v interface{}) *tpb.AttributeValue { 193 var iv int64 194 switch v := v.(type) { 195 case string: 196 return &tpb.AttributeValue{ 197 Value: &tpb.AttributeValue_StringValue{truncatableString(v, 256)}, 198 } 199 case bool: 200 return &tpb.AttributeValue{ 201 Value: &tpb.AttributeValue_BoolValue{v}, 202 } 203 case int: 204 iv = int64(v) 205 case uint: 206 iv = int64(v) 207 case int8: 208 iv = int64(v) 209 case uint8: 210 iv = int64(v) 211 case int16: 212 iv = int64(v) 213 case uint16: 214 iv = int64(v) 215 case int32: 216 iv = int64(v) 217 case uint32: 218 iv = int64(v) 219 case int64: 220 iv = v 221 case uint64: 222 iv = int64(v) 223 } 224 return &tpb.AttributeValue{ 225 Value: &tpb.AttributeValue_IntValue{iv}, 226 } 227 } 228 229 func statusCode(s cpb.CommandResultStatus_Value) *spb.Status { 230 switch s { 231 case cpb.CommandResultStatus_SUCCESS, cpb.CommandResultStatus_CACHE_HIT: 232 return status.New(codes.OK, s.String()).Proto() 233 case cpb.CommandResultStatus_NON_ZERO_EXIT, 234 cpb.CommandResultStatus_REMOTE_ERROR, 235 cpb.CommandResultStatus_LOCAL_ERROR: 236 return status.New(codes.Internal, s.String()).Proto() 237 238 case cpb.CommandResultStatus_TIMEOUT: 239 return status.New(codes.DeadlineExceeded, s.String()).Proto() 240 case cpb.CommandResultStatus_INTERRUPTED: 241 return status.New(codes.Canceled, s.String()).Proto() 242 default: 243 return status.New(codes.Unknown, s.String()).Proto() 244 } 245 } 246 247 func convertLogRecord(ctx context.Context, logrec *lpb.LogRecord, tctx *traceContext) []*tpb.Span { 248 var spans []*tpb.Span 249 var pfrom, pto time.Time 250 251 sctx := tctx.newSpanContext("RunCommand") 252 parent := sctx.SpanID() 253 254 output := "???" 255 if outputs := logrec.GetCommand().GetOutput().GetOutputFiles(); len(outputs) > 0 { 256 output = outputs[0] 257 } 258 wd := logrec.GetCommand().GetWorkingDirectory() 259 output = strings.TrimPrefix(output, wd) 260 261 identifiers := logrec.GetCommand().GetIdentifiers() 262 rm := logrec.GetRemoteMetadata() 263 if rm != nil { 264 sps, rfrom, rto := convertEventTime("Remote", tctx, parent, rm.GetEventTimes(), tctx.attrs(map[string]*tpb.AttributeValue{ 265 "action_digest": attrValue(rm.GetActionDigest()), 266 "status": attrValue(rm.GetResult().GetStatus().String()), 267 "exit_code": attrValue(rm.GetResult().GetExitCode()), 268 "cache_hit": attrValue(rm.GetCacheHit()), 269 "num_input_files": attrValue(rm.GetNumInputFiles()), 270 "num_input_directories": attrValue(rm.GetNumInputDirectories()), 271 "total_input_bytes": attrValue(rm.GetTotalInputBytes()), 272 "num_output_files": attrValue(rm.GetNumOutputFiles()), 273 "num_output_directories": attrValue(rm.GetNumOutputDirectories()), 274 "total_output_bytes": attrValue(rm.GetTotalOutputBytes()), 275 }), statusCode(rm.GetResult().GetStatus())) 276 spans = append(spans, sps...) 277 if pfrom.IsZero() || pfrom.After(rfrom) { 278 pfrom = rfrom 279 } 280 if pto.IsZero() || pto.Before(rto) { 281 pto = rto 282 } 283 } 284 lm := logrec.GetLocalMetadata() 285 if lm != nil { 286 sps, lfrom, lto := convertEventTime("Local", tctx, parent, lm.GetEventTimes(), tctx.attrs(map[string]*tpb.AttributeValue{ 287 "status": attrValue(lm.GetResult().GetStatus().String()), 288 "exit_code": attrValue(lm.GetResult().GetExitCode()), 289 "executed_locally": attrValue(lm.GetExecutedLocally()), 290 291 "valid_cache_hit": attrValue(lm.GetValidCacheHit()), 292 "updated_cache": attrValue(lm.GetUpdatedCache()), 293 "labels": attrValue(labels.ToKey(lm.GetLabels())), 294 }), statusCode(lm.GetResult().GetStatus())) 295 spans = append(spans, sps...) 296 if pfrom.IsZero() || pfrom.After(lfrom) { 297 pfrom = lfrom 298 } 299 if pto.IsZero() || pto.Before(lto) { 300 pto = lto 301 } 302 } 303 st := statusCode(logrec.GetResult().GetStatus()) 304 attrs := tctx.attrs(map[string]*tpb.AttributeValue{ 305 "output": attrValue(output), 306 "command_id": attrValue(identifiers.GetCommandId()), 307 "invocation_id": attrValue(identifiers.GetInvocationId()), 308 "correlated_invocations_id": attrValue(identifiers.GetCorrelatedInvocationsId()), 309 "tool_name": attrValue(identifiers.GetToolName()), 310 "tool_version": attrValue(identifiers.GetToolVersion()), 311 "execution_id": attrValue(identifiers.GetExecutionId()), 312 "exit_code": attrValue(logrec.GetResult().GetExitCode()), 313 }) 314 if msg := logrec.GetResult().GetMsg(); msg != "" { 315 st.Message = fmt.Sprintf("%s: %s", st.Message, msg) 316 } 317 spans = append(spans, &tpb.Span{ 318 Name: sctx.Name(), 319 SpanId: sctx.SpanID(), 320 ParentSpanId: "", 321 DisplayName: truncatableString("reproxy.RunCommand", 128), 322 StartTime: tspb.New(pfrom), 323 EndTime: tspb.New(pto), 324 Attributes: attrs, 325 Status: st, 326 }) 327 dur := pto.Sub(pfrom) 328 if dur < *traceThreshold { 329 log.V(1).Infof("drop trace %s dur=%s", identifiers, dur) 330 return nil 331 } 332 log.Infof("%s %s reproxy.RunCommand %s %s %s", sctx.TraceID(), sctx.SpanID(), pfrom, pto, dur) 333 return spans 334 } 335 336 func convert(ctx context.Context, logs []*lpb.LogRecord, projectID string, res map[string]*tpb.AttributeValue, client *trace.Client) ([]*tpb.Span, error) { 337 var spans []*tpb.Span 338 var batch []*tpb.Span 339 for _, logrec := range logs { 340 identifiers := logrec.GetCommand().GetIdentifiers() 341 tctx, err := newTraceContext(projectID, identifiers.GetExecutionId(), res) 342 if err != nil { 343 return spans, err 344 } 345 sps := convertLogRecord(ctx, logrec, tctx) 346 batch = append(batch, sps...) 347 if len(batch) > *batchSize { 348 t := time.Now() 349 err = client.BatchWriteSpans(ctx, &tpb.BatchWriteSpansRequest{ 350 Name: path.Join("projects", projectID), 351 Spans: batch, 352 }) 353 if err != nil { 354 return spans, fmt.Errorf("failed to export spans: %v", err) 355 } 356 log.V(1).Infof("exported %d spans: %s", len(batch), time.Since(t)) 357 spans = append(spans, batch...) 358 batch = nil 359 } 360 } 361 if len(batch) > 0 { 362 t := time.Now() 363 err := client.BatchWriteSpans(ctx, &tpb.BatchWriteSpansRequest{ 364 Name: path.Join("projects", projectID), 365 Spans: batch, 366 }) 367 if err != nil { 368 return spans, fmt.Errorf("failed to export spans: %v", err) 369 } 370 log.V(1).Infof("exported %d spans: %s", len(batch), time.Since(t)) 371 spans = append(spans, batch...) 372 batch = nil 373 } 374 return spans, nil 375 } 376 377 func main() { 378 defer log.Flush() 379 ctx := context.Background() 380 rbeflag.Parse() 381 var logRecords []*lpb.LogRecord 382 var err error 383 switch { 384 case *logPath != "": 385 log.Infof("Loading log from %v...", *logPath) 386 logRecords, err = logger.ParseFromFormatFile(*logPath) 387 if err != nil { 388 log.Fatalf("Failed reading proxy log: %v", err) 389 } 390 case len(proxyLogDir) > 0: 391 format, err := logger.ParseFormat(*logFormat) 392 if err != nil { 393 log.Fatal(err) 394 } 395 log.Infof("Loading log from %v %q...", format, proxyLogDir) 396 logRecords, _, err = logger.ParseFromLogDirs(format, proxyLogDir) 397 if err != nil { 398 log.Fatalf("Failed reading proxy log: %v", err) 399 } 400 default: 401 log.Fatal("Must provide proxy log path or proxy log dir.") 402 } 403 404 log.Infof("export to project=%s", *projectID) 405 client, err := trace.NewClient(ctx) 406 if err != nil { 407 log.Fatalf("Failed to create trace client: %v", err) 408 } 409 defer client.Close() 410 res := map[string]*tpb.AttributeValue{} 411 for _, attr := range traceAttributes { 412 i := strings.Index(attr, "=") 413 if i < 0 { 414 log.Fatalf("bad attributes: %q", attr) 415 } 416 key := attr[:i] 417 value := attr[i+1:] 418 res[key] = attrValue(value) 419 } 420 421 spans, err := convert(ctx, logRecords, *projectID, res, client) 422 if err != nil { 423 log.Fatalf("Failed to convert: %v", err) 424 } 425 log.Infof("%d requests %d spans", len(logRecords), len(spans)) 426 }