go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/server/archivist/archivist.go (about) 1 // Copyright 2016 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package archivist 16 17 import ( 18 "context" 19 "crypto/sha256" 20 "encoding/base64" 21 "io" 22 "regexp" 23 24 cl "cloud.google.com/go/logging" 25 "github.com/golang/protobuf/proto" 26 mrpb "google.golang.org/genproto/googleapis/api/monitoredres" 27 28 "go.chromium.org/luci/common/errors" 29 "go.chromium.org/luci/common/gcloud" 30 "go.chromium.org/luci/common/gcloud/gs" 31 "go.chromium.org/luci/common/logging" 32 "go.chromium.org/luci/common/retry/transient" 33 "go.chromium.org/luci/common/sync/parallel" 34 "go.chromium.org/luci/common/tsmon/distribution" 35 "go.chromium.org/luci/common/tsmon/field" 36 "go.chromium.org/luci/common/tsmon/metric" 37 tsmon_types "go.chromium.org/luci/common/tsmon/types" 38 "go.chromium.org/luci/config" 39 40 logdog "go.chromium.org/luci/logdog/api/endpoints/coordinator/services/v1" 41 "go.chromium.org/luci/logdog/api/logpb" 42 "go.chromium.org/luci/logdog/common/archive" 43 "go.chromium.org/luci/logdog/common/storage" 44 "go.chromium.org/luci/logdog/common/types" 45 "go.chromium.org/luci/logdog/common/viewer" 46 ) 47 48 const ( 49 tsEntriesField = "entries" 50 tsIndexField = "index" 51 ) 52 53 var logIDRe = regexp.MustCompile(`^[[:alnum:]._\-][[:alnum:]./_\-]{0,510}`) 54 55 // CLClient is a general interface for CloudLogging client and intended to enable 56 // unit tests to stub out CloudLogging. 57 type CLClient interface { 58 Close() error 59 Logger(logID string, opts ...cl.LoggerOption) *cl.Logger 60 Ping(context.Context) error 61 } 62 63 var ( 64 // tsCount counts the raw number of archival tasks that this instance has 65 // processed, regardless of success/failure. 66 tsCount = metric.NewCounter("logdog/archivist/archive/count", 67 "The number of archival tasks processed.", 68 nil, 69 field.String("project"), 70 field.Bool("successful")) 71 72 // tsSize tracks the archive binary file size distribution of completed 73 // archives. 74 // 75 // The "archive" field is the specific type of archive (entries, index, data) 76 // that is being tracked. 77 // 78 // The "stream" field is the type of log stream that is being archived. 79 tsSize = metric.NewCumulativeDistribution("logdog/archivist/archive/size", 80 "The size (in bytes) of each archive file.", 81 &tsmon_types.MetricMetadata{Units: tsmon_types.Bytes}, 82 distribution.DefaultBucketer, 83 field.String("project"), 84 field.String("archive"), 85 field.String("stream")) 86 87 // tsTotalBytes tracks the cumulative total number of bytes that have 88 // been archived by this instance. 89 // 90 // The "archive" field is the specific type of archive (entries, index, data) 91 // that is being tracked. 92 // 93 // The "stream" field is the type of log stream that is being archived. 94 tsTotalBytes = metric.NewCounter("logdog/archivist/archive/total_bytes", 95 "The total number of archived bytes.", 96 &tsmon_types.MetricMetadata{Units: tsmon_types.Bytes}, 97 field.String("project"), 98 field.String("archive"), 99 field.String("stream")) 100 101 // tsLogEntries tracks the number of log entries per individual 102 // archival. 103 // 104 // The "stream" field is the type of log stream that is being archived. 105 tsLogEntries = metric.NewCumulativeDistribution("logdog/archivist/archive/log_entries", 106 "The total number of log entries per archive.", 107 nil, 108 distribution.DefaultBucketer, 109 field.String("project"), 110 field.String("stream")) 111 112 // tsTotalLogEntries tracks the total number of log entries that have 113 // been archived by this instance. 114 // 115 // The "stream" field is the type of log stream that is being archived. 116 tsTotalLogEntries = metric.NewCounter("logdog/archivist/archive/total_log_entries", 117 "The total number of log entries.", 118 nil, 119 field.String("project"), 120 field.String("stream")) 121 ) 122 123 // Settings defines the archival parameters for a specific archival operation. 124 // 125 // In practice, this will be formed from service and project settings. 126 type Settings struct { 127 // GSBase is the base Google Storage path. This includes the bucket name 128 // and any associated path. 129 GSBase gs.Path 130 // GSStagingBase is the base Google Storage path for archive staging. This 131 // includes the bucket name and any associated path. 132 GSStagingBase gs.Path 133 134 // IndexStreamRange is the maximum number of stream indexes in between index 135 // entries. See archive.Manifest for more information. 136 IndexStreamRange int 137 // IndexPrefixRange is the maximum number of prefix indexes in between index 138 // entries. See archive.Manifest for more information. 139 IndexPrefixRange int 140 // IndexByteRange is the maximum number of stream data bytes in between index 141 // entries. See archive.Manifest for more information. 142 IndexByteRange int 143 144 // CloudLoggingProjectID is the ID of the Google Cloud Platform project to export 145 // logs to. 146 // 147 // May be empty, if no export is configured. 148 CloudLoggingProjectID string 149 // CloudLoggingBufferLimit is the maximum number of megabytes that the 150 // Cloud Logger will keep in memory per concurrent-task before flushing them 151 // out. 152 CloudLoggingBufferLimit int 153 } 154 155 // SettingsLoader returns archival Settings for a given project. 156 type SettingsLoader func(ctx context.Context, project string) (*Settings, error) 157 158 // Archivist is a stateless configuration capable of archiving individual log 159 // streams. 160 type Archivist struct { 161 // Service is the client to use to communicate with Coordinator's Services 162 // endpoint. 163 Service logdog.ServicesClient 164 165 // SettingsLoader loads archival settings for a specific project. 166 SettingsLoader SettingsLoader 167 168 // Storage is the archival source Storage instance. 169 Storage storage.Storage 170 171 // GSClientFactory obtains a Google Storage client for archive generation. 172 GSClientFactory func(ctx context.Context, luciProject string) (gs.Client, error) 173 174 // CLClientFactory obtains a Cloud Logging client for log exports. 175 // `luciProject` is the ID of the LUCI project to export logs from, and 176 // `clProject` is the ID of the Google Cloud project to export logs to. 177 CLClientFactory func(ctx context.Context, luciProject, clProject string, onError func(err error)) (CLClient, error) 178 } 179 180 // storageBufferSize is the size, in bytes, of the LogEntry buffer that is used 181 // to during archival. This should be greater than the maximum LogEntry size. 182 const storageBufferSize = types.MaxLogEntryDataSize * 64 183 184 // ArchiveTask processes and executes a single log stream archive task. 185 // 186 // If the supplied Context is Done, operation may terminate before completion, 187 // returning the Context's error. 188 func (a *Archivist) ArchiveTask(ctx context.Context, task *logdog.ArchiveTask) error { 189 err := a.archiveTaskImpl(ctx, task) 190 191 failure := isFailure(err) 192 193 // Add a result metric. 194 tsCount.Add(ctx, 1, task.Project, !failure) 195 196 return err 197 } 198 199 // archiveTaskImpl performs the actual task archival. 200 // 201 // Its error return value is used to indicate how the archive failed. isFailure 202 // will be called to determine if the returned error value is a failure or a 203 // status error. 204 func (a *Archivist) archiveTaskImpl(ctx context.Context, task *logdog.ArchiveTask) error { 205 // Validate the project name. 206 if err := config.ValidateProjectName(task.Project); err != nil { 207 logging.WithError(err).Errorf(ctx, "invalid project name %q: %s", task.Project) 208 return nil 209 } 210 211 // Load archival settings for this project. 212 settings, err := a.loadSettings(ctx, task.Project) 213 switch { 214 case err == config.ErrNoConfig: 215 logging.WithError(err).Errorf(ctx, "The project config doesn't exist; discarding the task.") 216 return nil 217 case transient.Tag.In(err): 218 // If this is a transient error, exit immediately and do not delete the 219 // archival task. 220 logging.WithError(err).Warningf(ctx, "TRANSIENT error during loading the project config.") 221 return err 222 case err != nil: 223 // This project has bad or no archival settings, this is non-transient, 224 // discard the task. 225 logging.WithError(err).Errorf(ctx, "Failed to load settings for project.") 226 return nil 227 } 228 229 // Load the log stream's current state. If it is already archived, we will 230 // return an immediate success. 231 ls, err := a.Service.LoadStream(ctx, &logdog.LoadStreamRequest{ 232 Project: task.Project, 233 Id: task.Id, 234 Desc: true, 235 }) 236 switch { 237 case err != nil: 238 logging.WithError(err).Errorf(ctx, "Failed to load log stream.") 239 return err 240 241 case ls.State == nil: 242 logging.Errorf(ctx, "Log stream did not include state.") 243 return errors.New("log stream did not include state") 244 245 case ls.State.Purged: 246 logging.Warningf(ctx, "Log stream is purged. Discarding archival request.") 247 a.expungeStorage(ctx, task.Project, ls.Desc, ls.State.TerminalIndex) 248 return nil 249 250 case ls.State.Archived: 251 logging.Infof(ctx, "Log stream is already archived. Discarding archival request.") 252 a.expungeStorage(ctx, task.Project, ls.Desc, ls.State.TerminalIndex) 253 return nil 254 255 case ls.State.ProtoVersion != logpb.Version: 256 logging.Fields{ 257 "protoVersion": ls.State.ProtoVersion, 258 "expectedVersion": logpb.Version, 259 }.Errorf(ctx, "Unsupported log stream protobuf version.") 260 return errors.New("unsupported log stream protobuf version") 261 262 case ls.Desc == nil: 263 logging.Errorf(ctx, "Log stream did not include a descriptor.") 264 return errors.New("log stream did not include a descriptor") 265 } 266 267 ar := logdog.ArchiveStreamRequest{ 268 Project: task.Project, 269 Id: task.Id, 270 } 271 272 // Build our staged archival plan. This doesn't actually do any archiving. 273 staged, err := a.makeStagedArchival(ctx, task.Project, task.Realm, settings, ls) 274 if err != nil { 275 logging.WithError(err).Errorf(ctx, "Failed to create staged archival plan.") 276 return err 277 } 278 279 // TODO(crbug.com/1164124) - handle the error from clClient.Close() 280 defer staged.Close() 281 282 // Archive to staging. 283 // 284 // If a non-transient failure occurs here, we will report it to the Archivist 285 // under the assumption that it will continue occurring. 286 // 287 // We will handle error creating the plan and executing the plan in the same 288 // switch statement below. 289 switch err = staged.stage(); { 290 case transient.Tag.In(err): 291 // If this is a transient error, exit immediately and do not delete the 292 // archival task. 293 logging.WithError(err).Warningf(ctx, "TRANSIENT error during archival operation.") 294 return err 295 296 case err != nil: 297 // This is a non-transient error, so we are confident that any future 298 // Archival will also encounter this error. We will mark this archival 299 // as an error and report it to the Coordinator. 300 logging.WithError(err).Errorf(ctx, "Archival failed with non-transient error.") 301 ar.Error = err.Error() 302 if ar.Error == "" { 303 // This needs to be non-nil, so if our actual error has an empty string, 304 // fill in a generic message. 305 ar.Error = "archival error" 306 } 307 308 default: 309 // In case something fails, clean up our staged archival (best effort). 310 defer staged.cleanup() 311 312 // Finalize the archival. 313 if err := staged.finalize(&ar); err != nil { 314 logging.WithError(err).Errorf(ctx, "Failed to finalize archival.") 315 return err 316 } 317 318 // Add metrics for this successful archival. 319 streamType := staged.desc.StreamType.String() 320 321 staged.stream.addMetrics(ctx, task.Project, tsEntriesField, streamType) 322 staged.index.addMetrics(ctx, task.Project, tsIndexField, streamType) 323 324 tsLogEntries.Add(ctx, float64(staged.logEntryCount), task.Project, streamType) 325 tsTotalLogEntries.Add(ctx, staged.logEntryCount, task.Project, streamType) 326 } 327 328 if _, err := a.Service.ArchiveStream(ctx, &ar); err != nil { 329 logging.WithError(err).Errorf(ctx, "Failed to report archive state.") 330 return err 331 } 332 a.expungeStorage(ctx, task.Project, ls.Desc, ar.TerminalIndex) 333 334 return nil 335 } 336 337 // expungeStorage does a best-effort expunging of the intermediate storage 338 // (BigTable) rows after successful archival. 339 // 340 // `desc` is a binary-encoded LogStreamDescriptor 341 // `terminalIndex` should be the terminal index of the archived stream. If it's 342 // 343 // <0 (an empty stream) we skip the expunge. 344 func (a *Archivist) expungeStorage(ctx context.Context, project string, desc []byte, terminalIndex int64) { 345 if terminalIndex < 0 { 346 // no log rows 347 return 348 } 349 350 if desc == nil { 351 logging.Warningf(ctx, "expungeStorage: nil desc") 352 return 353 } 354 355 var lsd logpb.LogStreamDescriptor 356 if err := proto.Unmarshal(desc, &lsd); err != nil { 357 logging.WithError(err).Warningf(ctx, "expungeStorage: decoding desc") 358 return 359 } 360 361 err := a.Storage.Expunge(ctx, storage.ExpungeRequest{ 362 Path: lsd.Path(), 363 Project: project, 364 }) 365 if err != nil { 366 logging.WithError(err).Warningf(ctx, "expungeStorage: failed") 367 } 368 } 369 370 // loadSettings loads and validates archival settings. 371 func (a *Archivist) loadSettings(ctx context.Context, project string) (*Settings, error) { 372 if a.SettingsLoader == nil { 373 panic("no settings loader configured") 374 } 375 376 st, err := a.SettingsLoader(ctx, project) 377 switch { 378 case err != nil: 379 return nil, err 380 381 case st.GSBase.Bucket() == "": 382 logging.Fields{ 383 logging.ErrorKey: err, 384 "gsBase": st.GSBase, 385 }.Errorf(ctx, "Invalid storage base.") 386 return nil, errors.New("invalid storage base") 387 388 case st.GSStagingBase.Bucket() == "": 389 logging.Fields{ 390 logging.ErrorKey: err, 391 "gsStagingBase": st.GSStagingBase, 392 }.Errorf(ctx, "Invalid storage staging base.") 393 return nil, errors.New("invalid storage staging base") 394 395 default: 396 return st, nil 397 } 398 } 399 400 func (a *Archivist) makeStagedArchival(ctx context.Context, project string, realm string, 401 st *Settings, ls *logdog.LoadStreamResponse) (*stagedArchival, error) { 402 403 gsClient, err := a.GSClientFactory(ctx, project) 404 if err != nil { 405 logging.Fields{ 406 logging.ErrorKey: err, 407 "protoVersion": ls.State.ProtoVersion, 408 }.Errorf(ctx, "Failed to obtain GSClient.") 409 return nil, err 410 } 411 412 sa := stagedArchival{ 413 Archivist: a, 414 Settings: st, 415 416 ctx: ctx, 417 project: project, 418 realm: realm, 419 gsclient: gsClient, 420 421 terminalIndex: types.MessageIndex(ls.State.TerminalIndex), 422 } 423 424 // Deserialize and validate the descriptor protobuf. If this fails, it is a 425 // non-transient error. 426 if err := proto.Unmarshal(ls.Desc, &sa.desc); err != nil { 427 logging.Fields{ 428 logging.ErrorKey: err, 429 "protoVersion": ls.State.ProtoVersion, 430 }.Errorf(ctx, "Failed to unmarshal descriptor protobuf.") 431 return nil, err 432 } 433 sa.path = sa.desc.Path() 434 435 // Construct staged archival paths sa.stream and sa.index. The path length 436 // must not exceed 1024 bytes, it is GCS limit. 437 if err = sa.makeStagingPaths(1024); err != nil { 438 return nil, err 439 } 440 441 // Construct a CloudLogging client, if the config is set and the input 442 // stream type is TEXT. 443 if st.CloudLoggingProjectID != "" && sa.desc.StreamType == logpb.StreamType_TEXT { 444 // Validate the project ID, and ping the project to verify the auth. 445 if err = gcloud.ValidateProjectID(st.CloudLoggingProjectID); err != nil { 446 return nil, errors.Annotate(err, "CloudLoggingProjectID %q", st.CloudLoggingProjectID).Err() 447 } 448 onError := func(err error) { 449 logging.Fields{ 450 "luciProject": project, 451 "cloudProject": st.CloudLoggingProjectID, 452 "path": sa.path, 453 }.Errorf(ctx, "archiving log to Cloud Logging: %v", err) 454 } 455 456 clc, err := a.CLClientFactory(ctx, project, st.CloudLoggingProjectID, onError) 457 if err != nil { 458 logging.Fields{ 459 logging.ErrorKey: err, 460 "protoVersion": ls.State.ProtoVersion, 461 }.Errorf(ctx, "Failed to obtain CloudLogging client.") 462 return nil, err 463 } 464 if err = clc.Ping(ctx); err != nil { 465 return nil, errors.Annotate( 466 err, "failed to ping CloudProject %q for Cloud Logging export", 467 st.CloudLoggingProjectID).Err() 468 } 469 sa.clclient = clc 470 } 471 472 return &sa, nil 473 } 474 475 type stagedArchival struct { 476 *Archivist 477 *Settings 478 479 ctx context.Context 480 project string 481 realm string 482 path types.StreamPath 483 desc logpb.LogStreamDescriptor 484 485 stream stagingPaths 486 index stagingPaths 487 488 terminalIndex types.MessageIndex 489 logEntryCount int64 490 491 gsclient gs.Client 492 clclient CLClient 493 } 494 495 func base64Hash(p types.StreamName) string { 496 hasher := sha256.New() 497 hasher.Write([]byte(p)) 498 return base64.RawURLEncoding.EncodeToString(hasher.Sum(nil)) 499 } 500 501 // makeStagingPaths populates `staged` and `final` fields in sa.stream and 502 // sa.index. 503 // 504 // It prefixes the staging GCS paths with a hash of stream's Logdog prefix to 505 // make sure we spread the load across GCS namespace to avoid hotspotting its 506 // metadata server. 507 // 508 // These paths may be shared between projects. To enforce an absence of 509 // conflicts, we will insert the project name as part of the path. 510 func (sa *stagedArchival) makeStagingPaths(maxGSFilenameLength int) error { 511 // "<prefix>/+/<name>" => (<prefix>, <name>). 512 prefix, name := sa.path.Split() 513 if name == "" { 514 return errors.Reason("got prefix-only path %q, don't know how to stage it", sa.path).Err() 515 } 516 517 // base64 encoded SHA256 hash of the prefix. 518 prefixHash := "p/" + base64Hash(prefix) 519 520 // GCS paths we need to generate are: 521 // <GSStagingBase>/<project>/<prefixHash>/+/<name>/logstream.entries 522 // <GSStagingBase>/<project>/<prefixHash>/+/<name>/logstream.index 523 // <GSBase>/<project>/<prefix>/+/<name>/logstream.entries 524 // <GSBase>/<project>/<prefix>/+/<name>/logstream.index 525 // 526 // Each path length must be less than maxGSFilenameLength bytes. And we want 527 // <name> component in all paths to be identical. If some path doesn't fit 528 // the limit, we replace <name> with "<name-prefix>-TRUNCATED-<hash>" 529 // everywhere, making it fit the limit. 530 531 // Note: len("logstream.entries") > len("logstream.index"), use it for max len. 532 maxStagingLen := len(sa.GSStagingBase.Concat(sa.project, prefixHash, "+", string(name), "logstream.entries").Filename()) 533 maxFinalLen := len(sa.GSBase.Concat(sa.project, string(prefix), "+", string(name), "logstream.entries").Filename()) 534 535 // See if we need to truncate <name> to fit GCS paths into limits. 536 // 537 // The sa.path is user-provided and is unlimited. It is known to be large 538 // enough to exceed max ID length (https://crbug.com/1138017). 539 // So, truncate it if needed while avoiding overwrites by using crypto hash. 540 maxPathLen := maxStagingLen 541 if maxFinalLen > maxStagingLen { 542 maxPathLen = maxFinalLen 543 } 544 if bytesToCut := maxPathLen - maxGSFilenameLength; bytesToCut > 0 { 545 nameSuffix := types.StreamName("-TRUNCATED-" + base64Hash(name)[:16]) 546 // Replace last len(nameSuffix)+bytesToCut bytes with nameSuffix. It will 547 // reduce the overall name size by `bytesToCut` bytes, as we need. 548 if len(nameSuffix)+bytesToCut > len(name) { 549 // There's no enough space even to fit nameSuffix. The prefix is too 550 // huge. This should be rare, abort. 551 return errors.Reason("can't stage %q of project %q, prefix is too long", sa.path, sa.project).Err() 552 } 553 name = name[:len(name)-len(nameSuffix)-bytesToCut] + nameSuffix 554 } 555 556 // Everything should fit into the limits now. 557 nameMap := map[string]*stagingPaths{ 558 "logstream.entries": &sa.stream, 559 "logstream.index": &sa.index, 560 } 561 for file, spaths := range nameMap { 562 spaths.staged = sa.GSStagingBase.Concat(sa.project, prefixHash, "+", string(name), file) 563 spaths.final = sa.GSBase.Concat(sa.project, string(prefix), "+", string(name), file) 564 } 565 return nil 566 } 567 568 // stage executes the archival process, archiving to the staged storage paths. 569 // 570 // If stage fails, it may return a transient error. 571 func (sa *stagedArchival) stage() (err error) { 572 // Group any transient errors that occur during cleanup. If we aren't 573 // returning a non-transient error, return a transient "terr". 574 var terr errors.MultiError 575 defer func() { 576 if err == nil && len(terr) > 0 { 577 logging.Errorf(sa.ctx, "Encountered transient errors: %s", terr) 578 err = transient.Tag.Apply(terr) 579 } 580 }() 581 582 // Close our writers on exit. If any of them fail to close, mark the archival 583 // as a transient failure. 584 closeWriter := func(closer io.Closer, path gs.Path) { 585 // Close the Writer. If this results in an error, append it to our transient 586 // error MultiError. 587 if ierr := closer.Close(); ierr != nil { 588 logging.Warningf(sa.ctx, "Error closing writer to %s: %s", path, ierr) 589 terr = append(terr, ierr) 590 } 591 592 // If we have an archival error, also delete the path associated with this 593 // stream. This is a non-fatal failure, since we've already hit a fatal 594 // one. 595 if err != nil || len(terr) > 0 { 596 logging.Warningf(sa.ctx, "Cleaning up %s after error", path) 597 if ierr := sa.gsclient.Delete(path); ierr != nil { 598 logging.Fields{ 599 logging.ErrorKey: ierr, 600 "path": path, 601 }.Warningf(sa.ctx, "Failed to delete stream on error.") 602 } 603 } 604 } 605 606 // createWriter is a shorthand function for creating a writer to a path and 607 // reporting an error if it failed. 608 createWriter := func(p gs.Path) (gs.Writer, error) { 609 w, ierr := sa.gsclient.NewWriter(p) 610 if ierr != nil { 611 logging.Fields{ 612 logging.ErrorKey: ierr, 613 "path": p, 614 }.Errorf(sa.ctx, "Failed to create writer.") 615 return nil, ierr 616 } 617 return w, nil 618 } 619 620 var streamWriter, indexWriter gs.Writer 621 if streamWriter, err = createWriter(sa.stream.staged); err != nil { 622 return err 623 } 624 defer closeWriter(streamWriter, sa.stream.staged) 625 626 if indexWriter, err = createWriter(sa.index.staged); err != nil { 627 return err 628 } 629 defer closeWriter(indexWriter, sa.index.staged) 630 631 // Read our log entries from intermediate storage. 632 ss := storageSource{ 633 Context: sa.ctx, 634 st: sa.Storage, 635 project: sa.project, 636 path: sa.path, 637 terminalIndex: sa.terminalIndex, 638 lastIndex: -1, 639 } 640 641 m := archive.Manifest{ 642 LUCIProject: sa.project, 643 Desc: &sa.desc, 644 Source: &ss, 645 LogWriter: streamWriter, 646 IndexWriter: indexWriter, 647 StreamIndexRange: sa.IndexStreamRange, 648 PrefixIndexRange: sa.IndexPrefixRange, 649 ByteRange: sa.IndexByteRange, 650 651 Logger: logging.Get(sa.ctx), 652 } 653 654 if sa.clclient != nil { 655 logID := "luci-logs" 656 tags := sa.desc.GetTags() 657 if tags == nil { 658 tags = map[string]string{} 659 } 660 if sa.realm != "" { 661 tags["realm"] = sa.realm 662 } 663 664 // bbagent adds viewer.LogDogViewerURLTag to log streams for 665 // "back to build" link in UI 666 // 667 // This URL isn't useful in Cloud Logging UI, and doesn't add any value 668 // to search capabilities. So, remove it. 669 delete(tags, viewer.LogDogViewerURLTag) 670 671 switch val, ok := tags["luci.CloudLogExportID"]; { 672 case !ok, len(val) == 0: // skip 673 674 // len(LogID) must be < 512, and allows ./_- and alphanumerics. 675 // If CloudLogExportID is too long or contains unsupported chars, fall back to 676 // the default LogID. 677 case len(val) > 511: 678 logging.Errorf(sa.ctx, "CloudLogExportID: too long - %d", len(val)) 679 680 case !logIDRe.MatchString(val): 681 logging.Errorf(sa.ctx, "CloudLogExportID(%s): does not match %s", val, logIDRe) 682 683 default: 684 logID = val 685 } 686 687 m.CloudLogger = sa.clclient.Logger( 688 logID, 689 cl.CommonLabels(tags), 690 cl.CommonResource(&mrpb.MonitoredResource{ 691 Type: "generic_task", 692 Labels: map[string]string{ 693 "project_id": sa.project, 694 "location": sa.desc.GetName(), 695 "namespace": sa.desc.GetPrefix(), 696 "job": "cloud-logging-export", 697 }, 698 }), 699 cl.BufferedByteLimit(sa.CloudLoggingBufferLimit*1024*1024), 700 ) 701 } 702 703 if err = archive.Archive(m); err != nil { 704 logging.WithError(err).Errorf(sa.ctx, "Failed to archive log stream.") 705 return err 706 } 707 708 if ss.logEntryCount == 0 { 709 // If our last log index was <0, then no logs were archived. 710 logging.Warningf(sa.ctx, "No log entries were archived.") 711 } 712 713 // Update our state with archival results. 714 sa.terminalIndex = ss.lastIndex 715 sa.logEntryCount = ss.logEntryCount 716 sa.stream.bytesWritten = streamWriter.Count() 717 sa.index.bytesWritten = indexWriter.Count() 718 return nil 719 } 720 721 type stagingPaths struct { 722 staged gs.Path 723 final gs.Path 724 bytesWritten int64 725 } 726 727 func (d *stagingPaths) clearStaged() { d.staged = "" } 728 729 func (d *stagingPaths) enabled() bool { return d.final != "" } 730 731 func (d *stagingPaths) addMetrics(ctx context.Context, projectField, archiveField, streamField string) { 732 tsSize.Add(ctx, float64(d.bytesWritten), projectField, archiveField, streamField) 733 tsTotalBytes.Add(ctx, d.bytesWritten, projectField, archiveField, streamField) 734 } 735 736 func (sa *stagedArchival) finalize(ar *logdog.ArchiveStreamRequest) error { 737 err := parallel.FanOutIn(func(taskC chan<- func() error) { 738 for _, d := range sa.getStagingPaths() { 739 d := d 740 741 // Don't finalize zero-sized streams. 742 if !d.enabled() || d.bytesWritten == 0 { 743 continue 744 } 745 746 taskC <- func() error { 747 if err := sa.gsclient.Rename(d.staged, d.final); err != nil { 748 logging.Fields{ 749 logging.ErrorKey: err, 750 "stagedPath": d.staged, 751 "finalPath": d.final, 752 }.Errorf(sa.ctx, "Failed to rename GS object.") 753 return err 754 } 755 756 // Clear the staged value to indicate that it no longer exists. 757 d.clearStaged() 758 return nil 759 } 760 } 761 }) 762 if err != nil { 763 return err 764 } 765 766 ar.TerminalIndex = int64(sa.terminalIndex) 767 ar.LogEntryCount = sa.logEntryCount 768 ar.StreamUrl = string(sa.stream.final) 769 ar.StreamSize = sa.stream.bytesWritten 770 ar.IndexUrl = string(sa.index.final) 771 ar.IndexSize = sa.index.bytesWritten 772 return nil 773 } 774 775 func (sa *stagedArchival) Close() error { 776 var clErr error 777 if sa.clclient != nil { 778 clErr = errors.Annotate(sa.clclient.Close(), 779 "while closing CloudLogging client for (%s/%s/+/%s)", 780 sa.project, sa.desc.GetPrefix(), sa.desc.GetName()).Err() 781 } 782 return errors.Flatten(errors.MultiError{sa.gsclient.Close(), clErr}) 783 } 784 785 func (sa *stagedArchival) cleanup() { 786 for _, d := range sa.getStagingPaths() { 787 if d.staged == "" { 788 continue 789 } 790 791 logging.Warningf(sa.ctx, "Cleaning up staged path %s", d.staged) 792 if err := sa.gsclient.Delete(d.staged); err != nil { 793 logging.Fields{ 794 logging.ErrorKey: err, 795 "path": d.staged, 796 }.Warningf(sa.ctx, "Failed to clean up staged path.") 797 } 798 799 d.clearStaged() 800 } 801 } 802 803 func (sa *stagedArchival) getStagingPaths() []*stagingPaths { 804 return []*stagingPaths{ 805 &sa.stream, 806 &sa.index, 807 } 808 } 809 810 // statusErrorWrapper is an error wrapper. It is detected by IsFailure and used to 811 // determine whether the supplied error represents a failure or just a status 812 // error. 813 type statusErrorWrapper struct { 814 inner error 815 } 816 817 var _ interface { 818 error 819 errors.Wrapped 820 } = (*statusErrorWrapper)(nil) 821 822 func (e *statusErrorWrapper) Error() string { 823 if e.inner != nil { 824 return e.inner.Error() 825 } 826 return "" 827 } 828 829 func (e *statusErrorWrapper) Unwrap() error { 830 return e.inner 831 } 832 833 func isFailure(err error) bool { 834 if err == nil { 835 return false 836 } 837 _, ok := err.(*statusErrorWrapper) 838 return !ok 839 }