go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/bq/export.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bq 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "time" 22 23 "golang.org/x/sync/errgroup" 24 "google.golang.org/protobuf/proto" 25 "google.golang.org/protobuf/types/known/durationpb" 26 "google.golang.org/protobuf/types/known/timestamppb" 27 28 "go.chromium.org/luci/common/clock" 29 "go.chromium.org/luci/common/logging" 30 "go.chromium.org/luci/gae/service/datastore" 31 "go.chromium.org/luci/server/tq" 32 33 "go.chromium.org/luci/swarming/server/bq/taskspb" 34 ) 35 36 // exportDuration is the duration of the time interval to export to bigquery. 37 const exportDuration = 15 * time.Second 38 39 // maxTasksToSchedule is the maximum number of export tasks which may be 40 // scheduled per cron job. 41 const maxTasksToSchedule = 20 42 43 // latestAge represents the latest time in the past which can be scheduled for 44 // export by ScheduleExportTasks 45 const latestAge = 2 * time.Minute 46 47 // maxExportStateAge is the amount of time before an ExportState is garbage 48 // collected. 49 const maxExportStateAge = 24 * time.Hour 50 51 func RegisterTQTasks() { 52 tq.RegisterTaskClass(tq.TaskClass{ 53 ID: "bq-export-interval", 54 Kind: tq.NonTransactional, 55 Prototype: &taskspb.CreateExportTask{}, 56 Queue: "bq-export-interval", 57 Handler: func(ctx context.Context, payload proto.Message) error { 58 return exportTask(ctx, payload.(*taskspb.CreateExportTask)) 59 }, 60 }) 61 } 62 63 func tableID(cloudProject, dataset, tableName string) string { 64 return fmt.Sprintf("%s.%s.%s", cloudProject, dataset, tableName) 65 } 66 67 // CleanupExportState deletes export states which are older than 68 // maxExportStateAge. 69 func CleanupExportState(ctx context.Context) error { 70 // ScheduleExportTasks runs every 1m 71 // * schedules 4 exports per minute 72 // * on 4 tables 73 const batchSize = 4 * 4 * 10 74 // Will need to tune this value 75 const nWorkers = 64 76 g := new(errgroup.Group) 77 g.SetLimit(nWorkers) 78 79 now := clock.Now(ctx).UTC() 80 cutoff := now.Add(-maxExportStateAge) 81 logging.Infof(ctx, "Deleting ExportState created earlier than %s", cutoff) 82 q := datastore.NewQuery(exportStateKind).Lte("CreatedAt", cutoff) 83 84 deleteBatch := func(batch []*datastore.Key) { 85 g.Go(func() error { 86 logging.Debugf(ctx, "Attempting delete of %d ExportStates", len(batch)) 87 return datastore.Delete(ctx, batch) 88 }) 89 } 90 91 // RunInBatch works sequentially, so we can use closure to store the 92 // current batch. 93 batch := make([]*datastore.Key, 0, batchSize) 94 err := datastore.RunBatch(ctx, batchSize, q, func(key *datastore.Key) { 95 batch = append(batch, key) 96 if len(batch) == batchSize { 97 deleteBatch(batch) 98 batch = make([]*datastore.Key, 0, batchSize) 99 } 100 }) 101 // Whatever is left of batches gets deleted in this call. 102 deleteBatch(batch) 103 104 if err != nil { 105 logging.Errorf(ctx, "ExportState cleanup query failed") 106 // Useful work may still happen in g, in that case wait until its done 107 return errors.Join(err, g.Wait()) 108 } 109 return g.Wait() 110 } 111 112 // ScheduleExportTasks creates a series of tasks responsible for 113 // exporting a specific time interval to bigquery. All of the TQ tasks scheduled 114 // will cover the range [NextExport, cutoff). If exports fall behind schedule, 115 // the scheduler will try and catch up as much as possible by spawning as many 116 // tasks as possible. A `DuplicationKey` is used to ensure that no duplicate 117 // tasks are created if there are temporary failures to write to datastore. Will 118 // schedule a maxium of MaxTasksToSchedule export tasks. 119 func ScheduleExportTasks(ctx context.Context, cloudProject, dataset, tableName string) error { 120 now := clock.Now(ctx).UTC() 121 cutoff := now.Add(-latestAge) 122 tableID := tableID(cloudProject, dataset, tableName) 123 logging.Infof(ctx, "Scheduling export tasks: %s - %s", tableID, cutoff) 124 sch := ExportSchedule{Key: exportScheduleKey(ctx, tableName)} 125 err := datastore.Get(ctx, &sch) 126 if err != nil { 127 if errors.Is(err, datastore.ErrNoSuchEntity) { 128 sch.NextExport = now.Truncate(time.Minute) 129 logging.Infof(ctx, "Creating initial ExportSchedule - %+v", &sch) 130 return datastore.Put(ctx, &sch) 131 } else { 132 return err 133 } 134 } 135 i := 0 136 for { 137 // At this point, we have generated exports up until the cutoff point 138 // Or we have reached maximum number of export tasks to schedule. 139 if sch.NextExport.Add(exportDuration).After(cutoff) || i >= maxTasksToSchedule { 140 logging.Infof(ctx, "Scheduling export tasks done: %s", sch.NextExport) 141 break 142 } 143 payload := taskspb.CreateExportTask{ 144 Start: timestamppb.New(sch.NextExport), 145 Duration: durationpb.New(exportDuration), 146 CloudProject: cloudProject, 147 Dataset: dataset, 148 TableName: tableName, 149 } 150 ts := sch.NextExport.Unix() 151 dedupKey := fmt.Sprintf("%s:%d:%d", tableID, ts, exportDuration/time.Second) 152 task := tq.Task{ 153 Title: dedupKey, 154 DeduplicationKey: dedupKey, 155 Payload: &payload, 156 } 157 logging.Debugf(ctx, "Triggering %s: - %+v", 158 dedupKey, 159 &payload) 160 err = tq.AddTask(ctx, &task) 161 if err != nil { 162 logging.Warningf(ctx, "Failed to trigger export task: %+v", &payload) 163 break 164 } 165 sch.NextExport = sch.NextExport.Add(exportDuration) 166 i += 1 167 } 168 logging.Infof(ctx, "Updating export schedule after %d iterations: %+v", i, sch) 169 return errors.Join(err, datastore.Put(ctx, &sch)) 170 } 171 172 func exportTask(ctx context.Context, t *taskspb.CreateExportTask) error { 173 logging.Infof(ctx, "ExportTask started for %s:%s:%d", 174 tableID(t.CloudProject, t.Dataset, t.TableName), 175 t.Start.AsTime(), 176 t.Duration) 177 return nil 178 }