go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/dsmapper/dsmapperlite/dsmapperlite.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package dsmapperlite implements an in-process datastore mapper. 16 // 17 // Unlike its bigger sibling dsmapper, it doesn't distribute mapping 18 // operations across machines, but in exchange has a very simple API. There's 19 // no need to install it as a server module or setup task queue tasks etc. Just 20 // use is a library. 21 // 22 // Useful for quickly visiting up to 100K entities. 23 package dsmapperlite 24 25 import ( 26 "context" 27 28 "golang.org/x/sync/errgroup" 29 30 "go.chromium.org/luci/common/errors" 31 "go.chromium.org/luci/common/logging" 32 "go.chromium.org/luci/gae/service/datastore" 33 34 "go.chromium.org/luci/server/dsmapper/internal/splitter" 35 ) 36 37 // Map passes all entities matching the query to the callback, in parallel, 38 // in some random order. 39 // 40 // Runs up to `shards` number of parallel goroutines, where each one executes 41 // a datastore query and passes the resulting entities to the callback (along 42 // with the shard index). Each query fetches entities in `batchSize` pages 43 // before handling them. The overall memory consumption is thus 44 // `O(shards * batchSize * averageEntitySize)`. 45 // 46 // Within a shard, the callback is called sequentially, but different shards 47 // are processed in parallel. If the callback needs to parallelize entity 48 // processing more, it should manage its own goroutine pool and pass entities 49 // to it. 50 // 51 // If the callback returns an error, Map aborts the entire operation ASAP (but 52 // it may take some time to wind down). When this happens, the context passed 53 // to the callback is canceled. 54 func Map[E any](ctx context.Context, q *datastore.Query, shards, batchSize int, cb func(ctx context.Context, shard int, entity E) error) error { 55 logging.Infof(ctx, "Calculating ranges...") 56 ranges, err := splitter.SplitIntoRanges(ctx, q, splitter.Params{ 57 Shards: shards, 58 Samples: 500, 59 }) 60 if err != nil { 61 return errors.Annotate(err, "failed to do the initial __scatter__ query").Err() 62 } 63 logging.Infof(ctx, "Querying %d ranges in parallel...", len(ranges)) 64 eg, ctx := errgroup.WithContext(ctx) 65 for idx, r := range ranges { 66 idx := idx 67 rangedQ := r.Apply(q) 68 eg.Go(func() error { 69 return datastore.RunBatch(ctx, int32(batchSize), rangedQ, func(e E) error { 70 return cb(ctx, idx, e) 71 }) 72 }) 73 } 74 return eg.Wait() 75 }