go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/cmd/datastore-delete/main.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Executable datastore-delete deletes all data of a specified kind in a
    16  // Datastore database.
    17  //
    18  // First run in a dry run mode to see how many entities will be deleted:
    19  //
    20  //	go run main.go -cloud-project <project-id>
    21  //
    22  // Then run for real:
    23  //
    24  //	go run main.go -cloud-project <project-id> -delete
    25  package main
    26  
    27  import (
    28  	"context"
    29  	"flag"
    30  	"fmt"
    31  	"os"
    32  	"strings"
    33  	"sync"
    34  	"time"
    35  
    36  	cloudds "cloud.google.com/go/datastore"
    37  	"golang.org/x/sync/errgroup"
    38  	"google.golang.org/api/option"
    39  
    40  	"go.chromium.org/luci/auth"
    41  	"go.chromium.org/luci/common/clock"
    42  	"go.chromium.org/luci/common/errors"
    43  	"go.chromium.org/luci/common/logging"
    44  	"go.chromium.org/luci/common/logging/gologger"
    45  	"go.chromium.org/luci/common/system/signals"
    46  	"go.chromium.org/luci/gae/impl/cloud"
    47  	"go.chromium.org/luci/gae/service/datastore"
    48  	"go.chromium.org/luci/hardcoded/chromeinfra"
    49  
    50  	"go.chromium.org/luci/server/dsmapper/dsmapperlite"
    51  )
    52  
    53  var (
    54  	cloudProject = flag.String("cloud-project", "", "Cloud Datastore cloud project")
    55  	kind         = flag.String("kind", "", "Datastore Kind to delete all entries for")
    56  	delete       = flag.Bool("delete", false, "If set, actually delete the data")
    57  	workers      = flag.Int("workers", 256, "Number of goroutines doing deletions")
    58  )
    59  
    60  func main() {
    61  	flag.Parse()
    62  	if *cloudProject == "" {
    63  		fmt.Fprintf(os.Stderr, "-cloud-project is required\n")
    64  		os.Exit(2)
    65  	}
    66  
    67  	if *kind == "" {
    68  		fmt.Fprintf(os.Stderr, "-kind is required\n")
    69  		os.Exit(2)
    70  	}
    71  
    72  	ctx := gologger.StdConfig.Use(context.Background())
    73  	ctx, cancel := context.WithCancel(ctx)
    74  	signals.HandleInterrupt(cancel)
    75  
    76  	if err := run(ctx); err != nil {
    77  		errors.Log(ctx, err)
    78  		os.Exit(1)
    79  	}
    80  }
    81  
    82  func run(ctx context.Context) error {
    83  	scopes := []string{
    84  		"https://www.googleapis.com/auth/cloud-platform",
    85  		"https://www.googleapis.com/auth/userinfo.email",
    86  	}
    87  
    88  	ts, err := auth.NewAuthenticator(ctx, auth.SilentLogin, chromeinfra.SetDefaultAuthOptions(auth.Options{
    89  		Scopes: scopes,
    90  	})).TokenSource()
    91  	switch {
    92  	case err == auth.ErrLoginRequired:
    93  		return errors.Reason("Need to login. Run `luci-auth login -scopes \"%s\"`", strings.Join(scopes, " ")).Err()
    94  	case err != nil:
    95  		return errors.Annotate(err, "failed to get token source").Err()
    96  	}
    97  
    98  	client, err := cloudds.NewClient(ctx, *cloudProject,
    99  		option.WithTokenSource(ts),
   100  		option.WithGRPCConnectionPool(*workers/16),
   101  	)
   102  	if err != nil {
   103  		return errors.Annotate(err, "failed to instantiate the datastore client").Err()
   104  	}
   105  
   106  	ctx = (&cloud.ConfigLite{
   107  		ProjectID: *cloudProject,
   108  		DS:        client,
   109  	}).Use(ctx)
   110  
   111  	return reallyRun(ctx)
   112  }
   113  
   114  func reallyRun(ctx context.Context) error {
   115  	keys := make(chan *datastore.Key, 50000)
   116  	visitor := visitor{
   117  		now:        clock.Now(ctx).UTC(),
   118  		delete:     *delete,
   119  		nextReport: clock.Now(ctx).Add(time.Second),
   120  	}
   121  
   122  	// A goroutine pool to process visited entities.
   123  	gr, gctx := errgroup.WithContext(ctx)
   124  	for i := 0; i < *workers; i++ {
   125  		gr.Go(func() error {
   126  			for s := range keys {
   127  				visitor.process(gctx, s)
   128  				visitor.reportMaybe(gctx)
   129  			}
   130  			return nil
   131  		})
   132  	}
   133  
   134  	// A mapper that feeds entities to the visitor goroutine pool.
   135  	logging.Infof(ctx, "Visiting %s entities...", *kind)
   136  	mapErr := dsmapperlite.Map(ctx, datastore.NewQuery(*kind).KeysOnly(true), 32, 1000,
   137  		func(ctx context.Context, _ int, key *datastore.Key) error {
   138  			visitor.visit(ctx, key)
   139  			keys <- key
   140  			visitor.reportMaybe(ctx)
   141  			return nil
   142  		},
   143  	)
   144  	close(keys)
   145  	visitor.visitedAll(ctx)
   146  	grErr := gr.Wait()
   147  
   148  	visitor.report(ctx, true)
   149  
   150  	if grErr != nil {
   151  		return errors.Annotate(grErr, "when processing %s", *kind).Err()
   152  	}
   153  	if mapErr != nil {
   154  		return errors.Annotate(mapErr, "when visiting %s", *kind).Err()
   155  	}
   156  	return nil
   157  }
   158  
   159  type visitor struct {
   160  	now    time.Time
   161  	delete bool
   162  
   163  	m sync.Mutex
   164  
   165  	visited int // total number of entities visited
   166  
   167  	pendingDelete int // entities queued for delete
   168  
   169  	deleted int // total number of successfully deleted entities
   170  	errors  int // total number of deletion errors
   171  
   172  	reportM      sync.Mutex
   173  	nextReport   time.Time // when to print the next progress report
   174  	doneVisiting bool      // true if done visiting, but still processing
   175  }
   176  
   177  // visit returns true if a key needs to be processed.
   178  func (v *visitor) visit(ctx context.Context, s *datastore.Key) {
   179  	v.m.Lock()
   180  	defer v.m.Unlock()
   181  
   182  	v.visited++
   183  }
   184  
   185  // process deletes a key.
   186  func (v *visitor) process(ctx context.Context, s *datastore.Key) {
   187  	var err error
   188  	if v.delete {
   189  		if err = deleteKey(ctx, s); err != nil {
   190  			logging.Errorf(ctx, "%s: %s", s, err)
   191  		}
   192  	}
   193  
   194  	v.m.Lock()
   195  	defer v.m.Unlock()
   196  
   197  	v.pendingDelete--
   198  	if v.delete {
   199  		if err != nil {
   200  			v.errors++
   201  		} else {
   202  			v.deleted++
   203  		}
   204  	}
   205  }
   206  
   207  // visitedAll is called when all keys are visited.
   208  func (v *visitor) visitedAll(ctx context.Context) {
   209  	v.reportM.Lock()
   210  	v.doneVisiting = true
   211  	v.reportM.Unlock()
   212  	v.report(ctx, true)
   213  }
   214  
   215  // reportMaybe prints a progress report if it is time.
   216  func (v *visitor) reportMaybe(ctx context.Context) {
   217  	now := clock.Now(ctx)
   218  
   219  	v.reportM.Lock()
   220  	needReport := now.After(v.nextReport)
   221  	if needReport {
   222  		v.nextReport = now.Add(time.Second)
   223  	}
   224  	doneVisiting := v.doneVisiting
   225  	v.reportM.Unlock()
   226  
   227  	if needReport {
   228  		v.report(ctx, doneVisiting)
   229  	}
   230  }
   231  
   232  // report prints a progress report.
   233  func (v *visitor) report(ctx context.Context, doneVisiting bool) {
   234  	v.m.Lock()
   235  	defer v.m.Unlock()
   236  
   237  	logging.Infof(ctx, "-------------------------------------------")
   238  	if doneVisiting {
   239  		logging.Infof(ctx, "All visited entities:                     %d", v.visited)
   240  	} else {
   241  		logging.Infof(ctx, "Entities visited so far:                  %d", v.visited)
   242  	}
   243  	logging.Infof(ctx, "Entities pending delete by the tool:      %d", v.pendingDelete)
   244  	logging.Infof(ctx, "Successfully deleted entities:            %d", v.deleted)
   245  	logging.Infof(ctx, "Update errors:                            %d", v.errors)
   246  	logging.Infof(ctx, "-------------------------------------------")
   247  }
   248  
   249  func deleteKey(ctx context.Context, key *datastore.Key) error {
   250  	return datastore.Delete(ctx, key)
   251  }