github.com/derat/nup@v0.0.0-20230418113745-15592ba7c620/cmd/nup/storage/command.go (about)

     1  // Copyright 2021 Daniel Erat.
     2  // All rights reserved.
     3  
     4  package storage
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"flag"
    10  	"fmt"
    11  	"io"
    12  	"log"
    13  	"os"
    14  
    15  	"cloud.google.com/go/storage"
    16  
    17  	"github.com/derat/nup/cmd/nup/client"
    18  	"github.com/derat/nup/server/db"
    19  	"github.com/google/subcommands"
    20  
    21  	"golang.org/x/oauth2/google"
    22  
    23  	"google.golang.org/api/iterator"
    24  	"google.golang.org/api/option"
    25  )
    26  
    27  type storageClass string
    28  
    29  const (
    30  	standard storageClass = "STANDARD"
    31  	nearline              = "NEARLINE"
    32  	coldline              = "COLDLINE"
    33  	archive               = "ARCHIVE"
    34  )
    35  
    36  type Command struct {
    37  	Cfg *client.Config
    38  
    39  	bucketName   string // GCS bucket name
    40  	class        string // storage class for low-rated files
    41  	maxUpdates   int    // files to update
    42  	numWorkers   int    // concurrent GCS updates
    43  	ratingCutoff int    // min rating for standard storage class
    44  }
    45  
    46  func (*Command) Name() string     { return "storage" }
    47  func (*Command) Synopsis() string { return "update song storage classes" }
    48  func (*Command) Usage() string {
    49  	return `storage <flags>:
    50  	Update song files' storage classes in Google Cloud Storage based on
    51  	ratings in dumped songs read from stdin.
    52  
    53  `
    54  }
    55  
    56  func (cmd *Command) SetFlags(f *flag.FlagSet) {
    57  	f.StringVar(&cmd.bucketName, "bucket", "", "Google Cloud Storage bucket containing songs")
    58  	f.StringVar(&cmd.class, "class", string(coldline), "Storage class for infrequently-accessed files")
    59  	f.IntVar(&cmd.maxUpdates, "max-updates", -1, "Maximum number of files to update")
    60  	f.IntVar(&cmd.numWorkers, "workers", 10, "Maximum concurrent Google Cloud Storage updates")
    61  	f.IntVar(&cmd.ratingCutoff, "rating-cutoff", 4, "Minimum song rating for standard storage class")
    62  }
    63  
    64  func (cmd *Command) Execute(ctx context.Context, _ *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
    65  	if cmd.bucketName == "" {
    66  		fmt.Fprintln(os.Stderr, "Must supply bucket name with -bucket")
    67  		return subcommands.ExitUsageError
    68  	}
    69  	class := storageClass(cmd.class)
    70  	if class != nearline && class != coldline && class != archive {
    71  		fmt.Fprintf(os.Stderr, "Invalid -class %q (valid: %v %v %v)\n", class, nearline, coldline, archive)
    72  		return subcommands.ExitUsageError
    73  	}
    74  
    75  	creds, err := google.FindDefaultCredentials(ctx,
    76  		"https://www.googleapis.com/auth/devstorage.read_write",
    77  	)
    78  	if err != nil {
    79  		fmt.Fprintln(os.Stderr, "Failed finding credentials:", err)
    80  		return subcommands.ExitFailure
    81  	}
    82  	client, err := storage.NewClient(ctx, option.WithCredentials(creds))
    83  	if err != nil {
    84  		fmt.Fprintln(os.Stderr, "Failed creating client:", err)
    85  		return subcommands.ExitFailure
    86  	}
    87  	defer client.Close()
    88  
    89  	// Read songs from stdin and determine the proper storage class for each.
    90  	songClasses := make(map[string]storageClass)
    91  	d := json.NewDecoder(os.Stdin)
    92  	for {
    93  		var s db.Song
    94  		if err := d.Decode(&s); err == io.EOF {
    95  			break
    96  		} else if err != nil {
    97  			fmt.Fprintln(os.Stderr, "Failed to read song:", err)
    98  			return subcommands.ExitFailure
    99  		}
   100  		cls := standard
   101  		if s.Rating > 0 && s.Rating < cmd.ratingCutoff {
   102  			cls = class
   103  		}
   104  		songClasses[s.Filename] = cls
   105  	}
   106  
   107  	// List the objects synchronously so we know how many jobs we'll have.
   108  	var jobs []job
   109  	bucket := client.Bucket(cmd.bucketName)
   110  	it := bucket.Objects(ctx, &storage.Query{Prefix: ""})
   111  	for {
   112  		attrs, err := it.Next()
   113  		if err == iterator.Done {
   114  			break
   115  		} else if err != nil {
   116  			fmt.Fprintf(os.Stderr, "Failed listing objects in %v: %v\n", cmd.bucketName, err)
   117  			return subcommands.ExitFailure
   118  		}
   119  		class, ok := songClasses[attrs.Name]
   120  		if ok && attrs.StorageClass != string(class) {
   121  			jobs = append(jobs, job{*attrs, class})
   122  			if cmd.maxUpdates > 0 && len(jobs) >= cmd.maxUpdates {
   123  				break
   124  			}
   125  		}
   126  	}
   127  
   128  	// See https://gobyexample.com/worker-pools.
   129  	jobChan := make(chan job, len(jobs))
   130  	resChan := make(chan result, len(jobs))
   131  
   132  	// Start the workers.
   133  	for i := 0; i < cmd.numWorkers; i++ {
   134  		go worker(ctx, bucket, jobChan, resChan)
   135  	}
   136  
   137  	// Submit the jobs.
   138  	for _, j := range jobs {
   139  		jobChan <- j
   140  	}
   141  	close(jobChan)
   142  
   143  	// Wait for all the jobs to finish.
   144  	var numErrs int
   145  	for i := 0; i < len(jobs); i++ {
   146  		res := <-resChan
   147  		msg := fmt.Sprintf("[%d/%d] %q: %v -> %v", i+1, len(jobs),
   148  			res.attrs.Name, res.attrs.StorageClass, res.class)
   149  		if res.err == nil {
   150  			log.Print(msg)
   151  		} else {
   152  			numErrs++
   153  			log.Printf("%s failed: %v", msg, res.err)
   154  		}
   155  	}
   156  	if numErrs > 0 {
   157  		fmt.Fprintf(os.Stderr, "Failed updating %v object(s)\n", numErrs)
   158  		return subcommands.ExitFailure
   159  	}
   160  	return subcommands.ExitSuccess
   161  }
   162  
   163  type job struct {
   164  	attrs storage.ObjectAttrs // original attributes
   165  	class storageClass        // new storage class
   166  }
   167  
   168  type result struct {
   169  	job
   170  	err error
   171  }
   172  
   173  func worker(ctx context.Context, bucket *storage.BucketHandle, jobs <-chan job, results chan<- result) {
   174  	for j := range jobs {
   175  		obj := bucket.Object(j.attrs.Name)
   176  		copier := obj.CopierFrom(obj)
   177  		copier.StorageClass = string(j.class)
   178  
   179  		// Preserve a bunch of random junk.
   180  		copier.ContentType = j.attrs.ContentType
   181  		copier.ContentLanguage = j.attrs.ContentLanguage
   182  		copier.CacheControl = j.attrs.CacheControl
   183  		copier.ACL = j.attrs.ACL
   184  		copier.PredefinedACL = j.attrs.PredefinedACL
   185  		copier.ContentEncoding = j.attrs.ContentEncoding
   186  		copier.ContentDisposition = j.attrs.ContentDisposition
   187  		copier.Metadata = j.attrs.Metadata
   188  
   189  		_, err := copier.Run(ctx)
   190  		results <- result{j, err}
   191  	}
   192  }