github.com/derat/nup@v0.0.0-20230418113745-15592ba7c620/cmd/nup/storage/command.go (about) 1 // Copyright 2021 Daniel Erat. 2 // All rights reserved. 3 4 package storage 5 6 import ( 7 "context" 8 "encoding/json" 9 "flag" 10 "fmt" 11 "io" 12 "log" 13 "os" 14 15 "cloud.google.com/go/storage" 16 17 "github.com/derat/nup/cmd/nup/client" 18 "github.com/derat/nup/server/db" 19 "github.com/google/subcommands" 20 21 "golang.org/x/oauth2/google" 22 23 "google.golang.org/api/iterator" 24 "google.golang.org/api/option" 25 ) 26 27 type storageClass string 28 29 const ( 30 standard storageClass = "STANDARD" 31 nearline = "NEARLINE" 32 coldline = "COLDLINE" 33 archive = "ARCHIVE" 34 ) 35 36 type Command struct { 37 Cfg *client.Config 38 39 bucketName string // GCS bucket name 40 class string // storage class for low-rated files 41 maxUpdates int // files to update 42 numWorkers int // concurrent GCS updates 43 ratingCutoff int // min rating for standard storage class 44 } 45 46 func (*Command) Name() string { return "storage" } 47 func (*Command) Synopsis() string { return "update song storage classes" } 48 func (*Command) Usage() string { 49 return `storage <flags>: 50 Update song files' storage classes in Google Cloud Storage based on 51 ratings in dumped songs read from stdin. 52 53 ` 54 } 55 56 func (cmd *Command) SetFlags(f *flag.FlagSet) { 57 f.StringVar(&cmd.bucketName, "bucket", "", "Google Cloud Storage bucket containing songs") 58 f.StringVar(&cmd.class, "class", string(coldline), "Storage class for infrequently-accessed files") 59 f.IntVar(&cmd.maxUpdates, "max-updates", -1, "Maximum number of files to update") 60 f.IntVar(&cmd.numWorkers, "workers", 10, "Maximum concurrent Google Cloud Storage updates") 61 f.IntVar(&cmd.ratingCutoff, "rating-cutoff", 4, "Minimum song rating for standard storage class") 62 } 63 64 func (cmd *Command) Execute(ctx context.Context, _ *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus { 65 if cmd.bucketName == "" { 66 fmt.Fprintln(os.Stderr, "Must supply bucket name with -bucket") 67 return subcommands.ExitUsageError 68 } 69 class := storageClass(cmd.class) 70 if class != nearline && class != coldline && class != archive { 71 fmt.Fprintf(os.Stderr, "Invalid -class %q (valid: %v %v %v)\n", class, nearline, coldline, archive) 72 return subcommands.ExitUsageError 73 } 74 75 creds, err := google.FindDefaultCredentials(ctx, 76 "https://www.googleapis.com/auth/devstorage.read_write", 77 ) 78 if err != nil { 79 fmt.Fprintln(os.Stderr, "Failed finding credentials:", err) 80 return subcommands.ExitFailure 81 } 82 client, err := storage.NewClient(ctx, option.WithCredentials(creds)) 83 if err != nil { 84 fmt.Fprintln(os.Stderr, "Failed creating client:", err) 85 return subcommands.ExitFailure 86 } 87 defer client.Close() 88 89 // Read songs from stdin and determine the proper storage class for each. 90 songClasses := make(map[string]storageClass) 91 d := json.NewDecoder(os.Stdin) 92 for { 93 var s db.Song 94 if err := d.Decode(&s); err == io.EOF { 95 break 96 } else if err != nil { 97 fmt.Fprintln(os.Stderr, "Failed to read song:", err) 98 return subcommands.ExitFailure 99 } 100 cls := standard 101 if s.Rating > 0 && s.Rating < cmd.ratingCutoff { 102 cls = class 103 } 104 songClasses[s.Filename] = cls 105 } 106 107 // List the objects synchronously so we know how many jobs we'll have. 108 var jobs []job 109 bucket := client.Bucket(cmd.bucketName) 110 it := bucket.Objects(ctx, &storage.Query{Prefix: ""}) 111 for { 112 attrs, err := it.Next() 113 if err == iterator.Done { 114 break 115 } else if err != nil { 116 fmt.Fprintf(os.Stderr, "Failed listing objects in %v: %v\n", cmd.bucketName, err) 117 return subcommands.ExitFailure 118 } 119 class, ok := songClasses[attrs.Name] 120 if ok && attrs.StorageClass != string(class) { 121 jobs = append(jobs, job{*attrs, class}) 122 if cmd.maxUpdates > 0 && len(jobs) >= cmd.maxUpdates { 123 break 124 } 125 } 126 } 127 128 // See https://gobyexample.com/worker-pools. 129 jobChan := make(chan job, len(jobs)) 130 resChan := make(chan result, len(jobs)) 131 132 // Start the workers. 133 for i := 0; i < cmd.numWorkers; i++ { 134 go worker(ctx, bucket, jobChan, resChan) 135 } 136 137 // Submit the jobs. 138 for _, j := range jobs { 139 jobChan <- j 140 } 141 close(jobChan) 142 143 // Wait for all the jobs to finish. 144 var numErrs int 145 for i := 0; i < len(jobs); i++ { 146 res := <-resChan 147 msg := fmt.Sprintf("[%d/%d] %q: %v -> %v", i+1, len(jobs), 148 res.attrs.Name, res.attrs.StorageClass, res.class) 149 if res.err == nil { 150 log.Print(msg) 151 } else { 152 numErrs++ 153 log.Printf("%s failed: %v", msg, res.err) 154 } 155 } 156 if numErrs > 0 { 157 fmt.Fprintf(os.Stderr, "Failed updating %v object(s)\n", numErrs) 158 return subcommands.ExitFailure 159 } 160 return subcommands.ExitSuccess 161 } 162 163 type job struct { 164 attrs storage.ObjectAttrs // original attributes 165 class storageClass // new storage class 166 } 167 168 type result struct { 169 job 170 err error 171 } 172 173 func worker(ctx context.Context, bucket *storage.BucketHandle, jobs <-chan job, results chan<- result) { 174 for j := range jobs { 175 obj := bucket.Object(j.attrs.Name) 176 copier := obj.CopierFrom(obj) 177 copier.StorageClass = string(j.class) 178 179 // Preserve a bunch of random junk. 180 copier.ContentType = j.attrs.ContentType 181 copier.ContentLanguage = j.attrs.ContentLanguage 182 copier.CacheControl = j.attrs.CacheControl 183 copier.ACL = j.attrs.ACL 184 copier.PredefinedACL = j.attrs.PredefinedACL 185 copier.ContentEncoding = j.attrs.ContentEncoding 186 copier.ContentDisposition = j.attrs.ContentDisposition 187 copier.Metadata = j.attrs.Metadata 188 189 _, err := copier.Run(ctx) 190 results <- result{j, err} 191 } 192 }