github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/ee/backup/s3_handler.go (about) 1 // +build !oss 2 3 /* 4 * Copyright 2018 Dgraph Labs, Inc. and Contributors 5 * 6 * Licensed under the Dgraph Community License (the "License"); you 7 * may not use this file except in compliance with the License. You 8 * may obtain a copy of the License at 9 * 10 * https://github.com/dgraph-io/dgraph/blob/master/licenses/DCL.txt 11 */ 12 13 package backup 14 15 import ( 16 "encoding/json" 17 "fmt" 18 "io" 19 "net/url" 20 "os" 21 "path/filepath" 22 "sort" 23 "strings" 24 "time" 25 26 "github.com/dgraph-io/dgraph/protos/pb" 27 "github.com/dgraph-io/dgraph/x" 28 29 "github.com/golang/glog" 30 minio "github.com/minio/minio-go" 31 "github.com/minio/minio-go/pkg/credentials" 32 "github.com/minio/minio-go/pkg/s3utils" 33 "github.com/pkg/errors" 34 ) 35 36 const ( 37 // Shown in transfer logs 38 appName = "Dgraph" 39 40 // defaultEndpointS3 is used with s3 scheme when no host is provided 41 defaultEndpointS3 = "s3.amazonaws.com" 42 43 // s3AccelerateSubstr S3 acceleration is enabled if the S3 host is contains this substring. 44 // See http://docs.aws.amazon.com/AmazonS3/latest/dev/transfer-acceleration.html 45 s3AccelerateSubstr = "s3-accelerate" 46 ) 47 48 // s3Handler is used for 's3:' and 'minio:' URI schemes. 49 type s3Handler struct { 50 bucketName, objectPrefix string 51 pwriter *io.PipeWriter 52 preader *io.PipeReader 53 cerr chan error 54 req *pb.BackupRequest 55 uri *url.URL 56 } 57 58 func (h *s3Handler) credentialsInRequest() bool { 59 return h.req.GetAccessKey() != "" && h.req.GetSecretKey() != "" 60 } 61 62 // setup creates a new session, checks valid bucket at uri.Path, and configures a minio client. 63 // setup also fills in values used by the handler in subsequent calls. 64 // Returns a new S3 minio client, otherwise a nil client with an error. 65 func (h *s3Handler) setup(uri *url.URL) (*minio.Client, error) { 66 if len(uri.Path) < 1 { 67 return nil, errors.Errorf("Invalid bucket: %q", uri.Path) 68 } 69 70 glog.V(2).Infof("Backup using host: %s, path: %s", uri.Host, uri.Path) 71 72 var creds credentials.Value 73 if h.req.GetAnonymous() { 74 // No need to setup credentials. 75 } else if !h.credentialsInRequest() { 76 var provider credentials.Provider 77 switch uri.Scheme { 78 case "s3": 79 // s3:///bucket/folder 80 if !strings.Contains(uri.Host, ".") { 81 uri.Host = defaultEndpointS3 82 } 83 if !s3utils.IsAmazonEndpoint(*uri) { 84 return nil, errors.Errorf("Invalid S3 endpoint %q", uri.Host) 85 } 86 // Access Key ID: AWS_ACCESS_KEY_ID or AWS_ACCESS_KEY. 87 // Secret Access Key: AWS_SECRET_ACCESS_KEY or AWS_SECRET_KEY. 88 // Secret Token: AWS_SESSION_TOKEN. 89 provider = &credentials.EnvAWS{} 90 91 default: // minio 92 if uri.Host == "" { 93 return nil, errors.Errorf("Minio handler requires a host") 94 } 95 // Access Key ID: MINIO_ACCESS_KEY. 96 // Secret Access Key: MINIO_SECRET_KEY. 97 provider = &credentials.EnvMinio{} 98 } 99 100 // If no credentials can be retrieved, an attempt to access the destination 101 // with no credentials will be made. 102 creds, _ = provider.Retrieve() // error is always nil 103 } else { 104 creds.AccessKeyID = h.req.GetAccessKey() 105 creds.SecretAccessKey = h.req.GetSecretKey() 106 creds.SessionToken = h.req.GetSessionToken() 107 } 108 109 secure := uri.Query().Get("secure") != "false" // secure by default 110 111 mc, err := minio.New(uri.Host, creds.AccessKeyID, creds.SecretAccessKey, secure) 112 if err != nil { 113 return nil, err 114 } 115 116 // Set client app name "Dgraph/v1.0.x" 117 mc.SetAppInfo(appName, x.Version()) 118 119 // S3 transfer acceleration support. 120 if uri.Scheme == "s3" && strings.Contains(uri.Host, s3AccelerateSubstr) { 121 mc.SetS3TransferAccelerate(uri.Host) 122 } 123 124 // enable HTTP tracing 125 if uri.Query().Get("trace") == "true" { 126 mc.TraceOn(os.Stderr) 127 } 128 129 // split path into bucketName and blobPrefix 130 parts := strings.Split(uri.Path[1:], "/") 131 h.bucketName = parts[0] // bucket 132 133 // verify the requested bucket exists. 134 found, err := mc.BucketExists(h.bucketName) 135 if err != nil { 136 return nil, errors.Wrapf(err, "while looking for bucket %s at host %s", 137 h.bucketName, uri.Host) 138 } 139 if !found { 140 return nil, errors.Errorf("Bucket was not found: %s", h.bucketName) 141 } 142 if len(parts) > 1 { 143 h.objectPrefix = filepath.Join(parts[1:]...) 144 } 145 146 return mc, err 147 } 148 149 func (h *s3Handler) createObject(uri *url.URL, req *pb.BackupRequest, mc *minio.Client, 150 objectName string) { 151 152 // The backup object is: folder1...folderN/dgraph.20181106.0113/r110001-g1.backup 153 object := filepath.Join(h.objectPrefix, fmt.Sprintf(backupPathFmt, req.UnixTs), 154 objectName) 155 glog.V(2).Infof("Sending data to %s blob %q ...", uri.Scheme, object) 156 157 h.cerr = make(chan error, 1) 158 h.preader, h.pwriter = io.Pipe() 159 go func() { 160 h.cerr <- h.upload(mc, object) 161 }() 162 } 163 164 // GetLatestManifest reads the manifests at the given URL and returns the 165 // latest manifest. 166 func (h *s3Handler) GetLatestManifest(uri *url.URL) (*Manifest, error) { 167 mc, err := h.setup(uri) 168 if err != nil { 169 return nil, err 170 } 171 172 // Find the max Since value from the latest backup. 173 var lastManifest string 174 done := make(chan struct{}) 175 defer close(done) 176 suffix := "/" + backupManifest 177 for object := range mc.ListObjects(h.bucketName, h.objectPrefix, true, done) { 178 if strings.HasSuffix(object.Key, suffix) && object.Key > lastManifest { 179 lastManifest = object.Key 180 } 181 } 182 183 var m Manifest 184 if lastManifest == "" { 185 return &m, nil 186 } 187 188 if err := h.readManifest(mc, lastManifest, &m); err != nil { 189 return nil, err 190 } 191 return &m, nil 192 } 193 194 // CreateBackupFile creates a new session and prepares the data stream for the backup. 195 // URI formats: 196 // minio://<host>/bucket/folder1.../folderN?secure=true|false 197 // minio://<host:port>/bucket/folder1.../folderN?secure=true|false 198 // s3://<s3 region endpoint>/bucket/folder1.../folderN?secure=true|false 199 // s3:///bucket/folder1.../folderN?secure=true|false (use default S3 endpoint) 200 func (h *s3Handler) CreateBackupFile(uri *url.URL, req *pb.BackupRequest) error { 201 glog.V(2).Infof("S3Handler got uri: %+v. Host: %s. Path: %s\n", uri, uri.Host, uri.Path) 202 203 h.req = req 204 mc, err := h.setup(uri) 205 if err != nil { 206 return err 207 } 208 209 objectName := backupName(req.ReadTs, req.GroupId) 210 h.createObject(uri, req, mc, objectName) 211 return nil 212 } 213 214 // CreateManifest finishes a backup by creating an object to store the manifest. 215 func (h *s3Handler) CreateManifest(uri *url.URL, req *pb.BackupRequest) error { 216 glog.V(2).Infof("S3Handler got uri: %+v. Host: %s. Path: %s\n", uri, uri.Host, uri.Path) 217 218 h.req = req 219 mc, err := h.setup(uri) 220 if err != nil { 221 return err 222 } 223 224 h.createObject(uri, req, mc, backupManifest) 225 return nil 226 } 227 228 // readManifest reads a manifest file at path using the handler. 229 // Returns nil on success, otherwise an error. 230 func (h *s3Handler) readManifest(mc *minio.Client, object string, m *Manifest) error { 231 reader, err := mc.GetObject(h.bucketName, object, minio.GetObjectOptions{}) 232 if err != nil { 233 return err 234 } 235 defer reader.Close() 236 return json.NewDecoder(reader).Decode(m) 237 } 238 239 // Load creates a new session, scans for backup objects in a bucket, then tries to 240 // load any backup objects found. 241 // Returns nil and the maximum Since value on success, error otherwise. 242 func (h *s3Handler) Load(uri *url.URL, backupId string, fn loadFn) (uint64, error) { 243 mc, err := h.setup(uri) 244 if err != nil { 245 return 0, err 246 } 247 248 var paths []string 249 250 doneCh := make(chan struct{}) 251 defer close(doneCh) 252 253 suffix := "/" + backupManifest 254 for object := range mc.ListObjects(h.bucketName, h.objectPrefix, true, doneCh) { 255 if strings.HasSuffix(object.Key, suffix) { 256 paths = append(paths, object.Key) 257 } 258 } 259 if len(paths) == 0 { 260 return 0, errors.Errorf("No manifests found at: %s", uri.String()) 261 } 262 sort.Strings(paths) 263 if glog.V(3) { 264 fmt.Printf("Found backup manifest(s) %s: %v\n", uri.Scheme, paths) 265 } 266 267 // since is returned with the max manifest Since value found. 268 var since uint64 269 270 // Read and filter the manifests to get the list of manifests to consider 271 // for this restore operation. 272 var manifests []*Manifest 273 for _, path := range paths { 274 var m Manifest 275 if err := h.readManifest(mc, path, &m); err != nil { 276 return 0, errors.Wrapf(err, "While reading %q", path) 277 } 278 m.Path = path 279 manifests = append(manifests, &m) 280 } 281 manifests, err = filterManifests(manifests, backupId) 282 if err != nil { 283 return 0, err 284 } 285 286 // Process each manifest, first check that they are valid and then confirm the 287 // backup manifests for each group exist. Each group in manifest must have a backup file, 288 // otherwise this is a failure and the user must remedy. 289 for i, manifest := range manifests { 290 if manifest.Since == 0 || len(manifest.Groups) == 0 { 291 if glog.V(2) { 292 fmt.Printf("Restore: skip backup: %#v\n", manifest) 293 } 294 continue 295 } 296 297 path := filepath.Dir(manifests[i].Path) 298 for gid := range manifest.Groups { 299 object := filepath.Join(path, backupName(manifest.Since, gid)) 300 reader, err := mc.GetObject(h.bucketName, object, minio.GetObjectOptions{}) 301 if err != nil { 302 return 0, errors.Wrapf(err, "Failed to get %q", object) 303 } 304 defer reader.Close() 305 306 st, err := reader.Stat() 307 if err != nil { 308 return 0, errors.Wrapf(err, "Stat failed %q", object) 309 } 310 if st.Size <= 0 { 311 return 0, errors.Errorf("Remote object is empty or inaccessible: %s", object) 312 } 313 fmt.Printf("Downloading %q, %d bytes\n", object, st.Size) 314 315 // Only restore the predicates that were assigned to this group at the time 316 // of the last backup. 317 predSet := manifests[len(manifests)-1].getPredsInGroup(gid) 318 if err = fn(reader, int(gid), predSet); err != nil { 319 return 0, err 320 } 321 } 322 since = manifest.Since 323 } 324 return since, nil 325 } 326 327 // ListManifests loads the manifests in the locations and returns them. 328 func (h *s3Handler) ListManifests(uri *url.URL) ([]string, error) { 329 mc, err := h.setup(uri) 330 if err != nil { 331 return nil, err 332 } 333 h.uri = uri 334 335 var manifests []string 336 doneCh := make(chan struct{}) 337 defer close(doneCh) 338 339 suffix := "/" + backupManifest 340 for object := range mc.ListObjects(h.bucketName, h.objectPrefix, true, doneCh) { 341 if strings.HasSuffix(object.Key, suffix) { 342 manifests = append(manifests, object.Key) 343 } 344 } 345 if len(manifests) == 0 { 346 return nil, errors.Errorf("No manifests found at: %s", uri.String()) 347 } 348 sort.Strings(manifests) 349 if glog.V(3) { 350 fmt.Printf("Found backup manifest(s) %s: %v\n", uri.Scheme, manifests) 351 } 352 return manifests, nil 353 } 354 355 func (h *s3Handler) ReadManifest(path string, m *Manifest) error { 356 mc, err := h.setup(h.uri) 357 if err != nil { 358 return err 359 } 360 361 return h.readManifest(mc, path, m) 362 } 363 364 // upload will block until it's done or an error occurs. 365 func (h *s3Handler) upload(mc *minio.Client, object string) error { 366 start := time.Now() 367 368 // We don't need to have a progress object, because we're using a Pipe. A write to Pipe would 369 // block until it can be fully read. So, the rate of the writes here would be equal to the rate 370 // of upload. We're already tracking progress of the writes in stream.Lists, so no need to track 371 // the progress of read. By definition, it must be the same. 372 n, err := mc.PutObject(h.bucketName, object, h.preader, -1, minio.PutObjectOptions{}) 373 glog.V(2).Infof("Backup sent %d bytes. Time elapsed: %s", 374 n, time.Since(start).Round(time.Second)) 375 376 if err != nil { 377 // This should cause Write to fail as well. 378 glog.Errorf("Backup: Closing RW pipe due to error: %v", err) 379 h.pwriter.Close() 380 h.preader.Close() 381 } 382 return err 383 } 384 385 func (h *s3Handler) Close() error { 386 // Done buffering, send EOF. 387 if err := h.pwriter.CloseWithError(nil); err != nil && err != io.EOF { 388 glog.Errorf("Unexpected error when closing pipe: %v", err) 389 } 390 glog.V(2).Infof("Backup waiting for upload to complete.") 391 return <-h.cerr 392 } 393 394 func (h *s3Handler) Write(b []byte) (int, error) { 395 return h.pwriter.Write(b) 396 }