github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/ee/backup/s3_handler.go (about)

     1  // +build !oss
     2  
     3  /*
     4   * Copyright 2018 Dgraph Labs, Inc. and Contributors
     5   *
     6   * Licensed under the Dgraph Community License (the "License"); you
     7   * may not use this file except in compliance with the License. You
     8   * may obtain a copy of the License at
     9   *
    10   *     https://github.com/dgraph-io/dgraph/blob/master/licenses/DCL.txt
    11   */
    12  
    13  package backup
    14  
    15  import (
    16  	"encoding/json"
    17  	"fmt"
    18  	"io"
    19  	"net/url"
    20  	"os"
    21  	"path/filepath"
    22  	"sort"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/dgraph-io/dgraph/protos/pb"
    27  	"github.com/dgraph-io/dgraph/x"
    28  
    29  	"github.com/golang/glog"
    30  	minio "github.com/minio/minio-go"
    31  	"github.com/minio/minio-go/pkg/credentials"
    32  	"github.com/minio/minio-go/pkg/s3utils"
    33  	"github.com/pkg/errors"
    34  )
    35  
    36  const (
    37  	// Shown in transfer logs
    38  	appName = "Dgraph"
    39  
    40  	// defaultEndpointS3 is used with s3 scheme when no host is provided
    41  	defaultEndpointS3 = "s3.amazonaws.com"
    42  
    43  	// s3AccelerateSubstr S3 acceleration is enabled if the S3 host is contains this substring.
    44  	// See http://docs.aws.amazon.com/AmazonS3/latest/dev/transfer-acceleration.html
    45  	s3AccelerateSubstr = "s3-accelerate"
    46  )
    47  
    48  // s3Handler is used for 's3:' and 'minio:' URI schemes.
    49  type s3Handler struct {
    50  	bucketName, objectPrefix string
    51  	pwriter                  *io.PipeWriter
    52  	preader                  *io.PipeReader
    53  	cerr                     chan error
    54  	req                      *pb.BackupRequest
    55  	uri                      *url.URL
    56  }
    57  
    58  func (h *s3Handler) credentialsInRequest() bool {
    59  	return h.req.GetAccessKey() != "" && h.req.GetSecretKey() != ""
    60  }
    61  
    62  // setup creates a new session, checks valid bucket at uri.Path, and configures a minio client.
    63  // setup also fills in values used by the handler in subsequent calls.
    64  // Returns a new S3 minio client, otherwise a nil client with an error.
    65  func (h *s3Handler) setup(uri *url.URL) (*minio.Client, error) {
    66  	if len(uri.Path) < 1 {
    67  		return nil, errors.Errorf("Invalid bucket: %q", uri.Path)
    68  	}
    69  
    70  	glog.V(2).Infof("Backup using host: %s, path: %s", uri.Host, uri.Path)
    71  
    72  	var creds credentials.Value
    73  	if h.req.GetAnonymous() {
    74  		// No need to setup credentials.
    75  	} else if !h.credentialsInRequest() {
    76  		var provider credentials.Provider
    77  		switch uri.Scheme {
    78  		case "s3":
    79  			// s3:///bucket/folder
    80  			if !strings.Contains(uri.Host, ".") {
    81  				uri.Host = defaultEndpointS3
    82  			}
    83  			if !s3utils.IsAmazonEndpoint(*uri) {
    84  				return nil, errors.Errorf("Invalid S3 endpoint %q", uri.Host)
    85  			}
    86  			// Access Key ID:     AWS_ACCESS_KEY_ID or AWS_ACCESS_KEY.
    87  			// Secret Access Key: AWS_SECRET_ACCESS_KEY or AWS_SECRET_KEY.
    88  			// Secret Token:      AWS_SESSION_TOKEN.
    89  			provider = &credentials.EnvAWS{}
    90  
    91  		default: // minio
    92  			if uri.Host == "" {
    93  				return nil, errors.Errorf("Minio handler requires a host")
    94  			}
    95  			// Access Key ID:     MINIO_ACCESS_KEY.
    96  			// Secret Access Key: MINIO_SECRET_KEY.
    97  			provider = &credentials.EnvMinio{}
    98  		}
    99  
   100  		// If no credentials can be retrieved, an attempt to access the destination
   101  		// with no credentials will be made.
   102  		creds, _ = provider.Retrieve() // error is always nil
   103  	} else {
   104  		creds.AccessKeyID = h.req.GetAccessKey()
   105  		creds.SecretAccessKey = h.req.GetSecretKey()
   106  		creds.SessionToken = h.req.GetSessionToken()
   107  	}
   108  
   109  	secure := uri.Query().Get("secure") != "false" // secure by default
   110  
   111  	mc, err := minio.New(uri.Host, creds.AccessKeyID, creds.SecretAccessKey, secure)
   112  	if err != nil {
   113  		return nil, err
   114  	}
   115  
   116  	// Set client app name "Dgraph/v1.0.x"
   117  	mc.SetAppInfo(appName, x.Version())
   118  
   119  	// S3 transfer acceleration support.
   120  	if uri.Scheme == "s3" && strings.Contains(uri.Host, s3AccelerateSubstr) {
   121  		mc.SetS3TransferAccelerate(uri.Host)
   122  	}
   123  
   124  	// enable HTTP tracing
   125  	if uri.Query().Get("trace") == "true" {
   126  		mc.TraceOn(os.Stderr)
   127  	}
   128  
   129  	// split path into bucketName and blobPrefix
   130  	parts := strings.Split(uri.Path[1:], "/")
   131  	h.bucketName = parts[0] // bucket
   132  
   133  	// verify the requested bucket exists.
   134  	found, err := mc.BucketExists(h.bucketName)
   135  	if err != nil {
   136  		return nil, errors.Wrapf(err, "while looking for bucket %s at host %s",
   137  			h.bucketName, uri.Host)
   138  	}
   139  	if !found {
   140  		return nil, errors.Errorf("Bucket was not found: %s", h.bucketName)
   141  	}
   142  	if len(parts) > 1 {
   143  		h.objectPrefix = filepath.Join(parts[1:]...)
   144  	}
   145  
   146  	return mc, err
   147  }
   148  
   149  func (h *s3Handler) createObject(uri *url.URL, req *pb.BackupRequest, mc *minio.Client,
   150  	objectName string) {
   151  
   152  	// The backup object is: folder1...folderN/dgraph.20181106.0113/r110001-g1.backup
   153  	object := filepath.Join(h.objectPrefix, fmt.Sprintf(backupPathFmt, req.UnixTs),
   154  		objectName)
   155  	glog.V(2).Infof("Sending data to %s blob %q ...", uri.Scheme, object)
   156  
   157  	h.cerr = make(chan error, 1)
   158  	h.preader, h.pwriter = io.Pipe()
   159  	go func() {
   160  		h.cerr <- h.upload(mc, object)
   161  	}()
   162  }
   163  
   164  // GetLatestManifest reads the manifests at the given URL and returns the
   165  // latest manifest.
   166  func (h *s3Handler) GetLatestManifest(uri *url.URL) (*Manifest, error) {
   167  	mc, err := h.setup(uri)
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	// Find the max Since value from the latest backup.
   173  	var lastManifest string
   174  	done := make(chan struct{})
   175  	defer close(done)
   176  	suffix := "/" + backupManifest
   177  	for object := range mc.ListObjects(h.bucketName, h.objectPrefix, true, done) {
   178  		if strings.HasSuffix(object.Key, suffix) && object.Key > lastManifest {
   179  			lastManifest = object.Key
   180  		}
   181  	}
   182  
   183  	var m Manifest
   184  	if lastManifest == "" {
   185  		return &m, nil
   186  	}
   187  
   188  	if err := h.readManifest(mc, lastManifest, &m); err != nil {
   189  		return nil, err
   190  	}
   191  	return &m, nil
   192  }
   193  
   194  // CreateBackupFile creates a new session and prepares the data stream for the backup.
   195  // URI formats:
   196  //   minio://<host>/bucket/folder1.../folderN?secure=true|false
   197  //   minio://<host:port>/bucket/folder1.../folderN?secure=true|false
   198  //   s3://<s3 region endpoint>/bucket/folder1.../folderN?secure=true|false
   199  //   s3:///bucket/folder1.../folderN?secure=true|false (use default S3 endpoint)
   200  func (h *s3Handler) CreateBackupFile(uri *url.URL, req *pb.BackupRequest) error {
   201  	glog.V(2).Infof("S3Handler got uri: %+v. Host: %s. Path: %s\n", uri, uri.Host, uri.Path)
   202  
   203  	h.req = req
   204  	mc, err := h.setup(uri)
   205  	if err != nil {
   206  		return err
   207  	}
   208  
   209  	objectName := backupName(req.ReadTs, req.GroupId)
   210  	h.createObject(uri, req, mc, objectName)
   211  	return nil
   212  }
   213  
   214  // CreateManifest finishes a backup by creating an object to store the manifest.
   215  func (h *s3Handler) CreateManifest(uri *url.URL, req *pb.BackupRequest) error {
   216  	glog.V(2).Infof("S3Handler got uri: %+v. Host: %s. Path: %s\n", uri, uri.Host, uri.Path)
   217  
   218  	h.req = req
   219  	mc, err := h.setup(uri)
   220  	if err != nil {
   221  		return err
   222  	}
   223  
   224  	h.createObject(uri, req, mc, backupManifest)
   225  	return nil
   226  }
   227  
   228  // readManifest reads a manifest file at path using the handler.
   229  // Returns nil on success, otherwise an error.
   230  func (h *s3Handler) readManifest(mc *minio.Client, object string, m *Manifest) error {
   231  	reader, err := mc.GetObject(h.bucketName, object, minio.GetObjectOptions{})
   232  	if err != nil {
   233  		return err
   234  	}
   235  	defer reader.Close()
   236  	return json.NewDecoder(reader).Decode(m)
   237  }
   238  
   239  // Load creates a new session, scans for backup objects in a bucket, then tries to
   240  // load any backup objects found.
   241  // Returns nil and the maximum Since value on success, error otherwise.
   242  func (h *s3Handler) Load(uri *url.URL, backupId string, fn loadFn) (uint64, error) {
   243  	mc, err := h.setup(uri)
   244  	if err != nil {
   245  		return 0, err
   246  	}
   247  
   248  	var paths []string
   249  
   250  	doneCh := make(chan struct{})
   251  	defer close(doneCh)
   252  
   253  	suffix := "/" + backupManifest
   254  	for object := range mc.ListObjects(h.bucketName, h.objectPrefix, true, doneCh) {
   255  		if strings.HasSuffix(object.Key, suffix) {
   256  			paths = append(paths, object.Key)
   257  		}
   258  	}
   259  	if len(paths) == 0 {
   260  		return 0, errors.Errorf("No manifests found at: %s", uri.String())
   261  	}
   262  	sort.Strings(paths)
   263  	if glog.V(3) {
   264  		fmt.Printf("Found backup manifest(s) %s: %v\n", uri.Scheme, paths)
   265  	}
   266  
   267  	// since is returned with the max manifest Since value found.
   268  	var since uint64
   269  
   270  	// Read and filter the manifests to get the list of manifests to consider
   271  	// for this restore operation.
   272  	var manifests []*Manifest
   273  	for _, path := range paths {
   274  		var m Manifest
   275  		if err := h.readManifest(mc, path, &m); err != nil {
   276  			return 0, errors.Wrapf(err, "While reading %q", path)
   277  		}
   278  		m.Path = path
   279  		manifests = append(manifests, &m)
   280  	}
   281  	manifests, err = filterManifests(manifests, backupId)
   282  	if err != nil {
   283  		return 0, err
   284  	}
   285  
   286  	// Process each manifest, first check that they are valid and then confirm the
   287  	// backup manifests for each group exist. Each group in manifest must have a backup file,
   288  	// otherwise this is a failure and the user must remedy.
   289  	for i, manifest := range manifests {
   290  		if manifest.Since == 0 || len(manifest.Groups) == 0 {
   291  			if glog.V(2) {
   292  				fmt.Printf("Restore: skip backup: %#v\n", manifest)
   293  			}
   294  			continue
   295  		}
   296  
   297  		path := filepath.Dir(manifests[i].Path)
   298  		for gid := range manifest.Groups {
   299  			object := filepath.Join(path, backupName(manifest.Since, gid))
   300  			reader, err := mc.GetObject(h.bucketName, object, minio.GetObjectOptions{})
   301  			if err != nil {
   302  				return 0, errors.Wrapf(err, "Failed to get %q", object)
   303  			}
   304  			defer reader.Close()
   305  
   306  			st, err := reader.Stat()
   307  			if err != nil {
   308  				return 0, errors.Wrapf(err, "Stat failed %q", object)
   309  			}
   310  			if st.Size <= 0 {
   311  				return 0, errors.Errorf("Remote object is empty or inaccessible: %s", object)
   312  			}
   313  			fmt.Printf("Downloading %q, %d bytes\n", object, st.Size)
   314  
   315  			// Only restore the predicates that were assigned to this group at the time
   316  			// of the last backup.
   317  			predSet := manifests[len(manifests)-1].getPredsInGroup(gid)
   318  			if err = fn(reader, int(gid), predSet); err != nil {
   319  				return 0, err
   320  			}
   321  		}
   322  		since = manifest.Since
   323  	}
   324  	return since, nil
   325  }
   326  
   327  // ListManifests loads the manifests in the locations and returns them.
   328  func (h *s3Handler) ListManifests(uri *url.URL) ([]string, error) {
   329  	mc, err := h.setup(uri)
   330  	if err != nil {
   331  		return nil, err
   332  	}
   333  	h.uri = uri
   334  
   335  	var manifests []string
   336  	doneCh := make(chan struct{})
   337  	defer close(doneCh)
   338  
   339  	suffix := "/" + backupManifest
   340  	for object := range mc.ListObjects(h.bucketName, h.objectPrefix, true, doneCh) {
   341  		if strings.HasSuffix(object.Key, suffix) {
   342  			manifests = append(manifests, object.Key)
   343  		}
   344  	}
   345  	if len(manifests) == 0 {
   346  		return nil, errors.Errorf("No manifests found at: %s", uri.String())
   347  	}
   348  	sort.Strings(manifests)
   349  	if glog.V(3) {
   350  		fmt.Printf("Found backup manifest(s) %s: %v\n", uri.Scheme, manifests)
   351  	}
   352  	return manifests, nil
   353  }
   354  
   355  func (h *s3Handler) ReadManifest(path string, m *Manifest) error {
   356  	mc, err := h.setup(h.uri)
   357  	if err != nil {
   358  		return err
   359  	}
   360  
   361  	return h.readManifest(mc, path, m)
   362  }
   363  
   364  // upload will block until it's done or an error occurs.
   365  func (h *s3Handler) upload(mc *minio.Client, object string) error {
   366  	start := time.Now()
   367  
   368  	// We don't need to have a progress object, because we're using a Pipe. A write to Pipe would
   369  	// block until it can be fully read. So, the rate of the writes here would be equal to the rate
   370  	// of upload. We're already tracking progress of the writes in stream.Lists, so no need to track
   371  	// the progress of read. By definition, it must be the same.
   372  	n, err := mc.PutObject(h.bucketName, object, h.preader, -1, minio.PutObjectOptions{})
   373  	glog.V(2).Infof("Backup sent %d bytes. Time elapsed: %s",
   374  		n, time.Since(start).Round(time.Second))
   375  
   376  	if err != nil {
   377  		// This should cause Write to fail as well.
   378  		glog.Errorf("Backup: Closing RW pipe due to error: %v", err)
   379  		h.pwriter.Close()
   380  		h.preader.Close()
   381  	}
   382  	return err
   383  }
   384  
   385  func (h *s3Handler) Close() error {
   386  	// Done buffering, send EOF.
   387  	if err := h.pwriter.CloseWithError(nil); err != nil && err != io.EOF {
   388  		glog.Errorf("Unexpected error when closing pipe: %v", err)
   389  	}
   390  	glog.V(2).Infof("Backup waiting for upload to complete.")
   391  	return <-h.cerr
   392  }
   393  
   394  func (h *s3Handler) Write(b []byte) (int, error) {
   395  	return h.pwriter.Write(b)
   396  }