github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/ee/backup/handler.go (about)

     1  // +build !oss
     2  
     3  /*
     4   * Copyright 2018 Dgraph Labs, Inc. and Contributors
     5   *
     6   * Licensed under the Dgraph Community License (the "License"); you
     7   * may not use this file except in compliance with the License. You
     8   * may obtain a copy of the License at
     9   *
    10   *     https://github.com/dgraph-io/dgraph/blob/master/licenses/DCL.txt
    11   */
    12  
    13  package backup
    14  
    15  import (
    16  	"fmt"
    17  	"io"
    18  	"net/url"
    19  
    20  	"github.com/dgraph-io/dgraph/protos/pb"
    21  
    22  	"github.com/pkg/errors"
    23  )
    24  
    25  const (
    26  	// backupPathFmt defines the path to store or index backup objects.
    27  	// The expected parameter is a date in string format.
    28  	backupPathFmt = `dgraph.%s`
    29  
    30  	// backupNameFmt defines the name of backups files or objects (remote).
    31  	// The first parameter is the read timestamp at the time of backup. This is used for
    32  	// incremental backups and partial restore.
    33  	// The second parameter is the group ID when backup happened. This is used for partitioning
    34  	// the posting directories 'p' during restore.
    35  	backupNameFmt = `r%d-g%d.backup`
    36  
    37  	// backupManifest is the name of backup manifests. This a JSON file that contains the
    38  	// details of the backup. A backup dir without a manifest is ignored.
    39  	//
    40  	// Example manifest:
    41  	// {
    42  	//   "since": 2280,
    43  	//   "groups": [ 1, 2, 3 ],
    44  	// }
    45  	//
    46  	// "since" is the read timestamp used at the backup request. This value is called "since"
    47  	// because it used by subsequent incremental backups.
    48  	// "groups" are the group IDs that participated.
    49  	backupManifest = `manifest.json`
    50  )
    51  
    52  // UriHandler interface is implemented by URI scheme handlers.
    53  // When adding new scheme handles, for example 'azure://', an object will implement
    54  // this interface to supply Dgraph with a way to create or load backup files into DB.
    55  // For all methods below, the URL object is parsed as described in `newHandler' and
    56  // the Processor object has the DB, estimated tablets size, and backup parameters.
    57  type UriHandler interface {
    58  	// Handlers must know how to Write to their URI location.
    59  	// These function calls are used by both Create and Load.
    60  	io.WriteCloser
    61  
    62  	// GetLatestManifest reads the manifests at the given URL and returns the
    63  	// latest manifest.
    64  	GetLatestManifest(*url.URL) (*Manifest, error)
    65  
    66  	// CreateBackupFile prepares the object or file to save the backup file.
    67  	CreateBackupFile(*url.URL, *pb.BackupRequest) error
    68  
    69  	// CreateManifest prepares the manifest for writing.
    70  	CreateManifest(*url.URL, *pb.BackupRequest) error
    71  
    72  	// Load will scan location URI for backup files, then load them via loadFn.
    73  	// It optionally takes the name of the last directory to consider. Any backup directories
    74  	// created after will be ignored.
    75  	// Objects implementing this function will be used for retrieving (dowload) backup files
    76  	// and loading the data into a DB. The restore CLI command uses this call.
    77  	Load(*url.URL, string, loadFn) (uint64, error)
    78  
    79  	// ListManifests will scan the provided URI and return the paths to the manifests stored
    80  	// in that location.
    81  	ListManifests(*url.URL) ([]string, error)
    82  
    83  	// ReadManifest will read the manifest at the given location and load it into the given
    84  	// Manifest object.
    85  	ReadManifest(string, *Manifest) error
    86  }
    87  
    88  // getHandler returns a UriHandler for the URI scheme.
    89  func getHandler(scheme string) UriHandler {
    90  	switch scheme {
    91  	case "file", "":
    92  		return &fileHandler{}
    93  	case "minio", "s3":
    94  		return &s3Handler{}
    95  	}
    96  	return nil
    97  }
    98  
    99  // NewUriHandler parses the requested URI and finds the corresponding UriHandler.
   100  // Target URI formats:
   101  //   [scheme]://[host]/[path]?[args]
   102  //   [scheme]:///[path]?[args]
   103  //   /[path]?[args] (only for local or NFS)
   104  //
   105  // Target URI parts:
   106  //   scheme - service handler, one of: "file", "s3", "minio"
   107  //     host - remote address. ex: "dgraph.s3.amazonaws.com"
   108  //     path - directory, bucket or container at target. ex: "/dgraph/backups/"
   109  //     args - specific arguments that are ok to appear in logs.
   110  //
   111  // Global args (if supported by the handler):
   112  //     secure - true|false turn on/off TLS.
   113  //      trace - true|false turn on/off HTTP tracing.
   114  //   compress - true|false turn on/off data compression.
   115  //    encrypt - true|false turn on/off data encryption.
   116  //
   117  // Examples:
   118  //   s3://dgraph.s3.amazonaws.com/dgraph/backups?secure=true
   119  //   minio://localhost:9000/dgraph?secure=true
   120  //   file:///tmp/dgraph/backups
   121  //   /tmp/dgraph/backups?compress=gzip
   122  func NewUriHandler(uri *url.URL) (UriHandler, error) {
   123  	h := getHandler(uri.Scheme)
   124  	if h == nil {
   125  		return nil, errors.Errorf("Unable to handle url: %s", uri)
   126  	}
   127  
   128  	return h, nil
   129  }
   130  
   131  // predicateSet is a map whose keys are predicates. It is meant to be used as a set.
   132  type predicateSet map[string]struct{}
   133  
   134  // loadFn is a function that will receive the current file being read.
   135  // A reader, the backup groupId, and a map whose keys are the predicates to restore
   136  // are passed as arguments.
   137  type loadFn func(reader io.Reader, groupId int, preds predicateSet) error
   138  
   139  // Load will scan location l for backup files in the given backup series and load them
   140  // sequentially. Returns the maximum Since value on success, otherwise an error.
   141  func Load(location, backupId string, fn loadFn) (since uint64, err error) {
   142  	uri, err := url.Parse(location)
   143  	if err != nil {
   144  		return 0, err
   145  	}
   146  
   147  	h := getHandler(uri.Scheme)
   148  	if h == nil {
   149  		return 0, errors.Errorf("Unsupported URI: %v", uri)
   150  	}
   151  
   152  	return h.Load(uri, backupId, fn)
   153  }
   154  
   155  // ListManifests scans location l for backup files and returns the list of manifests.
   156  func ListManifests(l string) (map[string]*Manifest, error) {
   157  	uri, err := url.Parse(l)
   158  	if err != nil {
   159  		return nil, err
   160  	}
   161  
   162  	h := getHandler(uri.Scheme)
   163  	if h == nil {
   164  		return nil, errors.Errorf("Unsupported URI: %v", uri)
   165  	}
   166  
   167  	paths, err := h.ListManifests(uri)
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	listedManifests := make(map[string]*Manifest)
   173  	for _, path := range paths {
   174  		var m Manifest
   175  		if err := h.ReadManifest(path, &m); err != nil {
   176  			return nil, errors.Wrapf(err, "While reading %q", path)
   177  		}
   178  		listedManifests[path] = &m
   179  	}
   180  
   181  	return listedManifests, nil
   182  }
   183  
   184  // filterManifests takes a list of manifests and returns the list of manifests
   185  // that should be considered during a restore.
   186  func filterManifests(manifests []*Manifest, backupId string) ([]*Manifest, error) {
   187  	// Go through the files in reverse order and stop when the latest full backup is found.
   188  	var filteredManifests []*Manifest
   189  	for i := len(manifests) - 1; i >= 0; i-- {
   190  		// If backupId is not empty, skip all the manifests that do not match the given
   191  		// backupId. If it's empty, do not skip any manifests as the default behavior is
   192  		// to restore the latest series of backups.
   193  		if len(backupId) > 0 && manifests[i].BackupId != backupId {
   194  			fmt.Printf("Restore: skip manifest %s as it's not part of the series with uid %s.\n",
   195  				manifests[i].Path, backupId)
   196  			continue
   197  		}
   198  
   199  		filteredManifests = append(filteredManifests, manifests[i])
   200  		if manifests[i].Type == "full" {
   201  			break
   202  		}
   203  	}
   204  
   205  	// Reverse the filtered lists since the original iteration happened in reverse.
   206  	for i := len(filteredManifests)/2 - 1; i >= 0; i-- {
   207  		opp := len(filteredManifests) - 1 - i
   208  		filteredManifests[i], filteredManifests[opp] = filteredManifests[opp], filteredManifests[i]
   209  	}
   210  
   211  	if err := verifyManifests(filteredManifests); err != nil {
   212  		return nil, err
   213  	}
   214  
   215  	return filteredManifests, nil
   216  }
   217  
   218  func verifyManifests(manifests []*Manifest) error {
   219  	if len(manifests) == 0 {
   220  		return nil
   221  	}
   222  
   223  	if manifests[0].BackupNum != 1 {
   224  		return errors.Errorf("expected a BackupNum value of 1 for first manifest but got %d",
   225  			manifests[0].BackupNum)
   226  	}
   227  
   228  	backupId := manifests[0].BackupId
   229  	var backupNum uint64
   230  	for _, manifest := range manifests {
   231  		if manifest.BackupId != backupId {
   232  			return errors.Errorf("found a manifest with backup ID %s but expected %s",
   233  				manifest.BackupId, backupId)
   234  		}
   235  
   236  		backupNum++
   237  		if manifest.BackupNum != backupNum {
   238  			return errors.Errorf("found a manifest with backup number %d but expected %d",
   239  				manifest.BackupNum, backupNum)
   240  		}
   241  	}
   242  
   243  	return nil
   244  }
   245  
   246  func backupName(since uint64, groupId uint32) string {
   247  	return fmt.Sprintf(backupNameFmt, since, groupId)
   248  }