github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/ee/backup/backup.go (about)

     1  // +build !oss
     2  
     3  /*
     4   * Copyright 2018 Dgraph Labs, Inc. and Contributors
     5   *
     6   * Licensed under the Dgraph Community License (the "License"); you
     7   * may not use this file except in compliance with the License. You
     8   * may obtain a copy of the License at
     9   *
    10   *     https://github.com/dgraph-io/dgraph/blob/master/licenses/DCL.txt
    11   */
    12  
    13  package backup
    14  
    15  import (
    16  	"compress/gzip"
    17  	"context"
    18  	"encoding/binary"
    19  	"encoding/hex"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"net/url"
    24  	"sync"
    25  
    26  	"github.com/dgraph-io/badger"
    27  	bpb "github.com/dgraph-io/badger/pb"
    28  	"github.com/golang/glog"
    29  	"github.com/pkg/errors"
    30  
    31  	"github.com/dgraph-io/dgraph/posting"
    32  	"github.com/dgraph-io/dgraph/protos/pb"
    33  	"github.com/dgraph-io/dgraph/x"
    34  )
    35  
    36  // Processor handles the different stages of the backup process.
    37  type Processor struct {
    38  	// DB is the Badger pstore managed by this node.
    39  	DB *badger.DB
    40  	// Request stores the backup request containing the parameters for this backup.
    41  	Request *pb.BackupRequest
    42  }
    43  
    44  // Manifest records backup details, these are values used during restore.
    45  // Since is the timestamp from which the next incremental backup should start (it's set
    46  // to the readTs of the current backup).
    47  // Groups are the IDs of the groups involved.
    48  type Manifest struct {
    49  	sync.Mutex
    50  	//Type is the type of backup, either full or incremental.
    51  	Type string `json:"type"`
    52  	// Since is the timestamp at which this backup was taken. It's called Since
    53  	// because it will become the timestamp from which to backup in the next
    54  	// incremental backup.
    55  	Since uint64 `json:"since"`
    56  	// Groups is the map of valid groups to predicates at the time the backup was created.
    57  	Groups map[uint32][]string `json:"groups"`
    58  	// BackupId is a unique ID assigned to all the backups in the same series
    59  	// (from the first full backup to the last incremental backup).
    60  	BackupId string `json:"backup_id"`
    61  	// BackupNum is a monotonically increasing number assigned to each backup in
    62  	// a series. The full backup as BackupNum equal to one and each incremental
    63  	// backup gets assigned the next available number. Used to verify the integrity
    64  	// of the data during a restore.
    65  	BackupNum uint64 `json:"backup_num"`
    66  	// Path is the path to the manifest file. This field is only used during
    67  	// processing and is not written to disk.
    68  	Path string `json:"-"`
    69  }
    70  
    71  func (m *Manifest) getPredsInGroup(gid uint32) predicateSet {
    72  	preds, ok := m.Groups[gid]
    73  	if !ok {
    74  		return nil
    75  	}
    76  
    77  	predSet := make(predicateSet)
    78  	for _, pred := range preds {
    79  		predSet[pred] = struct{}{}
    80  	}
    81  	return predSet
    82  }
    83  
    84  // WriteBackup uses the request values to create a stream writer then hand off the data
    85  // retrieval to stream.Orchestrate. The writer will create all the fd's needed to
    86  // collect the data and later move to the target.
    87  // Returns errors on failure, nil on success.
    88  func (pr *Processor) WriteBackup(ctx context.Context) (*pb.Status, error) {
    89  	var emptyRes pb.Status
    90  
    91  	if err := ctx.Err(); err != nil {
    92  		return nil, err
    93  	}
    94  
    95  	uri, err := url.Parse(pr.Request.Destination)
    96  	if err != nil {
    97  		return &emptyRes, err
    98  	}
    99  
   100  	handler, err := NewUriHandler(uri)
   101  	if err != nil {
   102  		return &emptyRes, err
   103  	}
   104  
   105  	if err := handler.CreateBackupFile(uri, pr.Request); err != nil {
   106  		return &emptyRes, err
   107  	}
   108  
   109  	glog.V(3).Infof("Backup manifest version: %d", pr.Request.SinceTs)
   110  
   111  	predMap := make(map[string]struct{})
   112  	for _, pred := range pr.Request.Predicates {
   113  		predMap[pred] = struct{}{}
   114  	}
   115  
   116  	var maxVersion uint64
   117  	gzWriter := gzip.NewWriter(handler)
   118  	stream := pr.DB.NewStreamAt(pr.Request.ReadTs)
   119  	stream.LogPrefix = "Dgraph.Backup"
   120  	stream.KeyToList = pr.toBackupList
   121  	stream.ChooseKey = func(item *badger.Item) bool {
   122  		parsedKey, err := x.Parse(item.Key())
   123  		if err != nil {
   124  			return false
   125  		}
   126  		_, ok := predMap[parsedKey.Attr]
   127  		return ok
   128  	}
   129  	stream.Send = func(list *bpb.KVList) error {
   130  		for _, kv := range list.Kv {
   131  			if maxVersion < kv.Version {
   132  				maxVersion = kv.Version
   133  			}
   134  		}
   135  		return writeKVList(list, gzWriter)
   136  	}
   137  
   138  	if err := stream.Orchestrate(context.Background()); err != nil {
   139  		glog.Errorf("While taking backup: %v", err)
   140  		return &emptyRes, err
   141  	}
   142  
   143  	if maxVersion > pr.Request.ReadTs {
   144  		glog.Errorf("Max timestamp seen during backup (%d) is greater than readTs (%d)",
   145  			maxVersion, pr.Request.ReadTs)
   146  	}
   147  
   148  	glog.V(2).Infof("Backup group %d version: %d", pr.Request.GroupId, pr.Request.ReadTs)
   149  	if err = gzWriter.Close(); err != nil {
   150  		glog.Errorf("While closing gzipped writer: %v", err)
   151  		return &emptyRes, err
   152  	}
   153  	if err = handler.Close(); err != nil {
   154  		glog.Errorf("While closing handler: %v", err)
   155  		return &emptyRes, err
   156  	}
   157  	glog.Infof("Backup complete: group %d at %d", pr.Request.GroupId, pr.Request.ReadTs)
   158  	return &emptyRes, nil
   159  }
   160  
   161  // CompleteBackup will finalize a backup by writing the manifest at the backup destination.
   162  func (pr *Processor) CompleteBackup(ctx context.Context, manifest *Manifest) error {
   163  	if err := ctx.Err(); err != nil {
   164  		return err
   165  	}
   166  
   167  	uri, err := url.Parse(pr.Request.Destination)
   168  	if err != nil {
   169  		return err
   170  	}
   171  
   172  	handler, err := NewUriHandler(uri)
   173  	if err != nil {
   174  		return err
   175  	}
   176  
   177  	if err := handler.CreateManifest(uri, pr.Request); err != nil {
   178  		return err
   179  	}
   180  
   181  	if err = json.NewEncoder(handler).Encode(manifest); err != nil {
   182  		return err
   183  	}
   184  
   185  	if err = handler.Close(); err != nil {
   186  		return err
   187  	}
   188  	glog.Infof("Backup completed OK.")
   189  	return nil
   190  }
   191  
   192  // GoString implements the GoStringer interface for Manifest.
   193  func (m *Manifest) GoString() string {
   194  	return fmt.Sprintf(`Manifest{Since: %d, Groups: %v}`, m.Since, m.Groups)
   195  }
   196  
   197  func (pr *Processor) toBackupList(key []byte, itr *badger.Iterator) (*bpb.KVList, error) {
   198  	list := &bpb.KVList{}
   199  
   200  	item := itr.Item()
   201  	if item.Version() < pr.Request.SinceTs || item.IsDeletedOrExpired() {
   202  		// Ignore versions less than given timestamp, or skip older versions of
   203  		// the given key by returning an empty list.
   204  		return list, nil
   205  	}
   206  
   207  	switch item.UserMeta() {
   208  	case posting.BitEmptyPosting, posting.BitCompletePosting, posting.BitDeltaPosting:
   209  		l, err := posting.ReadPostingList(key, itr)
   210  		if err != nil {
   211  			return nil, errors.Wrapf(err, "while reading posting list")
   212  		}
   213  		kvs, err := l.Rollup()
   214  		if err != nil {
   215  			return nil, errors.Wrapf(err, "while rolling up list")
   216  		}
   217  
   218  		for _, kv := range kvs {
   219  			backupKey, err := toBackupKey(kv.Key)
   220  			if err != nil {
   221  				return nil, err
   222  			}
   223  			kv.Key = backupKey
   224  
   225  			backupPl, err := toBackupPostingList(kv.Value)
   226  			if err != nil {
   227  				return nil, err
   228  			}
   229  			kv.Value = backupPl
   230  		}
   231  		list.Kv = append(list.Kv, kvs...)
   232  	case posting.BitSchemaPosting:
   233  		valCopy, err := item.ValueCopy(nil)
   234  		if err != nil {
   235  			return nil, errors.Wrapf(err, "while copying value")
   236  		}
   237  
   238  		backupKey, err := toBackupKey(key)
   239  		if err != nil {
   240  			return nil, err
   241  		}
   242  
   243  		kv := &bpb.KV{
   244  			Key:       backupKey,
   245  			Value:     valCopy,
   246  			UserMeta:  []byte{item.UserMeta()},
   247  			Version:   item.Version(),
   248  			ExpiresAt: item.ExpiresAt(),
   249  		}
   250  		list.Kv = append(list.Kv, kv)
   251  	default:
   252  		return nil, errors.Errorf(
   253  			"Unexpected meta: %d for key: %s", item.UserMeta(), hex.Dump(key))
   254  	}
   255  	return list, nil
   256  }
   257  
   258  func toBackupKey(key []byte) ([]byte, error) {
   259  	parsedKey, err := x.Parse(key)
   260  	if err != nil {
   261  		return nil, errors.Wrapf(err, "could not parse key %s", hex.Dump(key))
   262  	}
   263  	backupKey, err := parsedKey.ToBackupKey().Marshal()
   264  	if err != nil {
   265  		return nil, errors.Wrapf(err, "while converting key for backup")
   266  	}
   267  	return backupKey, nil
   268  }
   269  
   270  func toBackupPostingList(val []byte) ([]byte, error) {
   271  	pl := &pb.PostingList{}
   272  	if err := pl.Unmarshal(val); err != nil {
   273  		return nil, errors.Wrapf(err, "while reading posting list")
   274  	}
   275  	backupVal, err := posting.ToBackupPostingList(pl).Marshal()
   276  	if err != nil {
   277  		return nil, errors.Wrapf(err, "while converting posting list for backup")
   278  	}
   279  	return backupVal, nil
   280  }
   281  
   282  func writeKVList(list *bpb.KVList, w io.Writer) error {
   283  	if err := binary.Write(w, binary.LittleEndian, uint64(list.Size())); err != nil {
   284  		return err
   285  	}
   286  	buf, err := list.Marshal()
   287  	if err != nil {
   288  		return err
   289  	}
   290  	_, err = w.Write(buf)
   291  	return err
   292  }