github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/ee/backup/backup.go (about) 1 // +build !oss 2 3 /* 4 * Copyright 2018 Dgraph Labs, Inc. and Contributors 5 * 6 * Licensed under the Dgraph Community License (the "License"); you 7 * may not use this file except in compliance with the License. You 8 * may obtain a copy of the License at 9 * 10 * https://github.com/dgraph-io/dgraph/blob/master/licenses/DCL.txt 11 */ 12 13 package backup 14 15 import ( 16 "compress/gzip" 17 "context" 18 "encoding/binary" 19 "encoding/hex" 20 "encoding/json" 21 "fmt" 22 "io" 23 "net/url" 24 "sync" 25 26 "github.com/dgraph-io/badger" 27 bpb "github.com/dgraph-io/badger/pb" 28 "github.com/golang/glog" 29 "github.com/pkg/errors" 30 31 "github.com/dgraph-io/dgraph/posting" 32 "github.com/dgraph-io/dgraph/protos/pb" 33 "github.com/dgraph-io/dgraph/x" 34 ) 35 36 // Processor handles the different stages of the backup process. 37 type Processor struct { 38 // DB is the Badger pstore managed by this node. 39 DB *badger.DB 40 // Request stores the backup request containing the parameters for this backup. 41 Request *pb.BackupRequest 42 } 43 44 // Manifest records backup details, these are values used during restore. 45 // Since is the timestamp from which the next incremental backup should start (it's set 46 // to the readTs of the current backup). 47 // Groups are the IDs of the groups involved. 48 type Manifest struct { 49 sync.Mutex 50 //Type is the type of backup, either full or incremental. 51 Type string `json:"type"` 52 // Since is the timestamp at which this backup was taken. It's called Since 53 // because it will become the timestamp from which to backup in the next 54 // incremental backup. 55 Since uint64 `json:"since"` 56 // Groups is the map of valid groups to predicates at the time the backup was created. 57 Groups map[uint32][]string `json:"groups"` 58 // BackupId is a unique ID assigned to all the backups in the same series 59 // (from the first full backup to the last incremental backup). 60 BackupId string `json:"backup_id"` 61 // BackupNum is a monotonically increasing number assigned to each backup in 62 // a series. The full backup as BackupNum equal to one and each incremental 63 // backup gets assigned the next available number. Used to verify the integrity 64 // of the data during a restore. 65 BackupNum uint64 `json:"backup_num"` 66 // Path is the path to the manifest file. This field is only used during 67 // processing and is not written to disk. 68 Path string `json:"-"` 69 } 70 71 func (m *Manifest) getPredsInGroup(gid uint32) predicateSet { 72 preds, ok := m.Groups[gid] 73 if !ok { 74 return nil 75 } 76 77 predSet := make(predicateSet) 78 for _, pred := range preds { 79 predSet[pred] = struct{}{} 80 } 81 return predSet 82 } 83 84 // WriteBackup uses the request values to create a stream writer then hand off the data 85 // retrieval to stream.Orchestrate. The writer will create all the fd's needed to 86 // collect the data and later move to the target. 87 // Returns errors on failure, nil on success. 88 func (pr *Processor) WriteBackup(ctx context.Context) (*pb.Status, error) { 89 var emptyRes pb.Status 90 91 if err := ctx.Err(); err != nil { 92 return nil, err 93 } 94 95 uri, err := url.Parse(pr.Request.Destination) 96 if err != nil { 97 return &emptyRes, err 98 } 99 100 handler, err := NewUriHandler(uri) 101 if err != nil { 102 return &emptyRes, err 103 } 104 105 if err := handler.CreateBackupFile(uri, pr.Request); err != nil { 106 return &emptyRes, err 107 } 108 109 glog.V(3).Infof("Backup manifest version: %d", pr.Request.SinceTs) 110 111 predMap := make(map[string]struct{}) 112 for _, pred := range pr.Request.Predicates { 113 predMap[pred] = struct{}{} 114 } 115 116 var maxVersion uint64 117 gzWriter := gzip.NewWriter(handler) 118 stream := pr.DB.NewStreamAt(pr.Request.ReadTs) 119 stream.LogPrefix = "Dgraph.Backup" 120 stream.KeyToList = pr.toBackupList 121 stream.ChooseKey = func(item *badger.Item) bool { 122 parsedKey, err := x.Parse(item.Key()) 123 if err != nil { 124 return false 125 } 126 _, ok := predMap[parsedKey.Attr] 127 return ok 128 } 129 stream.Send = func(list *bpb.KVList) error { 130 for _, kv := range list.Kv { 131 if maxVersion < kv.Version { 132 maxVersion = kv.Version 133 } 134 } 135 return writeKVList(list, gzWriter) 136 } 137 138 if err := stream.Orchestrate(context.Background()); err != nil { 139 glog.Errorf("While taking backup: %v", err) 140 return &emptyRes, err 141 } 142 143 if maxVersion > pr.Request.ReadTs { 144 glog.Errorf("Max timestamp seen during backup (%d) is greater than readTs (%d)", 145 maxVersion, pr.Request.ReadTs) 146 } 147 148 glog.V(2).Infof("Backup group %d version: %d", pr.Request.GroupId, pr.Request.ReadTs) 149 if err = gzWriter.Close(); err != nil { 150 glog.Errorf("While closing gzipped writer: %v", err) 151 return &emptyRes, err 152 } 153 if err = handler.Close(); err != nil { 154 glog.Errorf("While closing handler: %v", err) 155 return &emptyRes, err 156 } 157 glog.Infof("Backup complete: group %d at %d", pr.Request.GroupId, pr.Request.ReadTs) 158 return &emptyRes, nil 159 } 160 161 // CompleteBackup will finalize a backup by writing the manifest at the backup destination. 162 func (pr *Processor) CompleteBackup(ctx context.Context, manifest *Manifest) error { 163 if err := ctx.Err(); err != nil { 164 return err 165 } 166 167 uri, err := url.Parse(pr.Request.Destination) 168 if err != nil { 169 return err 170 } 171 172 handler, err := NewUriHandler(uri) 173 if err != nil { 174 return err 175 } 176 177 if err := handler.CreateManifest(uri, pr.Request); err != nil { 178 return err 179 } 180 181 if err = json.NewEncoder(handler).Encode(manifest); err != nil { 182 return err 183 } 184 185 if err = handler.Close(); err != nil { 186 return err 187 } 188 glog.Infof("Backup completed OK.") 189 return nil 190 } 191 192 // GoString implements the GoStringer interface for Manifest. 193 func (m *Manifest) GoString() string { 194 return fmt.Sprintf(`Manifest{Since: %d, Groups: %v}`, m.Since, m.Groups) 195 } 196 197 func (pr *Processor) toBackupList(key []byte, itr *badger.Iterator) (*bpb.KVList, error) { 198 list := &bpb.KVList{} 199 200 item := itr.Item() 201 if item.Version() < pr.Request.SinceTs || item.IsDeletedOrExpired() { 202 // Ignore versions less than given timestamp, or skip older versions of 203 // the given key by returning an empty list. 204 return list, nil 205 } 206 207 switch item.UserMeta() { 208 case posting.BitEmptyPosting, posting.BitCompletePosting, posting.BitDeltaPosting: 209 l, err := posting.ReadPostingList(key, itr) 210 if err != nil { 211 return nil, errors.Wrapf(err, "while reading posting list") 212 } 213 kvs, err := l.Rollup() 214 if err != nil { 215 return nil, errors.Wrapf(err, "while rolling up list") 216 } 217 218 for _, kv := range kvs { 219 backupKey, err := toBackupKey(kv.Key) 220 if err != nil { 221 return nil, err 222 } 223 kv.Key = backupKey 224 225 backupPl, err := toBackupPostingList(kv.Value) 226 if err != nil { 227 return nil, err 228 } 229 kv.Value = backupPl 230 } 231 list.Kv = append(list.Kv, kvs...) 232 case posting.BitSchemaPosting: 233 valCopy, err := item.ValueCopy(nil) 234 if err != nil { 235 return nil, errors.Wrapf(err, "while copying value") 236 } 237 238 backupKey, err := toBackupKey(key) 239 if err != nil { 240 return nil, err 241 } 242 243 kv := &bpb.KV{ 244 Key: backupKey, 245 Value: valCopy, 246 UserMeta: []byte{item.UserMeta()}, 247 Version: item.Version(), 248 ExpiresAt: item.ExpiresAt(), 249 } 250 list.Kv = append(list.Kv, kv) 251 default: 252 return nil, errors.Errorf( 253 "Unexpected meta: %d for key: %s", item.UserMeta(), hex.Dump(key)) 254 } 255 return list, nil 256 } 257 258 func toBackupKey(key []byte) ([]byte, error) { 259 parsedKey, err := x.Parse(key) 260 if err != nil { 261 return nil, errors.Wrapf(err, "could not parse key %s", hex.Dump(key)) 262 } 263 backupKey, err := parsedKey.ToBackupKey().Marshal() 264 if err != nil { 265 return nil, errors.Wrapf(err, "while converting key for backup") 266 } 267 return backupKey, nil 268 } 269 270 func toBackupPostingList(val []byte) ([]byte, error) { 271 pl := &pb.PostingList{} 272 if err := pl.Unmarshal(val); err != nil { 273 return nil, errors.Wrapf(err, "while reading posting list") 274 } 275 backupVal, err := posting.ToBackupPostingList(pl).Marshal() 276 if err != nil { 277 return nil, errors.Wrapf(err, "while converting posting list for backup") 278 } 279 return backupVal, nil 280 } 281 282 func writeKVList(list *bpb.KVList, w io.Writer) error { 283 if err := binary.Write(w, binary.LittleEndian, uint64(list.Size())); err != nil { 284 return err 285 } 286 buf, err := list.Marshal() 287 if err != nil { 288 return err 289 } 290 _, err = w.Write(buf) 291 return err 292 }