github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/ee/backup/handler.go (about) 1 // +build !oss 2 3 /* 4 * Copyright 2018 Dgraph Labs, Inc. and Contributors 5 * 6 * Licensed under the Dgraph Community License (the "License"); you 7 * may not use this file except in compliance with the License. You 8 * may obtain a copy of the License at 9 * 10 * https://github.com/dgraph-io/dgraph/blob/master/licenses/DCL.txt 11 */ 12 13 package backup 14 15 import ( 16 "fmt" 17 "io" 18 "net/url" 19 20 "github.com/dgraph-io/dgraph/protos/pb" 21 22 "github.com/pkg/errors" 23 ) 24 25 const ( 26 // backupPathFmt defines the path to store or index backup objects. 27 // The expected parameter is a date in string format. 28 backupPathFmt = `dgraph.%s` 29 30 // backupNameFmt defines the name of backups files or objects (remote). 31 // The first parameter is the read timestamp at the time of backup. This is used for 32 // incremental backups and partial restore. 33 // The second parameter is the group ID when backup happened. This is used for partitioning 34 // the posting directories 'p' during restore. 35 backupNameFmt = `r%d-g%d.backup` 36 37 // backupManifest is the name of backup manifests. This a JSON file that contains the 38 // details of the backup. A backup dir without a manifest is ignored. 39 // 40 // Example manifest: 41 // { 42 // "since": 2280, 43 // "groups": [ 1, 2, 3 ], 44 // } 45 // 46 // "since" is the read timestamp used at the backup request. This value is called "since" 47 // because it used by subsequent incremental backups. 48 // "groups" are the group IDs that participated. 49 backupManifest = `manifest.json` 50 ) 51 52 // UriHandler interface is implemented by URI scheme handlers. 53 // When adding new scheme handles, for example 'azure://', an object will implement 54 // this interface to supply Dgraph with a way to create or load backup files into DB. 55 // For all methods below, the URL object is parsed as described in `newHandler' and 56 // the Processor object has the DB, estimated tablets size, and backup parameters. 57 type UriHandler interface { 58 // Handlers must know how to Write to their URI location. 59 // These function calls are used by both Create and Load. 60 io.WriteCloser 61 62 // GetLatestManifest reads the manifests at the given URL and returns the 63 // latest manifest. 64 GetLatestManifest(*url.URL) (*Manifest, error) 65 66 // CreateBackupFile prepares the object or file to save the backup file. 67 CreateBackupFile(*url.URL, *pb.BackupRequest) error 68 69 // CreateManifest prepares the manifest for writing. 70 CreateManifest(*url.URL, *pb.BackupRequest) error 71 72 // Load will scan location URI for backup files, then load them via loadFn. 73 // It optionally takes the name of the last directory to consider. Any backup directories 74 // created after will be ignored. 75 // Objects implementing this function will be used for retrieving (dowload) backup files 76 // and loading the data into a DB. The restore CLI command uses this call. 77 Load(*url.URL, string, loadFn) (uint64, error) 78 79 // ListManifests will scan the provided URI and return the paths to the manifests stored 80 // in that location. 81 ListManifests(*url.URL) ([]string, error) 82 83 // ReadManifest will read the manifest at the given location and load it into the given 84 // Manifest object. 85 ReadManifest(string, *Manifest) error 86 } 87 88 // getHandler returns a UriHandler for the URI scheme. 89 func getHandler(scheme string) UriHandler { 90 switch scheme { 91 case "file", "": 92 return &fileHandler{} 93 case "minio", "s3": 94 return &s3Handler{} 95 } 96 return nil 97 } 98 99 // NewUriHandler parses the requested URI and finds the corresponding UriHandler. 100 // Target URI formats: 101 // [scheme]://[host]/[path]?[args] 102 // [scheme]:///[path]?[args] 103 // /[path]?[args] (only for local or NFS) 104 // 105 // Target URI parts: 106 // scheme - service handler, one of: "file", "s3", "minio" 107 // host - remote address. ex: "dgraph.s3.amazonaws.com" 108 // path - directory, bucket or container at target. ex: "/dgraph/backups/" 109 // args - specific arguments that are ok to appear in logs. 110 // 111 // Global args (if supported by the handler): 112 // secure - true|false turn on/off TLS. 113 // trace - true|false turn on/off HTTP tracing. 114 // compress - true|false turn on/off data compression. 115 // encrypt - true|false turn on/off data encryption. 116 // 117 // Examples: 118 // s3://dgraph.s3.amazonaws.com/dgraph/backups?secure=true 119 // minio://localhost:9000/dgraph?secure=true 120 // file:///tmp/dgraph/backups 121 // /tmp/dgraph/backups?compress=gzip 122 func NewUriHandler(uri *url.URL) (UriHandler, error) { 123 h := getHandler(uri.Scheme) 124 if h == nil { 125 return nil, errors.Errorf("Unable to handle url: %s", uri) 126 } 127 128 return h, nil 129 } 130 131 // predicateSet is a map whose keys are predicates. It is meant to be used as a set. 132 type predicateSet map[string]struct{} 133 134 // loadFn is a function that will receive the current file being read. 135 // A reader, the backup groupId, and a map whose keys are the predicates to restore 136 // are passed as arguments. 137 type loadFn func(reader io.Reader, groupId int, preds predicateSet) error 138 139 // Load will scan location l for backup files in the given backup series and load them 140 // sequentially. Returns the maximum Since value on success, otherwise an error. 141 func Load(location, backupId string, fn loadFn) (since uint64, err error) { 142 uri, err := url.Parse(location) 143 if err != nil { 144 return 0, err 145 } 146 147 h := getHandler(uri.Scheme) 148 if h == nil { 149 return 0, errors.Errorf("Unsupported URI: %v", uri) 150 } 151 152 return h.Load(uri, backupId, fn) 153 } 154 155 // ListManifests scans location l for backup files and returns the list of manifests. 156 func ListManifests(l string) (map[string]*Manifest, error) { 157 uri, err := url.Parse(l) 158 if err != nil { 159 return nil, err 160 } 161 162 h := getHandler(uri.Scheme) 163 if h == nil { 164 return nil, errors.Errorf("Unsupported URI: %v", uri) 165 } 166 167 paths, err := h.ListManifests(uri) 168 if err != nil { 169 return nil, err 170 } 171 172 listedManifests := make(map[string]*Manifest) 173 for _, path := range paths { 174 var m Manifest 175 if err := h.ReadManifest(path, &m); err != nil { 176 return nil, errors.Wrapf(err, "While reading %q", path) 177 } 178 listedManifests[path] = &m 179 } 180 181 return listedManifests, nil 182 } 183 184 // filterManifests takes a list of manifests and returns the list of manifests 185 // that should be considered during a restore. 186 func filterManifests(manifests []*Manifest, backupId string) ([]*Manifest, error) { 187 // Go through the files in reverse order and stop when the latest full backup is found. 188 var filteredManifests []*Manifest 189 for i := len(manifests) - 1; i >= 0; i-- { 190 // If backupId is not empty, skip all the manifests that do not match the given 191 // backupId. If it's empty, do not skip any manifests as the default behavior is 192 // to restore the latest series of backups. 193 if len(backupId) > 0 && manifests[i].BackupId != backupId { 194 fmt.Printf("Restore: skip manifest %s as it's not part of the series with uid %s.\n", 195 manifests[i].Path, backupId) 196 continue 197 } 198 199 filteredManifests = append(filteredManifests, manifests[i]) 200 if manifests[i].Type == "full" { 201 break 202 } 203 } 204 205 // Reverse the filtered lists since the original iteration happened in reverse. 206 for i := len(filteredManifests)/2 - 1; i >= 0; i-- { 207 opp := len(filteredManifests) - 1 - i 208 filteredManifests[i], filteredManifests[opp] = filteredManifests[opp], filteredManifests[i] 209 } 210 211 if err := verifyManifests(filteredManifests); err != nil { 212 return nil, err 213 } 214 215 return filteredManifests, nil 216 } 217 218 func verifyManifests(manifests []*Manifest) error { 219 if len(manifests) == 0 { 220 return nil 221 } 222 223 if manifests[0].BackupNum != 1 { 224 return errors.Errorf("expected a BackupNum value of 1 for first manifest but got %d", 225 manifests[0].BackupNum) 226 } 227 228 backupId := manifests[0].BackupId 229 var backupNum uint64 230 for _, manifest := range manifests { 231 if manifest.BackupId != backupId { 232 return errors.Errorf("found a manifest with backup ID %s but expected %s", 233 manifest.BackupId, backupId) 234 } 235 236 backupNum++ 237 if manifest.BackupNum != backupNum { 238 return errors.Errorf("found a manifest with backup number %d but expected %d", 239 manifest.BackupNum, backupNum) 240 } 241 } 242 243 return nil 244 } 245 246 func backupName(since uint64, groupId uint32) string { 247 return fmt.Sprintf(backupNameFmt, since, groupId) 248 }