github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/spec/spec.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 // Package spec provides builders and parsers for spelling Noms databases, 23 // datasets and values. 24 package spec 25 26 import ( 27 "context" 28 "errors" 29 "fmt" 30 "net/url" 31 "os" 32 "os/user" 33 "path/filepath" 34 "strings" 35 36 "cloud.google.com/go/storage" 37 "github.com/aws/aws-sdk-go/aws" 38 "github.com/aws/aws-sdk-go/aws/credentials" 39 "github.com/aws/aws-sdk-go/aws/session" 40 "github.com/aws/aws-sdk-go/service/dynamodb" 41 "github.com/aws/aws-sdk-go/service/s3" 42 43 "github.com/dolthub/dolt/go/store/chunks" 44 "github.com/dolthub/dolt/go/store/d" 45 "github.com/dolthub/dolt/go/store/datas" 46 "github.com/dolthub/dolt/go/store/nbs" 47 "github.com/dolthub/dolt/go/store/types" 48 ) 49 50 const ( 51 Separator = "::" 52 DefaultAWSRegion = "us-west-2" 53 DefaultAWSCredsProfile = "default" 54 ) 55 56 type ProtocolImpl interface { 57 NewChunkStore(sp Spec) (chunks.ChunkStore, error) 58 NewDatabase(sp Spec) (datas.Database, error) 59 } 60 61 var ExternalProtocols = map[string]ProtocolImpl{} 62 63 type AWSCredentialSource int 64 65 const ( 66 InvalidCS AWSCredentialSource = iota - 1 67 68 // Auto will try env first and fall back to role (This is the default) 69 AutoCS 70 71 // Role Uses the AWS IAM role of the instance for auth 72 RoleCS 73 74 // Env uses the credentials stored in the environment variables AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY 75 EnvCS 76 77 // Uses credentials stored in a file 78 FileCS 79 ) 80 81 func (ct AWSCredentialSource) String() string { 82 switch ct { 83 case RoleCS: 84 return "role" 85 case EnvCS: 86 return "env" 87 case AutoCS: 88 return "auto" 89 case FileCS: 90 return "file" 91 default: 92 return "invalid" 93 } 94 } 95 96 func AWSCredentialSourceFromStr(str string) AWSCredentialSource { 97 strlwr := strings.TrimSpace(strings.ToLower(str)) 98 switch strlwr { 99 case "", "auto": 100 return AutoCS 101 case "role": 102 return RoleCS 103 case "env": 104 return EnvCS 105 case "file": 106 return FileCS 107 default: 108 return InvalidCS 109 } 110 } 111 112 // SpecOptions customize Spec behavior. 113 type SpecOptions struct { 114 // Authorization token for requests. For example, if the database is HTTP 115 // this will used for an `Authorization: Bearer ${authorization}` header. 116 Authorization string 117 118 // Region that should be used when creating the aws session 119 AWSRegion string 120 121 // The type of credentials that should be used when creating the aws session 122 AWSCredSource AWSCredentialSource 123 124 // Credential file to use when using auto or file credentials 125 AWSCredFile string 126 } 127 128 func (so *SpecOptions) AwsRegionOrDefault() string { 129 if so.AWSRegion == "" { 130 return DefaultAWSRegion 131 } 132 133 return so.AWSRegion 134 } 135 136 func (so *SpecOptions) AwsCredFileOrDefault() string { 137 if so.AWSCredFile == "" { 138 usr, err := user.Current() 139 if err != nil { 140 return "" 141 } 142 143 return filepath.Join(usr.HomeDir, ".aws", "credentials") 144 } 145 146 return so.AWSCredFile 147 } 148 149 // Spec locates a Noms database, dataset, or value globally. Spec caches 150 // its database instance so it therefore does not reflect new commits in 151 // the db, by (legacy) design. 152 type Spec struct { 153 // Protocol is one of "mem", "aws", "gs", "nbs" 154 Protocol string 155 156 // DatabaseName is the name of the Spec's database, which is the string after 157 // "protocol:". specs include their leading "//" characters. 158 DatabaseName string 159 160 // Options are the SpecOptions that the Spec was constructed with. 161 Options SpecOptions 162 163 // Path is nil unless the spec was created with ForPath. 164 Path AbsolutePath 165 166 // db is lazily created, so it needs to be a pointer to a Database. 167 db *datas.Database 168 } 169 170 func newSpec(dbSpec string, opts SpecOptions) (Spec, error) { 171 protocol, dbName, err := parseDatabaseSpec(dbSpec) 172 if err != nil { 173 return Spec{}, err 174 } 175 176 return Spec{ 177 Protocol: protocol, 178 DatabaseName: dbName, 179 Options: opts, 180 db: new(datas.Database), 181 }, nil 182 } 183 184 // ForDatabase parses a spec for a Database. 185 func ForDatabase(spec string) (Spec, error) { 186 return ForDatabaseOpts(spec, SpecOptions{}) 187 } 188 189 // ForDatabaseOpts parses a spec for a Database. 190 func ForDatabaseOpts(spec string, opts SpecOptions) (Spec, error) { 191 return newSpec(spec, opts) 192 } 193 194 // ForDataset parses a spec for a Dataset. 195 func ForDataset(spec string) (Spec, error) { 196 return ForDatasetOpts(spec, SpecOptions{}) 197 } 198 199 // ForDatasetOpts parses a spec for a Dataset. 200 func ForDatasetOpts(spec string, opts SpecOptions) (Spec, error) { 201 dbSpec, pathStr, err := splitDatabaseSpec(spec) 202 if err != nil { 203 return Spec{}, err 204 } 205 206 sp, err := newSpec(dbSpec, opts) 207 if err != nil { 208 return Spec{}, err 209 } 210 211 path, err := NewAbsolutePath(pathStr) 212 if err != nil { 213 return Spec{}, err 214 } 215 216 if path.Dataset == "" { 217 return Spec{}, errors.New("dataset name required for dataset spec") 218 } 219 220 if !path.Path.IsEmpty() { 221 return Spec{}, errors.New("path is not allowed for dataset spec") 222 } 223 224 sp.Path = path 225 return sp, nil 226 } 227 228 // ForPath parses a spec for a path to a Value. 229 func ForPath(spec string) (Spec, error) { 230 return ForPathOpts(spec, SpecOptions{}) 231 } 232 233 // ForPathOpts parses a spec for a path to a Value. 234 func ForPathOpts(spec string, opts SpecOptions) (Spec, error) { 235 dbSpec, pathStr, err := splitDatabaseSpec(spec) 236 if err != nil { 237 return Spec{}, err 238 } 239 240 var path AbsolutePath 241 if pathStr != "" { 242 path, err = NewAbsolutePath(pathStr) 243 if err != nil { 244 return Spec{}, err 245 } 246 } 247 248 sp, err := newSpec(dbSpec, opts) 249 if err != nil { 250 return Spec{}, err 251 } 252 253 sp.Path = path 254 return sp, nil 255 } 256 257 func (sp Spec) String() string { 258 s := sp.Protocol 259 if s != "mem" { 260 s += ":" + sp.DatabaseName 261 } 262 p := sp.Path.String() 263 if p != "" { 264 s += Separator + p 265 } 266 return s 267 } 268 269 // GetDatabase returns the Database instance that this Spec's DatabaseName 270 // describes. The same Database instance is returned every time, unless Close 271 // is called. If the Spec is closed, it is re-opened with a new Database. 272 func (sp Spec) GetDatabase(ctx context.Context) datas.Database { 273 if *sp.db == nil { 274 *sp.db = sp.createDatabase(ctx) 275 } 276 return *sp.db 277 } 278 279 // NewChunkStore returns a new ChunkStore instance that this Spec's 280 // DatabaseName describes. It's unusual to call this method, GetDatabase is 281 // more useful. Unlike GetDatabase, a new ChunkStore instance is returned every 282 // time. If there is no ChunkStore, for example remote databases, returns nil. 283 func (sp Spec) NewChunkStore(ctx context.Context) chunks.ChunkStore { 284 switch sp.Protocol { 285 case "http", "https": 286 return nil 287 case "aws": 288 return parseAWSSpec(ctx, sp.Href(), sp.Options) 289 case "gs": 290 return parseGCSSpec(ctx, sp.Href(), sp.Options) 291 case "nbs": 292 cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28) 293 d.PanicIfError(err) 294 return cs 295 case "mem": 296 storage := &chunks.MemoryStorage{} 297 return storage.NewView() 298 default: 299 impl, ok := ExternalProtocols[sp.Protocol] 300 if !ok { 301 d.PanicIfError(fmt.Errorf("unknown protocol: %s", sp.Protocol)) 302 } 303 r, err := impl.NewChunkStore(sp) 304 d.PanicIfError(err) 305 return r 306 } 307 } 308 309 func parseAWSSpec(ctx context.Context, awsURL string, options SpecOptions) chunks.ChunkStore { 310 fmt.Println(awsURL, options) 311 312 u, _ := url.Parse(awsURL) 313 parts := strings.SplitN(u.Hostname(), ":", 2) // [table] [, bucket]? 314 d.PanicIfFalse(len(parts) == 2) 315 316 awsConfig := aws.NewConfig().WithRegion(options.AwsRegionOrDefault()) 317 318 switch options.AWSCredSource { 319 case RoleCS: 320 case EnvCS: 321 awsConfig = awsConfig.WithCredentials(credentials.NewEnvCredentials()) 322 case FileCS: 323 filePath := options.AwsCredFileOrDefault() 324 creds := credentials.NewSharedCredentials(filePath, DefaultAWSCredsProfile) 325 awsConfig = awsConfig.WithCredentials(creds) 326 case AutoCS: 327 envCreds := credentials.NewEnvCredentials() 328 if _, err := envCreds.Get(); err == nil { 329 awsConfig = awsConfig.WithCredentials(envCreds) 330 } 331 332 filePath := options.AwsCredFileOrDefault() 333 if _, err := os.Stat(filePath); err == nil { 334 creds := credentials.NewSharedCredentials(filePath, DefaultAWSCredsProfile) 335 awsConfig = awsConfig.WithCredentials(creds) 336 } 337 default: 338 panic("unsupported credential type") 339 } 340 341 sess := session.Must(session.NewSession(awsConfig)) 342 cs, err := nbs.NewAWSStore(ctx, types.Format_Default.VersionString(), parts[0], u.Path, parts[1], s3.New(sess), dynamodb.New(sess), 1<<28) 343 344 d.PanicIfError(err) 345 346 return cs 347 } 348 349 func parseGCSSpec(ctx context.Context, gcsURL string, options SpecOptions) chunks.ChunkStore { 350 u, err := url.Parse(gcsURL) 351 d.PanicIfError(err) 352 353 fmt.Println(u) 354 355 bucket := u.Host 356 path := u.Path 357 358 gcs, err := storage.NewClient(ctx) 359 360 if err != nil { 361 panic("Could not create GCSBlobstore") 362 } 363 364 cs, err := nbs.NewGCSStore(ctx, types.Format_Default.VersionString(), bucket, path, gcs, 1<<28) 365 366 d.PanicIfError(err) 367 368 return cs 369 } 370 371 // GetDataset returns the current Dataset instance for this Spec's Database. 372 // GetDataset is live, so if Commit is called on this Spec's Database later, a 373 // new up-to-date Dataset will returned on the next call to GetDataset. If 374 // this is not a Dataset spec, returns nil. 375 func (sp Spec) GetDataset(ctx context.Context) (ds datas.Dataset) { 376 if sp.Path.Dataset != "" { 377 var err error 378 ds, err = sp.GetDatabase(ctx).GetDataset(ctx, sp.Path.Dataset) 379 d.PanicIfError(err) 380 } 381 return 382 } 383 384 // GetValue returns the Value at this Spec's Path within its Database, or nil 385 // if this isn't a Path Spec or if that path isn't found. 386 func (sp Spec) GetValue(ctx context.Context) (val types.Value) { 387 if !sp.Path.IsEmpty() { 388 val = sp.Path.Resolve(ctx, sp.GetDatabase(ctx)) 389 } 390 return 391 } 392 393 // Href treats the Protocol and DatabaseName as a URL, and returns its href. 394 // For example, the spec http://example.com/path::ds returns 395 // "http://example.com/path". If the Protocol is not "http" or "http", returns 396 // an empty string. 397 func (sp Spec) Href() string { 398 switch proto := sp.Protocol; proto { 399 case "http", "https", "aws", "gs": 400 return proto + ":" + sp.DatabaseName 401 default: 402 return "" 403 } 404 } 405 406 // Pin returns a Spec in which the dataset component, if any, has been replaced 407 // with the hash of the HEAD of that dataset. This "pins" the path to the state 408 // of the database at the current moment in time. Returns itself if the 409 // PathSpec is already "pinned". 410 func (sp Spec) Pin(ctx context.Context) (Spec, bool) { 411 var ds datas.Dataset 412 413 if !sp.Path.IsEmpty() { 414 if !sp.Path.Hash.IsEmpty() { 415 // Spec is already pinned. 416 return sp, true 417 } 418 419 var err error 420 ds, err = sp.GetDatabase(ctx).GetDataset(ctx, sp.Path.Dataset) 421 d.PanicIfError(err) 422 } else { 423 ds = sp.GetDataset(ctx) 424 } 425 426 commit, ok := ds.MaybeHead() 427 if !ok { 428 return Spec{}, false 429 } 430 431 nbf := sp.GetDatabase(ctx).Format() 432 r := sp 433 434 var err error 435 r.Path.Dataset = "" 436 r.Path.Hash, err = commit.Hash(nbf) 437 d.PanicIfError(err) 438 439 return r, true 440 } 441 442 func (sp Spec) Close() error { 443 db := *sp.db 444 if db == nil { 445 return nil 446 } 447 448 *sp.db = nil 449 return db.Close() 450 } 451 452 func (sp Spec) createDatabase(ctx context.Context) datas.Database { 453 switch sp.Protocol { 454 case "aws": 455 return datas.NewDatabase(parseAWSSpec(ctx, sp.Href(), sp.Options)) 456 case "gs": 457 return datas.NewDatabase(parseGCSSpec(ctx, sp.Href(), sp.Options)) 458 case "nbs": 459 os.Mkdir(sp.DatabaseName, 0777) 460 cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28) 461 d.PanicIfError(err) 462 return datas.NewDatabase(cs) 463 case "mem": 464 storage := &chunks.MemoryStorage{} 465 return datas.NewDatabase(storage.NewViewWithDefaultFormat()) 466 default: 467 impl, ok := ExternalProtocols[sp.Protocol] 468 if !ok { 469 d.PanicIfError(fmt.Errorf("unknown protocol: %s", sp.Protocol)) 470 } 471 r, err := impl.NewDatabase(sp) 472 d.PanicIfError(err) 473 return r 474 } 475 } 476 477 func parseDatabaseSpec(spec string) (protocol, name string, err error) { 478 if len(spec) == 0 { 479 err = fmt.Errorf("empty spec") 480 return 481 } 482 483 parts := strings.SplitN(spec, ":", 2) // [protocol] [, path]? 484 485 // If there was no ":" then this is either a mem spec, or a filesystem path. 486 // This is ambiguous if the file system path is "mem" but that just means the 487 // path needs to be explicitly "nbs:mem". 488 if len(parts) == 1 { 489 if spec == "mem" { 490 protocol = "mem" 491 } else { 492 protocol, name = "nbs", spec 493 } 494 return 495 } else if len(parts) == 2 && len(parts[0]) == 1 && parts[0][0] >= 'A' && parts[0][0] <= 'Z' { //check for Windows drive letter, ala C:\Users\Public 496 if _, err := os.Stat(parts[0] + `:\`); !os.IsNotExist(err) { 497 parts = []string{"nbs", spec} 498 } 499 } 500 501 if _, ok := ExternalProtocols[parts[0]]; ok { 502 protocol, name = parts[0], parts[1] 503 return 504 } 505 506 switch parts[0] { 507 case "nbs": 508 protocol, name = parts[0], parts[1] 509 510 case "aws", "gs": 511 u, perr := url.Parse(spec) 512 if perr != nil { 513 err = perr 514 } else if u.Host == "" { 515 err = fmt.Errorf("%s has empty host", spec) 516 } else if parts[0] == "aws" && u.Path == "" { 517 err = fmt.Errorf("%s does not specify a database ID", spec) 518 } else { 519 protocol, name = parts[0], parts[1] 520 } 521 522 case "mem": 523 err = fmt.Errorf(`in-memory database must be specified as "mem", not "mem:"`) 524 525 default: 526 err = fmt.Errorf("invalid database protocol %s in %s", protocol, spec) 527 } 528 return 529 } 530 531 func splitDatabaseSpec(spec string) (string, string, error) { 532 lastIdx := strings.LastIndex(spec, Separator) 533 if lastIdx == -1 { 534 return "", "", fmt.Errorf("missing %s after database in %s", Separator, spec) 535 } 536 537 return spec[:lastIdx], spec[lastIdx+len(Separator):], nil 538 }