github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/spec/spec.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 // Package spec provides builders and parsers for spelling Noms databases, 23 // datasets and values. 24 package spec 25 26 import ( 27 "context" 28 "errors" 29 "fmt" 30 "net/url" 31 "os" 32 "os/user" 33 "path/filepath" 34 "strings" 35 36 "cloud.google.com/go/storage" 37 "github.com/aws/aws-sdk-go/aws" 38 "github.com/aws/aws-sdk-go/aws/credentials" 39 "github.com/aws/aws-sdk-go/aws/session" 40 "github.com/aws/aws-sdk-go/service/dynamodb" 41 "github.com/aws/aws-sdk-go/service/s3" 42 "github.com/oracle/oci-go-sdk/v65/common" 43 "github.com/oracle/oci-go-sdk/v65/objectstorage" 44 45 "github.com/dolthub/dolt/go/libraries/utils/filesys" 46 "github.com/dolthub/dolt/go/store/chunks" 47 "github.com/dolthub/dolt/go/store/d" 48 "github.com/dolthub/dolt/go/store/datas" 49 "github.com/dolthub/dolt/go/store/nbs" 50 "github.com/dolthub/dolt/go/store/prolly/tree" 51 "github.com/dolthub/dolt/go/store/types" 52 ) 53 54 const ( 55 Separator = "::" 56 DefaultAWSRegion = "us-west-2" 57 DefaultAWSCredsProfile = "default" 58 ) 59 60 type ProtocolImpl interface { 61 NewChunkStore(sp Spec) (chunks.ChunkStore, error) 62 } 63 64 var ExternalProtocols = map[string]ProtocolImpl{} 65 66 type AWSCredentialSource int 67 68 const ( 69 InvalidCS AWSCredentialSource = iota - 1 70 71 // Auto will try env first and fall back to role (This is the default) 72 AutoCS 73 74 // Role Uses the AWS IAM role of the instance for auth 75 RoleCS 76 77 // Env uses the credentials stored in the environment variables AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY 78 EnvCS 79 80 // Uses credentials stored in a file 81 FileCS 82 ) 83 84 func (ct AWSCredentialSource) String() string { 85 switch ct { 86 case RoleCS: 87 return "role" 88 case EnvCS: 89 return "env" 90 case AutoCS: 91 return "auto" 92 case FileCS: 93 return "file" 94 default: 95 return "invalid" 96 } 97 } 98 99 func AWSCredentialSourceFromStr(str string) AWSCredentialSource { 100 strlwr := strings.TrimSpace(strings.ToLower(str)) 101 switch strlwr { 102 case "", "auto": 103 return AutoCS 104 case "role": 105 return RoleCS 106 case "env": 107 return EnvCS 108 case "file": 109 return FileCS 110 default: 111 return InvalidCS 112 } 113 } 114 115 // SpecOptions customize Spec behavior. 116 type SpecOptions struct { 117 // Authorization token for requests. For example, if the database is HTTP 118 // this will used for an `Authorization: Bearer ${authorization}` header. 119 Authorization string 120 121 // Region that should be used when creating the aws session 122 AWSRegion string 123 124 // The type of credentials that should be used when creating the aws session 125 AWSCredSource AWSCredentialSource 126 127 // Credential file to use when using auto or file credentials 128 AWSCredFile string 129 } 130 131 func (so *SpecOptions) AwsRegionOrDefault() string { 132 if so.AWSRegion == "" { 133 return DefaultAWSRegion 134 } 135 136 return so.AWSRegion 137 } 138 139 func (so *SpecOptions) AwsCredFileOrDefault() string { 140 if so.AWSCredFile == "" { 141 usr, err := user.Current() 142 if err != nil { 143 return "" 144 } 145 146 return filepath.Join(usr.HomeDir, ".aws", "credentials") 147 } 148 149 return so.AWSCredFile 150 } 151 152 // Spec locates a Noms database, dataset, or value globally. Spec caches 153 // its database instance so it therefore does not reflect new commits in 154 // the db, by (legacy) design. 155 type Spec struct { 156 // Protocol is one of "mem", "aws", "gs", "nbs" 157 Protocol string 158 159 // DatabaseName is the name of the Spec's database, which is the string after 160 // "protocol:". specs include their leading "//" characters. 161 DatabaseName string 162 163 // Options are the SpecOptions that the Spec was constructed with. 164 Options SpecOptions 165 166 // Path is nil unless the spec was created with ForPath. 167 Path AbsolutePath 168 169 // db is lazily created, so it needs to be a pointer to a Database. 170 db *datas.Database 171 vrw *types.ValueReadWriter 172 ns *tree.NodeStore 173 } 174 175 func newSpec(dbSpec string, opts SpecOptions) (Spec, error) { 176 protocol, dbName, err := parseDatabaseSpec(dbSpec) 177 if err != nil { 178 return Spec{}, err 179 } 180 181 return Spec{ 182 Protocol: protocol, 183 DatabaseName: dbName, 184 Options: opts, 185 db: new(datas.Database), 186 vrw: new(types.ValueReadWriter), 187 ns: new(tree.NodeStore), 188 }, nil 189 } 190 191 // ForDatabase parses a spec for a Database. 192 func ForDatabase(spec string) (Spec, error) { 193 return ForDatabaseOpts(spec, SpecOptions{}) 194 } 195 196 // ForDatabaseOpts parses a spec for a Database. 197 func ForDatabaseOpts(spec string, opts SpecOptions) (Spec, error) { 198 return newSpec(spec, opts) 199 } 200 201 // ForDataset parses a spec for a Dataset. 202 func ForDataset(spec string) (Spec, error) { 203 return ForDatasetOpts(spec, SpecOptions{}) 204 } 205 206 // ForDatasetOpts parses a spec for a Dataset. 207 func ForDatasetOpts(spec string, opts SpecOptions) (Spec, error) { 208 dbSpec, pathStr, err := splitDatabaseSpec(spec) 209 if err != nil { 210 return Spec{}, err 211 } 212 213 sp, err := newSpec(dbSpec, opts) 214 if err != nil { 215 return Spec{}, err 216 } 217 218 path, err := NewAbsolutePath(pathStr) 219 if err != nil { 220 return Spec{}, err 221 } 222 223 if path.Dataset == "" { 224 return Spec{}, errors.New("dataset name required for dataset spec") 225 } 226 227 sp.Path = path 228 return sp, nil 229 } 230 231 // ForPath parses a spec for a path to a Value. 232 func ForPath(spec string) (Spec, error) { 233 return ForPathOpts(spec, SpecOptions{}) 234 } 235 236 // ForPathOpts parses a spec for a path to a Value. 237 func ForPathOpts(spec string, opts SpecOptions) (Spec, error) { 238 dbSpec, pathStr, err := splitDatabaseSpec(spec) 239 if err != nil { 240 return Spec{}, err 241 } 242 243 var path AbsolutePath 244 if pathStr != "" { 245 path, err = NewAbsolutePath(pathStr) 246 if err != nil { 247 return Spec{}, err 248 } 249 } 250 251 sp, err := newSpec(dbSpec, opts) 252 if err != nil { 253 return Spec{}, err 254 } 255 256 sp.Path = path 257 return sp, nil 258 } 259 260 func (sp Spec) String() string { 261 s := sp.Protocol 262 if s != "mem" { 263 s += ":" + sp.DatabaseName 264 } 265 p := sp.Path.String() 266 if p != "" { 267 s += Separator + p 268 } 269 return s 270 } 271 272 // GetDatabase returns the Database instance that this Spec's DatabaseName 273 // describes. The same Database instance is returned every time, unless Close 274 // is called. If the Spec is closed, it is re-opened with a new Database. 275 func (sp Spec) GetDatabase(ctx context.Context) datas.Database { 276 if *sp.db == nil { 277 db, vrw, ns := sp.createDatabase(ctx) 278 *sp.db = db 279 *sp.vrw = vrw 280 *sp.ns = ns 281 } 282 return *sp.db 283 } 284 285 func (sp Spec) GetNodeStore(ctx context.Context) tree.NodeStore { 286 if *sp.db == nil { 287 db, vrw, ns := sp.createDatabase(ctx) 288 *sp.db = db 289 *sp.vrw = vrw 290 *sp.ns = ns 291 } 292 return *sp.ns 293 } 294 295 func (sp Spec) GetVRW(ctx context.Context) types.ValueReadWriter { 296 if *sp.db == nil { 297 db, vrw, ns := sp.createDatabase(ctx) 298 *sp.db = db 299 *sp.vrw = vrw 300 *sp.ns = ns 301 } 302 return *sp.vrw 303 } 304 305 // NewChunkStore returns a new ChunkStore instance that this Spec's 306 // DatabaseName describes. It's unusual to call this method, GetDatabase is 307 // more useful. Unlike GetDatabase, a new ChunkStore instance is returned every 308 // time. If there is no ChunkStore, for example remote databases, returns nil. 309 func (sp Spec) NewChunkStore(ctx context.Context) chunks.ChunkStore { 310 switch sp.Protocol { 311 case "http", "https": 312 return nil 313 case "aws": 314 return parseAWSSpec(ctx, sp.Href(), sp.Options) 315 case "gs": 316 return parseGCSSpec(ctx, sp.Href(), sp.Options) 317 case "oci": 318 return parseOCISpec(ctx, sp.Href(), sp.Options) 319 case "nbs": 320 cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28, nbs.NewUnlimitedMemQuotaProvider()) 321 d.PanicIfError(err) 322 return cs 323 case "mem": 324 storage := &chunks.MemoryStorage{} 325 return storage.NewView() 326 default: 327 impl, ok := ExternalProtocols[sp.Protocol] 328 if !ok { 329 d.PanicIfError(fmt.Errorf("unknown protocol: %s", sp.Protocol)) 330 } 331 r, err := impl.NewChunkStore(sp) 332 d.PanicIfError(err) 333 return r 334 } 335 } 336 337 func parseAWSSpec(ctx context.Context, awsURL string, options SpecOptions) chunks.ChunkStore { 338 fmt.Println(awsURL, options) 339 340 u, _ := url.Parse(awsURL) 341 parts := strings.SplitN(u.Hostname(), ":", 2) // [table] [, bucket]? 342 d.PanicIfFalse(len(parts) == 2) 343 344 awsConfig := aws.NewConfig().WithRegion(options.AwsRegionOrDefault()) 345 346 switch options.AWSCredSource { 347 case RoleCS: 348 case EnvCS: 349 awsConfig = awsConfig.WithCredentials(credentials.NewEnvCredentials()) 350 case FileCS: 351 filePath := options.AwsCredFileOrDefault() 352 creds := credentials.NewSharedCredentials(filePath, DefaultAWSCredsProfile) 353 awsConfig = awsConfig.WithCredentials(creds) 354 case AutoCS: 355 envCreds := credentials.NewEnvCredentials() 356 if _, err := envCreds.Get(); err == nil { 357 awsConfig = awsConfig.WithCredentials(envCreds) 358 } 359 360 filePath := options.AwsCredFileOrDefault() 361 if _, err := os.Stat(filePath); err == nil { 362 creds := credentials.NewSharedCredentials(filePath, DefaultAWSCredsProfile) 363 awsConfig = awsConfig.WithCredentials(creds) 364 } 365 default: 366 panic("unsupported credential type") 367 } 368 369 sess := session.Must(session.NewSession(awsConfig)) 370 cs, err := nbs.NewAWSStore(ctx, types.Format_Default.VersionString(), parts[0], u.Path, parts[1], s3.New(sess), dynamodb.New(sess), 1<<28, nbs.NewUnlimitedMemQuotaProvider()) 371 372 d.PanicIfError(err) 373 374 return cs 375 } 376 377 func parseGCSSpec(ctx context.Context, gcsURL string, options SpecOptions) chunks.ChunkStore { 378 u, err := url.Parse(gcsURL) 379 d.PanicIfError(err) 380 381 fmt.Println(u) 382 383 bucket := u.Host 384 path := u.Path 385 386 gcs, err := storage.NewClient(ctx) 387 388 if err != nil { 389 panic("Could not create GCSBlobstore") 390 } 391 392 cs, err := nbs.NewGCSStore(ctx, types.Format_Default.VersionString(), bucket, path, gcs, 1<<28, nbs.NewUnlimitedMemQuotaProvider()) 393 394 d.PanicIfError(err) 395 396 return cs 397 } 398 399 func parseOCISpec(ctx context.Context, ociURL string, options SpecOptions) chunks.ChunkStore { 400 u, err := url.Parse(ociURL) 401 d.PanicIfError(err) 402 403 fmt.Println(u) 404 405 bucket := u.Host 406 path := u.Path 407 408 provider := common.DefaultConfigProvider() 409 410 client, err := objectstorage.NewObjectStorageClientWithConfigurationProvider(provider) 411 if err != nil { 412 panic("Could not create OCIBlobstore") 413 } 414 415 cs, err := nbs.NewOCISStore(ctx, types.Format_Default.VersionString(), bucket, path, provider, client, 1<<28, nbs.NewUnlimitedMemQuotaProvider()) 416 d.PanicIfError(err) 417 418 return cs 419 } 420 421 // GetDataset returns the current Dataset instance for this Spec's Database. 422 // GetDataset is live, so if Commit is called on this Spec's Database later, a 423 // new up-to-date Dataset will returned on the next call to GetDataset. If 424 // this is not a Dataset spec, returns nil. 425 func (sp Spec) GetDataset(ctx context.Context) (ds datas.Dataset) { 426 if sp.Path.Dataset != "" { 427 var err error 428 ds, err = sp.GetDatabase(ctx).GetDataset(ctx, sp.Path.Dataset) 429 d.PanicIfError(err) 430 } 431 return 432 } 433 434 // GetValue returns the Value at this Spec's Path within its Database, or nil 435 // if this isn't a Path Spec or if that path isn't found. 436 func (sp Spec) GetValue(ctx context.Context) (val types.Value, err error) { 437 if !sp.Path.IsEmpty() { 438 val, err = sp.Path.Resolve(ctx, sp.GetDatabase(ctx), sp.GetVRW(ctx)) 439 if err != nil { 440 return nil, err 441 } 442 } 443 return 444 } 445 446 // Href treats the Protocol and DatabaseName as a URL, and returns its href. 447 // For example, the spec http://example.com/path::ds returns 448 // "http://example.com/path". If the Protocol is not "http" or "http", returns 449 // an empty string. 450 func (sp Spec) Href() string { 451 switch proto := sp.Protocol; proto { 452 case "http", "https", "aws", "gs", "oci": 453 return proto + ":" + sp.DatabaseName 454 default: 455 return "" 456 } 457 } 458 459 func (sp Spec) Close() error { 460 db := *sp.db 461 if db == nil { 462 return nil 463 } 464 465 *sp.db = nil 466 return db.Close() 467 } 468 469 func (sp Spec) createDatabase(ctx context.Context) (datas.Database, types.ValueReadWriter, tree.NodeStore) { 470 switch sp.Protocol { 471 case "aws": 472 cs := parseAWSSpec(ctx, sp.Href(), sp.Options) 473 ns := tree.NewNodeStore(cs) 474 vrw := types.NewValueStore(cs) 475 return datas.NewTypesDatabase(vrw, ns), vrw, ns 476 case "gs": 477 cs := parseGCSSpec(ctx, sp.Href(), sp.Options) 478 ns := tree.NewNodeStore(cs) 479 vrw := types.NewValueStore(cs) 480 return datas.NewTypesDatabase(vrw, ns), vrw, ns 481 case "oci": 482 cs := parseOCISpec(ctx, sp.Href(), sp.Options) 483 ns := tree.NewNodeStore(cs) 484 vrw := types.NewValueStore(cs) 485 return datas.NewTypesDatabase(vrw, ns), vrw, ns 486 case "nbs": 487 // If the database is the oldgen database return a standard NBS store. 488 if strings.Contains(sp.DatabaseName, "oldgen") { 489 return getStandardLocalStore(ctx, sp.DatabaseName) 490 } 491 492 oldgenDb := filepath.Join(sp.DatabaseName, "oldgen") 493 494 err := validateDir(oldgenDb) 495 // If we can't validate that an oldgen db exists just use a standard local store. 496 if err != nil { 497 return getStandardLocalStore(ctx, sp.DatabaseName) 498 } 499 500 newGenSt, err := nbs.NewLocalJournalingStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, nbs.NewUnlimitedMemQuotaProvider()) 501 502 // If the journaling store can't be created, fall back to a standard local store 503 if err != nil { 504 var localErr error 505 newGenSt, localErr = nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28, nbs.NewUnlimitedMemQuotaProvider()) 506 if localErr != nil { 507 d.PanicIfError(err) 508 } 509 } 510 511 oldGenSt, err := nbs.NewLocalStore(ctx, newGenSt.Version(), oldgenDb, 1<<28, nbs.NewUnlimitedMemQuotaProvider()) 512 d.PanicIfError(err) 513 514 cs := nbs.NewGenerationalCS(oldGenSt, newGenSt, nil) 515 516 ns := tree.NewNodeStore(cs) 517 vrw := types.NewValueStore(cs) 518 return datas.NewTypesDatabase(vrw, ns), vrw, ns 519 case "mem": 520 storage := &chunks.MemoryStorage{} 521 cs := storage.NewViewWithDefaultFormat() 522 ns := tree.NewNodeStore(cs) 523 vrw := types.NewValueStore(cs) 524 return datas.NewTypesDatabase(vrw, ns), vrw, ns 525 default: 526 impl, ok := ExternalProtocols[sp.Protocol] 527 if !ok { 528 d.PanicIfError(fmt.Errorf("unknown protocol: %s", sp.Protocol)) 529 } 530 cs, err := impl.NewChunkStore(sp) 531 d.PanicIfError(err) 532 vrw := types.NewValueStore(cs) 533 ns := tree.NewNodeStore(cs) 534 return datas.NewTypesDatabase(vrw, ns), vrw, ns 535 } 536 } 537 538 func getStandardLocalStore(ctx context.Context, dbName string) (datas.Database, types.ValueReadWriter, tree.NodeStore) { 539 os.Mkdir(dbName, 0777) 540 541 cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), dbName, 1<<28, nbs.NewUnlimitedMemQuotaProvider()) 542 d.PanicIfError(err) 543 544 vrw := types.NewValueStore(cs) 545 ns := tree.NewNodeStore(cs) 546 return datas.NewTypesDatabase(vrw, ns), vrw, ns 547 } 548 549 func validateDir(path string) error { 550 info, err := os.Stat(path) 551 552 if err != nil { 553 return err 554 } else if !info.IsDir() { 555 return filesys.ErrIsFile 556 } 557 558 return nil 559 } 560 561 func parseDatabaseSpec(spec string) (protocol, name string, err error) { 562 if len(spec) == 0 { 563 err = fmt.Errorf("empty spec") 564 return 565 } 566 567 parts := strings.SplitN(spec, ":", 2) // [protocol] [, path]? 568 569 // If there was no ":" then this is either a mem spec, or a filesystem path. 570 // This is ambiguous if the file system path is "mem" but that just means the 571 // path needs to be explicitly "nbs:mem". 572 if len(parts) == 1 { 573 if spec == "mem" { 574 protocol = "mem" 575 } else { 576 protocol, name = "nbs", spec 577 } 578 return 579 } else if len(parts) == 2 && len(parts[0]) == 1 && parts[0][0] >= 'A' && parts[0][0] <= 'Z' { //check for Windows drive letter, ala C:\Users\Public 580 if _, err := os.Stat(parts[0] + `:\`); !os.IsNotExist(err) { 581 parts = []string{"nbs", spec} 582 } 583 } 584 585 if _, ok := ExternalProtocols[parts[0]]; ok { 586 protocol, name = parts[0], parts[1] 587 return 588 } 589 590 switch parts[0] { 591 case "nbs": 592 protocol, name = parts[0], parts[1] 593 594 case "aws", "gs", "oci": 595 u, perr := url.Parse(spec) 596 if perr != nil { 597 err = perr 598 } else if u.Host == "" { 599 err = fmt.Errorf("%s has empty host", spec) 600 } else if parts[0] == "aws" && u.Path == "" { 601 err = fmt.Errorf("%s does not specify a database ID", spec) 602 } else { 603 protocol, name = parts[0], parts[1] 604 } 605 606 case "mem": 607 err = fmt.Errorf(`in-memory database must be specified as "mem", not "mem:"`) 608 609 default: 610 err = fmt.Errorf("invalid database protocol %s in %s", protocol, spec) 611 } 612 return 613 } 614 615 func splitDatabaseSpec(spec string) (string, string, error) { 616 lastIdx := strings.LastIndex(spec, Separator) 617 if lastIdx == -1 { 618 return "", "", fmt.Errorf("missing %s after database in %s", Separator, spec) 619 } 620 621 return spec[:lastIdx], spec[lastIdx+len(Separator):], nil 622 }