github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/spec/spec.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  // Package spec provides builders and parsers for spelling Noms databases,
    23  // datasets and values.
    24  package spec
    25  
    26  import (
    27  	"context"
    28  	"errors"
    29  	"fmt"
    30  	"net/url"
    31  	"os"
    32  	"os/user"
    33  	"path/filepath"
    34  	"strings"
    35  
    36  	"cloud.google.com/go/storage"
    37  	"github.com/aws/aws-sdk-go/aws"
    38  	"github.com/aws/aws-sdk-go/aws/credentials"
    39  	"github.com/aws/aws-sdk-go/aws/session"
    40  	"github.com/aws/aws-sdk-go/service/dynamodb"
    41  	"github.com/aws/aws-sdk-go/service/s3"
    42  	"github.com/oracle/oci-go-sdk/v65/common"
    43  	"github.com/oracle/oci-go-sdk/v65/objectstorage"
    44  
    45  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    46  	"github.com/dolthub/dolt/go/store/chunks"
    47  	"github.com/dolthub/dolt/go/store/d"
    48  	"github.com/dolthub/dolt/go/store/datas"
    49  	"github.com/dolthub/dolt/go/store/nbs"
    50  	"github.com/dolthub/dolt/go/store/prolly/tree"
    51  	"github.com/dolthub/dolt/go/store/types"
    52  )
    53  
    54  const (
    55  	Separator              = "::"
    56  	DefaultAWSRegion       = "us-west-2"
    57  	DefaultAWSCredsProfile = "default"
    58  )
    59  
    60  type ProtocolImpl interface {
    61  	NewChunkStore(sp Spec) (chunks.ChunkStore, error)
    62  }
    63  
    64  var ExternalProtocols = map[string]ProtocolImpl{}
    65  
    66  type AWSCredentialSource int
    67  
    68  const (
    69  	InvalidCS AWSCredentialSource = iota - 1
    70  
    71  	// Auto will try env first and fall back to role (This is the default)
    72  	AutoCS
    73  
    74  	// Role Uses the AWS IAM role of the instance for auth
    75  	RoleCS
    76  
    77  	// Env uses the credentials stored in the environment variables AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY
    78  	EnvCS
    79  
    80  	// Uses credentials stored in a file
    81  	FileCS
    82  )
    83  
    84  func (ct AWSCredentialSource) String() string {
    85  	switch ct {
    86  	case RoleCS:
    87  		return "role"
    88  	case EnvCS:
    89  		return "env"
    90  	case AutoCS:
    91  		return "auto"
    92  	case FileCS:
    93  		return "file"
    94  	default:
    95  		return "invalid"
    96  	}
    97  }
    98  
    99  func AWSCredentialSourceFromStr(str string) AWSCredentialSource {
   100  	strlwr := strings.TrimSpace(strings.ToLower(str))
   101  	switch strlwr {
   102  	case "", "auto":
   103  		return AutoCS
   104  	case "role":
   105  		return RoleCS
   106  	case "env":
   107  		return EnvCS
   108  	case "file":
   109  		return FileCS
   110  	default:
   111  		return InvalidCS
   112  	}
   113  }
   114  
   115  // SpecOptions customize Spec behavior.
   116  type SpecOptions struct {
   117  	// Authorization token for requests. For example, if the database is HTTP
   118  	// this will used for an `Authorization: Bearer ${authorization}` header.
   119  	Authorization string
   120  
   121  	// Region that should be used when creating the aws session
   122  	AWSRegion string
   123  
   124  	// The type of credentials that should be used when creating the aws session
   125  	AWSCredSource AWSCredentialSource
   126  
   127  	// Credential file to use when using auto or file credentials
   128  	AWSCredFile string
   129  }
   130  
   131  func (so *SpecOptions) AwsRegionOrDefault() string {
   132  	if so.AWSRegion == "" {
   133  		return DefaultAWSRegion
   134  	}
   135  
   136  	return so.AWSRegion
   137  }
   138  
   139  func (so *SpecOptions) AwsCredFileOrDefault() string {
   140  	if so.AWSCredFile == "" {
   141  		usr, err := user.Current()
   142  		if err != nil {
   143  			return ""
   144  		}
   145  
   146  		return filepath.Join(usr.HomeDir, ".aws", "credentials")
   147  	}
   148  
   149  	return so.AWSCredFile
   150  }
   151  
   152  // Spec locates a Noms database, dataset, or value globally. Spec caches
   153  // its database instance so it therefore does not reflect new commits in
   154  // the db, by (legacy) design.
   155  type Spec struct {
   156  	// Protocol is one of "mem", "aws", "gs", "nbs"
   157  	Protocol string
   158  
   159  	// DatabaseName is the name of the Spec's database, which is the string after
   160  	// "protocol:". specs include their leading "//" characters.
   161  	DatabaseName string
   162  
   163  	// Options are the SpecOptions that the Spec was constructed with.
   164  	Options SpecOptions
   165  
   166  	// Path is nil unless the spec was created with ForPath.
   167  	Path AbsolutePath
   168  
   169  	// db is lazily created, so it needs to be a pointer to a Database.
   170  	db  *datas.Database
   171  	vrw *types.ValueReadWriter
   172  	ns  *tree.NodeStore
   173  }
   174  
   175  func newSpec(dbSpec string, opts SpecOptions) (Spec, error) {
   176  	protocol, dbName, err := parseDatabaseSpec(dbSpec)
   177  	if err != nil {
   178  		return Spec{}, err
   179  	}
   180  
   181  	return Spec{
   182  		Protocol:     protocol,
   183  		DatabaseName: dbName,
   184  		Options:      opts,
   185  		db:           new(datas.Database),
   186  		vrw:          new(types.ValueReadWriter),
   187  		ns:           new(tree.NodeStore),
   188  	}, nil
   189  }
   190  
   191  // ForDatabase parses a spec for a Database.
   192  func ForDatabase(spec string) (Spec, error) {
   193  	return ForDatabaseOpts(spec, SpecOptions{})
   194  }
   195  
   196  // ForDatabaseOpts parses a spec for a Database.
   197  func ForDatabaseOpts(spec string, opts SpecOptions) (Spec, error) {
   198  	return newSpec(spec, opts)
   199  }
   200  
   201  // ForDataset parses a spec for a Dataset.
   202  func ForDataset(spec string) (Spec, error) {
   203  	return ForDatasetOpts(spec, SpecOptions{})
   204  }
   205  
   206  // ForDatasetOpts parses a spec for a Dataset.
   207  func ForDatasetOpts(spec string, opts SpecOptions) (Spec, error) {
   208  	dbSpec, pathStr, err := splitDatabaseSpec(spec)
   209  	if err != nil {
   210  		return Spec{}, err
   211  	}
   212  
   213  	sp, err := newSpec(dbSpec, opts)
   214  	if err != nil {
   215  		return Spec{}, err
   216  	}
   217  
   218  	path, err := NewAbsolutePath(pathStr)
   219  	if err != nil {
   220  		return Spec{}, err
   221  	}
   222  
   223  	if path.Dataset == "" {
   224  		return Spec{}, errors.New("dataset name required for dataset spec")
   225  	}
   226  
   227  	sp.Path = path
   228  	return sp, nil
   229  }
   230  
   231  // ForPath parses a spec for a path to a Value.
   232  func ForPath(spec string) (Spec, error) {
   233  	return ForPathOpts(spec, SpecOptions{})
   234  }
   235  
   236  // ForPathOpts parses a spec for a path to a Value.
   237  func ForPathOpts(spec string, opts SpecOptions) (Spec, error) {
   238  	dbSpec, pathStr, err := splitDatabaseSpec(spec)
   239  	if err != nil {
   240  		return Spec{}, err
   241  	}
   242  
   243  	var path AbsolutePath
   244  	if pathStr != "" {
   245  		path, err = NewAbsolutePath(pathStr)
   246  		if err != nil {
   247  			return Spec{}, err
   248  		}
   249  	}
   250  
   251  	sp, err := newSpec(dbSpec, opts)
   252  	if err != nil {
   253  		return Spec{}, err
   254  	}
   255  
   256  	sp.Path = path
   257  	return sp, nil
   258  }
   259  
   260  func (sp Spec) String() string {
   261  	s := sp.Protocol
   262  	if s != "mem" {
   263  		s += ":" + sp.DatabaseName
   264  	}
   265  	p := sp.Path.String()
   266  	if p != "" {
   267  		s += Separator + p
   268  	}
   269  	return s
   270  }
   271  
   272  // GetDatabase returns the Database instance that this Spec's DatabaseName
   273  // describes. The same Database instance is returned every time, unless Close
   274  // is called. If the Spec is closed, it is re-opened with a new Database.
   275  func (sp Spec) GetDatabase(ctx context.Context) datas.Database {
   276  	if *sp.db == nil {
   277  		db, vrw, ns := sp.createDatabase(ctx)
   278  		*sp.db = db
   279  		*sp.vrw = vrw
   280  		*sp.ns = ns
   281  	}
   282  	return *sp.db
   283  }
   284  
   285  func (sp Spec) GetNodeStore(ctx context.Context) tree.NodeStore {
   286  	if *sp.db == nil {
   287  		db, vrw, ns := sp.createDatabase(ctx)
   288  		*sp.db = db
   289  		*sp.vrw = vrw
   290  		*sp.ns = ns
   291  	}
   292  	return *sp.ns
   293  }
   294  
   295  func (sp Spec) GetVRW(ctx context.Context) types.ValueReadWriter {
   296  	if *sp.db == nil {
   297  		db, vrw, ns := sp.createDatabase(ctx)
   298  		*sp.db = db
   299  		*sp.vrw = vrw
   300  		*sp.ns = ns
   301  	}
   302  	return *sp.vrw
   303  }
   304  
   305  // NewChunkStore returns a new ChunkStore instance that this Spec's
   306  // DatabaseName describes. It's unusual to call this method, GetDatabase is
   307  // more useful. Unlike GetDatabase, a new ChunkStore instance is returned every
   308  // time. If there is no ChunkStore, for example remote databases, returns nil.
   309  func (sp Spec) NewChunkStore(ctx context.Context) chunks.ChunkStore {
   310  	switch sp.Protocol {
   311  	case "http", "https":
   312  		return nil
   313  	case "aws":
   314  		return parseAWSSpec(ctx, sp.Href(), sp.Options)
   315  	case "gs":
   316  		return parseGCSSpec(ctx, sp.Href(), sp.Options)
   317  	case "oci":
   318  		return parseOCISpec(ctx, sp.Href(), sp.Options)
   319  	case "nbs":
   320  		cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28, nbs.NewUnlimitedMemQuotaProvider())
   321  		d.PanicIfError(err)
   322  		return cs
   323  	case "mem":
   324  		storage := &chunks.MemoryStorage{}
   325  		return storage.NewView()
   326  	default:
   327  		impl, ok := ExternalProtocols[sp.Protocol]
   328  		if !ok {
   329  			d.PanicIfError(fmt.Errorf("unknown protocol: %s", sp.Protocol))
   330  		}
   331  		r, err := impl.NewChunkStore(sp)
   332  		d.PanicIfError(err)
   333  		return r
   334  	}
   335  }
   336  
   337  func parseAWSSpec(ctx context.Context, awsURL string, options SpecOptions) chunks.ChunkStore {
   338  	fmt.Println(awsURL, options)
   339  
   340  	u, _ := url.Parse(awsURL)
   341  	parts := strings.SplitN(u.Hostname(), ":", 2) // [table] [, bucket]?
   342  	d.PanicIfFalse(len(parts) == 2)
   343  
   344  	awsConfig := aws.NewConfig().WithRegion(options.AwsRegionOrDefault())
   345  
   346  	switch options.AWSCredSource {
   347  	case RoleCS:
   348  	case EnvCS:
   349  		awsConfig = awsConfig.WithCredentials(credentials.NewEnvCredentials())
   350  	case FileCS:
   351  		filePath := options.AwsCredFileOrDefault()
   352  		creds := credentials.NewSharedCredentials(filePath, DefaultAWSCredsProfile)
   353  		awsConfig = awsConfig.WithCredentials(creds)
   354  	case AutoCS:
   355  		envCreds := credentials.NewEnvCredentials()
   356  		if _, err := envCreds.Get(); err == nil {
   357  			awsConfig = awsConfig.WithCredentials(envCreds)
   358  		}
   359  
   360  		filePath := options.AwsCredFileOrDefault()
   361  		if _, err := os.Stat(filePath); err == nil {
   362  			creds := credentials.NewSharedCredentials(filePath, DefaultAWSCredsProfile)
   363  			awsConfig = awsConfig.WithCredentials(creds)
   364  		}
   365  	default:
   366  		panic("unsupported credential type")
   367  	}
   368  
   369  	sess := session.Must(session.NewSession(awsConfig))
   370  	cs, err := nbs.NewAWSStore(ctx, types.Format_Default.VersionString(), parts[0], u.Path, parts[1], s3.New(sess), dynamodb.New(sess), 1<<28, nbs.NewUnlimitedMemQuotaProvider())
   371  
   372  	d.PanicIfError(err)
   373  
   374  	return cs
   375  }
   376  
   377  func parseGCSSpec(ctx context.Context, gcsURL string, options SpecOptions) chunks.ChunkStore {
   378  	u, err := url.Parse(gcsURL)
   379  	d.PanicIfError(err)
   380  
   381  	fmt.Println(u)
   382  
   383  	bucket := u.Host
   384  	path := u.Path
   385  
   386  	gcs, err := storage.NewClient(ctx)
   387  
   388  	if err != nil {
   389  		panic("Could not create GCSBlobstore")
   390  	}
   391  
   392  	cs, err := nbs.NewGCSStore(ctx, types.Format_Default.VersionString(), bucket, path, gcs, 1<<28, nbs.NewUnlimitedMemQuotaProvider())
   393  
   394  	d.PanicIfError(err)
   395  
   396  	return cs
   397  }
   398  
   399  func parseOCISpec(ctx context.Context, ociURL string, options SpecOptions) chunks.ChunkStore {
   400  	u, err := url.Parse(ociURL)
   401  	d.PanicIfError(err)
   402  
   403  	fmt.Println(u)
   404  
   405  	bucket := u.Host
   406  	path := u.Path
   407  
   408  	provider := common.DefaultConfigProvider()
   409  
   410  	client, err := objectstorage.NewObjectStorageClientWithConfigurationProvider(provider)
   411  	if err != nil {
   412  		panic("Could not create OCIBlobstore")
   413  	}
   414  
   415  	cs, err := nbs.NewOCISStore(ctx, types.Format_Default.VersionString(), bucket, path, provider, client, 1<<28, nbs.NewUnlimitedMemQuotaProvider())
   416  	d.PanicIfError(err)
   417  
   418  	return cs
   419  }
   420  
   421  // GetDataset returns the current Dataset instance for this Spec's Database.
   422  // GetDataset is live, so if Commit is called on this Spec's Database later, a
   423  // new up-to-date Dataset will returned on the next call to GetDataset.  If
   424  // this is not a Dataset spec, returns nil.
   425  func (sp Spec) GetDataset(ctx context.Context) (ds datas.Dataset) {
   426  	if sp.Path.Dataset != "" {
   427  		var err error
   428  		ds, err = sp.GetDatabase(ctx).GetDataset(ctx, sp.Path.Dataset)
   429  		d.PanicIfError(err)
   430  	}
   431  	return
   432  }
   433  
   434  // GetValue returns the Value at this Spec's Path within its Database, or nil
   435  // if this isn't a Path Spec or if that path isn't found.
   436  func (sp Spec) GetValue(ctx context.Context) (val types.Value, err error) {
   437  	if !sp.Path.IsEmpty() {
   438  		val, err = sp.Path.Resolve(ctx, sp.GetDatabase(ctx), sp.GetVRW(ctx))
   439  		if err != nil {
   440  			return nil, err
   441  		}
   442  	}
   443  	return
   444  }
   445  
   446  // Href treats the Protocol and DatabaseName as a URL, and returns its href.
   447  // For example, the spec http://example.com/path::ds returns
   448  // "http://example.com/path". If the Protocol is not "http" or "http", returns
   449  // an empty string.
   450  func (sp Spec) Href() string {
   451  	switch proto := sp.Protocol; proto {
   452  	case "http", "https", "aws", "gs", "oci":
   453  		return proto + ":" + sp.DatabaseName
   454  	default:
   455  		return ""
   456  	}
   457  }
   458  
   459  func (sp Spec) Close() error {
   460  	db := *sp.db
   461  	if db == nil {
   462  		return nil
   463  	}
   464  
   465  	*sp.db = nil
   466  	return db.Close()
   467  }
   468  
   469  func (sp Spec) createDatabase(ctx context.Context) (datas.Database, types.ValueReadWriter, tree.NodeStore) {
   470  	switch sp.Protocol {
   471  	case "aws":
   472  		cs := parseAWSSpec(ctx, sp.Href(), sp.Options)
   473  		ns := tree.NewNodeStore(cs)
   474  		vrw := types.NewValueStore(cs)
   475  		return datas.NewTypesDatabase(vrw, ns), vrw, ns
   476  	case "gs":
   477  		cs := parseGCSSpec(ctx, sp.Href(), sp.Options)
   478  		ns := tree.NewNodeStore(cs)
   479  		vrw := types.NewValueStore(cs)
   480  		return datas.NewTypesDatabase(vrw, ns), vrw, ns
   481  	case "oci":
   482  		cs := parseOCISpec(ctx, sp.Href(), sp.Options)
   483  		ns := tree.NewNodeStore(cs)
   484  		vrw := types.NewValueStore(cs)
   485  		return datas.NewTypesDatabase(vrw, ns), vrw, ns
   486  	case "nbs":
   487  		// If the database is the oldgen database return a standard NBS store.
   488  		if strings.Contains(sp.DatabaseName, "oldgen") {
   489  			return getStandardLocalStore(ctx, sp.DatabaseName)
   490  		}
   491  
   492  		oldgenDb := filepath.Join(sp.DatabaseName, "oldgen")
   493  
   494  		err := validateDir(oldgenDb)
   495  		// If we can't validate that an oldgen db exists just use a standard local store.
   496  		if err != nil {
   497  			return getStandardLocalStore(ctx, sp.DatabaseName)
   498  		}
   499  
   500  		newGenSt, err := nbs.NewLocalJournalingStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, nbs.NewUnlimitedMemQuotaProvider())
   501  
   502  		// If the journaling store can't be created, fall back to a standard local store
   503  		if err != nil {
   504  			var localErr error
   505  			newGenSt, localErr = nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28, nbs.NewUnlimitedMemQuotaProvider())
   506  			if localErr != nil {
   507  				d.PanicIfError(err)
   508  			}
   509  		}
   510  
   511  		oldGenSt, err := nbs.NewLocalStore(ctx, newGenSt.Version(), oldgenDb, 1<<28, nbs.NewUnlimitedMemQuotaProvider())
   512  		d.PanicIfError(err)
   513  
   514  		cs := nbs.NewGenerationalCS(oldGenSt, newGenSt, nil)
   515  
   516  		ns := tree.NewNodeStore(cs)
   517  		vrw := types.NewValueStore(cs)
   518  		return datas.NewTypesDatabase(vrw, ns), vrw, ns
   519  	case "mem":
   520  		storage := &chunks.MemoryStorage{}
   521  		cs := storage.NewViewWithDefaultFormat()
   522  		ns := tree.NewNodeStore(cs)
   523  		vrw := types.NewValueStore(cs)
   524  		return datas.NewTypesDatabase(vrw, ns), vrw, ns
   525  	default:
   526  		impl, ok := ExternalProtocols[sp.Protocol]
   527  		if !ok {
   528  			d.PanicIfError(fmt.Errorf("unknown protocol: %s", sp.Protocol))
   529  		}
   530  		cs, err := impl.NewChunkStore(sp)
   531  		d.PanicIfError(err)
   532  		vrw := types.NewValueStore(cs)
   533  		ns := tree.NewNodeStore(cs)
   534  		return datas.NewTypesDatabase(vrw, ns), vrw, ns
   535  	}
   536  }
   537  
   538  func getStandardLocalStore(ctx context.Context, dbName string) (datas.Database, types.ValueReadWriter, tree.NodeStore) {
   539  	os.Mkdir(dbName, 0777)
   540  
   541  	cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), dbName, 1<<28, nbs.NewUnlimitedMemQuotaProvider())
   542  	d.PanicIfError(err)
   543  
   544  	vrw := types.NewValueStore(cs)
   545  	ns := tree.NewNodeStore(cs)
   546  	return datas.NewTypesDatabase(vrw, ns), vrw, ns
   547  }
   548  
   549  func validateDir(path string) error {
   550  	info, err := os.Stat(path)
   551  
   552  	if err != nil {
   553  		return err
   554  	} else if !info.IsDir() {
   555  		return filesys.ErrIsFile
   556  	}
   557  
   558  	return nil
   559  }
   560  
   561  func parseDatabaseSpec(spec string) (protocol, name string, err error) {
   562  	if len(spec) == 0 {
   563  		err = fmt.Errorf("empty spec")
   564  		return
   565  	}
   566  
   567  	parts := strings.SplitN(spec, ":", 2) // [protocol] [, path]?
   568  
   569  	// If there was no ":" then this is either a mem spec, or a filesystem path.
   570  	// This is ambiguous if the file system path is "mem" but that just means the
   571  	// path needs to be explicitly "nbs:mem".
   572  	if len(parts) == 1 {
   573  		if spec == "mem" {
   574  			protocol = "mem"
   575  		} else {
   576  			protocol, name = "nbs", spec
   577  		}
   578  		return
   579  	} else if len(parts) == 2 && len(parts[0]) == 1 && parts[0][0] >= 'A' && parts[0][0] <= 'Z' { //check for Windows drive letter, ala C:\Users\Public
   580  		if _, err := os.Stat(parts[0] + `:\`); !os.IsNotExist(err) {
   581  			parts = []string{"nbs", spec}
   582  		}
   583  	}
   584  
   585  	if _, ok := ExternalProtocols[parts[0]]; ok {
   586  		protocol, name = parts[0], parts[1]
   587  		return
   588  	}
   589  
   590  	switch parts[0] {
   591  	case "nbs":
   592  		protocol, name = parts[0], parts[1]
   593  
   594  	case "aws", "gs", "oci":
   595  		u, perr := url.Parse(spec)
   596  		if perr != nil {
   597  			err = perr
   598  		} else if u.Host == "" {
   599  			err = fmt.Errorf("%s has empty host", spec)
   600  		} else if parts[0] == "aws" && u.Path == "" {
   601  			err = fmt.Errorf("%s does not specify a database ID", spec)
   602  		} else {
   603  			protocol, name = parts[0], parts[1]
   604  		}
   605  
   606  	case "mem":
   607  		err = fmt.Errorf(`in-memory database must be specified as "mem", not "mem:"`)
   608  
   609  	default:
   610  		err = fmt.Errorf("invalid database protocol %s in %s", protocol, spec)
   611  	}
   612  	return
   613  }
   614  
   615  func splitDatabaseSpec(spec string) (string, string, error) {
   616  	lastIdx := strings.LastIndex(spec, Separator)
   617  	if lastIdx == -1 {
   618  		return "", "", fmt.Errorf("missing %s after database in %s", Separator, spec)
   619  	}
   620  
   621  	return spec[:lastIdx], spec[lastIdx+len(Separator):], nil
   622  }