github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/spec/spec.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  // Package spec provides builders and parsers for spelling Noms databases,
    23  // datasets and values.
    24  package spec
    25  
    26  import (
    27  	"context"
    28  	"errors"
    29  	"fmt"
    30  	"net/url"
    31  	"os"
    32  	"os/user"
    33  	"path/filepath"
    34  	"strings"
    35  
    36  	"cloud.google.com/go/storage"
    37  	"github.com/aws/aws-sdk-go/aws"
    38  	"github.com/aws/aws-sdk-go/aws/credentials"
    39  	"github.com/aws/aws-sdk-go/aws/session"
    40  	"github.com/aws/aws-sdk-go/service/dynamodb"
    41  	"github.com/aws/aws-sdk-go/service/s3"
    42  
    43  	"github.com/dolthub/dolt/go/store/chunks"
    44  	"github.com/dolthub/dolt/go/store/d"
    45  	"github.com/dolthub/dolt/go/store/datas"
    46  	"github.com/dolthub/dolt/go/store/nbs"
    47  	"github.com/dolthub/dolt/go/store/types"
    48  )
    49  
    50  const (
    51  	Separator              = "::"
    52  	DefaultAWSRegion       = "us-west-2"
    53  	DefaultAWSCredsProfile = "default"
    54  )
    55  
    56  type ProtocolImpl interface {
    57  	NewChunkStore(sp Spec) (chunks.ChunkStore, error)
    58  	NewDatabase(sp Spec) (datas.Database, error)
    59  }
    60  
    61  var ExternalProtocols = map[string]ProtocolImpl{}
    62  
    63  type AWSCredentialSource int
    64  
    65  const (
    66  	InvalidCS AWSCredentialSource = iota - 1
    67  
    68  	// Auto will try env first and fall back to role (This is the default)
    69  	AutoCS
    70  
    71  	// Role Uses the AWS IAM role of the instance for auth
    72  	RoleCS
    73  
    74  	// Env uses the credentials stored in the environment variables AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY
    75  	EnvCS
    76  
    77  	// Uses credentials stored in a file
    78  	FileCS
    79  )
    80  
    81  func (ct AWSCredentialSource) String() string {
    82  	switch ct {
    83  	case RoleCS:
    84  		return "role"
    85  	case EnvCS:
    86  		return "env"
    87  	case AutoCS:
    88  		return "auto"
    89  	case FileCS:
    90  		return "file"
    91  	default:
    92  		return "invalid"
    93  	}
    94  }
    95  
    96  func AWSCredentialSourceFromStr(str string) AWSCredentialSource {
    97  	strlwr := strings.TrimSpace(strings.ToLower(str))
    98  	switch strlwr {
    99  	case "", "auto":
   100  		return AutoCS
   101  	case "role":
   102  		return RoleCS
   103  	case "env":
   104  		return EnvCS
   105  	case "file":
   106  		return FileCS
   107  	default:
   108  		return InvalidCS
   109  	}
   110  }
   111  
   112  // SpecOptions customize Spec behavior.
   113  type SpecOptions struct {
   114  	// Authorization token for requests. For example, if the database is HTTP
   115  	// this will used for an `Authorization: Bearer ${authorization}` header.
   116  	Authorization string
   117  
   118  	// Region that should be used when creating the aws session
   119  	AWSRegion string
   120  
   121  	// The type of credentials that should be used when creating the aws session
   122  	AWSCredSource AWSCredentialSource
   123  
   124  	// Credential file to use when using auto or file credentials
   125  	AWSCredFile string
   126  }
   127  
   128  func (so *SpecOptions) AwsRegionOrDefault() string {
   129  	if so.AWSRegion == "" {
   130  		return DefaultAWSRegion
   131  	}
   132  
   133  	return so.AWSRegion
   134  }
   135  
   136  func (so *SpecOptions) AwsCredFileOrDefault() string {
   137  	if so.AWSCredFile == "" {
   138  		usr, err := user.Current()
   139  		if err != nil {
   140  			return ""
   141  		}
   142  
   143  		return filepath.Join(usr.HomeDir, ".aws", "credentials")
   144  	}
   145  
   146  	return so.AWSCredFile
   147  }
   148  
   149  // Spec locates a Noms database, dataset, or value globally. Spec caches
   150  // its database instance so it therefore does not reflect new commits in
   151  // the db, by (legacy) design.
   152  type Spec struct {
   153  	// Protocol is one of "mem", "aws", "gs", "nbs"
   154  	Protocol string
   155  
   156  	// DatabaseName is the name of the Spec's database, which is the string after
   157  	// "protocol:". specs include their leading "//" characters.
   158  	DatabaseName string
   159  
   160  	// Options are the SpecOptions that the Spec was constructed with.
   161  	Options SpecOptions
   162  
   163  	// Path is nil unless the spec was created with ForPath.
   164  	Path AbsolutePath
   165  
   166  	// db is lazily created, so it needs to be a pointer to a Database.
   167  	db *datas.Database
   168  }
   169  
   170  func newSpec(dbSpec string, opts SpecOptions) (Spec, error) {
   171  	protocol, dbName, err := parseDatabaseSpec(dbSpec)
   172  	if err != nil {
   173  		return Spec{}, err
   174  	}
   175  
   176  	return Spec{
   177  		Protocol:     protocol,
   178  		DatabaseName: dbName,
   179  		Options:      opts,
   180  		db:           new(datas.Database),
   181  	}, nil
   182  }
   183  
   184  // ForDatabase parses a spec for a Database.
   185  func ForDatabase(spec string) (Spec, error) {
   186  	return ForDatabaseOpts(spec, SpecOptions{})
   187  }
   188  
   189  // ForDatabaseOpts parses a spec for a Database.
   190  func ForDatabaseOpts(spec string, opts SpecOptions) (Spec, error) {
   191  	return newSpec(spec, opts)
   192  }
   193  
   194  // ForDataset parses a spec for a Dataset.
   195  func ForDataset(spec string) (Spec, error) {
   196  	return ForDatasetOpts(spec, SpecOptions{})
   197  }
   198  
   199  // ForDatasetOpts parses a spec for a Dataset.
   200  func ForDatasetOpts(spec string, opts SpecOptions) (Spec, error) {
   201  	dbSpec, pathStr, err := splitDatabaseSpec(spec)
   202  	if err != nil {
   203  		return Spec{}, err
   204  	}
   205  
   206  	sp, err := newSpec(dbSpec, opts)
   207  	if err != nil {
   208  		return Spec{}, err
   209  	}
   210  
   211  	path, err := NewAbsolutePath(pathStr)
   212  	if err != nil {
   213  		return Spec{}, err
   214  	}
   215  
   216  	if path.Dataset == "" {
   217  		return Spec{}, errors.New("dataset name required for dataset spec")
   218  	}
   219  
   220  	if !path.Path.IsEmpty() {
   221  		return Spec{}, errors.New("path is not allowed for dataset spec")
   222  	}
   223  
   224  	sp.Path = path
   225  	return sp, nil
   226  }
   227  
   228  // ForPath parses a spec for a path to a Value.
   229  func ForPath(spec string) (Spec, error) {
   230  	return ForPathOpts(spec, SpecOptions{})
   231  }
   232  
   233  // ForPathOpts parses a spec for a path to a Value.
   234  func ForPathOpts(spec string, opts SpecOptions) (Spec, error) {
   235  	dbSpec, pathStr, err := splitDatabaseSpec(spec)
   236  	if err != nil {
   237  		return Spec{}, err
   238  	}
   239  
   240  	var path AbsolutePath
   241  	if pathStr != "" {
   242  		path, err = NewAbsolutePath(pathStr)
   243  		if err != nil {
   244  			return Spec{}, err
   245  		}
   246  	}
   247  
   248  	sp, err := newSpec(dbSpec, opts)
   249  	if err != nil {
   250  		return Spec{}, err
   251  	}
   252  
   253  	sp.Path = path
   254  	return sp, nil
   255  }
   256  
   257  func (sp Spec) String() string {
   258  	s := sp.Protocol
   259  	if s != "mem" {
   260  		s += ":" + sp.DatabaseName
   261  	}
   262  	p := sp.Path.String()
   263  	if p != "" {
   264  		s += Separator + p
   265  	}
   266  	return s
   267  }
   268  
   269  // GetDatabase returns the Database instance that this Spec's DatabaseName
   270  // describes. The same Database instance is returned every time, unless Close
   271  // is called. If the Spec is closed, it is re-opened with a new Database.
   272  func (sp Spec) GetDatabase(ctx context.Context) datas.Database {
   273  	if *sp.db == nil {
   274  		*sp.db = sp.createDatabase(ctx)
   275  	}
   276  	return *sp.db
   277  }
   278  
   279  // NewChunkStore returns a new ChunkStore instance that this Spec's
   280  // DatabaseName describes. It's unusual to call this method, GetDatabase is
   281  // more useful. Unlike GetDatabase, a new ChunkStore instance is returned every
   282  // time. If there is no ChunkStore, for example remote databases, returns nil.
   283  func (sp Spec) NewChunkStore(ctx context.Context) chunks.ChunkStore {
   284  	switch sp.Protocol {
   285  	case "http", "https":
   286  		return nil
   287  	case "aws":
   288  		return parseAWSSpec(ctx, sp.Href(), sp.Options)
   289  	case "gs":
   290  		return parseGCSSpec(ctx, sp.Href(), sp.Options)
   291  	case "nbs":
   292  		cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28)
   293  		d.PanicIfError(err)
   294  		return cs
   295  	case "mem":
   296  		storage := &chunks.MemoryStorage{}
   297  		return storage.NewView()
   298  	default:
   299  		impl, ok := ExternalProtocols[sp.Protocol]
   300  		if !ok {
   301  			d.PanicIfError(fmt.Errorf("unknown protocol: %s", sp.Protocol))
   302  		}
   303  		r, err := impl.NewChunkStore(sp)
   304  		d.PanicIfError(err)
   305  		return r
   306  	}
   307  }
   308  
   309  func parseAWSSpec(ctx context.Context, awsURL string, options SpecOptions) chunks.ChunkStore {
   310  	fmt.Println(awsURL, options)
   311  
   312  	u, _ := url.Parse(awsURL)
   313  	parts := strings.SplitN(u.Hostname(), ":", 2) // [table] [, bucket]?
   314  	d.PanicIfFalse(len(parts) == 2)
   315  
   316  	awsConfig := aws.NewConfig().WithRegion(options.AwsRegionOrDefault())
   317  
   318  	switch options.AWSCredSource {
   319  	case RoleCS:
   320  	case EnvCS:
   321  		awsConfig = awsConfig.WithCredentials(credentials.NewEnvCredentials())
   322  	case FileCS:
   323  		filePath := options.AwsCredFileOrDefault()
   324  		creds := credentials.NewSharedCredentials(filePath, DefaultAWSCredsProfile)
   325  		awsConfig = awsConfig.WithCredentials(creds)
   326  	case AutoCS:
   327  		envCreds := credentials.NewEnvCredentials()
   328  		if _, err := envCreds.Get(); err == nil {
   329  			awsConfig = awsConfig.WithCredentials(envCreds)
   330  		}
   331  
   332  		filePath := options.AwsCredFileOrDefault()
   333  		if _, err := os.Stat(filePath); err == nil {
   334  			creds := credentials.NewSharedCredentials(filePath, DefaultAWSCredsProfile)
   335  			awsConfig = awsConfig.WithCredentials(creds)
   336  		}
   337  	default:
   338  		panic("unsupported credential type")
   339  	}
   340  
   341  	sess := session.Must(session.NewSession(awsConfig))
   342  	cs, err := nbs.NewAWSStore(ctx, types.Format_Default.VersionString(), parts[0], u.Path, parts[1], s3.New(sess), dynamodb.New(sess), 1<<28)
   343  
   344  	d.PanicIfError(err)
   345  
   346  	return cs
   347  }
   348  
   349  func parseGCSSpec(ctx context.Context, gcsURL string, options SpecOptions) chunks.ChunkStore {
   350  	u, err := url.Parse(gcsURL)
   351  	d.PanicIfError(err)
   352  
   353  	fmt.Println(u)
   354  
   355  	bucket := u.Host
   356  	path := u.Path
   357  
   358  	gcs, err := storage.NewClient(ctx)
   359  
   360  	if err != nil {
   361  		panic("Could not create GCSBlobstore")
   362  	}
   363  
   364  	cs, err := nbs.NewGCSStore(ctx, types.Format_Default.VersionString(), bucket, path, gcs, 1<<28)
   365  
   366  	d.PanicIfError(err)
   367  
   368  	return cs
   369  }
   370  
   371  // GetDataset returns the current Dataset instance for this Spec's Database.
   372  // GetDataset is live, so if Commit is called on this Spec's Database later, a
   373  // new up-to-date Dataset will returned on the next call to GetDataset.  If
   374  // this is not a Dataset spec, returns nil.
   375  func (sp Spec) GetDataset(ctx context.Context) (ds datas.Dataset) {
   376  	if sp.Path.Dataset != "" {
   377  		var err error
   378  		ds, err = sp.GetDatabase(ctx).GetDataset(ctx, sp.Path.Dataset)
   379  		d.PanicIfError(err)
   380  	}
   381  	return
   382  }
   383  
   384  // GetValue returns the Value at this Spec's Path within its Database, or nil
   385  // if this isn't a Path Spec or if that path isn't found.
   386  func (sp Spec) GetValue(ctx context.Context) (val types.Value) {
   387  	if !sp.Path.IsEmpty() {
   388  		val = sp.Path.Resolve(ctx, sp.GetDatabase(ctx))
   389  	}
   390  	return
   391  }
   392  
   393  // Href treats the Protocol and DatabaseName as a URL, and returns its href.
   394  // For example, the spec http://example.com/path::ds returns
   395  // "http://example.com/path". If the Protocol is not "http" or "http", returns
   396  // an empty string.
   397  func (sp Spec) Href() string {
   398  	switch proto := sp.Protocol; proto {
   399  	case "http", "https", "aws", "gs":
   400  		return proto + ":" + sp.DatabaseName
   401  	default:
   402  		return ""
   403  	}
   404  }
   405  
   406  // Pin returns a Spec in which the dataset component, if any, has been replaced
   407  // with the hash of the HEAD of that dataset. This "pins" the path to the state
   408  // of the database at the current moment in time.  Returns itself if the
   409  // PathSpec is already "pinned".
   410  func (sp Spec) Pin(ctx context.Context) (Spec, bool) {
   411  	var ds datas.Dataset
   412  
   413  	if !sp.Path.IsEmpty() {
   414  		if !sp.Path.Hash.IsEmpty() {
   415  			// Spec is already pinned.
   416  			return sp, true
   417  		}
   418  
   419  		var err error
   420  		ds, err = sp.GetDatabase(ctx).GetDataset(ctx, sp.Path.Dataset)
   421  		d.PanicIfError(err)
   422  	} else {
   423  		ds = sp.GetDataset(ctx)
   424  	}
   425  
   426  	commit, ok := ds.MaybeHead()
   427  	if !ok {
   428  		return Spec{}, false
   429  	}
   430  
   431  	nbf := sp.GetDatabase(ctx).Format()
   432  	r := sp
   433  
   434  	var err error
   435  	r.Path.Dataset = ""
   436  	r.Path.Hash, err = commit.Hash(nbf)
   437  	d.PanicIfError(err)
   438  
   439  	return r, true
   440  }
   441  
   442  func (sp Spec) Close() error {
   443  	db := *sp.db
   444  	if db == nil {
   445  		return nil
   446  	}
   447  
   448  	*sp.db = nil
   449  	return db.Close()
   450  }
   451  
   452  func (sp Spec) createDatabase(ctx context.Context) datas.Database {
   453  	switch sp.Protocol {
   454  	case "aws":
   455  		return datas.NewDatabase(parseAWSSpec(ctx, sp.Href(), sp.Options))
   456  	case "gs":
   457  		return datas.NewDatabase(parseGCSSpec(ctx, sp.Href(), sp.Options))
   458  	case "nbs":
   459  		os.Mkdir(sp.DatabaseName, 0777)
   460  		cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28)
   461  		d.PanicIfError(err)
   462  		return datas.NewDatabase(cs)
   463  	case "mem":
   464  		storage := &chunks.MemoryStorage{}
   465  		return datas.NewDatabase(storage.NewViewWithDefaultFormat())
   466  	default:
   467  		impl, ok := ExternalProtocols[sp.Protocol]
   468  		if !ok {
   469  			d.PanicIfError(fmt.Errorf("unknown protocol: %s", sp.Protocol))
   470  		}
   471  		r, err := impl.NewDatabase(sp)
   472  		d.PanicIfError(err)
   473  		return r
   474  	}
   475  }
   476  
   477  func parseDatabaseSpec(spec string) (protocol, name string, err error) {
   478  	if len(spec) == 0 {
   479  		err = fmt.Errorf("empty spec")
   480  		return
   481  	}
   482  
   483  	parts := strings.SplitN(spec, ":", 2) // [protocol] [, path]?
   484  
   485  	// If there was no ":" then this is either a mem spec, or a filesystem path.
   486  	// This is ambiguous if the file system path is "mem" but that just means the
   487  	// path needs to be explicitly "nbs:mem".
   488  	if len(parts) == 1 {
   489  		if spec == "mem" {
   490  			protocol = "mem"
   491  		} else {
   492  			protocol, name = "nbs", spec
   493  		}
   494  		return
   495  	} else if len(parts) == 2 && len(parts[0]) == 1 && parts[0][0] >= 'A' && parts[0][0] <= 'Z' { //check for Windows drive letter, ala C:\Users\Public
   496  		if _, err := os.Stat(parts[0] + `:\`); !os.IsNotExist(err) {
   497  			parts = []string{"nbs", spec}
   498  		}
   499  	}
   500  
   501  	if _, ok := ExternalProtocols[parts[0]]; ok {
   502  		protocol, name = parts[0], parts[1]
   503  		return
   504  	}
   505  
   506  	switch parts[0] {
   507  	case "nbs":
   508  		protocol, name = parts[0], parts[1]
   509  
   510  	case "aws", "gs":
   511  		u, perr := url.Parse(spec)
   512  		if perr != nil {
   513  			err = perr
   514  		} else if u.Host == "" {
   515  			err = fmt.Errorf("%s has empty host", spec)
   516  		} else if parts[0] == "aws" && u.Path == "" {
   517  			err = fmt.Errorf("%s does not specify a database ID", spec)
   518  		} else {
   519  			protocol, name = parts[0], parts[1]
   520  		}
   521  
   522  	case "mem":
   523  		err = fmt.Errorf(`in-memory database must be specified as "mem", not "mem:"`)
   524  
   525  	default:
   526  		err = fmt.Errorf("invalid database protocol %s in %s", protocol, spec)
   527  	}
   528  	return
   529  }
   530  
   531  func splitDatabaseSpec(spec string) (string, string, error) {
   532  	lastIdx := strings.LastIndex(spec, Separator)
   533  	if lastIdx == -1 {
   534  		return "", "", fmt.Errorf("missing %s after database in %s", Separator, spec)
   535  	}
   536  
   537  	return spec[:lastIdx], spec[lastIdx+len(Separator):], nil
   538  }