github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/mvdata/data_loc.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mvdata
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"path/filepath"
    21  	"strings"
    22  
    23  	"github.com/dolthub/dolt/go/cmd/dolt/cli"
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/table"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/table/typed/noms"
    29  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    30  )
    31  
    32  // DataFormat is an enumeration of the valid data formats
    33  type DataFormat string
    34  
    35  const (
    36  	// InvalidDataFormat is the format of a data lotacion that isn't valid
    37  	InvalidDataFormat DataFormat = "invalid"
    38  
    39  	// DoltDB is the format of a data location for a dolt table
    40  	DoltDB DataFormat = "doltdb"
    41  
    42  	// CsvFile is the format of a data location that is a .csv file
    43  	CsvFile DataFormat = ".csv"
    44  
    45  	// PsvFile is the format of a data location that is a .psv file
    46  	PsvFile DataFormat = ".psv"
    47  
    48  	// XlsxFile is the format of a data location that is a .xlsx file
    49  	XlsxFile DataFormat = ".xlsx"
    50  
    51  	// JsonFile is the format of a data location that is a json file
    52  	JsonFile DataFormat = ".json"
    53  
    54  	// SqlFile is the format of a data location that is a .sql file
    55  	SqlFile DataFormat = ".sql"
    56  )
    57  
    58  // ReadableStr returns a human readable string for a DataFormat
    59  func (df DataFormat) ReadableStr() string {
    60  	switch df {
    61  	case DoltDB:
    62  		return "dolt table"
    63  	case CsvFile:
    64  		return "csv file"
    65  	case PsvFile:
    66  		return "psv file"
    67  	case XlsxFile:
    68  		return "xlsx file"
    69  	case JsonFile:
    70  		return "json file"
    71  	case SqlFile:
    72  		return "sql file"
    73  	default:
    74  		return "invalid"
    75  	}
    76  }
    77  
    78  // DataLocation is an interface that can be used to read or write from the source or the destination of a move operation.
    79  type DataLocation interface {
    80  	fmt.Stringer
    81  
    82  	// Exists returns true if the DataLocation already exists
    83  	Exists(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS) (bool, error)
    84  
    85  	// NewReader creates a TableReadCloser for the DataLocation
    86  	NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error)
    87  
    88  	// NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite
    89  	// an existing table.
    90  	NewCreatingWriter(ctx context.Context, mvOpts DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, sortedInput bool, outSch schema.Schema, statsCB noms.StatsCB, useGC bool) (table.TableWriteCloser, error)
    91  
    92  	// NewUpdatingWriter will create a TableWriteCloser for a DataLocation that will update and append rows based on
    93  	// their primary key.
    94  	NewUpdatingWriter(ctx context.Context, mvOpts DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB, useGC bool) (table.TableWriteCloser, error)
    95  
    96  	// NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table if it has the same schema.
    97  	NewReplacingWriter(ctx context.Context, mvOpts DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, srcIsSorted bool, outSch schema.Schema, statsCB noms.StatsCB, useGC bool) (table.TableWriteCloser, error)
    98  }
    99  
   100  // NewDataLocation creates a DataLocation object from a path and a format string.  If the path is the name of a table
   101  // then a TableDataLocation will be returned.  If the path is empty a StreamDataLocation is returned.  Otherwise a
   102  // FileDataLocation is returned.  For FileDataLocations and StreamDataLocations, if a file format is provided explicitly
   103  // then it is used as the format, otherwise, when it can be, it is inferred from the path for files.  Inference is based
   104  // on the file's extension.
   105  func NewDataLocation(path, fileFmtStr string) DataLocation {
   106  	dataFmt := DFFromString(fileFmtStr)
   107  
   108  	if len(path) == 0 {
   109  		return StreamDataLocation{Format: dataFmt, Reader: cli.InStream, Writer: cli.OutStream}
   110  	} else if fileFmtStr == "" {
   111  		if doltdb.IsValidTableName(path) {
   112  			return TableDataLocation{path}
   113  		} else {
   114  			switch strings.ToLower(filepath.Ext(path)) {
   115  			case string(CsvFile):
   116  				dataFmt = CsvFile
   117  			case string(PsvFile):
   118  				dataFmt = PsvFile
   119  			case string(XlsxFile):
   120  				dataFmt = XlsxFile
   121  			case string(JsonFile):
   122  				dataFmt = JsonFile
   123  			case string(SqlFile):
   124  				dataFmt = SqlFile
   125  			}
   126  		}
   127  	}
   128  
   129  	return FileDataLocation{path, dataFmt}
   130  }