github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/mvdata/data_loc.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mvdata
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"path/filepath"
    22  	"strings"
    23  
    24  	"github.com/dolthub/dolt/go/cmd/dolt/cli"
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/table"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
    30  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    31  )
    32  
    33  // DataFormat is an enumeration of the valid data formats
    34  type DataFormat string
    35  
    36  const (
    37  	// InvalidDataFormat is the format of a data lotacion that isn't valid
    38  	InvalidDataFormat DataFormat = "invalid"
    39  
    40  	// DoltDB is the format of a data location for a dolt table
    41  	DoltDB DataFormat = "doltdb"
    42  
    43  	// CsvFile is the format of a data location that is a .csv file
    44  	CsvFile DataFormat = ".csv"
    45  
    46  	// PsvFile is the format of a data location that is a .psv file
    47  	PsvFile DataFormat = ".psv"
    48  
    49  	// XlsxFile is the format of a data location that is a .xlsx file
    50  	XlsxFile DataFormat = ".xlsx"
    51  
    52  	// JsonFile is the format of a data location that is a json file
    53  	JsonFile DataFormat = ".json"
    54  
    55  	// SqlFile is the format of a data location that is a .sql file
    56  	SqlFile DataFormat = ".sql"
    57  
    58  	// ParquetFile is the format of a data location that is a .paquet file
    59  	ParquetFile DataFormat = ".parquet"
    60  )
    61  
    62  // ReadableStr returns a human readable string for a DataFormat
    63  func (df DataFormat) ReadableStr() string {
    64  	switch df {
    65  	case DoltDB:
    66  		return "dolt table"
    67  	case CsvFile:
    68  		return "csv file"
    69  	case PsvFile:
    70  		return "psv file"
    71  	case XlsxFile:
    72  		return "xlsx file"
    73  	case JsonFile:
    74  		return "json file"
    75  	case SqlFile:
    76  		return "sql file"
    77  	case ParquetFile:
    78  		return "parquet file"
    79  	default:
    80  		return "invalid"
    81  	}
    82  }
    83  
    84  // DataLocation is an interface that can be used to read or write from the source or the destination of a move operation.
    85  type DataLocation interface {
    86  	fmt.Stringer
    87  
    88  	// Exists returns true if the DataLocation already exists
    89  	Exists(ctx context.Context, root doltdb.RootValue, fs filesys.ReadableFS) (bool, error)
    90  
    91  	// NewReader creates a TableReadCloser for the DataLocation
    92  	NewReader(ctx context.Context, dEnv *env.DoltEnv, opts interface{}) (rdCl table.SqlRowReader, sorted bool, err error)
    93  
    94  	// NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite
    95  	// an existing table.
    96  	NewCreatingWriter(ctx context.Context, mvOpts DataMoverOptions, root doltdb.RootValue, outSch schema.Schema, opts editor.Options, wr io.WriteCloser) (table.SqlRowWriter, error)
    97  }
    98  
    99  // NewDataLocation creates a DataLocation object from a path and a format string.  If the path is the name of a table
   100  // then a TableDataLocation will be returned.  If the path is empty a StreamDataLocation is returned.  Otherwise a
   101  // FileDataLocation is returned.  For FileDataLocations and StreamDataLocations, if a file format is provided explicitly
   102  // then it is used as the format, otherwise, when it can be, it is inferred from the path for files.  Inference is based
   103  // on the file's extension.
   104  func NewDataLocation(path, fileFmtStr string) DataLocation {
   105  	dataFmt := DFFromString(fileFmtStr)
   106  
   107  	if len(path) == 0 {
   108  		return StreamDataLocation{Format: dataFmt, Reader: cli.InStream, Writer: cli.OutStream}
   109  	} else if fileFmtStr == "" {
   110  		switch strings.ToLower(filepath.Ext(path)) {
   111  		case string(CsvFile):
   112  			dataFmt = CsvFile
   113  		case string(PsvFile):
   114  			dataFmt = PsvFile
   115  		case string(XlsxFile):
   116  			dataFmt = XlsxFile
   117  		case string(JsonFile):
   118  			dataFmt = JsonFile
   119  		case string(SqlFile):
   120  			dataFmt = SqlFile
   121  		case string(ParquetFile):
   122  			dataFmt = ParquetFile
   123  		}
   124  	}
   125  
   126  	return FileDataLocation{path, dataFmt}
   127  }