github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/mvdata/file_data_loc.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mvdata
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"os"
    22  	"strings"
    23  
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    25  
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/table"
    29  	"github.com/hasnat/dolt/go/libraries/doltcore/table/typed/json"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/table/typed/noms"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/csv"
    32  	"github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/sqlexport"
    33  	"github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/xlsx"
    34  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    35  )
    36  
    37  // DFFromString returns a data object from a string.
    38  func DFFromString(dfStr string) DataFormat {
    39  	switch strings.ToLower(dfStr) {
    40  	case "csv", ".csv":
    41  		return CsvFile
    42  	case "psv", ".psv":
    43  		return PsvFile
    44  	case "xlsx", ".xlsx":
    45  		return XlsxFile
    46  	case "json", ".json":
    47  		return JsonFile
    48  	case "sql", ".sql":
    49  		return SqlFile
    50  	default:
    51  		return InvalidDataFormat
    52  	}
    53  }
    54  
    55  // FileDataLocation is a file that that can be imported from or exported to.
    56  type FileDataLocation struct {
    57  	// Path is the path of the file on the filesystem
    58  	Path string
    59  
    60  	// Format is the DataFormat of the file
    61  	Format DataFormat
    62  }
    63  
    64  // String returns a string representation of the data location.
    65  func (dl FileDataLocation) String() string {
    66  	return dl.Format.ReadableStr() + ":" + dl.Path
    67  }
    68  
    69  // Exists returns true if the DataLocation already exists
    70  func (dl FileDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS) (bool, error) {
    71  	exists, _ := fs.Exists(dl.Path)
    72  	return exists, nil
    73  }
    74  
    75  // NewReader creates a TableReadCloser for the DataLocation
    76  func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) {
    77  	exists, isDir := fs.Exists(dl.Path)
    78  
    79  	if !exists {
    80  		return nil, false, os.ErrNotExist
    81  	} else if isDir {
    82  		return nil, false, filesys.ErrIsDir
    83  	}
    84  
    85  	switch dl.Format {
    86  	case CsvFile:
    87  		delim := ","
    88  
    89  		if opts != nil {
    90  			csvOpts, _ := opts.(CsvOptions)
    91  
    92  			if len(csvOpts.Delim) != 0 {
    93  				delim = csvOpts.Delim
    94  			}
    95  		}
    96  
    97  		rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim))
    98  
    99  		return rd, false, err
   100  
   101  	case PsvFile:
   102  		rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|"))
   103  		return rd, false, err
   104  
   105  	case XlsxFile:
   106  		xlsxOpts := opts.(XlsxOptions)
   107  		rd, err := xlsx.OpenXLSXReader(ctx, root.VRW(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName})
   108  		return rd, false, err
   109  
   110  	case JsonFile:
   111  		var sch schema.Schema
   112  		jsonOpts, _ := opts.(JSONOptions)
   113  		if jsonOpts.SchFile != "" {
   114  			tn, s, err := SchAndTableNameFromFile(ctx, jsonOpts.SchFile, fs, root)
   115  			if err != nil {
   116  				return nil, false, err
   117  			}
   118  			if tn != jsonOpts.TableName {
   119  				return nil, false, fmt.Errorf("table name '%s' from schema file %s does not match table arg '%s'", tn, jsonOpts.SchFile, jsonOpts.TableName)
   120  			}
   121  			sch = s
   122  		} else {
   123  			if opts == nil {
   124  				return nil, false, errors.New("Unable to determine table name on JSON import")
   125  			}
   126  			tbl, exists, err := root.GetTable(context.TODO(), jsonOpts.TableName)
   127  			if !exists {
   128  				return nil, false, errors.New(fmt.Sprintf("The following table could not be found:\n%v", jsonOpts.TableName))
   129  			}
   130  			if err != nil {
   131  				return nil, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error()))
   132  			}
   133  			sch, err = tbl.GetSchema(context.TODO())
   134  			if err != nil {
   135  				return nil, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error()))
   136  			}
   137  		}
   138  
   139  		rd, err := json.OpenJSONReader(root.VRW(), dl.Path, fs, sch)
   140  		return rd, false, err
   141  	}
   142  
   143  	return nil, false, errors.New("unsupported format")
   144  }
   145  
   146  // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite
   147  // an existing table.
   148  func (dl FileDataLocation) NewCreatingWriter(ctx context.Context, mvOpts DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, _ bool, outSch schema.Schema, _ noms.StatsCB, _ bool) (table.TableWriteCloser, error) {
   149  	switch dl.Format {
   150  	case CsvFile:
   151  		return csv.OpenCSVWriter(dl.Path, dEnv.FS, outSch, csv.NewCSVInfo())
   152  	case PsvFile:
   153  		return csv.OpenCSVWriter(dl.Path, dEnv.FS, outSch, csv.NewCSVInfo().SetDelim("|"))
   154  	case XlsxFile:
   155  		panic("writing to xlsx files is not supported yet")
   156  	case JsonFile:
   157  		return json.OpenJSONWriter(dl.Path, dEnv.FS, outSch)
   158  	case SqlFile:
   159  		return sqlexport.OpenSQLExportWriter(ctx, dl.Path, dEnv.FS, root, mvOpts.SrcName(), outSch)
   160  	}
   161  
   162  	panic("Invalid Data Format." + string(dl.Format))
   163  }
   164  
   165  // NewUpdatingWriter will create a TableWriteCloser for a DataLocation that will update and append rows based on
   166  // their primary key.
   167  func (dl FileDataLocation) NewUpdatingWriter(_ context.Context, _ DataMoverOptions, _ *env.DoltEnv, _ *doltdb.RootValue, _ bool, _ schema.Schema, _ noms.StatsCB, _ bool) (table.TableWriteCloser, error) {
   168  	panic("Updating of files is not supported")
   169  }
   170  
   171  // NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table while
   172  // preserving schema
   173  func (dl FileDataLocation) NewReplacingWriter(_ context.Context, _ DataMoverOptions, _ *env.DoltEnv, _ *doltdb.RootValue, _ bool, _ schema.Schema, _ noms.StatsCB, _ bool) (table.TableWriteCloser, error) {
   174  	panic("Replacing files is not supported")
   175  }