github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/mvdata/file_data_loc.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mvdata 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "os" 22 "strings" 23 24 "github.com/dolthub/dolt/go/libraries/doltcore/env" 25 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 27 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 28 "github.com/dolthub/dolt/go/libraries/doltcore/table" 29 "github.com/hasnat/dolt/go/libraries/doltcore/table/typed/json" 30 "github.com/dolthub/dolt/go/libraries/doltcore/table/typed/noms" 31 "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/csv" 32 "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/sqlexport" 33 "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/xlsx" 34 "github.com/dolthub/dolt/go/libraries/utils/filesys" 35 ) 36 37 // DFFromString returns a data object from a string. 38 func DFFromString(dfStr string) DataFormat { 39 switch strings.ToLower(dfStr) { 40 case "csv", ".csv": 41 return CsvFile 42 case "psv", ".psv": 43 return PsvFile 44 case "xlsx", ".xlsx": 45 return XlsxFile 46 case "json", ".json": 47 return JsonFile 48 case "sql", ".sql": 49 return SqlFile 50 default: 51 return InvalidDataFormat 52 } 53 } 54 55 // FileDataLocation is a file that that can be imported from or exported to. 56 type FileDataLocation struct { 57 // Path is the path of the file on the filesystem 58 Path string 59 60 // Format is the DataFormat of the file 61 Format DataFormat 62 } 63 64 // String returns a string representation of the data location. 65 func (dl FileDataLocation) String() string { 66 return dl.Format.ReadableStr() + ":" + dl.Path 67 } 68 69 // Exists returns true if the DataLocation already exists 70 func (dl FileDataLocation) Exists(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS) (bool, error) { 71 exists, _ := fs.Exists(dl.Path) 72 return exists, nil 73 } 74 75 // NewReader creates a TableReadCloser for the DataLocation 76 func (dl FileDataLocation) NewReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) { 77 exists, isDir := fs.Exists(dl.Path) 78 79 if !exists { 80 return nil, false, os.ErrNotExist 81 } else if isDir { 82 return nil, false, filesys.ErrIsDir 83 } 84 85 switch dl.Format { 86 case CsvFile: 87 delim := "," 88 89 if opts != nil { 90 csvOpts, _ := opts.(CsvOptions) 91 92 if len(csvOpts.Delim) != 0 { 93 delim = csvOpts.Delim 94 } 95 } 96 97 rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim)) 98 99 return rd, false, err 100 101 case PsvFile: 102 rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|")) 103 return rd, false, err 104 105 case XlsxFile: 106 xlsxOpts := opts.(XlsxOptions) 107 rd, err := xlsx.OpenXLSXReader(ctx, root.VRW(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName}) 108 return rd, false, err 109 110 case JsonFile: 111 var sch schema.Schema 112 jsonOpts, _ := opts.(JSONOptions) 113 if jsonOpts.SchFile != "" { 114 tn, s, err := SchAndTableNameFromFile(ctx, jsonOpts.SchFile, fs, root) 115 if err != nil { 116 return nil, false, err 117 } 118 if tn != jsonOpts.TableName { 119 return nil, false, fmt.Errorf("table name '%s' from schema file %s does not match table arg '%s'", tn, jsonOpts.SchFile, jsonOpts.TableName) 120 } 121 sch = s 122 } else { 123 if opts == nil { 124 return nil, false, errors.New("Unable to determine table name on JSON import") 125 } 126 tbl, exists, err := root.GetTable(context.TODO(), jsonOpts.TableName) 127 if !exists { 128 return nil, false, errors.New(fmt.Sprintf("The following table could not be found:\n%v", jsonOpts.TableName)) 129 } 130 if err != nil { 131 return nil, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table:\n%v", err.Error())) 132 } 133 sch, err = tbl.GetSchema(context.TODO()) 134 if err != nil { 135 return nil, false, errors.New(fmt.Sprintf("An error occurred attempting to read the table schema:\n%v", err.Error())) 136 } 137 } 138 139 rd, err := json.OpenJSONReader(root.VRW(), dl.Path, fs, sch) 140 return rd, false, err 141 } 142 143 return nil, false, errors.New("unsupported format") 144 } 145 146 // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite 147 // an existing table. 148 func (dl FileDataLocation) NewCreatingWriter(ctx context.Context, mvOpts DataMoverOptions, dEnv *env.DoltEnv, root *doltdb.RootValue, _ bool, outSch schema.Schema, _ noms.StatsCB, _ bool) (table.TableWriteCloser, error) { 149 switch dl.Format { 150 case CsvFile: 151 return csv.OpenCSVWriter(dl.Path, dEnv.FS, outSch, csv.NewCSVInfo()) 152 case PsvFile: 153 return csv.OpenCSVWriter(dl.Path, dEnv.FS, outSch, csv.NewCSVInfo().SetDelim("|")) 154 case XlsxFile: 155 panic("writing to xlsx files is not supported yet") 156 case JsonFile: 157 return json.OpenJSONWriter(dl.Path, dEnv.FS, outSch) 158 case SqlFile: 159 return sqlexport.OpenSQLExportWriter(ctx, dl.Path, dEnv.FS, root, mvOpts.SrcName(), outSch) 160 } 161 162 panic("Invalid Data Format." + string(dl.Format)) 163 } 164 165 // NewUpdatingWriter will create a TableWriteCloser for a DataLocation that will update and append rows based on 166 // their primary key. 167 func (dl FileDataLocation) NewUpdatingWriter(_ context.Context, _ DataMoverOptions, _ *env.DoltEnv, _ *doltdb.RootValue, _ bool, _ schema.Schema, _ noms.StatsCB, _ bool) (table.TableWriteCloser, error) { 168 panic("Updating of files is not supported") 169 } 170 171 // NewReplacingWriter will create a TableWriteCloser for a DataLocation that will overwrite an existing table while 172 // preserving schema 173 func (dl FileDataLocation) NewReplacingWriter(_ context.Context, _ DataMoverOptions, _ *env.DoltEnv, _ *doltdb.RootValue, _ bool, _ schema.Schema, _ noms.StatsCB, _ bool) (table.TableWriteCloser, error) { 174 panic("Replacing files is not supported") 175 }