github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/mvdata/data_mover.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mvdata 16 17 import ( 18 "bufio" 19 "bytes" 20 "context" 21 "errors" 22 "fmt" 23 "sync/atomic" 24 25 "github.com/dolthub/dolt/go/cmd/dolt/cli" 26 "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/csv" 27 28 "github.com/dolthub/dolt/go/cmd/dolt/errhand" 29 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 30 "github.com/dolthub/dolt/go/libraries/doltcore/env" 31 "github.com/dolthub/dolt/go/libraries/doltcore/env/actions" 32 "github.com/dolthub/dolt/go/libraries/doltcore/row" 33 "github.com/dolthub/dolt/go/libraries/doltcore/rowconv" 34 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 35 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil" 36 "github.com/dolthub/dolt/go/libraries/doltcore/table" 37 "github.com/dolthub/dolt/go/libraries/doltcore/table/pipeline" 38 "github.com/dolthub/dolt/go/libraries/utils/filesys" 39 "github.com/dolthub/dolt/go/libraries/utils/set" 40 "github.com/dolthub/dolt/go/store/types" 41 ) 42 43 type CsvOptions struct { 44 Delim string 45 } 46 47 type XlsxOptions struct { 48 SheetName string 49 } 50 51 type JSONOptions struct { 52 TableName string 53 SchFile string 54 } 55 56 type DataMoverOptions interface { 57 WritesToTable() bool 58 SrcName() string 59 DestName() string 60 } 61 62 type DataMoverCloser interface { 63 table.TableWriteCloser 64 Flush(context.Context) (*doltdb.RootValue, error) 65 } 66 67 type DataMover struct { 68 Rd table.TableReadCloser 69 Transforms *pipeline.TransformCollection 70 Wr table.TableWriteCloser 71 ContOnErr bool 72 } 73 74 type DataMoverCreationErrType string 75 76 const ( 77 CreateReaderErr DataMoverCreationErrType = "Create reader error" 78 NomsKindSchemaErr DataMoverCreationErrType = "Invalid schema error" 79 SchemaErr DataMoverCreationErrType = "Schema error" 80 MappingErr DataMoverCreationErrType = "Mapping error" 81 ReplacingErr DataMoverCreationErrType = "Replacing error" 82 CreateMapperErr DataMoverCreationErrType = "Mapper creation error" 83 CreateWriterErr DataMoverCreationErrType = "Create writer error" 84 CreateSorterErr DataMoverCreationErrType = "Create sorter error" 85 ) 86 87 var ErrProvidedPkNotFound = errors.New("provided primary key not found") 88 89 type DataMoverCreationError struct { 90 ErrType DataMoverCreationErrType 91 Cause error 92 } 93 94 func (dmce *DataMoverCreationError) String() string { 95 return string(dmce.ErrType) + ": " + dmce.Cause.Error() 96 } 97 98 type GCTableWriteCloser interface { 99 table.TableWriteCloser 100 GC(ctx context.Context) error 101 } 102 103 // Move is the method that executes the pipeline which will move data from the pipeline's source DataLocation to it's 104 // dest DataLocation. It returns the number of bad rows encountered during import, and an error. 105 func (imp *DataMover) Move(ctx context.Context, sch schema.Schema) (badRowCount int64, err error) { 106 defer imp.Rd.Close(ctx) 107 defer func() { 108 closeErr := imp.Wr.Close(ctx) 109 if err == nil { 110 err = closeErr 111 } 112 113 if err == nil { 114 if gcTWC, ok := imp.Wr.(GCTableWriteCloser); ok { 115 err = gcTWC.GC(ctx) 116 } 117 } 118 }() 119 120 var badCount int64 121 var rowErr error 122 var printStarted bool 123 var b bytes.Buffer 124 badRowCB := func(trf *pipeline.TransformRowFailure) (quit bool) { 125 if !imp.ContOnErr { 126 rowErr = trf 127 return true 128 } 129 130 if !printStarted { 131 cli.PrintErrln("The following rows were skipped:") 132 printStarted = true 133 } 134 135 r := pipeline.GetTransFailureRow(trf) 136 137 if r != nil { 138 err = writeBadRowToCli(ctx, r, sch, &b) 139 if err != nil { 140 return true 141 } 142 } 143 144 atomic.AddInt64(&badCount, 1) 145 return false 146 } 147 148 p := pipeline.NewAsyncPipeline( 149 pipeline.ProcFuncForReader(ctx, imp.Rd), 150 pipeline.ProcFuncForWriter(ctx, imp.Wr), 151 imp.Transforms, 152 badRowCB) 153 p.Start() 154 155 err = p.Wait() 156 if err != nil { 157 return 0, err 158 } 159 160 if rowErr != nil { 161 return 0, rowErr 162 } 163 164 return badCount, nil 165 } 166 167 // writeBadRowToCli prints a bad row in a csv form to STDERR. 168 func writeBadRowToCli(ctx context.Context, r row.Row, sch schema.Schema, b *bytes.Buffer) error { 169 sqlRow, err := sqlutil.DoltRowToSqlRow(r, sch) 170 if err != nil { 171 return err 172 } 173 174 wr := bufio.NewWriter(b) 175 176 colValStrs := make([]*string, len(sqlRow)) 177 178 for colNum, col := range sqlRow { 179 if col != nil { 180 str := sqlutil.SqlColToStr(ctx, col) 181 colValStrs[colNum] = &str 182 } else { 183 colValStrs[colNum] = nil 184 } 185 } 186 187 err = csv.WriteCSVRow(wr, colValStrs, ",", false) 188 if err != nil { 189 return err 190 } 191 192 err = wr.Flush() 193 if err != nil { 194 return err 195 } 196 197 str := b.String() 198 cli.PrintErr(str) 199 200 return nil 201 } 202 203 func MoveDataToRoot(ctx context.Context, mover *DataMover, mvOpts DataMoverOptions, root *doltdb.RootValue, updateRoot func(c context.Context, r *doltdb.RootValue) error) (*doltdb.RootValue, int64, errhand.VerboseError) { 204 var badCount int64 205 var err error 206 newRoot := &doltdb.RootValue{} 207 208 badCount, err = mover.Move(ctx, mover.Wr.GetSchema()) 209 210 if err != nil { 211 if pipeline.IsTransformFailure(err) { 212 bdr := errhand.BuildDError("\nA bad row was encountered while moving data.") 213 214 r := pipeline.GetTransFailureRow(err) 215 if r != nil { 216 bdr.AddDetails("Bad Row: " + row.Fmt(ctx, r, mover.Wr.GetSchema())) 217 } 218 219 details := pipeline.GetTransFailureDetails(err) 220 221 bdr.AddDetails(details) 222 bdr.AddDetails("These can be ignored using the '--continue'") 223 224 return nil, badCount, bdr.Build() 225 } 226 return nil, badCount, errhand.BuildDError("An error occurred moving data:\n").AddCause(err).Build() 227 } 228 229 if mvOpts.WritesToTable() { 230 wr := mover.Wr.(DataMoverCloser) 231 newRoot, err = wr.Flush(ctx) 232 if err != nil { 233 return nil, badCount, errhand.BuildDError("Failed to apply changes to the table.").AddCause(err).Build() 234 } 235 236 rootHash, err := root.HashOf() 237 if err != nil { 238 return nil, badCount, errhand.BuildDError("Failed to hash the working value.").AddCause(err).Build() 239 } 240 241 newRootHash, err := newRoot.HashOf() 242 if rootHash != newRootHash { 243 err = updateRoot(ctx, newRoot) 244 if err != nil { 245 return nil, badCount, errhand.BuildDError("Failed to update the working value.").AddCause(err).Build() 246 } 247 } 248 } 249 250 return newRoot, badCount, nil 251 } 252 253 func MoveData(ctx context.Context, dEnv *env.DoltEnv, mover *DataMover, mvOpts DataMoverOptions) (int64, errhand.VerboseError) { 254 root, err := dEnv.WorkingRoot(ctx) 255 if err != nil { 256 return 0, errhand.BuildDError("Failed to fetch the working value.").AddCause(err).Build() 257 } 258 _, badCount, moveErr := MoveDataToRoot(ctx, mover, mvOpts, root, dEnv.UpdateWorkingRoot) 259 if moveErr != nil { 260 return badCount, moveErr 261 } 262 return badCount, nil 263 } 264 265 // NameMapTransform creates a pipeline transform that converts rows from inSch to outSch based on a name mapping. 266 func NameMapTransform(ctx context.Context, vrw types.ValueReadWriter, inSch schema.Schema, outSch schema.Schema, mapper rowconv.NameMapper) (*pipeline.TransformCollection, error) { 267 mapping, err := rowconv.NameMapping(inSch, outSch, mapper) 268 269 if err != nil { 270 return nil, err 271 } 272 273 rconv, err := rowconv.NewImportRowConverter(ctx, vrw, mapping) 274 275 if err != nil { 276 return nil, err 277 } 278 279 transforms := pipeline.NewTransformCollection() 280 if !rconv.IdentityConverter { 281 nt := pipeline.NewNamedTransform("Mapping transform", pipeline.GetRowConvTransformFunc(rconv)) 282 transforms.AppendTransforms(nt) 283 } 284 285 return transforms, nil 286 } 287 288 // SchAndTableNameFromFile reads a SQL schema file and creates a Dolt schema from it. 289 func SchAndTableNameFromFile(ctx context.Context, path string, fs filesys.ReadableFS, root *doltdb.RootValue) (string, schema.Schema, error) { 290 if path != "" { 291 data, err := fs.ReadFile(path) 292 293 if err != nil { 294 return "", nil, err 295 } 296 297 tn, sch, err := sqlutil.ParseCreateTableStatement(ctx, root, string(data)) 298 299 if err != nil { 300 return "", nil, fmt.Errorf("%s in schema file %s", err.Error(), path) 301 } 302 303 return tn, sch, nil 304 } else { 305 return "", nil, errors.New("no schema file to parse") 306 } 307 } 308 309 func InferSchema(ctx context.Context, root *doltdb.RootValue, rd table.TableReadCloser, tableName string, pks []string, args actions.InferenceArgs) (schema.Schema, error) { 310 var err error 311 312 infCols, err := actions.InferColumnTypesFromTableReader(ctx, root, rd, args) 313 if err != nil { 314 return nil, err 315 } 316 317 pkSet := set.NewStrSet(pks) 318 newCols := schema.MapColCollection(infCols, func(col schema.Column) schema.Column { 319 col.IsPartOfPK = pkSet.Contains(col.Name) 320 if col.IsPartOfPK { 321 hasNotNull := false 322 for _, constraint := range col.Constraints { 323 if _, ok := constraint.(schema.NotNullConstraint); ok { 324 hasNotNull = true 325 break 326 } 327 } 328 if !hasNotNull { 329 col.Constraints = append(col.Constraints, schema.NotNullConstraint{}) 330 } 331 } 332 return col 333 }) 334 335 // check that all provided primary keys are being used 336 for _, pk := range pks { 337 col, ok := newCols.GetByName(pk) 338 if !col.IsPartOfPK || !ok { 339 return nil, ErrProvidedPkNotFound 340 } 341 } 342 343 newCols, err = root.GenerateTagsForNewColColl(ctx, tableName, newCols) 344 if err != nil { 345 return nil, errhand.BuildDError("failed to generate new schema").AddCause(err).Build() 346 } 347 348 err = schema.ValidateForInsert(newCols) 349 if err != nil { 350 return nil, errhand.BuildDError("invalid schema").AddCause(err).Build() 351 } 352 353 return schema.SchemaFromCols(newCols) 354 }