github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/mvdata/data_mover.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mvdata
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  
    22  	"github.com/dolthub/dolt/go/cmd/dolt/commands/engine"
    23  	"github.com/dolthub/dolt/go/cmd/dolt/errhand"
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/env/actions"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/table"
    30  	"github.com/dolthub/dolt/go/libraries/utils/set"
    31  )
    32  
    33  type CsvOptions struct {
    34  	Delim string
    35  }
    36  
    37  type XlsxOptions struct {
    38  	SheetName string
    39  }
    40  
    41  type JSONOptions struct {
    42  	TableName string
    43  	SchFile   string
    44  }
    45  
    46  type ParquetOptions struct {
    47  	TableName string
    48  	SchFile   string
    49  }
    50  
    51  type MoverOptions struct {
    52  	ContinueOnErr  bool
    53  	Force          bool
    54  	TableToWriteTo string
    55  	Operation      TableImportOp
    56  	DisableFks     bool
    57  }
    58  
    59  type DataMoverOptions interface {
    60  	IsAutocommitOff() bool
    61  	IsBatched() bool
    62  	WritesToTable() bool
    63  	SrcName() string
    64  	DestName() string
    65  }
    66  
    67  type DataMoverCreationErrType string
    68  
    69  const (
    70  	CreateReaderErr   DataMoverCreationErrType = "Create reader error"
    71  	NomsKindSchemaErr DataMoverCreationErrType = "Invalid schema error"
    72  	SchemaErr         DataMoverCreationErrType = "Schema error"
    73  	MappingErr        DataMoverCreationErrType = "Mapping error"
    74  	ReplacingErr      DataMoverCreationErrType = "Replacing error"
    75  	CreateMapperErr   DataMoverCreationErrType = "Mapper creation error"
    76  	CreateWriterErr   DataMoverCreationErrType = "Create writer error"
    77  	CreateSorterErr   DataMoverCreationErrType = "Create sorter error"
    78  )
    79  
    80  var ErrProvidedPkNotFound = errors.New("provided primary key not found")
    81  
    82  type DataMoverCreationError struct {
    83  	ErrType DataMoverCreationErrType
    84  	Cause   error
    85  }
    86  
    87  func (dmce *DataMoverCreationError) String() string {
    88  	return string(dmce.ErrType) + ": " + dmce.Cause.Error()
    89  }
    90  
    91  // SchAndTableNameFromFile reads a SQL schema file and creates a Dolt schema from it.
    92  func SchAndTableNameFromFile(ctx context.Context, path string, dEnv *env.DoltEnv) (string, schema.Schema, error) {
    93  	root, err := dEnv.WorkingRoot(ctx)
    94  	if err != nil {
    95  		return "", nil, err
    96  	}
    97  	fs := dEnv.FS
    98  
    99  	if path != "" {
   100  		data, err := fs.ReadFile(path)
   101  		if err != nil {
   102  			return "", nil, err
   103  		}
   104  
   105  		eng, dbName, err := engine.NewSqlEngineForEnv(ctx, dEnv)
   106  		if err != nil {
   107  			return "", nil, err
   108  		}
   109  
   110  		sqlCtx, err := eng.NewDefaultContext(ctx)
   111  		if err != nil {
   112  			return "", nil, err
   113  		}
   114  		sqlCtx.SetCurrentDatabase(dbName)
   115  		tn, sch, err := sqlutil.ParseCreateTableStatement(sqlCtx, root, eng.GetUnderlyingEngine(), string(data))
   116  
   117  		if err != nil {
   118  			return "", nil, fmt.Errorf("%s in schema file %s", err.Error(), path)
   119  		}
   120  
   121  		return tn, sch, nil
   122  	} else {
   123  		return "", nil, errors.New("no schema file to parse")
   124  	}
   125  }
   126  
   127  func InferSchema(ctx context.Context, root doltdb.RootValue, rd table.ReadCloser, tableName string, pks []string, args actions.InferenceArgs) (schema.Schema, error) {
   128  	var err error
   129  
   130  	infCols, err := actions.InferColumnTypesFromTableReader(ctx, rd, args)
   131  	if err != nil {
   132  		return nil, err
   133  	}
   134  
   135  	pkSet := set.NewStrSet(pks)
   136  	newCols := schema.MapColCollection(infCols, func(col schema.Column) schema.Column {
   137  		col.IsPartOfPK = pkSet.Contains(col.Name)
   138  		if col.IsPartOfPK {
   139  			hasNotNull := false
   140  			for _, constraint := range col.Constraints {
   141  				if _, ok := constraint.(schema.NotNullConstraint); ok {
   142  					hasNotNull = true
   143  					break
   144  				}
   145  			}
   146  			if !hasNotNull {
   147  				col.Constraints = append(col.Constraints, schema.NotNullConstraint{})
   148  			}
   149  		}
   150  		return col
   151  	})
   152  
   153  	// check that all provided primary keys are being used
   154  	for _, pk := range pks {
   155  		col, ok := newCols.GetByName(pk)
   156  		if !col.IsPartOfPK || !ok {
   157  			return nil, ErrProvidedPkNotFound
   158  		}
   159  	}
   160  
   161  	newCols, err = doltdb.GenerateTagsForNewColColl(ctx, root, tableName, newCols)
   162  	if err != nil {
   163  		return nil, errhand.BuildDError("failed to generate new schema").AddCause(err).Build()
   164  	}
   165  
   166  	err = schema.ValidateForInsert(newCols)
   167  	if err != nil {
   168  		return nil, errhand.BuildDError("invalid schema").AddCause(err).Build()
   169  	}
   170  
   171  	return schema.SchemaFromCols(newCols)
   172  }
   173  
   174  type TableImportOp string
   175  
   176  const (
   177  	CreateOp  TableImportOp = "overwrite"
   178  	ReplaceOp TableImportOp = "replace"
   179  	UpdateOp  TableImportOp = "update"
   180  	AppendOp  TableImportOp = "append"
   181  )