github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/rowconv/field_mapping.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package rowconv
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"strconv"
    21  
    22  	"github.com/dolthub/dolt/go/cmd/dolt/errhand"
    23  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/table/untyped"
    25  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    26  	"github.com/dolthub/dolt/go/libraries/utils/set"
    27  )
    28  
    29  // ErrMappingFileRead is an error returned when a mapping file cannot be read
    30  var ErrMappingFileRead = errors.New("error reading mapping file")
    31  
    32  // ErrUnmarshallingMapping is an error used when a mapping file cannot be converted from json
    33  var ErrUnmarshallingMapping = errors.New("error unmarshalling mapping")
    34  
    35  // ErrEmptyMapping is an error returned when the mapping is empty (No src columns, no destination columns)
    36  var ErrEmptyMapping = errors.New("empty mapping error")
    37  
    38  // BadMappingErr is a struct which implements the error interface and is used when there is an error with a mapping.
    39  type BadMappingErr struct {
    40  	srcField  string
    41  	destField string
    42  }
    43  
    44  // String representing the BadMappingError
    45  func (err *BadMappingErr) Error() string {
    46  	return fmt.Sprintf("Mapping file attempted to map %s to %s, but one or both of those fields are unknown.", err.srcField, err.destField)
    47  }
    48  
    49  // IsBadMappingErr returns true if the error is a BadMappingErr
    50  func IsBadMappingErr(err error) bool {
    51  	_, ok := err.(*BadMappingErr)
    52  	return ok
    53  }
    54  
    55  // NameMapper is a simple interface for mapping a string to another string
    56  type NameMapper map[string]string
    57  
    58  // Map maps a string to another string.  If a string is not in the mapping ok will be false, otherwise it is true.
    59  func (nm NameMapper) Map(str string) string {
    60  	v, ok := nm[str]
    61  	if ok {
    62  		return v
    63  	}
    64  	return str
    65  }
    66  
    67  // PreImage searches the NameMapper for the string that maps to str, returns str otherwise
    68  func (nm NameMapper) PreImage(str string) string {
    69  	for pre, post := range nm {
    70  		if post == str {
    71  			return pre
    72  		}
    73  	}
    74  	return str
    75  }
    76  
    77  // FieldMapping defines a mapping from columns in a source schema to columns in a dest schema.
    78  type FieldMapping struct {
    79  	// SrcSch is the source schema being mapped from.
    80  	SrcSch schema.Schema
    81  
    82  	// DestSch is the destination schema being mapped to.
    83  	DestSch schema.Schema
    84  
    85  	// SrcToDest is a map from a tag in the source schema to a tag in the dest schema.
    86  	SrcToDest map[uint64]uint64
    87  }
    88  
    89  // MapsAllDestPKs checks that each PK column in DestSch has a corresponding column in SrcSch
    90  func (fm *FieldMapping) MapsAllDestPKs() bool {
    91  	ds := set.NewUint64Set(nil)
    92  	for _, v := range fm.SrcToDest {
    93  		ds.Add(v)
    94  	}
    95  	for _, tag := range fm.DestSch.GetPKCols().Tags {
    96  		if !ds.Contains(tag) {
    97  			return false
    98  		}
    99  	}
   100  	return true
   101  }
   102  
   103  func InvertMapping(fm *FieldMapping) *FieldMapping {
   104  	invertedMap := make(map[uint64]uint64)
   105  
   106  	for k, v := range fm.SrcToDest {
   107  		invertedMap[v] = k
   108  	}
   109  
   110  	return &FieldMapping{
   111  		SrcSch:    fm.DestSch,
   112  		DestSch:   fm.SrcSch,
   113  		SrcToDest: invertedMap,
   114  	}
   115  }
   116  
   117  // NewFieldMapping creates a FieldMapping from a source schema, a destination schema, and a map from tags in the source
   118  // schema to tags in the dest schema.
   119  func NewFieldMapping(srcSch, destSch schema.Schema, srcTagToDestTag map[uint64]uint64) (*FieldMapping, error) {
   120  	destCols := destSch.GetAllCols()
   121  
   122  	for srcTag, destTag := range srcTagToDestTag {
   123  		_, destOk := destCols.GetByTag(destTag)
   124  
   125  		if !destOk {
   126  			return nil, &BadMappingErr{"src tag:" + strconv.FormatUint(srcTag, 10), "dest tag:" + strconv.FormatUint(destTag, 10)}
   127  		}
   128  	}
   129  
   130  	if len(srcTagToDestTag) == 0 {
   131  		return nil, ErrEmptyMapping
   132  	}
   133  
   134  	return &FieldMapping{srcSch, destSch, srcTagToDestTag}, nil
   135  }
   136  
   137  // Returns the identity mapping for the schema given.
   138  func IdentityMapping(sch schema.Schema) *FieldMapping {
   139  	fieldMapping, err := TagMapping(sch, sch)
   140  	if err != nil {
   141  		panic("Error creating identity mapping")
   142  	}
   143  	return fieldMapping
   144  }
   145  
   146  // TagMapping takes a source schema and a destination schema and maps all columns which have a matching tag in the
   147  // source and destination schemas.
   148  func TagMapping(srcSch, destSch schema.Schema) (*FieldMapping, error) {
   149  	successes := 0
   150  	srcCols := srcSch.GetAllCols()
   151  	destCols := destSch.GetAllCols()
   152  
   153  	srcToDest := make(map[uint64]uint64, destCols.Size())
   154  	err := destCols.Iter(func(destTag uint64, col schema.Column) (stop bool, err error) {
   155  		srcCol, ok := srcCols.GetByTag(destTag)
   156  
   157  		if ok {
   158  			srcToDest[srcCol.Tag] = destTag
   159  			successes++
   160  		}
   161  
   162  		return false, nil
   163  	})
   164  
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  
   169  	if successes == 0 {
   170  		return nil, ErrEmptyMapping
   171  	}
   172  
   173  	return NewFieldMapping(srcSch, destSch, srcToDest)
   174  }
   175  
   176  // NameMapping takes a source schema and a destination schema and maps all columns which have a matching name in the
   177  // source and destination schemas.
   178  func NameMapping(srcSch, destSch schema.Schema, nameMapper NameMapper) (*FieldMapping, error) {
   179  	successes := 0
   180  	srcCols := srcSch.GetAllCols()
   181  	destCols := destSch.GetAllCols()
   182  
   183  	srcToDest := make(map[uint64]uint64, destCols.Size())
   184  	err := srcCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
   185  		mn := nameMapper.Map(col.Name)
   186  		outCol, ok := destCols.GetByName(mn)
   187  
   188  		if ok {
   189  			srcToDest[tag] = outCol.Tag
   190  			successes++
   191  		}
   192  
   193  		return false, nil
   194  	})
   195  
   196  	if err != nil {
   197  		return nil, err
   198  	}
   199  
   200  	if successes == 0 {
   201  		return nil, ErrEmptyMapping
   202  	}
   203  
   204  	return NewFieldMapping(srcSch, destSch, srcToDest)
   205  }
   206  
   207  // NameMapperFromFile reads a JSON file containing a name mapping and returns a NameMapper.
   208  func NameMapperFromFile(mappingFile string, FS filesys.ReadableFS) (NameMapper, error) {
   209  	var nm NameMapper
   210  
   211  	if mappingFile == "" {
   212  		// identity mapper
   213  		return make(NameMapper), nil
   214  	}
   215  
   216  	if fileExists, _ := FS.Exists(mappingFile); !fileExists {
   217  		return nil, errhand.BuildDError("error: '%s' does not exist.", mappingFile).Build()
   218  	}
   219  
   220  	err := filesys.UnmarshalJSONFile(FS, mappingFile, &nm)
   221  
   222  	if err != nil {
   223  		return nil, errhand.BuildDError(ErrMappingFileRead.Error()).AddCause(err).Build()
   224  	}
   225  
   226  	return nm, nil
   227  }
   228  
   229  // TypedToUntypedMapping takes a schema and creates a mapping to an untyped schema with all the same columns.
   230  func TypedToUntypedMapping(sch schema.Schema) (*FieldMapping, error) {
   231  	untypedSch, err := untyped.UntypeSchema(sch)
   232  	if err != nil {
   233  		return nil, err
   234  	}
   235  
   236  	identityMap := make(map[uint64]uint64)
   237  	err = sch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
   238  		identityMap[tag] = tag
   239  		return false, nil
   240  	})
   241  
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  
   246  	mapping, err := NewFieldMapping(sch, untypedSch, identityMap)
   247  
   248  	if err != nil {
   249  		panic(err)
   250  	}
   251  
   252  	return mapping, nil
   253  }