github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/rowconv/field_mapping.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package rowconv
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"strconv"
    21  
    22  	"github.com/dolthub/dolt/go/cmd/dolt/errhand"
    23  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    24  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    25  )
    26  
    27  // ErrMappingFileRead is an error returned when a mapping file cannot be read
    28  var ErrMappingFileRead = errors.New("error reading mapping file")
    29  
    30  // ErrEmptyMapping is an error returned when the mapping is empty (No src columns, no destination columns)
    31  var ErrEmptyMapping = errors.New("empty mapping error")
    32  
    33  // BadMappingErr is a struct which implements the error interface and is used when there is an error with a mapping.
    34  type BadMappingErr struct {
    35  	srcField  string
    36  	destField string
    37  }
    38  
    39  // String representing the BadMappingError
    40  func (err *BadMappingErr) Error() string {
    41  	return fmt.Sprintf("Mapping file attempted to map %s to %s, but one or both of those fields are unknown.", err.srcField, err.destField)
    42  }
    43  
    44  // NameMapper is a simple interface for mapping a string to another string
    45  type NameMapper map[string]string
    46  
    47  // Map maps a string to another string.  If a string is not in the mapping ok will be false, otherwise it is true.
    48  func (nm NameMapper) Map(str string) string {
    49  	v, ok := nm[str]
    50  	if ok {
    51  		return v
    52  	}
    53  	return str
    54  }
    55  
    56  // PreImage searches the NameMapper for the string that maps to str, returns str otherwise
    57  func (nm NameMapper) PreImage(str string) string {
    58  	for pre, post := range nm {
    59  		if post == str {
    60  			return pre
    61  		}
    62  	}
    63  	return str
    64  }
    65  
    66  // FieldMapping defines a mapping from columns in a source schema to columns in a dest schema.
    67  type FieldMapping struct {
    68  	// SrcSch is the source schema being mapped from.
    69  	SrcSch schema.Schema
    70  
    71  	// DestSch is the destination schema being mapped to.
    72  	DestSch schema.Schema
    73  
    74  	// SrcToDest is a map from a tag in the source schema to a tag in the dest schema.
    75  	SrcToDest map[uint64]uint64
    76  }
    77  
    78  // NewFieldMapping creates a FieldMapping from a source schema, a destination schema, and a map from tags in the source
    79  // schema to tags in the dest schema.
    80  func NewFieldMapping(srcSch, destSch schema.Schema, srcTagToDestTag map[uint64]uint64) (*FieldMapping, error) {
    81  	destCols := destSch.GetAllCols()
    82  
    83  	for srcTag, destTag := range srcTagToDestTag {
    84  		_, destOk := destCols.GetByTag(destTag)
    85  
    86  		if !destOk {
    87  			return nil, &BadMappingErr{"src tag:" + strconv.FormatUint(srcTag, 10), "dest tag:" + strconv.FormatUint(destTag, 10)}
    88  		}
    89  	}
    90  
    91  	if len(srcTagToDestTag) == 0 {
    92  		return nil, ErrEmptyMapping
    93  	}
    94  
    95  	return &FieldMapping{srcSch, destSch, srcTagToDestTag}, nil
    96  }
    97  
    98  // TagMapping takes a source schema and a destination schema and maps all columns which have a matching tag in the
    99  // source and destination schemas.
   100  func TagMapping(srcSch, destSch schema.Schema) (*FieldMapping, error) {
   101  	successes := 0
   102  	srcCols := srcSch.GetAllCols()
   103  	destCols := destSch.GetAllCols()
   104  
   105  	srcToDest := make(map[uint64]uint64, destCols.Size())
   106  	err := destCols.Iter(func(destTag uint64, col schema.Column) (stop bool, err error) {
   107  		srcCol, ok := srcCols.GetByTag(destTag)
   108  
   109  		if ok {
   110  			srcToDest[srcCol.Tag] = destTag
   111  			successes++
   112  		}
   113  
   114  		return false, nil
   115  	})
   116  
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  
   121  	if successes == 0 {
   122  		return nil, ErrEmptyMapping
   123  	}
   124  
   125  	return NewFieldMapping(srcSch, destSch, srcToDest)
   126  }
   127  
   128  // NameMapping takes a source schema and a destination schema and maps all columns which have a matching name in the
   129  // source and destination schemas.
   130  func NameMapping(srcSch, destSch schema.Schema, nameMapper NameMapper) (*FieldMapping, error) {
   131  	successes := 0
   132  	srcCols := srcSch.GetAllCols()
   133  	destCols := destSch.GetAllCols()
   134  
   135  	srcToDest := make(map[uint64]uint64, destCols.Size())
   136  	err := srcCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
   137  		mn := nameMapper.Map(col.Name)
   138  		outCol, ok := destCols.GetByName(mn)
   139  
   140  		if ok {
   141  			srcToDest[tag] = outCol.Tag
   142  			successes++
   143  		}
   144  
   145  		return false, nil
   146  	})
   147  
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	if successes == 0 {
   153  		return nil, ErrEmptyMapping
   154  	}
   155  
   156  	return NewFieldMapping(srcSch, destSch, srcToDest)
   157  }
   158  
   159  // NameMapperFromFile reads a JSON file containing a name mapping and returns a NameMapper.
   160  func NameMapperFromFile(mappingFile string, FS filesys.ReadableFS) (NameMapper, error) {
   161  	var nm NameMapper
   162  
   163  	if mappingFile == "" {
   164  		// identity mapper
   165  		return make(NameMapper), nil
   166  	}
   167  
   168  	if fileExists, _ := FS.Exists(mappingFile); !fileExists {
   169  		return nil, errhand.BuildDError("error: '%s' does not exist.", mappingFile).Build()
   170  	}
   171  
   172  	err := filesys.UnmarshalJSONFile(FS, mappingFile, &nm)
   173  
   174  	if err != nil {
   175  		return nil, errhand.BuildDError(ErrMappingFileRead.Error()).AddCause(err).Build()
   176  	}
   177  
   178  	return nm, nil
   179  }
   180  
   181  // TagMappingByTagAndName takes a source schema and a destination schema and maps
   182  // pks by tag and non-pks by name.
   183  func TagMappingByTagAndName(srcSch, destSch schema.Schema) (*FieldMapping, error) {
   184  	srcToDest := make(map[uint64]uint64, destSch.GetAllCols().Size())
   185  
   186  	keyMap, valMap, err := schema.MapSchemaBasedOnTagAndName(srcSch, destSch)
   187  	if err != nil {
   188  		return nil, err
   189  	}
   190  
   191  	var successes int
   192  	for i, j := range keyMap {
   193  		if j == -1 {
   194  			continue
   195  		}
   196  		srcTag := srcSch.GetPKCols().GetByIndex(i).Tag
   197  		dstTag := destSch.GetPKCols().GetByIndex(j).Tag
   198  		srcToDest[srcTag] = dstTag
   199  		successes++
   200  	}
   201  	for i, j := range valMap {
   202  		if j == -1 {
   203  			continue
   204  		}
   205  		srcTag := srcSch.GetNonPKCols().GetByIndex(i).Tag
   206  		dstTag := destSch.GetNonPKCols().GetByIndex(j).Tag
   207  		srcToDest[srcTag] = dstTag
   208  		successes++
   209  	}
   210  
   211  	if successes == 0 {
   212  		return nil, ErrEmptyMapping
   213  	}
   214  
   215  	return NewFieldMapping(srcSch, destSch, srcToDest)
   216  }