github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/types/struct.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package types
    23  
    24  import (
    25  	"bytes"
    26  	"context"
    27  	"fmt"
    28  	"regexp"
    29  	"sort"
    30  	"strings"
    31  
    32  	"github.com/dolthub/dolt/go/store/d"
    33  )
    34  
    35  var EmptyStructType, _ = MakeStructType("")
    36  
    37  func EmptyStruct(nbf *NomsBinFormat) Struct {
    38  	es, _ := newStruct(nbf, "", nil, nil)
    39  	return es
    40  }
    41  
    42  type StructData map[string]Value
    43  
    44  type Struct struct {
    45  	valueImpl
    46  }
    47  
    48  // readStruct reads the data provided by a decoder and moves the decoder forward.
    49  func readStruct(nbf *NomsBinFormat, dec *valueDecoder) (Struct, error) {
    50  	start := dec.pos()
    51  	err := skipStruct(nbf, dec)
    52  
    53  	if err != nil {
    54  		return EmptyStruct(nbf), err
    55  	}
    56  
    57  	end := dec.pos()
    58  	return Struct{valueImpl{dec.vrw, nbf, dec.byteSlice(start, end), nil}}, nil
    59  }
    60  
    61  func skipStruct(nbf *NomsBinFormat, dec *valueDecoder) error {
    62  	dec.skipKind()
    63  	dec.skipString() // name
    64  	count := dec.readCount()
    65  	for i := uint64(0); i < count; i++ {
    66  		dec.skipString()
    67  		err := dec.SkipValue(nbf)
    68  
    69  		if err != nil {
    70  			return err
    71  		}
    72  	}
    73  
    74  	return nil
    75  }
    76  
    77  func isStructSameTypeForSure(nbf *NomsBinFormat, dec *valueDecoder, t *Type) (bool, error) {
    78  	desc := t.Desc.(StructDesc)
    79  	dec.skipKind()
    80  	if !dec.isStringSame(desc.Name) {
    81  		return false, nil
    82  	}
    83  	count := dec.readCount()
    84  	if count != uint64(len(desc.fields)) {
    85  		return false, nil
    86  	}
    87  	for i := uint64(0); i < count; i++ {
    88  		if desc.fields[i].Optional {
    89  			return false, nil
    90  		}
    91  		if !dec.isStringSame(desc.fields[i].Name) {
    92  			return false, nil
    93  		}
    94  
    95  		isSame, err := dec.isValueSameTypeForSure(nbf, desc.fields[i].Type)
    96  
    97  		if err != nil {
    98  			return false, err
    99  		}
   100  
   101  		if !isSame {
   102  			return false, nil
   103  		}
   104  	}
   105  	return true, nil
   106  }
   107  
   108  func walkStruct(nbf *NomsBinFormat, r *refWalker, cb RefCallback) error {
   109  	r.skipKind()
   110  	r.skipString() // name
   111  	count := r.readCount()
   112  	for i := uint64(0); i < count; i++ {
   113  		r.skipString()
   114  		err := r.walkValue(nbf, cb)
   115  
   116  		if err != nil {
   117  			return err
   118  		}
   119  	}
   120  
   121  	return nil
   122  }
   123  
   124  func newStruct(nbf *NomsBinFormat, name string, fieldNames []string, values []Value) (Struct, error) {
   125  	var vrw ValueReadWriter
   126  	w := newBinaryNomsWriter()
   127  	err := StructKind.writeTo(&w, nbf)
   128  
   129  	if err != nil {
   130  		return EmptyStruct(nbf), err
   131  	}
   132  
   133  	w.writeString(name)
   134  	w.writeCount(uint64(len(fieldNames)))
   135  	for i := 0; i < len(fieldNames); i++ {
   136  		w.writeString(fieldNames[i])
   137  		if vrw == nil {
   138  			vrw = values[i].(valueReadWriter).valueReadWriter()
   139  		}
   140  		err := values[i].writeTo(&w, nbf)
   141  
   142  		if err != nil {
   143  			return EmptyStruct(nbf), err
   144  		}
   145  	}
   146  	return Struct{valueImpl{vrw, nbf, w.data(), nil}}, nil
   147  }
   148  
   149  func NewStruct(nbf *NomsBinFormat, name string, data StructData) (Struct, error) {
   150  	verifyStructName(name)
   151  	fieldNames := make([]string, len(data))
   152  	values := make([]Value, len(data))
   153  
   154  	i := 0
   155  	for name := range data {
   156  		verifyFieldName(name)
   157  		fieldNames[i] = name
   158  		i++
   159  	}
   160  
   161  	sort.Strings(fieldNames)
   162  	for i = 0; i < len(fieldNames); i++ {
   163  		values[i] = data[fieldNames[i]]
   164  	}
   165  
   166  	return newStruct(nbf, name, fieldNames, values)
   167  }
   168  
   169  func (s Struct) Format() *NomsBinFormat {
   170  	return s.format()
   171  }
   172  
   173  // StructTemplate allows creating a template for structs with a known shape
   174  // (name and fields). If a lot of structs of the same shape are being created
   175  // then using a StructTemplate makes that slightly more efficient.
   176  type StructTemplate struct {
   177  	name       string
   178  	fieldNames []string
   179  }
   180  
   181  // MakeStructTemplate creates a new StructTemplate or panics if the name and
   182  // fields are not valid.
   183  func MakeStructTemplate(name string, fieldNames []string) (t StructTemplate) {
   184  	t = StructTemplate{name, fieldNames}
   185  
   186  	verifyStructName(name)
   187  	if len(fieldNames) == 0 {
   188  		return
   189  	}
   190  	verifyFieldName(fieldNames[0])
   191  	for i := 1; i < len(fieldNames); i++ {
   192  		verifyFieldName(fieldNames[i])
   193  		d.PanicIfFalse(fieldNames[i] > fieldNames[i-1])
   194  	}
   195  	return
   196  }
   197  
   198  // NewStruct creates a new Struct from the StructTemplate. The order of the
   199  // values must match the order of the field names of the StructTemplate.
   200  func (st StructTemplate) NewStruct(nbf *NomsBinFormat, values []Value) (Struct, error) {
   201  	d.PanicIfFalse(len(st.fieldNames) == len(values))
   202  	return newStruct(nbf, st.name, st.fieldNames, values)
   203  }
   204  
   205  func (s Struct) Empty() bool {
   206  	return s.Len() == 0
   207  }
   208  
   209  // Value interface
   210  func (s Struct) isPrimitive() bool {
   211  	return false
   212  }
   213  
   214  func (s Struct) Value(ctx context.Context) (Value, error) {
   215  	return s, nil
   216  }
   217  
   218  func (s Struct) WalkValues(ctx context.Context, cb ValueCallback) error {
   219  	dec, count := s.decoderSkipToFields()
   220  	for i := uint64(0); i < count; i++ {
   221  		dec.skipString()
   222  		v, err := dec.readValue(s.format())
   223  
   224  		if err != nil {
   225  			return err
   226  		}
   227  
   228  		err = cb(v)
   229  
   230  		if err != nil {
   231  			return err
   232  		}
   233  	}
   234  
   235  	return nil
   236  }
   237  
   238  func (s Struct) typeOf() (*Type, error) {
   239  	dec := s.decoder()
   240  	return readStructTypeOfValue(s.format(), &dec)
   241  }
   242  
   243  func readStructTypeOfValue(nbf *NomsBinFormat, dec *valueDecoder) (*Type, error) {
   244  	dec.skipKind()
   245  	name := dec.ReadString()
   246  	count := dec.readCount()
   247  	typeFields := make(structTypeFields, count)
   248  	for i := uint64(0); i < count; i++ {
   249  		fname := dec.ReadString()
   250  		t, err := dec.readTypeOfValue(nbf)
   251  
   252  		if err != nil {
   253  			return nil, err
   254  		}
   255  
   256  		typeFields[i] = StructField{
   257  			Name:     fname,
   258  			Optional: false,
   259  			Type:     t,
   260  		}
   261  	}
   262  
   263  	return makeStructTypeQuickly(name, typeFields)
   264  }
   265  
   266  func (s Struct) decoderSkipToFields() (valueDecoder, uint64) {
   267  	dec := s.decoder()
   268  	dec.skipKind()
   269  	dec.skipString()
   270  	count := dec.readCount()
   271  	return dec, count
   272  }
   273  
   274  // Len is the number of fields in the struct.
   275  func (s Struct) Len() int {
   276  	_, count := s.decoderSkipToFields()
   277  	return int(count)
   278  }
   279  
   280  // Name is the name of the struct.
   281  func (s Struct) Name() string {
   282  	dec := s.decoder()
   283  	dec.skipKind()
   284  	return dec.ReadString()
   285  }
   286  
   287  // IterFields iterates over the fields, calling cb for every field in the
   288  // struct.
   289  func (s Struct) IterFields(cb func(name string, value Value) error) error {
   290  	dec, count := s.decoderSkipToFields()
   291  	for i := uint64(0); i < count; i++ {
   292  		fldName := dec.ReadString()
   293  		val, err := dec.readValue(s.format())
   294  
   295  		if err != nil {
   296  			return err
   297  		}
   298  
   299  		err = cb(fldName, val)
   300  
   301  		if err != nil {
   302  			return err
   303  		}
   304  	}
   305  
   306  	return nil
   307  }
   308  
   309  type structPartCallbacks interface {
   310  	name(ctx context.Context, n string)
   311  	count(c uint64)
   312  	fieldName(n string)
   313  	fieldValue(ctx context.Context, v Value) error
   314  	end()
   315  }
   316  
   317  func (s Struct) iterParts(ctx context.Context, cbs structPartCallbacks) error {
   318  	dec := s.decoder()
   319  	dec.skipKind()
   320  	cbs.name(ctx, dec.ReadString())
   321  	count := dec.readCount()
   322  	cbs.count(count)
   323  	for i := uint64(0); i < count; i++ {
   324  		cbs.fieldName(dec.ReadString())
   325  		val, err := dec.readValue(s.format())
   326  
   327  		if err != nil {
   328  			return err
   329  		}
   330  
   331  		err = cbs.fieldValue(ctx, val)
   332  
   333  		if err != nil {
   334  			return err
   335  		}
   336  	}
   337  	cbs.end()
   338  	return nil
   339  }
   340  
   341  // MaybeGet returns the value of a field in the struct. If the struct does not a have a field with
   342  // the name name then this returns (nil, false).
   343  func (s Struct) MaybeGet(n string) (v Value, found bool, err error) {
   344  	dec, count := s.decoderSkipToFields()
   345  	for i := uint64(0); i < count; i++ {
   346  		name := dec.ReadString()
   347  		if name == n {
   348  			found = true
   349  			v, err = dec.readValue(s.format())
   350  
   351  			if err != nil {
   352  				return nil, false, err
   353  			}
   354  
   355  			return v, found, nil
   356  		}
   357  
   358  		if name > n {
   359  			return
   360  		}
   361  
   362  		err = dec.SkipValue(s.format())
   363  
   364  		if err != nil {
   365  			return nil, false, err
   366  		}
   367  	}
   368  
   369  	return v, found, nil
   370  }
   371  
   372  // Set returns a new struct where the field name has been set to value. If name is not an
   373  // existing field in the struct or the type of value is different from the old value of the
   374  // struct field a new struct type is created.
   375  func (s Struct) Set(n string, v Value) (Struct, error) {
   376  	verifyFieldName(n)
   377  
   378  	prolog, head, tail, count, found, err := s.splitFieldsAt(n)
   379  
   380  	if err != nil {
   381  		return EmptyStruct(s.Format()), err
   382  	}
   383  
   384  	w := binaryNomsWriter{make([]byte, len(s.buff)), 0}
   385  	w.writeRaw(prolog)
   386  
   387  	if !found {
   388  		count++
   389  	}
   390  	w.writeCount(count)
   391  	w.writeRaw(head)
   392  	w.writeString(n)
   393  	err = v.writeTo(&w, s.format())
   394  
   395  	if err != nil {
   396  		return EmptyStruct(s.Format()), err
   397  	}
   398  
   399  	w.writeRaw(tail)
   400  
   401  	return Struct{valueImpl{s.vrw, s.format(), w.data(), nil}}, nil
   402  }
   403  
   404  // splitFieldsAt splits the buffer into two parts. The fields coming before the field we are looking for
   405  // and the fields coming after it.
   406  func (s Struct) splitFieldsAt(name string) (prolog, head, tail []byte, count uint64, found bool, err error) {
   407  	dec := s.decoder()
   408  	dec.skipKind()
   409  	dec.skipString()
   410  	prolog = dec.buff[:dec.offset]
   411  	count = dec.readCount()
   412  	fieldsOffset := dec.offset
   413  
   414  	for i := uint64(0); i < count; i++ {
   415  		beforeCurrent := dec.offset
   416  		fn := dec.ReadString()
   417  		err = dec.SkipValue(s.format())
   418  
   419  		if err != nil {
   420  			return nil, nil, nil, 0, false, err
   421  		}
   422  
   423  		if fn == name {
   424  			found = true
   425  			head = dec.buff[fieldsOffset:beforeCurrent]
   426  			tail = dec.buff[dec.offset:len(dec.buff)]
   427  			break
   428  		}
   429  
   430  		if name < fn {
   431  			head = dec.buff[fieldsOffset:beforeCurrent]
   432  			tail = dec.buff[beforeCurrent:len(dec.buff)]
   433  			break
   434  		}
   435  	}
   436  
   437  	if head == nil && tail == nil {
   438  		head = dec.buff[fieldsOffset:dec.offset]
   439  	}
   440  
   441  	return prolog, head, tail, count, found, nil
   442  }
   443  
   444  // Delete returns a new struct where the field name has been removed.
   445  // If name is not an existing field in the struct then the current struct is returned.
   446  func (s Struct) Delete(n string) (Struct, error) {
   447  	prolog, head, tail, count, found, err := s.splitFieldsAt(n)
   448  
   449  	if err != nil {
   450  		return EmptyStruct(s.nbf), err
   451  	}
   452  
   453  	if !found {
   454  		return s, nil
   455  	}
   456  
   457  	w := binaryNomsWriter{make([]byte, len(s.buff)), 0}
   458  	w.writeRaw(prolog)
   459  	w.writeCount(count - 1)
   460  	w.writeRaw(head)
   461  	w.writeRaw(tail)
   462  
   463  	return Struct{valueImpl{s.vrw, s.format(), w.data(), nil}}, nil
   464  }
   465  
   466  func (s Struct) Diff(ctx context.Context, last Struct, changes chan<- ValueChanged) error {
   467  	if s.Equals(last) {
   468  		return nil
   469  	}
   470  	dec1, dec2 := s.decoder(), last.decoder()
   471  	dec1.skipKind()
   472  	dec2.skipKind()
   473  	dec1.skipString() // Ignore names
   474  	dec2.skipString()
   475  	count1, count2 := dec1.readCount(), dec2.readCount()
   476  	i1, i2 := uint64(0), uint64(0)
   477  	var fn1, fn2 string
   478  
   479  	for i1 < count1 && i2 < count2 {
   480  		if fn1 == "" {
   481  			fn1 = dec1.ReadString()
   482  		}
   483  		if fn2 == "" {
   484  			fn2 = dec2.ReadString()
   485  		}
   486  		var change ValueChanged
   487  		if fn1 == fn2 {
   488  			v1, err := dec1.readValue(s.format())
   489  
   490  			if err != nil {
   491  				return err
   492  			}
   493  
   494  			v2, err := dec2.readValue(s.format())
   495  
   496  			if err != nil {
   497  				return err
   498  			}
   499  
   500  			if !v1.Equals(v2) {
   501  				change = ValueChanged{DiffChangeModified, String(fn1), v2, v1}
   502  			}
   503  			i1++
   504  			i2++
   505  			fn1, fn2 = "", ""
   506  		} else if fn1 < fn2 {
   507  			v1, err := dec1.readValue(s.format())
   508  
   509  			if err != nil {
   510  				return err
   511  			}
   512  
   513  			change = ValueChanged{DiffChangeAdded, String(fn1), nil, v1}
   514  			i1++
   515  			fn1 = ""
   516  		} else {
   517  			v2, err := dec2.readValue(s.format())
   518  
   519  			if err != nil {
   520  				return err
   521  			}
   522  
   523  			change = ValueChanged{DiffChangeRemoved, String(fn2), v2, nil}
   524  			i2++
   525  			fn2 = ""
   526  		}
   527  
   528  		if change != (ValueChanged{}) {
   529  			if err := sendChange(ctx, changes, change); err != nil {
   530  				return err
   531  			}
   532  		}
   533  	}
   534  
   535  	for ; i1 < count1; i1++ {
   536  		if fn1 == "" {
   537  			fn1 = dec1.ReadString()
   538  			fmt.Println(fn1)
   539  		}
   540  		v1, err := dec1.readValue(s.format())
   541  
   542  		if err != nil {
   543  			return err
   544  		}
   545  
   546  		if err := sendChange(ctx, changes, ValueChanged{DiffChangeAdded, String(fn1), nil, v1}); err != nil {
   547  			return err
   548  		}
   549  	}
   550  
   551  	for ; i2 < count2; i2++ {
   552  		if fn2 == "" {
   553  			fn2 = dec2.ReadString()
   554  		}
   555  
   556  		v2, err := dec2.readValue(s.format())
   557  
   558  		if err != nil {
   559  			return err
   560  		}
   561  
   562  		if err := sendChange(ctx, changes, ValueChanged{DiffChangeRemoved, String(fn2), v2, nil}); err != nil {
   563  			return err
   564  		}
   565  	}
   566  
   567  	return nil
   568  }
   569  
   570  var escapeChar = "Q"
   571  var headFieldNamePattern = regexp.MustCompile("[a-zA-Z]")
   572  var tailFieldNamePattern = regexp.MustCompile("[a-zA-Z0-9_]")
   573  var escapeRegex = regexp.MustCompile(escapeChar)
   574  
   575  var fieldNameComponentRe = regexp.MustCompile("^" + headFieldNamePattern.String() + tailFieldNamePattern.String() + "*")
   576  
   577  type encodingFunc func(string, *regexp.Regexp) string
   578  
   579  func escapeField(input string, encode encodingFunc) string {
   580  	output := ""
   581  	pattern := headFieldNamePattern
   582  	for _, ch := range input {
   583  		output += encode(string([]rune{ch}), pattern)
   584  		pattern = tailFieldNamePattern
   585  	}
   586  	return output
   587  }
   588  
   589  // EscapeStructField escapes names for use as noms structs with regards to non CSV imported data.
   590  // Disallowed characters are encoded as 'Q<hex-encoded-utf8-bytes>'.
   591  // Note that Q itself is also escaped since it is the escape character.
   592  func EscapeStructField(input string) string {
   593  	if !escapeRegex.MatchString(input) && IsValidStructFieldName(input) {
   594  		return input
   595  	}
   596  	encode := func(s1 string, p *regexp.Regexp) string {
   597  		if p.MatchString(s1) && s1 != escapeChar {
   598  			return s1
   599  		}
   600  
   601  		var hs = fmt.Sprintf("%X", s1)
   602  		var buf bytes.Buffer
   603  		buf.WriteString(escapeChar)
   604  		if len(hs) == 1 {
   605  			buf.WriteString("0")
   606  		}
   607  		buf.WriteString(hs)
   608  		return buf.String()
   609  	}
   610  	return escapeField(input, encode)
   611  }
   612  
   613  // IsValidStructFieldName returns whether the name is valid as a field name in a struct.
   614  // Valid names must start with `a-zA-Z` and after that `a-zA-Z0-9_`.
   615  func IsValidStructFieldName(name string) bool {
   616  	for i, c := range name {
   617  		if i == 0 {
   618  			if !isAlpha(c) {
   619  				return false
   620  			}
   621  		} else if !isAlphaNumOrUnderscore(c) {
   622  			return false
   623  		}
   624  	}
   625  	return len(name) != 0
   626  }
   627  
   628  func isAlpha(c rune) bool {
   629  	return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
   630  }
   631  
   632  func isAlphaNumOrUnderscore(c rune) bool {
   633  	return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_'
   634  }
   635  
   636  func verifyFields(fs structTypeFields) {
   637  	for i, f := range fs {
   638  		verifyFieldName(f.Name)
   639  		if i > 0 && strings.Compare(fs[i-1].Name, f.Name) >= 0 {
   640  			d.Chk.Fail("Field names must be unique and ordered alphabetically")
   641  		}
   642  	}
   643  }
   644  
   645  func verifyName(name, kind string) {
   646  	if !IsValidStructFieldName(name) {
   647  		d.Panic(`Invalid struct%s name: "%s"`, kind, name)
   648  	}
   649  }
   650  
   651  func verifyFieldName(name string) {
   652  	verifyName(name, " field")
   653  }
   654  
   655  func verifyStructName(name string) {
   656  	if name != "" {
   657  		verifyName(name, "")
   658  	}
   659  }
   660  
   661  func (s Struct) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) {
   662  	panic("unreachable")
   663  }
   664  
   665  func (s Struct) skip(nbf *NomsBinFormat, b *binaryNomsReader) {
   666  	panic("unreachable")
   667  }
   668  
   669  func (s Struct) String() string {
   670  	panic("unreachable")
   671  }
   672  
   673  func (s Struct) HumanReadableString() string {
   674  	panic("unreachable")
   675  }