github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/types/struct.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package types 23 24 import ( 25 "bytes" 26 "context" 27 "fmt" 28 "regexp" 29 "sort" 30 "strings" 31 32 "github.com/dolthub/dolt/go/store/d" 33 ) 34 35 var EmptyStructType, _ = MakeStructType("") 36 37 func EmptyStruct(nbf *NomsBinFormat) Struct { 38 es, _ := newStruct(nbf, "", nil, nil) 39 return es 40 } 41 42 type StructData map[string]Value 43 44 type Struct struct { 45 valueImpl 46 } 47 48 // readStruct reads the data provided by a decoder and moves the decoder forward. 49 func readStruct(nbf *NomsBinFormat, dec *valueDecoder) (Struct, error) { 50 start := dec.pos() 51 err := skipStruct(nbf, dec) 52 53 if err != nil { 54 return EmptyStruct(nbf), err 55 } 56 57 end := dec.pos() 58 return Struct{valueImpl{dec.vrw, nbf, dec.byteSlice(start, end), nil}}, nil 59 } 60 61 func skipStruct(nbf *NomsBinFormat, dec *valueDecoder) error { 62 dec.skipKind() 63 dec.skipString() // name 64 count := dec.readCount() 65 for i := uint64(0); i < count; i++ { 66 dec.skipString() 67 err := dec.SkipValue(nbf) 68 69 if err != nil { 70 return err 71 } 72 } 73 74 return nil 75 } 76 77 func isStructSameTypeForSure(nbf *NomsBinFormat, dec *valueDecoder, t *Type) (bool, error) { 78 desc := t.Desc.(StructDesc) 79 dec.skipKind() 80 if !dec.isStringSame(desc.Name) { 81 return false, nil 82 } 83 count := dec.readCount() 84 if count != uint64(len(desc.fields)) { 85 return false, nil 86 } 87 for i := uint64(0); i < count; i++ { 88 if desc.fields[i].Optional { 89 return false, nil 90 } 91 if !dec.isStringSame(desc.fields[i].Name) { 92 return false, nil 93 } 94 95 isSame, err := dec.isValueSameTypeForSure(nbf, desc.fields[i].Type) 96 97 if err != nil { 98 return false, err 99 } 100 101 if !isSame { 102 return false, nil 103 } 104 } 105 return true, nil 106 } 107 108 func walkStruct(nbf *NomsBinFormat, r *refWalker, cb RefCallback) error { 109 r.skipKind() 110 r.skipString() // name 111 count := r.readCount() 112 for i := uint64(0); i < count; i++ { 113 r.skipString() 114 err := r.walkValue(nbf, cb) 115 116 if err != nil { 117 return err 118 } 119 } 120 121 return nil 122 } 123 124 func newStruct(nbf *NomsBinFormat, name string, fieldNames []string, values []Value) (Struct, error) { 125 var vrw ValueReadWriter 126 w := newBinaryNomsWriter() 127 err := StructKind.writeTo(&w, nbf) 128 129 if err != nil { 130 return EmptyStruct(nbf), err 131 } 132 133 w.writeString(name) 134 w.writeCount(uint64(len(fieldNames))) 135 for i := 0; i < len(fieldNames); i++ { 136 w.writeString(fieldNames[i]) 137 if vrw == nil { 138 vrw = values[i].(valueReadWriter).valueReadWriter() 139 } 140 err := values[i].writeTo(&w, nbf) 141 142 if err != nil { 143 return EmptyStruct(nbf), err 144 } 145 } 146 return Struct{valueImpl{vrw, nbf, w.data(), nil}}, nil 147 } 148 149 func NewStruct(nbf *NomsBinFormat, name string, data StructData) (Struct, error) { 150 verifyStructName(name) 151 fieldNames := make([]string, len(data)) 152 values := make([]Value, len(data)) 153 154 i := 0 155 for name := range data { 156 verifyFieldName(name) 157 fieldNames[i] = name 158 i++ 159 } 160 161 sort.Strings(fieldNames) 162 for i = 0; i < len(fieldNames); i++ { 163 values[i] = data[fieldNames[i]] 164 } 165 166 return newStruct(nbf, name, fieldNames, values) 167 } 168 169 func (s Struct) Format() *NomsBinFormat { 170 return s.format() 171 } 172 173 // StructTemplate allows creating a template for structs with a known shape 174 // (name and fields). If a lot of structs of the same shape are being created 175 // then using a StructTemplate makes that slightly more efficient. 176 type StructTemplate struct { 177 name string 178 fieldNames []string 179 } 180 181 // MakeStructTemplate creates a new StructTemplate or panics if the name and 182 // fields are not valid. 183 func MakeStructTemplate(name string, fieldNames []string) (t StructTemplate) { 184 t = StructTemplate{name, fieldNames} 185 186 verifyStructName(name) 187 if len(fieldNames) == 0 { 188 return 189 } 190 verifyFieldName(fieldNames[0]) 191 for i := 1; i < len(fieldNames); i++ { 192 verifyFieldName(fieldNames[i]) 193 d.PanicIfFalse(fieldNames[i] > fieldNames[i-1]) 194 } 195 return 196 } 197 198 // NewStruct creates a new Struct from the StructTemplate. The order of the 199 // values must match the order of the field names of the StructTemplate. 200 func (st StructTemplate) NewStruct(nbf *NomsBinFormat, values []Value) (Struct, error) { 201 d.PanicIfFalse(len(st.fieldNames) == len(values)) 202 return newStruct(nbf, st.name, st.fieldNames, values) 203 } 204 205 func (s Struct) Empty() bool { 206 return s.Len() == 0 207 } 208 209 // Value interface 210 func (s Struct) isPrimitive() bool { 211 return false 212 } 213 214 func (s Struct) Value(ctx context.Context) (Value, error) { 215 return s, nil 216 } 217 218 func (s Struct) WalkValues(ctx context.Context, cb ValueCallback) error { 219 dec, count := s.decoderSkipToFields() 220 for i := uint64(0); i < count; i++ { 221 dec.skipString() 222 v, err := dec.readValue(s.format()) 223 224 if err != nil { 225 return err 226 } 227 228 err = cb(v) 229 230 if err != nil { 231 return err 232 } 233 } 234 235 return nil 236 } 237 238 func (s Struct) typeOf() (*Type, error) { 239 dec := s.decoder() 240 return readStructTypeOfValue(s.format(), &dec) 241 } 242 243 func readStructTypeOfValue(nbf *NomsBinFormat, dec *valueDecoder) (*Type, error) { 244 dec.skipKind() 245 name := dec.ReadString() 246 count := dec.readCount() 247 typeFields := make(structTypeFields, count) 248 for i := uint64(0); i < count; i++ { 249 fname := dec.ReadString() 250 t, err := dec.readTypeOfValue(nbf) 251 252 if err != nil { 253 return nil, err 254 } 255 256 typeFields[i] = StructField{ 257 Name: fname, 258 Optional: false, 259 Type: t, 260 } 261 } 262 263 return makeStructTypeQuickly(name, typeFields) 264 } 265 266 func (s Struct) decoderSkipToFields() (valueDecoder, uint64) { 267 dec := s.decoder() 268 dec.skipKind() 269 dec.skipString() 270 count := dec.readCount() 271 return dec, count 272 } 273 274 // Len is the number of fields in the struct. 275 func (s Struct) Len() int { 276 _, count := s.decoderSkipToFields() 277 return int(count) 278 } 279 280 // Name is the name of the struct. 281 func (s Struct) Name() string { 282 dec := s.decoder() 283 dec.skipKind() 284 return dec.ReadString() 285 } 286 287 // IterFields iterates over the fields, calling cb for every field in the 288 // struct. 289 func (s Struct) IterFields(cb func(name string, value Value) error) error { 290 dec, count := s.decoderSkipToFields() 291 for i := uint64(0); i < count; i++ { 292 fldName := dec.ReadString() 293 val, err := dec.readValue(s.format()) 294 295 if err != nil { 296 return err 297 } 298 299 err = cb(fldName, val) 300 301 if err != nil { 302 return err 303 } 304 } 305 306 return nil 307 } 308 309 type structPartCallbacks interface { 310 name(ctx context.Context, n string) 311 count(c uint64) 312 fieldName(n string) 313 fieldValue(ctx context.Context, v Value) error 314 end() 315 } 316 317 func (s Struct) iterParts(ctx context.Context, cbs structPartCallbacks) error { 318 dec := s.decoder() 319 dec.skipKind() 320 cbs.name(ctx, dec.ReadString()) 321 count := dec.readCount() 322 cbs.count(count) 323 for i := uint64(0); i < count; i++ { 324 cbs.fieldName(dec.ReadString()) 325 val, err := dec.readValue(s.format()) 326 327 if err != nil { 328 return err 329 } 330 331 err = cbs.fieldValue(ctx, val) 332 333 if err != nil { 334 return err 335 } 336 } 337 cbs.end() 338 return nil 339 } 340 341 // MaybeGet returns the value of a field in the struct. If the struct does not a have a field with 342 // the name name then this returns (nil, false). 343 func (s Struct) MaybeGet(n string) (v Value, found bool, err error) { 344 dec, count := s.decoderSkipToFields() 345 for i := uint64(0); i < count; i++ { 346 name := dec.ReadString() 347 if name == n { 348 found = true 349 v, err = dec.readValue(s.format()) 350 351 if err != nil { 352 return nil, false, err 353 } 354 355 return v, found, nil 356 } 357 358 if name > n { 359 return 360 } 361 362 err = dec.SkipValue(s.format()) 363 364 if err != nil { 365 return nil, false, err 366 } 367 } 368 369 return v, found, nil 370 } 371 372 // Set returns a new struct where the field name has been set to value. If name is not an 373 // existing field in the struct or the type of value is different from the old value of the 374 // struct field a new struct type is created. 375 func (s Struct) Set(n string, v Value) (Struct, error) { 376 verifyFieldName(n) 377 378 prolog, head, tail, count, found, err := s.splitFieldsAt(n) 379 380 if err != nil { 381 return EmptyStruct(s.Format()), err 382 } 383 384 w := binaryNomsWriter{make([]byte, len(s.buff)), 0} 385 w.writeRaw(prolog) 386 387 if !found { 388 count++ 389 } 390 w.writeCount(count) 391 w.writeRaw(head) 392 w.writeString(n) 393 err = v.writeTo(&w, s.format()) 394 395 if err != nil { 396 return EmptyStruct(s.Format()), err 397 } 398 399 w.writeRaw(tail) 400 401 return Struct{valueImpl{s.vrw, s.format(), w.data(), nil}}, nil 402 } 403 404 // splitFieldsAt splits the buffer into two parts. The fields coming before the field we are looking for 405 // and the fields coming after it. 406 func (s Struct) splitFieldsAt(name string) (prolog, head, tail []byte, count uint64, found bool, err error) { 407 dec := s.decoder() 408 dec.skipKind() 409 dec.skipString() 410 prolog = dec.buff[:dec.offset] 411 count = dec.readCount() 412 fieldsOffset := dec.offset 413 414 for i := uint64(0); i < count; i++ { 415 beforeCurrent := dec.offset 416 fn := dec.ReadString() 417 err = dec.SkipValue(s.format()) 418 419 if err != nil { 420 return nil, nil, nil, 0, false, err 421 } 422 423 if fn == name { 424 found = true 425 head = dec.buff[fieldsOffset:beforeCurrent] 426 tail = dec.buff[dec.offset:len(dec.buff)] 427 break 428 } 429 430 if name < fn { 431 head = dec.buff[fieldsOffset:beforeCurrent] 432 tail = dec.buff[beforeCurrent:len(dec.buff)] 433 break 434 } 435 } 436 437 if head == nil && tail == nil { 438 head = dec.buff[fieldsOffset:dec.offset] 439 } 440 441 return prolog, head, tail, count, found, nil 442 } 443 444 // Delete returns a new struct where the field name has been removed. 445 // If name is not an existing field in the struct then the current struct is returned. 446 func (s Struct) Delete(n string) (Struct, error) { 447 prolog, head, tail, count, found, err := s.splitFieldsAt(n) 448 449 if err != nil { 450 return EmptyStruct(s.nbf), err 451 } 452 453 if !found { 454 return s, nil 455 } 456 457 w := binaryNomsWriter{make([]byte, len(s.buff)), 0} 458 w.writeRaw(prolog) 459 w.writeCount(count - 1) 460 w.writeRaw(head) 461 w.writeRaw(tail) 462 463 return Struct{valueImpl{s.vrw, s.format(), w.data(), nil}}, nil 464 } 465 466 func (s Struct) Diff(ctx context.Context, last Struct, changes chan<- ValueChanged) error { 467 if s.Equals(last) { 468 return nil 469 } 470 dec1, dec2 := s.decoder(), last.decoder() 471 dec1.skipKind() 472 dec2.skipKind() 473 dec1.skipString() // Ignore names 474 dec2.skipString() 475 count1, count2 := dec1.readCount(), dec2.readCount() 476 i1, i2 := uint64(0), uint64(0) 477 var fn1, fn2 string 478 479 for i1 < count1 && i2 < count2 { 480 if fn1 == "" { 481 fn1 = dec1.ReadString() 482 } 483 if fn2 == "" { 484 fn2 = dec2.ReadString() 485 } 486 var change ValueChanged 487 if fn1 == fn2 { 488 v1, err := dec1.readValue(s.format()) 489 490 if err != nil { 491 return err 492 } 493 494 v2, err := dec2.readValue(s.format()) 495 496 if err != nil { 497 return err 498 } 499 500 if !v1.Equals(v2) { 501 change = ValueChanged{DiffChangeModified, String(fn1), v2, v1} 502 } 503 i1++ 504 i2++ 505 fn1, fn2 = "", "" 506 } else if fn1 < fn2 { 507 v1, err := dec1.readValue(s.format()) 508 509 if err != nil { 510 return err 511 } 512 513 change = ValueChanged{DiffChangeAdded, String(fn1), nil, v1} 514 i1++ 515 fn1 = "" 516 } else { 517 v2, err := dec2.readValue(s.format()) 518 519 if err != nil { 520 return err 521 } 522 523 change = ValueChanged{DiffChangeRemoved, String(fn2), v2, nil} 524 i2++ 525 fn2 = "" 526 } 527 528 if change != (ValueChanged{}) { 529 if err := sendChange(ctx, changes, change); err != nil { 530 return err 531 } 532 } 533 } 534 535 for ; i1 < count1; i1++ { 536 if fn1 == "" { 537 fn1 = dec1.ReadString() 538 fmt.Println(fn1) 539 } 540 v1, err := dec1.readValue(s.format()) 541 542 if err != nil { 543 return err 544 } 545 546 if err := sendChange(ctx, changes, ValueChanged{DiffChangeAdded, String(fn1), nil, v1}); err != nil { 547 return err 548 } 549 } 550 551 for ; i2 < count2; i2++ { 552 if fn2 == "" { 553 fn2 = dec2.ReadString() 554 } 555 556 v2, err := dec2.readValue(s.format()) 557 558 if err != nil { 559 return err 560 } 561 562 if err := sendChange(ctx, changes, ValueChanged{DiffChangeRemoved, String(fn2), v2, nil}); err != nil { 563 return err 564 } 565 } 566 567 return nil 568 } 569 570 var escapeChar = "Q" 571 var headFieldNamePattern = regexp.MustCompile("[a-zA-Z]") 572 var tailFieldNamePattern = regexp.MustCompile("[a-zA-Z0-9_]") 573 var escapeRegex = regexp.MustCompile(escapeChar) 574 575 var fieldNameComponentRe = regexp.MustCompile("^" + headFieldNamePattern.String() + tailFieldNamePattern.String() + "*") 576 577 type encodingFunc func(string, *regexp.Regexp) string 578 579 func escapeField(input string, encode encodingFunc) string { 580 output := "" 581 pattern := headFieldNamePattern 582 for _, ch := range input { 583 output += encode(string([]rune{ch}), pattern) 584 pattern = tailFieldNamePattern 585 } 586 return output 587 } 588 589 // EscapeStructField escapes names for use as noms structs with regards to non CSV imported data. 590 // Disallowed characters are encoded as 'Q<hex-encoded-utf8-bytes>'. 591 // Note that Q itself is also escaped since it is the escape character. 592 func EscapeStructField(input string) string { 593 if !escapeRegex.MatchString(input) && IsValidStructFieldName(input) { 594 return input 595 } 596 encode := func(s1 string, p *regexp.Regexp) string { 597 if p.MatchString(s1) && s1 != escapeChar { 598 return s1 599 } 600 601 var hs = fmt.Sprintf("%X", s1) 602 var buf bytes.Buffer 603 buf.WriteString(escapeChar) 604 if len(hs) == 1 { 605 buf.WriteString("0") 606 } 607 buf.WriteString(hs) 608 return buf.String() 609 } 610 return escapeField(input, encode) 611 } 612 613 // IsValidStructFieldName returns whether the name is valid as a field name in a struct. 614 // Valid names must start with `a-zA-Z` and after that `a-zA-Z0-9_`. 615 func IsValidStructFieldName(name string) bool { 616 for i, c := range name { 617 if i == 0 { 618 if !isAlpha(c) { 619 return false 620 } 621 } else if !isAlphaNumOrUnderscore(c) { 622 return false 623 } 624 } 625 return len(name) != 0 626 } 627 628 func isAlpha(c rune) bool { 629 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' 630 } 631 632 func isAlphaNumOrUnderscore(c rune) bool { 633 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' 634 } 635 636 func verifyFields(fs structTypeFields) { 637 for i, f := range fs { 638 verifyFieldName(f.Name) 639 if i > 0 && strings.Compare(fs[i-1].Name, f.Name) >= 0 { 640 d.Chk.Fail("Field names must be unique and ordered alphabetically") 641 } 642 } 643 } 644 645 func verifyName(name, kind string) { 646 if !IsValidStructFieldName(name) { 647 d.Panic(`Invalid struct%s name: "%s"`, kind, name) 648 } 649 } 650 651 func verifyFieldName(name string) { 652 verifyName(name, " field") 653 } 654 655 func verifyStructName(name string) { 656 if name != "" { 657 verifyName(name, "") 658 } 659 } 660 661 func (s Struct) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) { 662 panic("unreachable") 663 } 664 665 func (s Struct) skip(nbf *NomsBinFormat, b *binaryNomsReader) { 666 panic("unreachable") 667 } 668 669 func (s Struct) String() string { 670 panic("unreachable") 671 } 672 673 func (s Struct) HumanReadableString() string { 674 panic("unreachable") 675 }