github.com/solo-io/cue@v0.4.7/internal/third_party/yaml/scannerc.go (about) 1 package yaml 2 3 import ( 4 "bytes" 5 "fmt" 6 ) 7 8 // Introduction 9 // ************ 10 // 11 // The following notes assume that you are familiar with the YAML specification 12 // (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in 13 // some cases we are less restrictive that it requires. 14 // 15 // The process of transforming a YAML stream into a sequence of events is 16 // divided on two steps: Scanning and Parsing. 17 // 18 // The Scanner transforms the input stream into a sequence of tokens, while the 19 // parser transform the sequence of tokens produced by the Scanner into a 20 // sequence of parsing events. 21 // 22 // The Scanner is rather clever and complicated. The Parser, on the contrary, 23 // is a straightforward implementation of a recursive-descendant parser (or, 24 // LL(1) parser, as it is usually called). 25 // 26 // Actually there are two issues of Scanning that might be called "clever", the 27 // rest is quite straightforward. The issues are "block collection start" and 28 // "simple keys". Both issues are explained below in details. 29 // 30 // Here the Scanning step is explained and implemented. We start with the list 31 // of all the tokens produced by the Scanner together with short descriptions. 32 // 33 // Now, tokens: 34 // 35 // STREAM-START(encoding) # The stream start. 36 // STREAM-END # The stream end. 37 // VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. 38 // TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. 39 // DOCUMENT-START # '---' 40 // DOCUMENT-END # '...' 41 // BLOCK-SEQUENCE-START # Indentation increase denoting a block 42 // BLOCK-MAPPING-START # sequence or a block mapping. 43 // BLOCK-END # Indentation decrease. 44 // FLOW-SEQUENCE-START # '[' 45 // FLOW-SEQUENCE-END # ']' 46 // BLOCK-SEQUENCE-START # '{' 47 // BLOCK-SEQUENCE-END # '}' 48 // BLOCK-ENTRY # '-' 49 // FLOW-ENTRY # ',' 50 // KEY # '?' or nothing (simple keys). 51 // VALUE # ':' 52 // ALIAS(anchor) # '*anchor' 53 // ANCHOR(anchor) # '&anchor' 54 // TAG(handle,suffix) # '!handle!suffix' 55 // SCALAR(value,style) # A scalar. 56 // 57 // The following two tokens are "virtual" tokens denoting the beginning and the 58 // end of the stream: 59 // 60 // STREAM-START(encoding) 61 // STREAM-END 62 // 63 // We pass the information about the input stream encoding with the 64 // STREAM-START token. 65 // 66 // The next two tokens are responsible for tags: 67 // 68 // VERSION-DIRECTIVE(major,minor) 69 // TAG-DIRECTIVE(handle,prefix) 70 // 71 // Example: 72 // 73 // %YAML 1.1 74 // %TAG ! !foo 75 // %TAG !yaml! tag:yaml.org,2002: 76 // --- 77 // 78 // The correspoding sequence of tokens: 79 // 80 // STREAM-START(utf-8) 81 // VERSION-DIRECTIVE(1,1) 82 // TAG-DIRECTIVE("!","!foo") 83 // TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") 84 // DOCUMENT-START 85 // STREAM-END 86 // 87 // Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole 88 // line. 89 // 90 // The document start and end indicators are represented by: 91 // 92 // DOCUMENT-START 93 // DOCUMENT-END 94 // 95 // Note that if a YAML stream contains an implicit document (without '---' 96 // and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be 97 // produced. 98 // 99 // In the following examples, we present whole documents together with the 100 // produced tokens. 101 // 102 // 1. An implicit document: 103 // 104 // 'a scalar' 105 // 106 // Tokens: 107 // 108 // STREAM-START(utf-8) 109 // SCALAR("a scalar",single-quoted) 110 // STREAM-END 111 // 112 // 2. An explicit document: 113 // 114 // --- 115 // 'a scalar' 116 // ... 117 // 118 // Tokens: 119 // 120 // STREAM-START(utf-8) 121 // DOCUMENT-START 122 // SCALAR("a scalar",single-quoted) 123 // DOCUMENT-END 124 // STREAM-END 125 // 126 // 3. Several documents in a stream: 127 // 128 // 'a scalar' 129 // --- 130 // 'another scalar' 131 // --- 132 // 'yet another scalar' 133 // 134 // Tokens: 135 // 136 // STREAM-START(utf-8) 137 // SCALAR("a scalar",single-quoted) 138 // DOCUMENT-START 139 // SCALAR("another scalar",single-quoted) 140 // DOCUMENT-START 141 // SCALAR("yet another scalar",single-quoted) 142 // STREAM-END 143 // 144 // We have already introduced the SCALAR token above. The following tokens are 145 // used to describe aliases, anchors, tag, and scalars: 146 // 147 // ALIAS(anchor) 148 // ANCHOR(anchor) 149 // TAG(handle,suffix) 150 // SCALAR(value,style) 151 // 152 // The following series of examples illustrate the usage of these tokens: 153 // 154 // 1. A recursive sequence: 155 // 156 // &A [ *A ] 157 // 158 // Tokens: 159 // 160 // STREAM-START(utf-8) 161 // ANCHOR("A") 162 // FLOW-SEQUENCE-START 163 // ALIAS("A") 164 // FLOW-SEQUENCE-END 165 // STREAM-END 166 // 167 // 2. A tagged scalar: 168 // 169 // !!float "3.14" # A good approximation. 170 // 171 // Tokens: 172 // 173 // STREAM-START(utf-8) 174 // TAG("!!","float") 175 // SCALAR("3.14",double-quoted) 176 // STREAM-END 177 // 178 // 3. Various scalar styles: 179 // 180 // --- # Implicit empty plain scalars do not produce tokens. 181 // --- a plain scalar 182 // --- 'a single-quoted scalar' 183 // --- "a double-quoted scalar" 184 // --- |- 185 // a literal scalar 186 // --- >- 187 // a folded 188 // scalar 189 // 190 // Tokens: 191 // 192 // STREAM-START(utf-8) 193 // DOCUMENT-START 194 // DOCUMENT-START 195 // SCALAR("a plain scalar",plain) 196 // DOCUMENT-START 197 // SCALAR("a single-quoted scalar",single-quoted) 198 // DOCUMENT-START 199 // SCALAR("a double-quoted scalar",double-quoted) 200 // DOCUMENT-START 201 // SCALAR("a literal scalar",literal) 202 // DOCUMENT-START 203 // SCALAR("a folded scalar",folded) 204 // STREAM-END 205 // 206 // Now it's time to review collection-related tokens. We will start with 207 // flow collections: 208 // 209 // FLOW-SEQUENCE-START 210 // FLOW-SEQUENCE-END 211 // FLOW-MAPPING-START 212 // FLOW-MAPPING-END 213 // FLOW-ENTRY 214 // KEY 215 // VALUE 216 // 217 // The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and 218 // FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' 219 // correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the 220 // indicators '?' and ':', which are used for denoting mapping keys and values, 221 // are represented by the KEY and VALUE tokens. 222 // 223 // The following examples show flow collections: 224 // 225 // 1. A flow sequence: 226 // 227 // [item 1, item 2, item 3] 228 // 229 // Tokens: 230 // 231 // STREAM-START(utf-8) 232 // FLOW-SEQUENCE-START 233 // SCALAR("item 1",plain) 234 // FLOW-ENTRY 235 // SCALAR("item 2",plain) 236 // FLOW-ENTRY 237 // SCALAR("item 3",plain) 238 // FLOW-SEQUENCE-END 239 // STREAM-END 240 // 241 // 2. A flow mapping: 242 // 243 // { 244 // a simple key: a value, # Note that the KEY token is produced. 245 // ? a complex key: another value, 246 // } 247 // 248 // Tokens: 249 // 250 // STREAM-START(utf-8) 251 // FLOW-MAPPING-START 252 // KEY 253 // SCALAR("a simple key",plain) 254 // VALUE 255 // SCALAR("a value",plain) 256 // FLOW-ENTRY 257 // KEY 258 // SCALAR("a complex key",plain) 259 // VALUE 260 // SCALAR("another value",plain) 261 // FLOW-ENTRY 262 // FLOW-MAPPING-END 263 // STREAM-END 264 // 265 // A simple key is a key which is not denoted by the '?' indicator. Note that 266 // the Scanner still produce the KEY token whenever it encounters a simple key. 267 // 268 // For scanning block collections, the following tokens are used (note that we 269 // repeat KEY and VALUE here): 270 // 271 // BLOCK-SEQUENCE-START 272 // BLOCK-MAPPING-START 273 // BLOCK-END 274 // BLOCK-ENTRY 275 // KEY 276 // VALUE 277 // 278 // The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation 279 // increase that precedes a block collection (cf. the INDENT token in Python). 280 // The token BLOCK-END denote indentation decrease that ends a block collection 281 // (cf. the DEDENT token in Python). However YAML has some syntax pecularities 282 // that makes detections of these tokens more complex. 283 // 284 // The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators 285 // '-', '?', and ':' correspondingly. 286 // 287 // The following examples show how the tokens BLOCK-SEQUENCE-START, 288 // BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: 289 // 290 // 1. Block sequences: 291 // 292 // - item 1 293 // - item 2 294 // - 295 // - item 3.1 296 // - item 3.2 297 // - 298 // key 1: value 1 299 // key 2: value 2 300 // 301 // Tokens: 302 // 303 // STREAM-START(utf-8) 304 // BLOCK-SEQUENCE-START 305 // BLOCK-ENTRY 306 // SCALAR("item 1",plain) 307 // BLOCK-ENTRY 308 // SCALAR("item 2",plain) 309 // BLOCK-ENTRY 310 // BLOCK-SEQUENCE-START 311 // BLOCK-ENTRY 312 // SCALAR("item 3.1",plain) 313 // BLOCK-ENTRY 314 // SCALAR("item 3.2",plain) 315 // BLOCK-END 316 // BLOCK-ENTRY 317 // BLOCK-MAPPING-START 318 // KEY 319 // SCALAR("key 1",plain) 320 // VALUE 321 // SCALAR("value 1",plain) 322 // KEY 323 // SCALAR("key 2",plain) 324 // VALUE 325 // SCALAR("value 2",plain) 326 // BLOCK-END 327 // BLOCK-END 328 // STREAM-END 329 // 330 // 2. Block mappings: 331 // 332 // a simple key: a value # The KEY token is produced here. 333 // ? a complex key 334 // : another value 335 // a mapping: 336 // key 1: value 1 337 // key 2: value 2 338 // a sequence: 339 // - item 1 340 // - item 2 341 // 342 // Tokens: 343 // 344 // STREAM-START(utf-8) 345 // BLOCK-MAPPING-START 346 // KEY 347 // SCALAR("a simple key",plain) 348 // VALUE 349 // SCALAR("a value",plain) 350 // KEY 351 // SCALAR("a complex key",plain) 352 // VALUE 353 // SCALAR("another value",plain) 354 // KEY 355 // SCALAR("a mapping",plain) 356 // BLOCK-MAPPING-START 357 // KEY 358 // SCALAR("key 1",plain) 359 // VALUE 360 // SCALAR("value 1",plain) 361 // KEY 362 // SCALAR("key 2",plain) 363 // VALUE 364 // SCALAR("value 2",plain) 365 // BLOCK-END 366 // KEY 367 // SCALAR("a sequence",plain) 368 // VALUE 369 // BLOCK-SEQUENCE-START 370 // BLOCK-ENTRY 371 // SCALAR("item 1",plain) 372 // BLOCK-ENTRY 373 // SCALAR("item 2",plain) 374 // BLOCK-END 375 // BLOCK-END 376 // STREAM-END 377 // 378 // YAML does not always require to start a new block collection from a new 379 // line. If the current line contains only '-', '?', and ':' indicators, a new 380 // block collection may start at the current line. The following examples 381 // illustrate this case: 382 // 383 // 1. Collections in a sequence: 384 // 385 // - - item 1 386 // - item 2 387 // - key 1: value 1 388 // key 2: value 2 389 // - ? complex key 390 // : complex value 391 // 392 // Tokens: 393 // 394 // STREAM-START(utf-8) 395 // BLOCK-SEQUENCE-START 396 // BLOCK-ENTRY 397 // BLOCK-SEQUENCE-START 398 // BLOCK-ENTRY 399 // SCALAR("item 1",plain) 400 // BLOCK-ENTRY 401 // SCALAR("item 2",plain) 402 // BLOCK-END 403 // BLOCK-ENTRY 404 // BLOCK-MAPPING-START 405 // KEY 406 // SCALAR("key 1",plain) 407 // VALUE 408 // SCALAR("value 1",plain) 409 // KEY 410 // SCALAR("key 2",plain) 411 // VALUE 412 // SCALAR("value 2",plain) 413 // BLOCK-END 414 // BLOCK-ENTRY 415 // BLOCK-MAPPING-START 416 // KEY 417 // SCALAR("complex key") 418 // VALUE 419 // SCALAR("complex value") 420 // BLOCK-END 421 // BLOCK-END 422 // STREAM-END 423 // 424 // 2. Collections in a mapping: 425 // 426 // ? a sequence 427 // : - item 1 428 // - item 2 429 // ? a mapping 430 // : key 1: value 1 431 // key 2: value 2 432 // 433 // Tokens: 434 // 435 // STREAM-START(utf-8) 436 // BLOCK-MAPPING-START 437 // KEY 438 // SCALAR("a sequence",plain) 439 // VALUE 440 // BLOCK-SEQUENCE-START 441 // BLOCK-ENTRY 442 // SCALAR("item 1",plain) 443 // BLOCK-ENTRY 444 // SCALAR("item 2",plain) 445 // BLOCK-END 446 // KEY 447 // SCALAR("a mapping",plain) 448 // VALUE 449 // BLOCK-MAPPING-START 450 // KEY 451 // SCALAR("key 1",plain) 452 // VALUE 453 // SCALAR("value 1",plain) 454 // KEY 455 // SCALAR("key 2",plain) 456 // VALUE 457 // SCALAR("value 2",plain) 458 // BLOCK-END 459 // BLOCK-END 460 // STREAM-END 461 // 462 // YAML also permits non-indented sequences if they are included into a block 463 // mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: 464 // 465 // key: 466 // - item 1 # BLOCK-SEQUENCE-START is NOT produced here. 467 // - item 2 468 // 469 // Tokens: 470 // 471 // STREAM-START(utf-8) 472 // BLOCK-MAPPING-START 473 // KEY 474 // SCALAR("key",plain) 475 // VALUE 476 // BLOCK-ENTRY 477 // SCALAR("item 1",plain) 478 // BLOCK-ENTRY 479 // SCALAR("item 2",plain) 480 // BLOCK-END 481 // 482 483 // Ensure that the buffer contains the required number of characters. 484 // Return true on success, false on failure (reader error or memory error). 485 func cache(parser *yaml_parser_t, length int) bool { 486 // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B) 487 return parser.unread >= length || yaml_parser_update_buffer(parser, length) 488 } 489 490 // Advance the buffer pointer. 491 func skip(parser *yaml_parser_t) { 492 parser.mark.index++ 493 parser.mark.column++ 494 parser.unread-- 495 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) 496 } 497 498 func skip_line(parser *yaml_parser_t) { 499 if is_crlf(parser.buffer, parser.buffer_pos) { 500 parser.mark.index += 2 501 parser.mark.column = 0 502 parser.mark.line++ 503 parser.unread -= 2 504 parser.buffer_pos += 2 505 } else if is_break(parser.buffer, parser.buffer_pos) { 506 parser.mark.index++ 507 parser.mark.column = 0 508 parser.mark.line++ 509 parser.unread-- 510 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) 511 } 512 } 513 514 // Copy a character to a string buffer and advance pointers. 515 func read(parser *yaml_parser_t, s []byte) []byte { 516 w := width(parser.buffer[parser.buffer_pos]) 517 if w == 0 { 518 panic("invalid character sequence") 519 } 520 if len(s) == 0 { 521 s = make([]byte, 0, 32) 522 } 523 if w == 1 && len(s)+w <= cap(s) { 524 s = s[:len(s)+1] 525 s[len(s)-1] = parser.buffer[parser.buffer_pos] 526 parser.buffer_pos++ 527 } else { 528 s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...) 529 parser.buffer_pos += w 530 } 531 parser.mark.index++ 532 parser.mark.column++ 533 parser.unread-- 534 return s 535 } 536 537 // Copy a line break character to a string buffer and advance pointers. 538 func read_line(parser *yaml_parser_t, s []byte) []byte { 539 buf := parser.buffer 540 pos := parser.buffer_pos 541 switch { 542 case buf[pos] == '\r' && buf[pos+1] == '\n': 543 // CR LF . LF 544 s = append(s, '\n') 545 parser.buffer_pos += 2 546 parser.mark.index++ 547 parser.unread-- 548 case buf[pos] == '\r' || buf[pos] == '\n': 549 // CR|LF . LF 550 s = append(s, '\n') 551 parser.buffer_pos += 1 552 case buf[pos] == '\xC2' && buf[pos+1] == '\x85': 553 // NEL . LF 554 s = append(s, '\n') 555 parser.buffer_pos += 2 556 case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): 557 // LS|PS . LS|PS 558 s = append(s, buf[parser.buffer_pos:pos+3]...) 559 parser.buffer_pos += 3 560 default: 561 return s 562 } 563 parser.mark.index++ 564 parser.mark.column = 0 565 parser.mark.line++ 566 parser.unread-- 567 return s 568 } 569 570 // Get the next token. 571 func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { 572 // Erase the token object. 573 *token = yaml_token_t{} // [Go] Is this necessary? 574 575 // No tokens after STREAM-END or error. 576 if parser.stream_end_produced || parser.error != yaml_NO_ERROR { 577 return true 578 } 579 580 // Ensure that the tokens queue contains enough tokens. 581 if !parser.token_available { 582 if !yaml_parser_fetch_more_tokens(parser) { 583 return false 584 } 585 } 586 587 // Fetch the next token from the queue. 588 *token = parser.tokens[parser.tokens_head] 589 parser.tokens_head++ 590 parser.tokens_parsed++ 591 parser.token_available = false 592 593 if token.typ == yaml_STREAM_END_TOKEN { 594 parser.stream_end_produced = true 595 } 596 return true 597 } 598 599 // Set the scanner error and return false. 600 func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { 601 parser.error = yaml_SCANNER_ERROR 602 parser.context = context 603 parser.context_mark = context_mark 604 parser.problem = problem 605 parser.problem_mark = parser.mark 606 return false 607 } 608 609 func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { 610 context := "while parsing a tag" 611 if directive { 612 context = "while parsing a %TAG directive" 613 } 614 return yaml_parser_set_scanner_error(parser, context, context_mark, problem) 615 } 616 617 func trace(args ...interface{}) func() { 618 pargs := append([]interface{}{"+++"}, args...) 619 fmt.Println(pargs...) 620 pargs = append([]interface{}{"---"}, args...) 621 return func() { fmt.Println(pargs...) } 622 } 623 624 // Ensure that the tokens queue contains at least one token which can be 625 // returned to the Parser. 626 func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { 627 // While we need more tokens to fetch, do it. 628 for { 629 // Check if we really need to fetch more tokens. 630 need_more_tokens := false 631 632 if parser.tokens_head == len(parser.tokens) { 633 // Queue is empty. 634 need_more_tokens = true 635 } else { 636 // Check if any potential simple key may occupy the head position. 637 if !yaml_parser_stale_simple_keys(parser) { 638 return false 639 } 640 641 for i := range parser.simple_keys { 642 simple_key := &parser.simple_keys[i] 643 if simple_key.possible && simple_key.token_number == parser.tokens_parsed { 644 need_more_tokens = true 645 break 646 } 647 } 648 } 649 650 // We are finished. 651 if !need_more_tokens { 652 break 653 } 654 // Fetch the next token. 655 if !yaml_parser_fetch_next_token(parser) { 656 return false 657 } 658 } 659 660 parser.token_available = true 661 return true 662 } 663 664 // The dispatcher for token fetchers. 665 func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { 666 // Ensure that the buffer is initialized. 667 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 668 return false 669 } 670 671 // Check if we just started scanning. Fetch STREAM-START then. 672 if !parser.stream_start_produced { 673 return yaml_parser_fetch_stream_start(parser) 674 } 675 676 // Eat whitespaces and comments until we reach the next token. 677 if !yaml_parser_scan_to_next_token(parser) { 678 return false 679 } 680 681 // Remove obsolete potential simple keys. 682 if !yaml_parser_stale_simple_keys(parser) { 683 return false 684 } 685 686 // Check the indentation level against the current column. 687 if !yaml_parser_unroll_indent(parser, parser.mark.column) { 688 return false 689 } 690 691 // Ensure that the buffer contains at least 4 characters. 4 is the length 692 // of the longest indicators ('--- ' and '... '). 693 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 694 return false 695 } 696 697 // Is it the end of the stream? 698 if is_z(parser.buffer, parser.buffer_pos) { 699 return yaml_parser_fetch_stream_end(parser) 700 } 701 702 // Is it a directive? 703 if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { 704 return yaml_parser_fetch_directive(parser) 705 } 706 707 buf := parser.buffer 708 pos := parser.buffer_pos 709 710 // Is it the document start indicator? 711 if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { 712 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) 713 } 714 715 // Is it the document end indicator? 716 if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { 717 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) 718 } 719 720 // Is it the flow sequence start indicator? 721 if buf[pos] == '[' { 722 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) 723 } 724 725 // Is it the flow mapping start indicator? 726 if parser.buffer[parser.buffer_pos] == '{' { 727 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) 728 } 729 730 // Is it the flow sequence end indicator? 731 if parser.buffer[parser.buffer_pos] == ']' { 732 return yaml_parser_fetch_flow_collection_end(parser, 733 yaml_FLOW_SEQUENCE_END_TOKEN) 734 } 735 736 // Is it the flow mapping end indicator? 737 if parser.buffer[parser.buffer_pos] == '}' { 738 return yaml_parser_fetch_flow_collection_end(parser, 739 yaml_FLOW_MAPPING_END_TOKEN) 740 } 741 742 // Is it the flow entry indicator? 743 if parser.buffer[parser.buffer_pos] == ',' { 744 return yaml_parser_fetch_flow_entry(parser) 745 } 746 747 // Is it the block entry indicator? 748 if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { 749 return yaml_parser_fetch_block_entry(parser) 750 } 751 752 // Is it the key indicator? 753 if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 754 return yaml_parser_fetch_key(parser) 755 } 756 757 // Is it the value indicator? 758 if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 759 return yaml_parser_fetch_value(parser) 760 } 761 762 // Is it an alias? 763 if parser.buffer[parser.buffer_pos] == '*' { 764 return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) 765 } 766 767 // Is it an anchor? 768 if parser.buffer[parser.buffer_pos] == '&' { 769 return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) 770 } 771 772 // Is it a tag? 773 if parser.buffer[parser.buffer_pos] == '!' { 774 return yaml_parser_fetch_tag(parser) 775 } 776 777 // Is it a literal scalar? 778 if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { 779 return yaml_parser_fetch_block_scalar(parser, true) 780 } 781 782 // Is it a folded scalar? 783 if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { 784 return yaml_parser_fetch_block_scalar(parser, false) 785 } 786 787 // Is it a single-quoted scalar? 788 if parser.buffer[parser.buffer_pos] == '\'' { 789 return yaml_parser_fetch_flow_scalar(parser, true) 790 } 791 792 // Is it a double-quoted scalar? 793 if parser.buffer[parser.buffer_pos] == '"' { 794 return yaml_parser_fetch_flow_scalar(parser, false) 795 } 796 797 // Is it a plain scalar? 798 // 799 // A plain scalar may start with any non-blank characters except 800 // 801 // '-', '?', ':', ',', '[', ']', '{', '}', 802 // '#', '&', '*', '!', '|', '>', '\'', '\"', 803 // '%', '@', '`'. 804 // 805 // In the block context (and, for the '-' indicator, in the flow context 806 // too), it may also start with the characters 807 // 808 // '-', '?', ':' 809 // 810 // if it is followed by a non-space character. 811 // 812 // The last rule is more restrictive than the specification requires. 813 // [Go] Make this logic more reasonable. 814 //switch parser.buffer[parser.buffer_pos] { 815 //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': 816 //} 817 if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || 818 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || 819 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || 820 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 821 parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || 822 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || 823 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || 824 parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || 825 parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || 826 parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || 827 (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || 828 (parser.flow_level == 0 && 829 (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && 830 !is_blankz(parser.buffer, parser.buffer_pos+1)) { 831 return yaml_parser_fetch_plain_scalar(parser) 832 } 833 834 // If we don't determine the token type so far, it is an error. 835 return yaml_parser_set_scanner_error(parser, 836 "while scanning for the next token", parser.mark, 837 "found character that cannot start any token") 838 } 839 840 // Check the list of potential simple keys and remove the positions that 841 // cannot contain simple keys anymore. 842 func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool { 843 // Check for a potential simple key for each flow level. 844 for i := range parser.simple_keys { 845 simple_key := &parser.simple_keys[i] 846 847 // The specification requires that a simple key 848 // 849 // - is limited to a single line, 850 // - is shorter than 1024 characters. 851 if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) { 852 853 // Check if the potential simple key to be removed is required. 854 if simple_key.required { 855 return yaml_parser_set_scanner_error(parser, 856 "while scanning a simple key", simple_key.mark, 857 "could not find expected ':'") 858 } 859 simple_key.possible = false 860 } 861 } 862 return true 863 } 864 865 // Check if a simple key may start at the current position and add it if 866 // needed. 867 func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { 868 // A simple key is required at the current position if the scanner is in 869 // the block context and the current column coincides with the indentation 870 // level. 871 872 required := parser.flow_level == 0 && parser.indent == parser.mark.column 873 874 // 875 // If the current position may start a simple key, save it. 876 // 877 if parser.simple_key_allowed { 878 simple_key := yaml_simple_key_t{ 879 possible: true, 880 required: required, 881 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), 882 } 883 simple_key.mark = parser.mark 884 885 if !yaml_parser_remove_simple_key(parser) { 886 return false 887 } 888 parser.simple_keys[len(parser.simple_keys)-1] = simple_key 889 } 890 return true 891 } 892 893 // Remove a potential simple key at the current flow level. 894 func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { 895 i := len(parser.simple_keys) - 1 896 if parser.simple_keys[i].possible { 897 // If the key is required, it is an error. 898 if parser.simple_keys[i].required { 899 return yaml_parser_set_scanner_error(parser, 900 "while scanning a simple key", parser.simple_keys[i].mark, 901 "could not find expected ':'") 902 } 903 } 904 // Remove the key from the stack. 905 parser.simple_keys[i].possible = false 906 return true 907 } 908 909 // Increase the flow level and resize the simple key list if needed. 910 func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { 911 // Reset the simple key on the next level. 912 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) 913 914 // Increase the flow level. 915 parser.flow_level++ 916 return true 917 } 918 919 // Decrease the flow level. 920 func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { 921 if parser.flow_level > 0 { 922 parser.flow_level-- 923 parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1] 924 } 925 return true 926 } 927 928 // Push the current indentation level to the stack and set the new level 929 // the current column is greater than the indentation level. In this case, 930 // append or insert the specified token into the token queue. 931 func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { 932 // In the flow context, do nothing. 933 if parser.flow_level > 0 { 934 return true 935 } 936 937 if parser.indent < column { 938 // Push the current indentation level to the stack and set the new 939 // indentation level. 940 parser.indents = append(parser.indents, parser.indent) 941 parser.indent = column 942 943 // Create a token and insert it into the queue. 944 token := yaml_token_t{ 945 typ: typ, 946 start_mark: mark, 947 end_mark: mark, 948 } 949 if number > -1 { 950 number -= parser.tokens_parsed 951 } 952 yaml_insert_token(parser, number, &token) 953 } 954 return true 955 } 956 957 // Pop indentation levels from the indents stack until the current level 958 // becomes less or equal to the column. For each indentation level, append 959 // the BLOCK-END token. 960 func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool { 961 // In the flow context, do nothing. 962 if parser.flow_level > 0 { 963 return true 964 } 965 966 // Loop through the indentation levels in the stack. 967 for parser.indent > column { 968 // Create a token and append it to the queue. 969 token := yaml_token_t{ 970 typ: yaml_BLOCK_END_TOKEN, 971 start_mark: parser.mark, 972 end_mark: parser.mark, 973 } 974 yaml_insert_token(parser, -1, &token) 975 976 // Pop the indentation level. 977 parser.indent = parser.indents[len(parser.indents)-1] 978 parser.indents = parser.indents[:len(parser.indents)-1] 979 } 980 return true 981 } 982 983 // Initialize the scanner and produce the STREAM-START token. 984 func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { 985 986 // Set the initial indentation. 987 parser.indent = -1 988 989 // Initialize the simple key stack. 990 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) 991 992 // A simple key is allowed at the beginning of the stream. 993 parser.simple_key_allowed = true 994 995 // We have started. 996 parser.stream_start_produced = true 997 998 // Create the STREAM-START token and append it to the queue. 999 token := yaml_token_t{ 1000 typ: yaml_STREAM_START_TOKEN, 1001 start_mark: parser.mark, 1002 end_mark: parser.mark, 1003 encoding: parser.encoding, 1004 } 1005 yaml_insert_token(parser, -1, &token) 1006 return true 1007 } 1008 1009 // Produce the STREAM-END token and shut down the scanner. 1010 func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { 1011 1012 // Force new line. 1013 if parser.mark.column != 0 { 1014 parser.mark.column = 0 1015 parser.mark.line++ 1016 } 1017 1018 // Reset the indentation level. 1019 if !yaml_parser_unroll_indent(parser, -1) { 1020 return false 1021 } 1022 1023 // Reset simple keys. 1024 if !yaml_parser_remove_simple_key(parser) { 1025 return false 1026 } 1027 1028 parser.simple_key_allowed = false 1029 1030 // Create the STREAM-END token and append it to the queue. 1031 token := yaml_token_t{ 1032 typ: yaml_STREAM_END_TOKEN, 1033 start_mark: parser.mark, 1034 end_mark: parser.mark, 1035 } 1036 yaml_insert_token(parser, -1, &token) 1037 return true 1038 } 1039 1040 // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. 1041 func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { 1042 // Reset the indentation level. 1043 if !yaml_parser_unroll_indent(parser, -1) { 1044 return false 1045 } 1046 1047 // Reset simple keys. 1048 if !yaml_parser_remove_simple_key(parser) { 1049 return false 1050 } 1051 1052 parser.simple_key_allowed = false 1053 1054 // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. 1055 token := yaml_token_t{} 1056 if !yaml_parser_scan_directive(parser, &token) { 1057 return false 1058 } 1059 // Append the token to the queue. 1060 yaml_insert_token(parser, -1, &token) 1061 return true 1062 } 1063 1064 // Produce the DOCUMENT-START or DOCUMENT-END token. 1065 func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1066 // Reset the indentation level. 1067 if !yaml_parser_unroll_indent(parser, -1) { 1068 return false 1069 } 1070 1071 // Reset simple keys. 1072 if !yaml_parser_remove_simple_key(parser) { 1073 return false 1074 } 1075 1076 parser.simple_key_allowed = false 1077 1078 // Consume the token. 1079 start_mark := parser.mark 1080 1081 skip(parser) 1082 skip(parser) 1083 skip(parser) 1084 1085 end_mark := parser.mark 1086 1087 // Create the DOCUMENT-START or DOCUMENT-END token. 1088 token := yaml_token_t{ 1089 typ: typ, 1090 start_mark: start_mark, 1091 end_mark: end_mark, 1092 } 1093 // Append the token to the queue. 1094 yaml_insert_token(parser, -1, &token) 1095 return true 1096 } 1097 1098 // Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 1099 func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1100 // The indicators '[' and '{' may start a simple key. 1101 if !yaml_parser_save_simple_key(parser) { 1102 return false 1103 } 1104 1105 // Increase the flow level. 1106 if !yaml_parser_increase_flow_level(parser) { 1107 return false 1108 } 1109 1110 // A simple key may follow the indicators '[' and '{'. 1111 parser.simple_key_allowed = true 1112 1113 // Consume the token. 1114 start_mark := parser.mark 1115 skip(parser) 1116 end_mark := parser.mark 1117 1118 // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. 1119 token := yaml_token_t{ 1120 typ: typ, 1121 start_mark: start_mark, 1122 end_mark: end_mark, 1123 } 1124 // Append the token to the queue. 1125 yaml_insert_token(parser, -1, &token) 1126 return true 1127 } 1128 1129 // Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. 1130 func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1131 // Reset any potential simple key on the current flow level. 1132 if !yaml_parser_remove_simple_key(parser) { 1133 return false 1134 } 1135 1136 // Decrease the flow level. 1137 if !yaml_parser_decrease_flow_level(parser) { 1138 return false 1139 } 1140 1141 // No simple keys after the indicators ']' and '}'. 1142 parser.simple_key_allowed = false 1143 1144 // Consume the token. 1145 1146 start_mark := parser.mark 1147 skip(parser) 1148 end_mark := parser.mark 1149 1150 // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. 1151 token := yaml_token_t{ 1152 typ: typ, 1153 start_mark: start_mark, 1154 end_mark: end_mark, 1155 } 1156 // Append the token to the queue. 1157 yaml_insert_token(parser, -1, &token) 1158 return true 1159 } 1160 1161 // Produce the FLOW-ENTRY token. 1162 func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { 1163 // Reset any potential simple keys on the current flow level. 1164 if !yaml_parser_remove_simple_key(parser) { 1165 return false 1166 } 1167 1168 // Simple keys are allowed after ','. 1169 parser.simple_key_allowed = true 1170 1171 // Consume the token. 1172 start_mark := parser.mark 1173 skip(parser) 1174 end_mark := parser.mark 1175 1176 // Create the FLOW-ENTRY token and append it to the queue. 1177 token := yaml_token_t{ 1178 typ: yaml_FLOW_ENTRY_TOKEN, 1179 start_mark: start_mark, 1180 end_mark: end_mark, 1181 } 1182 yaml_insert_token(parser, -1, &token) 1183 return true 1184 } 1185 1186 // Produce the BLOCK-ENTRY token. 1187 func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { 1188 // Check if the scanner is in the block context. 1189 if parser.flow_level == 0 { 1190 // Check if we are allowed to start a new entry. 1191 if !parser.simple_key_allowed { 1192 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1193 "block sequence entries are not allowed in this context") 1194 } 1195 // Add the BLOCK-SEQUENCE-START token if needed. 1196 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { 1197 return false 1198 } 1199 } else { 1200 // It is an error for the '-' indicator to occur in the flow context, 1201 // but we let the Parser detect and report about it because the Parser 1202 // is able to point to the context. 1203 } 1204 1205 // Reset any potential simple keys on the current flow level. 1206 if !yaml_parser_remove_simple_key(parser) { 1207 return false 1208 } 1209 1210 // Simple keys are allowed after '-'. 1211 parser.simple_key_allowed = true 1212 1213 // Consume the token. 1214 start_mark := parser.mark 1215 skip(parser) 1216 end_mark := parser.mark 1217 1218 // Create the BLOCK-ENTRY token and append it to the queue. 1219 token := yaml_token_t{ 1220 typ: yaml_BLOCK_ENTRY_TOKEN, 1221 start_mark: start_mark, 1222 end_mark: end_mark, 1223 } 1224 yaml_insert_token(parser, -1, &token) 1225 return true 1226 } 1227 1228 // Produce the KEY token. 1229 func yaml_parser_fetch_key(parser *yaml_parser_t) bool { 1230 1231 // In the block context, additional checks are required. 1232 if parser.flow_level == 0 { 1233 // Check if we are allowed to start a new key (not nessesary simple). 1234 if !parser.simple_key_allowed { 1235 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1236 "mapping keys are not allowed in this context") 1237 } 1238 // Add the BLOCK-MAPPING-START token if needed. 1239 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1240 return false 1241 } 1242 } 1243 1244 // Reset any potential simple keys on the current flow level. 1245 if !yaml_parser_remove_simple_key(parser) { 1246 return false 1247 } 1248 1249 // Simple keys are allowed after '?' in the block context. 1250 parser.simple_key_allowed = parser.flow_level == 0 1251 1252 // Consume the token. 1253 start_mark := parser.mark 1254 skip(parser) 1255 end_mark := parser.mark 1256 1257 // Create the KEY token and append it to the queue. 1258 token := yaml_token_t{ 1259 typ: yaml_KEY_TOKEN, 1260 start_mark: start_mark, 1261 end_mark: end_mark, 1262 } 1263 yaml_insert_token(parser, -1, &token) 1264 return true 1265 } 1266 1267 // Produce the VALUE token. 1268 func yaml_parser_fetch_value(parser *yaml_parser_t) bool { 1269 1270 simple_key := &parser.simple_keys[len(parser.simple_keys)-1] 1271 1272 // Have we found a simple key? 1273 if simple_key.possible { 1274 // Create the KEY token and insert it into the queue. 1275 token := yaml_token_t{ 1276 typ: yaml_KEY_TOKEN, 1277 start_mark: simple_key.mark, 1278 end_mark: simple_key.mark, 1279 } 1280 yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) 1281 1282 // In the block context, we may need to add the BLOCK-MAPPING-START token. 1283 if !yaml_parser_roll_indent(parser, simple_key.mark.column, 1284 simple_key.token_number, 1285 yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { 1286 return false 1287 } 1288 1289 // Remove the simple key. 1290 simple_key.possible = false 1291 1292 // A simple key cannot follow another simple key. 1293 parser.simple_key_allowed = false 1294 1295 } else { 1296 // The ':' indicator follows a complex key. 1297 1298 // In the block context, extra checks are required. 1299 if parser.flow_level == 0 { 1300 1301 // Check if we are allowed to start a complex value. 1302 if !parser.simple_key_allowed { 1303 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1304 "mapping values are not allowed in this context") 1305 } 1306 1307 // Add the BLOCK-MAPPING-START token if needed. 1308 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1309 return false 1310 } 1311 } 1312 1313 // Simple keys after ':' are allowed in the block context. 1314 parser.simple_key_allowed = parser.flow_level == 0 1315 } 1316 1317 // Consume the token. 1318 start_mark := parser.mark 1319 skip(parser) 1320 end_mark := parser.mark 1321 1322 // Create the VALUE token and append it to the queue. 1323 token := yaml_token_t{ 1324 typ: yaml_VALUE_TOKEN, 1325 start_mark: start_mark, 1326 end_mark: end_mark, 1327 } 1328 yaml_insert_token(parser, -1, &token) 1329 return true 1330 } 1331 1332 // Produce the ALIAS or ANCHOR token. 1333 func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1334 // An anchor or an alias could be a simple key. 1335 if !yaml_parser_save_simple_key(parser) { 1336 return false 1337 } 1338 1339 // A simple key cannot follow an anchor or an alias. 1340 parser.simple_key_allowed = false 1341 1342 // Create the ALIAS or ANCHOR token and append it to the queue. 1343 var token yaml_token_t 1344 if !yaml_parser_scan_anchor(parser, &token, typ) { 1345 return false 1346 } 1347 yaml_insert_token(parser, -1, &token) 1348 return true 1349 } 1350 1351 // Produce the TAG token. 1352 func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { 1353 // A tag could be a simple key. 1354 if !yaml_parser_save_simple_key(parser) { 1355 return false 1356 } 1357 1358 // A simple key cannot follow a tag. 1359 parser.simple_key_allowed = false 1360 1361 // Create the TAG token and append it to the queue. 1362 var token yaml_token_t 1363 if !yaml_parser_scan_tag(parser, &token) { 1364 return false 1365 } 1366 yaml_insert_token(parser, -1, &token) 1367 return true 1368 } 1369 1370 // Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. 1371 func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { 1372 // Remove any potential simple keys. 1373 if !yaml_parser_remove_simple_key(parser) { 1374 return false 1375 } 1376 1377 // A simple key may follow a block scalar. 1378 parser.simple_key_allowed = true 1379 1380 // Create the SCALAR token and append it to the queue. 1381 var token yaml_token_t 1382 if !yaml_parser_scan_block_scalar(parser, &token, literal) { 1383 return false 1384 } 1385 yaml_insert_token(parser, -1, &token) 1386 return true 1387 } 1388 1389 // Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. 1390 func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { 1391 // A plain scalar could be a simple key. 1392 if !yaml_parser_save_simple_key(parser) { 1393 return false 1394 } 1395 1396 // A simple key cannot follow a flow scalar. 1397 parser.simple_key_allowed = false 1398 1399 // Create the SCALAR token and append it to the queue. 1400 var token yaml_token_t 1401 if !yaml_parser_scan_flow_scalar(parser, &token, single) { 1402 return false 1403 } 1404 yaml_insert_token(parser, -1, &token) 1405 return true 1406 } 1407 1408 // Produce the SCALAR(...,plain) token. 1409 func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { 1410 // A plain scalar could be a simple key. 1411 if !yaml_parser_save_simple_key(parser) { 1412 return false 1413 } 1414 1415 // A simple key cannot follow a flow scalar. 1416 parser.simple_key_allowed = false 1417 1418 // Create the SCALAR token and append it to the queue. 1419 var token yaml_token_t 1420 if !yaml_parser_scan_plain_scalar(parser, &token) { 1421 return false 1422 } 1423 yaml_insert_token(parser, -1, &token) 1424 return true 1425 } 1426 1427 // Eat whitespaces and comments until the next token is found. 1428 func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { 1429 1430 parser.linesSinceLast = 0 1431 parser.spacesSinceLast = 0 1432 1433 // Until the next token is not found. 1434 for { 1435 // Allow the BOM mark to start a line. 1436 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1437 return false 1438 } 1439 if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { 1440 skip(parser) 1441 } 1442 1443 // Eat whitespaces. 1444 // Tabs are allowed: 1445 // - in the flow context 1446 // - in the block context, but not at the beginning of the line or 1447 // after '-', '?', or ':' (complex value). 1448 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1449 return false 1450 } 1451 1452 for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { 1453 skip(parser) 1454 parser.spacesSinceLast++ 1455 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1456 return false 1457 } 1458 } 1459 1460 // Eat a comment until a line break. 1461 if parser.buffer[parser.buffer_pos] == '#' { 1462 m := parser.mark 1463 parser.comment_buffer = parser.comment_buffer[:0] 1464 for !is_breakz(parser.buffer, parser.buffer_pos) { 1465 p := parser.buffer_pos 1466 skip(parser) 1467 parser.comment_buffer = append(parser.comment_buffer, 1468 parser.buffer[p:parser.buffer_pos]...) 1469 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1470 return false 1471 } 1472 } 1473 add_comment(parser, m, string(parser.comment_buffer)) 1474 } 1475 1476 // If it is a line break, eat it. 1477 if is_break(parser.buffer, parser.buffer_pos) { 1478 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1479 return false 1480 } 1481 skip_line(parser) 1482 parser.linesSinceLast++ 1483 1484 // In the block context, a new line may start a simple key. 1485 if parser.flow_level == 0 { 1486 parser.simple_key_allowed = true 1487 } 1488 } else { 1489 break // We have found a token. 1490 } 1491 } 1492 1493 return true 1494 } 1495 1496 // Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. 1497 // 1498 // Scope: 1499 // %YAML 1.1 # a comment \n 1500 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1501 // %TAG !yaml! tag:yaml.org,2002: \n 1502 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1503 // 1504 func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { 1505 // Eat '%'. 1506 start_mark := parser.mark 1507 skip(parser) 1508 1509 // Scan the directive name. 1510 var name []byte 1511 if !yaml_parser_scan_directive_name(parser, start_mark, &name) { 1512 return false 1513 } 1514 1515 // Is it a YAML directive? 1516 if bytes.Equal(name, []byte("YAML")) { 1517 // Scan the VERSION directive value. 1518 var major, minor int8 1519 if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { 1520 return false 1521 } 1522 end_mark := parser.mark 1523 1524 // Create a VERSION-DIRECTIVE token. 1525 *token = yaml_token_t{ 1526 typ: yaml_VERSION_DIRECTIVE_TOKEN, 1527 start_mark: start_mark, 1528 end_mark: end_mark, 1529 major: major, 1530 minor: minor, 1531 } 1532 1533 // Is it a TAG directive? 1534 } else if bytes.Equal(name, []byte("TAG")) { 1535 // Scan the TAG directive value. 1536 var handle, prefix []byte 1537 if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { 1538 return false 1539 } 1540 end_mark := parser.mark 1541 1542 // Create a TAG-DIRECTIVE token. 1543 *token = yaml_token_t{ 1544 typ: yaml_TAG_DIRECTIVE_TOKEN, 1545 start_mark: start_mark, 1546 end_mark: end_mark, 1547 value: handle, 1548 prefix: prefix, 1549 } 1550 1551 // Unknown directive. 1552 } else { 1553 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1554 start_mark, "found unknown directive name") 1555 return false 1556 } 1557 1558 // Eat the rest of the line including any comments. 1559 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1560 return false 1561 } 1562 1563 for is_blank(parser.buffer, parser.buffer_pos) { 1564 skip(parser) 1565 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1566 return false 1567 } 1568 } 1569 1570 if parser.buffer[parser.buffer_pos] == '#' { 1571 m := parser.mark 1572 parser.comment_buffer = parser.comment_buffer[:0] 1573 for !is_breakz(parser.buffer, parser.buffer_pos) { 1574 p := parser.buffer_pos 1575 skip(parser) 1576 parser.comment_buffer = append(parser.comment_buffer, 1577 parser.buffer[p:parser.buffer_pos]...) 1578 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1579 return false 1580 } 1581 } 1582 add_comment(parser, m, string(parser.comment_buffer)) 1583 } 1584 1585 // Check if we are at the end of the line. 1586 if !is_breakz(parser.buffer, parser.buffer_pos) { 1587 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1588 start_mark, "did not find expected comment or line break") 1589 return false 1590 } 1591 1592 // Eat a line break. 1593 if is_break(parser.buffer, parser.buffer_pos) { 1594 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1595 return false 1596 } 1597 skip_line(parser) 1598 } 1599 1600 return true 1601 } 1602 1603 // Scan the directive name. 1604 // 1605 // Scope: 1606 // %YAML 1.1 # a comment \n 1607 // ^^^^ 1608 // %TAG !yaml! tag:yaml.org,2002: \n 1609 // ^^^ 1610 // 1611 func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { 1612 // Consume the directive name. 1613 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1614 return false 1615 } 1616 1617 var s []byte 1618 for is_alpha(parser.buffer, parser.buffer_pos) { 1619 s = read(parser, s) 1620 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1621 return false 1622 } 1623 } 1624 1625 // Check if the name is empty. 1626 if len(s) == 0 { 1627 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1628 start_mark, "could not find expected directive name") 1629 return false 1630 } 1631 1632 // Check for an blank character after the name. 1633 if !is_blankz(parser.buffer, parser.buffer_pos) { 1634 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1635 start_mark, "found unexpected non-alphabetical character") 1636 return false 1637 } 1638 *name = s 1639 return true 1640 } 1641 1642 // Scan the value of VERSION-DIRECTIVE. 1643 // 1644 // Scope: 1645 // %YAML 1.1 # a comment \n 1646 // ^^^^^^ 1647 func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { 1648 // Eat whitespaces. 1649 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1650 return false 1651 } 1652 for is_blank(parser.buffer, parser.buffer_pos) { 1653 skip(parser) 1654 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1655 return false 1656 } 1657 } 1658 1659 // Consume the major version number. 1660 if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { 1661 return false 1662 } 1663 1664 // Eat '.'. 1665 if parser.buffer[parser.buffer_pos] != '.' { 1666 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1667 start_mark, "did not find expected digit or '.' character") 1668 } 1669 1670 skip(parser) 1671 1672 // Consume the minor version number. 1673 if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { 1674 return false 1675 } 1676 return true 1677 } 1678 1679 const max_number_length = 2 1680 1681 // Scan the version number of VERSION-DIRECTIVE. 1682 // 1683 // Scope: 1684 // %YAML 1.1 # a comment \n 1685 // ^ 1686 // %YAML 1.1 # a comment \n 1687 // ^ 1688 func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { 1689 1690 // Repeat while the next character is digit. 1691 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1692 return false 1693 } 1694 var value, length int8 1695 for is_digit(parser.buffer, parser.buffer_pos) { 1696 // Check if the number is too long. 1697 length++ 1698 if length > max_number_length { 1699 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1700 start_mark, "found extremely long version number") 1701 } 1702 value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos)) 1703 skip(parser) 1704 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1705 return false 1706 } 1707 } 1708 1709 // Check if the number was present. 1710 if length == 0 { 1711 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1712 start_mark, "did not find expected version number") 1713 } 1714 *number = value 1715 return true 1716 } 1717 1718 // Scan the value of a TAG-DIRECTIVE token. 1719 // 1720 // Scope: 1721 // %TAG !yaml! tag:yaml.org,2002: \n 1722 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1723 // 1724 func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { 1725 var handle_value, prefix_value []byte 1726 1727 // Eat whitespaces. 1728 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1729 return false 1730 } 1731 1732 for is_blank(parser.buffer, parser.buffer_pos) { 1733 skip(parser) 1734 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1735 return false 1736 } 1737 } 1738 1739 // Scan a handle. 1740 if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { 1741 return false 1742 } 1743 1744 // Expect a whitespace. 1745 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1746 return false 1747 } 1748 if !is_blank(parser.buffer, parser.buffer_pos) { 1749 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1750 start_mark, "did not find expected whitespace") 1751 return false 1752 } 1753 1754 // Eat whitespaces. 1755 for is_blank(parser.buffer, parser.buffer_pos) { 1756 skip(parser) 1757 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1758 return false 1759 } 1760 } 1761 1762 // Scan a prefix. 1763 if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { 1764 return false 1765 } 1766 1767 // Expect a whitespace or line break. 1768 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1769 return false 1770 } 1771 if !is_blankz(parser.buffer, parser.buffer_pos) { 1772 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1773 start_mark, "did not find expected whitespace or line break") 1774 return false 1775 } 1776 1777 *handle = handle_value 1778 *prefix = prefix_value 1779 return true 1780 } 1781 1782 func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { 1783 var s []byte 1784 1785 // Eat the indicator character. 1786 start_mark := parser.mark 1787 skip(parser) 1788 1789 // Consume the value. 1790 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1791 return false 1792 } 1793 1794 for is_alpha(parser.buffer, parser.buffer_pos) { 1795 s = read(parser, s) 1796 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1797 return false 1798 } 1799 } 1800 1801 end_mark := parser.mark 1802 1803 /* 1804 * Check if length of the anchor is greater than 0 and it is followed by 1805 * a whitespace character or one of the indicators: 1806 * 1807 * '?', ':', ',', ']', '}', '%', '@', '`'. 1808 */ 1809 1810 if len(s) == 0 || 1811 !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || 1812 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || 1813 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || 1814 parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || 1815 parser.buffer[parser.buffer_pos] == '`') { 1816 context := "while scanning an alias" 1817 if typ == yaml_ANCHOR_TOKEN { 1818 context = "while scanning an anchor" 1819 } 1820 yaml_parser_set_scanner_error(parser, context, start_mark, 1821 "did not find expected alphabetic or numeric character") 1822 return false 1823 } 1824 1825 // Create a token. 1826 *token = yaml_token_t{ 1827 typ: typ, 1828 start_mark: start_mark, 1829 end_mark: end_mark, 1830 value: s, 1831 } 1832 1833 return true 1834 } 1835 1836 /* 1837 * Scan a TAG token. 1838 */ 1839 1840 func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { 1841 var handle, suffix []byte 1842 1843 start_mark := parser.mark 1844 1845 // Check if the tag is in the canonical form. 1846 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1847 return false 1848 } 1849 1850 if parser.buffer[parser.buffer_pos+1] == '<' { 1851 // Keep the handle as '' 1852 1853 // Eat '!<' 1854 skip(parser) 1855 skip(parser) 1856 1857 // Consume the tag value. 1858 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1859 return false 1860 } 1861 1862 // Check for '>' and eat it. 1863 if parser.buffer[parser.buffer_pos] != '>' { 1864 yaml_parser_set_scanner_error(parser, "while scanning a tag", 1865 start_mark, "did not find the expected '>'") 1866 return false 1867 } 1868 1869 skip(parser) 1870 } else { 1871 // The tag has either the '!suffix' or the '!handle!suffix' form. 1872 1873 // First, try to scan a handle. 1874 if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { 1875 return false 1876 } 1877 1878 // Check if it is, indeed, handle. 1879 if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { 1880 // Scan the suffix now. 1881 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1882 return false 1883 } 1884 } else { 1885 // It wasn't a handle after all. Scan the rest of the tag. 1886 if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { 1887 return false 1888 } 1889 1890 // Set the handle to '!'. 1891 handle = []byte{'!'} 1892 1893 // A special case: the '!' tag. Set the handle to '' and the 1894 // suffix to '!'. 1895 if len(suffix) == 0 { 1896 handle, suffix = suffix, handle 1897 } 1898 } 1899 } 1900 1901 // Check the character which ends the tag. 1902 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1903 return false 1904 } 1905 if !is_blankz(parser.buffer, parser.buffer_pos) { 1906 yaml_parser_set_scanner_error(parser, "while scanning a tag", 1907 start_mark, "did not find expected whitespace or line break") 1908 return false 1909 } 1910 1911 end_mark := parser.mark 1912 1913 // Create a token. 1914 *token = yaml_token_t{ 1915 typ: yaml_TAG_TOKEN, 1916 start_mark: start_mark, 1917 end_mark: end_mark, 1918 value: handle, 1919 suffix: suffix, 1920 } 1921 return true 1922 } 1923 1924 // Scan a tag handle. 1925 func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { 1926 // Check the initial '!' character. 1927 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1928 return false 1929 } 1930 if parser.buffer[parser.buffer_pos] != '!' { 1931 yaml_parser_set_scanner_tag_error(parser, directive, 1932 start_mark, "did not find expected '!'") 1933 return false 1934 } 1935 1936 var s []byte 1937 1938 // Copy the '!' character. 1939 s = read(parser, s) 1940 1941 // Copy all subsequent alphabetical and numerical characters. 1942 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1943 return false 1944 } 1945 for is_alpha(parser.buffer, parser.buffer_pos) { 1946 s = read(parser, s) 1947 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1948 return false 1949 } 1950 } 1951 1952 // Check if the trailing character is '!' and copy it. 1953 if parser.buffer[parser.buffer_pos] == '!' { 1954 s = read(parser, s) 1955 } else { 1956 // It's either the '!' tag or not really a tag handle. If it's a %TAG 1957 // directive, it's an error. If it's a tag token, it must be a part of URI. 1958 if directive && string(s) != "!" { 1959 yaml_parser_set_scanner_tag_error(parser, directive, 1960 start_mark, "did not find expected '!'") 1961 return false 1962 } 1963 } 1964 1965 *handle = s 1966 return true 1967 } 1968 1969 // Scan a tag. 1970 func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { 1971 //size_t length = head ? strlen((char *)head) : 0 1972 var s []byte 1973 hasTag := len(head) > 0 1974 1975 // Copy the head if needed. 1976 // 1977 // Note that we don't copy the leading '!' character. 1978 if len(head) > 1 { 1979 s = append(s, head[1:]...) 1980 } 1981 1982 // Scan the tag. 1983 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1984 return false 1985 } 1986 1987 // The set of characters that may appear in URI is as follows: 1988 // 1989 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', 1990 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', 1991 // '%'. 1992 // [Go] Convert this into more reasonable logic. 1993 for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || 1994 parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || 1995 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || 1996 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || 1997 parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || 1998 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || 1999 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || 2000 parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || 2001 parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || 2002 parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || 2003 parser.buffer[parser.buffer_pos] == '%' { 2004 // Check if it is a URI-escape sequence. 2005 if parser.buffer[parser.buffer_pos] == '%' { 2006 if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { 2007 return false 2008 } 2009 } else { 2010 s = read(parser, s) 2011 } 2012 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2013 return false 2014 } 2015 hasTag = true 2016 } 2017 2018 if !hasTag { 2019 yaml_parser_set_scanner_tag_error(parser, directive, 2020 start_mark, "did not find expected tag URI") 2021 return false 2022 } 2023 *uri = s 2024 return true 2025 } 2026 2027 // Decode an URI-escape sequence corresponding to a single UTF-8 character. 2028 func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { 2029 2030 // Decode the required number of characters. 2031 w := 1024 2032 for w > 0 { 2033 // Check for a URI-escaped octet. 2034 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2035 return false 2036 } 2037 2038 if !(parser.buffer[parser.buffer_pos] == '%' && 2039 is_hex(parser.buffer, parser.buffer_pos+1) && 2040 is_hex(parser.buffer, parser.buffer_pos+2)) { 2041 return yaml_parser_set_scanner_tag_error(parser, directive, 2042 start_mark, "did not find URI escaped octet") 2043 } 2044 2045 // Get the octet. 2046 octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) 2047 2048 // If it is the leading octet, determine the length of the UTF-8 sequence. 2049 if w == 1024 { 2050 w = width(octet) 2051 if w == 0 { 2052 return yaml_parser_set_scanner_tag_error(parser, directive, 2053 start_mark, "found an incorrect leading UTF-8 octet") 2054 } 2055 } else { 2056 // Check if the trailing octet is correct. 2057 if octet&0xC0 != 0x80 { 2058 return yaml_parser_set_scanner_tag_error(parser, directive, 2059 start_mark, "found an incorrect trailing UTF-8 octet") 2060 } 2061 } 2062 2063 // Copy the octet and move the pointers. 2064 *s = append(*s, octet) 2065 skip(parser) 2066 skip(parser) 2067 skip(parser) 2068 w-- 2069 } 2070 return true 2071 } 2072 2073 // Scan a block scalar. 2074 func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { 2075 // Eat the indicator '|' or '>'. 2076 start_mark := parser.mark 2077 skip(parser) 2078 2079 // Scan the additional block scalar indicators. 2080 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2081 return false 2082 } 2083 2084 // Check for a chomping indicator. 2085 var chomping, increment int 2086 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2087 // Set the chomping method and eat the indicator. 2088 if parser.buffer[parser.buffer_pos] == '+' { 2089 chomping = +1 2090 } else { 2091 chomping = -1 2092 } 2093 skip(parser) 2094 2095 // Check for an indentation indicator. 2096 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2097 return false 2098 } 2099 if is_digit(parser.buffer, parser.buffer_pos) { 2100 // Check that the indentation is greater than 0. 2101 if parser.buffer[parser.buffer_pos] == '0' { 2102 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2103 start_mark, "found an indentation indicator equal to 0") 2104 return false 2105 } 2106 2107 // Get the indentation level and eat the indicator. 2108 increment = as_digit(parser.buffer, parser.buffer_pos) 2109 skip(parser) 2110 } 2111 2112 } else if is_digit(parser.buffer, parser.buffer_pos) { 2113 // Do the same as above, but in the opposite order. 2114 2115 if parser.buffer[parser.buffer_pos] == '0' { 2116 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2117 start_mark, "found an indentation indicator equal to 0") 2118 return false 2119 } 2120 increment = as_digit(parser.buffer, parser.buffer_pos) 2121 skip(parser) 2122 2123 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2124 return false 2125 } 2126 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2127 if parser.buffer[parser.buffer_pos] == '+' { 2128 chomping = +1 2129 } else { 2130 chomping = -1 2131 } 2132 skip(parser) 2133 } 2134 } 2135 2136 // Eat whitespaces and comments to the end of the line. 2137 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2138 return false 2139 } 2140 for is_blank(parser.buffer, parser.buffer_pos) { 2141 skip(parser) 2142 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2143 return false 2144 } 2145 } 2146 if parser.buffer[parser.buffer_pos] == '#' { 2147 m := parser.mark 2148 parser.comment_buffer = parser.comment_buffer[:0] 2149 for !is_breakz(parser.buffer, parser.buffer_pos) { 2150 p := parser.buffer_pos 2151 skip(parser) 2152 parser.comment_buffer = append(parser.comment_buffer, 2153 parser.buffer[p:parser.buffer_pos]...) 2154 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2155 return false 2156 } 2157 } 2158 add_comment(parser, m, string(parser.comment_buffer)) 2159 } 2160 2161 // Check if we are at the end of the line. 2162 if !is_breakz(parser.buffer, parser.buffer_pos) { 2163 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2164 start_mark, "did not find expected comment or line break") 2165 return false 2166 } 2167 2168 // Eat a line break. 2169 if is_break(parser.buffer, parser.buffer_pos) { 2170 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2171 return false 2172 } 2173 skip_line(parser) 2174 } 2175 2176 end_mark := parser.mark 2177 2178 // Set the indentation level if it was specified. 2179 var indent int 2180 if increment > 0 { 2181 if parser.indent >= 0 { 2182 indent = parser.indent + increment 2183 } else { 2184 indent = increment 2185 } 2186 } 2187 2188 // Scan the leading line breaks and determine the indentation level if needed. 2189 var s, leading_break, trailing_breaks []byte 2190 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2191 return false 2192 } 2193 2194 // Scan the block scalar content. 2195 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2196 return false 2197 } 2198 var leading_blank, trailing_blank bool 2199 for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { 2200 // We are at the beginning of a non-empty line. 2201 2202 // Is it a trailing whitespace? 2203 trailing_blank = is_blank(parser.buffer, parser.buffer_pos) 2204 2205 // Check if we need to fold the leading line break. 2206 if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' { 2207 // Do we need to join the lines by space? 2208 if len(trailing_breaks) == 0 { 2209 s = append(s, ' ') 2210 } 2211 } else { 2212 s = append(s, leading_break...) 2213 } 2214 leading_break = leading_break[:0] 2215 2216 // Append the remaining line breaks. 2217 s = append(s, trailing_breaks...) 2218 trailing_breaks = trailing_breaks[:0] 2219 2220 // Is it a leading whitespace? 2221 leading_blank = is_blank(parser.buffer, parser.buffer_pos) 2222 2223 // Consume the current line. 2224 for !is_breakz(parser.buffer, parser.buffer_pos) { 2225 s = read(parser, s) 2226 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2227 return false 2228 } 2229 } 2230 2231 // Consume the line break. 2232 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2233 return false 2234 } 2235 2236 leading_break = read_line(parser, leading_break) 2237 2238 // Eat the following indentation spaces and line breaks. 2239 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2240 return false 2241 } 2242 } 2243 2244 // Chomp the tail. 2245 if chomping != -1 { 2246 s = append(s, leading_break...) 2247 } 2248 if chomping == 1 { 2249 s = append(s, trailing_breaks...) 2250 } 2251 2252 // Create a token. 2253 *token = yaml_token_t{ 2254 typ: yaml_SCALAR_TOKEN, 2255 start_mark: start_mark, 2256 end_mark: end_mark, 2257 value: s, 2258 style: yaml_LITERAL_SCALAR_STYLE, 2259 } 2260 if !literal { 2261 token.style = yaml_FOLDED_SCALAR_STYLE 2262 } 2263 return true 2264 } 2265 2266 // Scan indentation spaces and line breaks for a block scalar. Determine the 2267 // indentation level if needed. 2268 func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { 2269 *end_mark = parser.mark 2270 2271 // Eat the indentation spaces and line breaks. 2272 max_indent := 0 2273 for { 2274 // Eat the indentation spaces. 2275 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2276 return false 2277 } 2278 for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) { 2279 skip(parser) 2280 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2281 return false 2282 } 2283 } 2284 if parser.mark.column > max_indent { 2285 max_indent = parser.mark.column 2286 } 2287 2288 // Check for a tab character messing the indentation. 2289 if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { 2290 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2291 start_mark, "found a tab character where an indentation space is expected") 2292 } 2293 2294 // Have we found a non-empty line? 2295 if !is_break(parser.buffer, parser.buffer_pos) { 2296 break 2297 } 2298 2299 // Consume the line break. 2300 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2301 return false 2302 } 2303 // [Go] Should really be returning breaks instead. 2304 *breaks = read_line(parser, *breaks) 2305 *end_mark = parser.mark 2306 } 2307 2308 // Determine the indentation level if needed. 2309 if *indent == 0 { 2310 *indent = max_indent 2311 if *indent < parser.indent+1 { 2312 *indent = parser.indent + 1 2313 } 2314 if *indent < 1 { 2315 *indent = 1 2316 } 2317 } 2318 return true 2319 } 2320 2321 // Scan a quoted scalar. 2322 func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { 2323 // Eat the left quote. 2324 start_mark := parser.mark 2325 skip(parser) 2326 2327 // Consume the content of the quoted scalar. 2328 var s, leading_break, trailing_breaks, whitespaces []byte 2329 for { 2330 // Check that there are no document indicators at the beginning of the line. 2331 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2332 return false 2333 } 2334 2335 if parser.mark.column == 0 && 2336 ((parser.buffer[parser.buffer_pos+0] == '-' && 2337 parser.buffer[parser.buffer_pos+1] == '-' && 2338 parser.buffer[parser.buffer_pos+2] == '-') || 2339 (parser.buffer[parser.buffer_pos+0] == '.' && 2340 parser.buffer[parser.buffer_pos+1] == '.' && 2341 parser.buffer[parser.buffer_pos+2] == '.')) && 2342 is_blankz(parser.buffer, parser.buffer_pos+3) { 2343 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2344 start_mark, "found unexpected document indicator") 2345 return false 2346 } 2347 2348 // Check for EOF. 2349 if is_z(parser.buffer, parser.buffer_pos) { 2350 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2351 start_mark, "found unexpected end of stream") 2352 return false 2353 } 2354 2355 // Consume non-blank characters. 2356 leading_blanks := false 2357 for !is_blankz(parser.buffer, parser.buffer_pos) { 2358 if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { 2359 // Is is an escaped single quote. 2360 s = append(s, '\'') 2361 skip(parser) 2362 skip(parser) 2363 2364 } else if single && parser.buffer[parser.buffer_pos] == '\'' { 2365 // It is a right single quote. 2366 break 2367 } else if !single && parser.buffer[parser.buffer_pos] == '"' { 2368 // It is a right double quote. 2369 break 2370 2371 } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { 2372 // It is an escaped line break. 2373 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2374 return false 2375 } 2376 skip(parser) 2377 skip_line(parser) 2378 leading_blanks = true 2379 break 2380 2381 } else if !single && parser.buffer[parser.buffer_pos] == '\\' { 2382 // It is an escape sequence. 2383 code_length := 0 2384 2385 // Check the escape character. 2386 switch parser.buffer[parser.buffer_pos+1] { 2387 case '0': 2388 s = append(s, 0) 2389 case 'a': 2390 s = append(s, '\x07') 2391 case 'b': 2392 s = append(s, '\x08') 2393 case 't', '\t': 2394 s = append(s, '\x09') 2395 case 'n': 2396 s = append(s, '\x0A') 2397 case 'v': 2398 s = append(s, '\x0B') 2399 case 'f': 2400 s = append(s, '\x0C') 2401 case 'r': 2402 s = append(s, '\x0D') 2403 case 'e': 2404 s = append(s, '\x1B') 2405 case ' ': 2406 s = append(s, '\x20') 2407 case '"': 2408 s = append(s, '"') 2409 case '\'': 2410 s = append(s, '\'') 2411 case '\\': 2412 s = append(s, '\\') 2413 case 'N': // NEL (#x85) 2414 s = append(s, '\xC2') 2415 s = append(s, '\x85') 2416 case '_': // #xA0 2417 s = append(s, '\xC2') 2418 s = append(s, '\xA0') 2419 case 'L': // LS (#x2028) 2420 s = append(s, '\xE2') 2421 s = append(s, '\x80') 2422 s = append(s, '\xA8') 2423 case 'P': // PS (#x2029) 2424 s = append(s, '\xE2') 2425 s = append(s, '\x80') 2426 s = append(s, '\xA9') 2427 case 'x': 2428 code_length = 2 2429 case 'u': 2430 code_length = 4 2431 case 'U': 2432 code_length = 8 2433 default: 2434 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2435 start_mark, "found unknown escape character") 2436 return false 2437 } 2438 2439 skip(parser) 2440 skip(parser) 2441 2442 // Consume an arbitrary escape code. 2443 if code_length > 0 { 2444 var value int 2445 2446 // Scan the character value. 2447 if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) { 2448 return false 2449 } 2450 for k := 0; k < code_length; k++ { 2451 if !is_hex(parser.buffer, parser.buffer_pos+k) { 2452 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2453 start_mark, "did not find expected hexdecimal number") 2454 return false 2455 } 2456 value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) 2457 } 2458 2459 // Check the value and write the character. 2460 if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { 2461 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2462 start_mark, "found invalid Unicode character escape code") 2463 return false 2464 } 2465 if value <= 0x7F { 2466 s = append(s, byte(value)) 2467 } else if value <= 0x7FF { 2468 s = append(s, byte(0xC0+(value>>6))) 2469 s = append(s, byte(0x80+(value&0x3F))) 2470 } else if value <= 0xFFFF { 2471 s = append(s, byte(0xE0+(value>>12))) 2472 s = append(s, byte(0x80+((value>>6)&0x3F))) 2473 s = append(s, byte(0x80+(value&0x3F))) 2474 } else { 2475 s = append(s, byte(0xF0+(value>>18))) 2476 s = append(s, byte(0x80+((value>>12)&0x3F))) 2477 s = append(s, byte(0x80+((value>>6)&0x3F))) 2478 s = append(s, byte(0x80+(value&0x3F))) 2479 } 2480 2481 // Advance the pointer. 2482 for k := 0; k < code_length; k++ { 2483 skip(parser) 2484 } 2485 } 2486 } else { 2487 // It is a non-escaped non-blank character. 2488 s = read(parser, s) 2489 } 2490 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2491 return false 2492 } 2493 } 2494 2495 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2496 return false 2497 } 2498 2499 // Check if we are at the end of the scalar. 2500 if single { 2501 if parser.buffer[parser.buffer_pos] == '\'' { 2502 break 2503 } 2504 } else { 2505 if parser.buffer[parser.buffer_pos] == '"' { 2506 break 2507 } 2508 } 2509 2510 // Consume blank characters. 2511 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2512 if is_blank(parser.buffer, parser.buffer_pos) { 2513 // Consume a space or a tab character. 2514 if !leading_blanks { 2515 whitespaces = read(parser, whitespaces) 2516 } else { 2517 skip(parser) 2518 } 2519 } else { 2520 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2521 return false 2522 } 2523 2524 // Check if it is a first line break. 2525 if !leading_blanks { 2526 whitespaces = whitespaces[:0] 2527 leading_break = read_line(parser, leading_break) 2528 leading_blanks = true 2529 } else { 2530 trailing_breaks = read_line(parser, trailing_breaks) 2531 } 2532 } 2533 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2534 return false 2535 } 2536 } 2537 2538 // Join the whitespaces or fold line breaks. 2539 if leading_blanks { 2540 // Do we need to fold line breaks? 2541 if len(leading_break) > 0 && leading_break[0] == '\n' { 2542 if len(trailing_breaks) == 0 { 2543 s = append(s, ' ') 2544 } else { 2545 s = append(s, trailing_breaks...) 2546 } 2547 } else { 2548 s = append(s, leading_break...) 2549 s = append(s, trailing_breaks...) 2550 } 2551 trailing_breaks = trailing_breaks[:0] 2552 leading_break = leading_break[:0] 2553 } else { 2554 s = append(s, whitespaces...) 2555 whitespaces = whitespaces[:0] 2556 } 2557 } 2558 2559 // Eat the right quote. 2560 skip(parser) 2561 end_mark := parser.mark 2562 2563 // Create a token. 2564 *token = yaml_token_t{ 2565 typ: yaml_SCALAR_TOKEN, 2566 start_mark: start_mark, 2567 end_mark: end_mark, 2568 value: s, 2569 style: yaml_SINGLE_QUOTED_SCALAR_STYLE, 2570 } 2571 if !single { 2572 token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE 2573 } 2574 return true 2575 } 2576 2577 // Scan a plain scalar. 2578 func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { 2579 2580 var s, leading_break, trailing_breaks, whitespaces []byte 2581 var leading_blanks bool 2582 var indent = parser.indent + 1 2583 2584 start_mark := parser.mark 2585 end_mark := parser.mark 2586 2587 // Consume the content of the plain scalar. 2588 for { 2589 // Check for a document indicator. 2590 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2591 return false 2592 } 2593 if parser.mark.column == 0 && 2594 ((parser.buffer[parser.buffer_pos+0] == '-' && 2595 parser.buffer[parser.buffer_pos+1] == '-' && 2596 parser.buffer[parser.buffer_pos+2] == '-') || 2597 (parser.buffer[parser.buffer_pos+0] == '.' && 2598 parser.buffer[parser.buffer_pos+1] == '.' && 2599 parser.buffer[parser.buffer_pos+2] == '.')) && 2600 is_blankz(parser.buffer, parser.buffer_pos+3) { 2601 break 2602 } 2603 2604 // Check for a comment. 2605 if parser.buffer[parser.buffer_pos] == '#' { 2606 break 2607 } 2608 2609 // Consume non-blank characters. 2610 for !is_blankz(parser.buffer, parser.buffer_pos) { 2611 2612 // Check for indicators that may end a plain scalar. 2613 if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || 2614 (parser.flow_level > 0 && 2615 (parser.buffer[parser.buffer_pos] == ',' || 2616 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || 2617 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 2618 parser.buffer[parser.buffer_pos] == '}')) { 2619 break 2620 } 2621 2622 // Check if we need to join whitespaces and breaks. 2623 if leading_blanks || len(whitespaces) > 0 { 2624 if leading_blanks { 2625 // Do we need to fold line breaks? 2626 if leading_break[0] == '\n' { 2627 if len(trailing_breaks) == 0 { 2628 s = append(s, ' ') 2629 } else { 2630 s = append(s, trailing_breaks...) 2631 } 2632 } else { 2633 s = append(s, leading_break...) 2634 s = append(s, trailing_breaks...) 2635 } 2636 trailing_breaks = trailing_breaks[:0] 2637 leading_break = leading_break[:0] 2638 leading_blanks = false 2639 } else { 2640 s = append(s, whitespaces...) 2641 whitespaces = whitespaces[:0] 2642 } 2643 } 2644 2645 // Copy the character. 2646 s = read(parser, s) 2647 2648 end_mark = parser.mark 2649 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2650 return false 2651 } 2652 } 2653 2654 // Is it the end? 2655 if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { 2656 break 2657 } 2658 2659 // Consume blank characters. 2660 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2661 return false 2662 } 2663 2664 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2665 if is_blank(parser.buffer, parser.buffer_pos) { 2666 2667 // Check for tab characters that abuse indentation. 2668 if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { 2669 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 2670 start_mark, "found a tab character that violates indentation") 2671 return false 2672 } 2673 2674 // Consume a space or a tab character. 2675 if !leading_blanks { 2676 whitespaces = read(parser, whitespaces) 2677 } else { 2678 skip(parser) 2679 } 2680 } else { 2681 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2682 return false 2683 } 2684 2685 // Check if it is a first line break. 2686 if !leading_blanks { 2687 whitespaces = whitespaces[:0] 2688 leading_break = read_line(parser, leading_break) 2689 leading_blanks = true 2690 } else { 2691 trailing_breaks = read_line(parser, trailing_breaks) 2692 } 2693 } 2694 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2695 return false 2696 } 2697 } 2698 2699 // Check indentation level. 2700 if parser.flow_level == 0 && parser.mark.column < indent { 2701 break 2702 } 2703 } 2704 2705 // Create a token. 2706 *token = yaml_token_t{ 2707 typ: yaml_SCALAR_TOKEN, 2708 start_mark: start_mark, 2709 end_mark: end_mark, 2710 value: s, 2711 style: yaml_PLAIN_SCALAR_STYLE, 2712 } 2713 2714 // Note that we change the 'simple_key_allowed' flag. 2715 if leading_blanks { 2716 parser.simple_key_allowed = true 2717 } 2718 return true 2719 }