cuelang.org/go@v0.10.1/internal/third_party/yaml/scannerc.go (about) 1 package yaml 2 3 import ( 4 "bytes" 5 "fmt" 6 ) 7 8 // Introduction 9 // ************ 10 // 11 // The following notes assume that you are familiar with the YAML specification 12 // (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in 13 // some cases we are less restrictive that it requires. 14 // 15 // The process of transforming a YAML stream into a sequence of events is 16 // divided on two steps: Scanning and Parsing. 17 // 18 // The Scanner transforms the input stream into a sequence of tokens, while the 19 // parser transform the sequence of tokens produced by the Scanner into a 20 // sequence of parsing events. 21 // 22 // The Scanner is rather clever and complicated. The Parser, on the contrary, 23 // is a straightforward implementation of a recursive-descendant parser (or, 24 // LL(1) parser, as it is usually called). 25 // 26 // Actually there are two issues of Scanning that might be called "clever", the 27 // rest is quite straightforward. The issues are "block collection start" and 28 // "simple keys". Both issues are explained below in details. 29 // 30 // Here the Scanning step is explained and implemented. We start with the list 31 // of all the tokens produced by the Scanner together with short descriptions. 32 // 33 // Now, tokens: 34 // 35 // STREAM-START(encoding) # The stream start. 36 // STREAM-END # The stream end. 37 // VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. 38 // TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. 39 // DOCUMENT-START # '---' 40 // DOCUMENT-END # '...' 41 // BLOCK-SEQUENCE-START # Indentation increase denoting a block 42 // BLOCK-MAPPING-START # sequence or a block mapping. 43 // BLOCK-END # Indentation decrease. 44 // FLOW-SEQUENCE-START # '[' 45 // FLOW-SEQUENCE-END # ']' 46 // BLOCK-SEQUENCE-START # '{' 47 // BLOCK-SEQUENCE-END # '}' 48 // BLOCK-ENTRY # '-' 49 // FLOW-ENTRY # ',' 50 // KEY # '?' or nothing (simple keys). 51 // VALUE # ':' 52 // ALIAS(anchor) # '*anchor' 53 // ANCHOR(anchor) # '&anchor' 54 // TAG(handle,suffix) # '!handle!suffix' 55 // SCALAR(value,style) # A scalar. 56 // 57 // The following two tokens are "virtual" tokens denoting the beginning and the 58 // end of the stream: 59 // 60 // STREAM-START(encoding) 61 // STREAM-END 62 // 63 // We pass the information about the input stream encoding with the 64 // STREAM-START token. 65 // 66 // The next two tokens are responsible for tags: 67 // 68 // VERSION-DIRECTIVE(major,minor) 69 // TAG-DIRECTIVE(handle,prefix) 70 // 71 // Example: 72 // 73 // %YAML 1.1 74 // %TAG ! !foo 75 // %TAG !yaml! tag:yaml.org,2002: 76 // --- 77 // 78 // The correspoding sequence of tokens: 79 // 80 // STREAM-START(utf-8) 81 // VERSION-DIRECTIVE(1,1) 82 // TAG-DIRECTIVE("!","!foo") 83 // TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") 84 // DOCUMENT-START 85 // STREAM-END 86 // 87 // Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole 88 // line. 89 // 90 // The document start and end indicators are represented by: 91 // 92 // DOCUMENT-START 93 // DOCUMENT-END 94 // 95 // Note that if a YAML stream contains an implicit document (without '---' 96 // and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be 97 // produced. 98 // 99 // In the following examples, we present whole documents together with the 100 // produced tokens. 101 // 102 // 1. An implicit document: 103 // 104 // 'a scalar' 105 // 106 // Tokens: 107 // 108 // STREAM-START(utf-8) 109 // SCALAR("a scalar",single-quoted) 110 // STREAM-END 111 // 112 // 2. An explicit document: 113 // 114 // --- 115 // 'a scalar' 116 // ... 117 // 118 // Tokens: 119 // 120 // STREAM-START(utf-8) 121 // DOCUMENT-START 122 // SCALAR("a scalar",single-quoted) 123 // DOCUMENT-END 124 // STREAM-END 125 // 126 // 3. Several documents in a stream: 127 // 128 // 'a scalar' 129 // --- 130 // 'another scalar' 131 // --- 132 // 'yet another scalar' 133 // 134 // Tokens: 135 // 136 // STREAM-START(utf-8) 137 // SCALAR("a scalar",single-quoted) 138 // DOCUMENT-START 139 // SCALAR("another scalar",single-quoted) 140 // DOCUMENT-START 141 // SCALAR("yet another scalar",single-quoted) 142 // STREAM-END 143 // 144 // We have already introduced the SCALAR token above. The following tokens are 145 // used to describe aliases, anchors, tag, and scalars: 146 // 147 // ALIAS(anchor) 148 // ANCHOR(anchor) 149 // TAG(handle,suffix) 150 // SCALAR(value,style) 151 // 152 // The following series of examples illustrate the usage of these tokens: 153 // 154 // 1. A recursive sequence: 155 // 156 // &A [ *A ] 157 // 158 // Tokens: 159 // 160 // STREAM-START(utf-8) 161 // ANCHOR("A") 162 // FLOW-SEQUENCE-START 163 // ALIAS("A") 164 // FLOW-SEQUENCE-END 165 // STREAM-END 166 // 167 // 2. A tagged scalar: 168 // 169 // !!float "3.14" # A good approximation. 170 // 171 // Tokens: 172 // 173 // STREAM-START(utf-8) 174 // TAG("!!","float") 175 // SCALAR("3.14",double-quoted) 176 // STREAM-END 177 // 178 // 3. Various scalar styles: 179 // 180 // --- # Implicit empty plain scalars do not produce tokens. 181 // --- a plain scalar 182 // --- 'a single-quoted scalar' 183 // --- "a double-quoted scalar" 184 // --- |- 185 // a literal scalar 186 // --- >- 187 // a folded 188 // scalar 189 // 190 // Tokens: 191 // 192 // STREAM-START(utf-8) 193 // DOCUMENT-START 194 // DOCUMENT-START 195 // SCALAR("a plain scalar",plain) 196 // DOCUMENT-START 197 // SCALAR("a single-quoted scalar",single-quoted) 198 // DOCUMENT-START 199 // SCALAR("a double-quoted scalar",double-quoted) 200 // DOCUMENT-START 201 // SCALAR("a literal scalar",literal) 202 // DOCUMENT-START 203 // SCALAR("a folded scalar",folded) 204 // STREAM-END 205 // 206 // Now it's time to review collection-related tokens. We will start with 207 // flow collections: 208 // 209 // FLOW-SEQUENCE-START 210 // FLOW-SEQUENCE-END 211 // FLOW-MAPPING-START 212 // FLOW-MAPPING-END 213 // FLOW-ENTRY 214 // KEY 215 // VALUE 216 // 217 // The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and 218 // FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' 219 // correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the 220 // indicators '?' and ':', which are used for denoting mapping keys and values, 221 // are represented by the KEY and VALUE tokens. 222 // 223 // The following examples show flow collections: 224 // 225 // 1. A flow sequence: 226 // 227 // [item 1, item 2, item 3] 228 // 229 // Tokens: 230 // 231 // STREAM-START(utf-8) 232 // FLOW-SEQUENCE-START 233 // SCALAR("item 1",plain) 234 // FLOW-ENTRY 235 // SCALAR("item 2",plain) 236 // FLOW-ENTRY 237 // SCALAR("item 3",plain) 238 // FLOW-SEQUENCE-END 239 // STREAM-END 240 // 241 // 2. A flow mapping: 242 // 243 // { 244 // a simple key: a value, # Note that the KEY token is produced. 245 // ? a complex key: another value, 246 // } 247 // 248 // Tokens: 249 // 250 // STREAM-START(utf-8) 251 // FLOW-MAPPING-START 252 // KEY 253 // SCALAR("a simple key",plain) 254 // VALUE 255 // SCALAR("a value",plain) 256 // FLOW-ENTRY 257 // KEY 258 // SCALAR("a complex key",plain) 259 // VALUE 260 // SCALAR("another value",plain) 261 // FLOW-ENTRY 262 // FLOW-MAPPING-END 263 // STREAM-END 264 // 265 // A simple key is a key which is not denoted by the '?' indicator. Note that 266 // the Scanner still produce the KEY token whenever it encounters a simple key. 267 // 268 // For scanning block collections, the following tokens are used (note that we 269 // repeat KEY and VALUE here): 270 // 271 // BLOCK-SEQUENCE-START 272 // BLOCK-MAPPING-START 273 // BLOCK-END 274 // BLOCK-ENTRY 275 // KEY 276 // VALUE 277 // 278 // The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation 279 // increase that precedes a block collection (cf. the INDENT token in Python). 280 // The token BLOCK-END denote indentation decrease that ends a block collection 281 // (cf. the DEDENT token in Python). However YAML has some syntax pecularities 282 // that makes detections of these tokens more complex. 283 // 284 // The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators 285 // '-', '?', and ':' correspondingly. 286 // 287 // The following examples show how the tokens BLOCK-SEQUENCE-START, 288 // BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: 289 // 290 // 1. Block sequences: 291 // 292 // - item 1 293 // - item 2 294 // - 295 // - item 3.1 296 // - item 3.2 297 // - 298 // key 1: value 1 299 // key 2: value 2 300 // 301 // Tokens: 302 // 303 // STREAM-START(utf-8) 304 // BLOCK-SEQUENCE-START 305 // BLOCK-ENTRY 306 // SCALAR("item 1",plain) 307 // BLOCK-ENTRY 308 // SCALAR("item 2",plain) 309 // BLOCK-ENTRY 310 // BLOCK-SEQUENCE-START 311 // BLOCK-ENTRY 312 // SCALAR("item 3.1",plain) 313 // BLOCK-ENTRY 314 // SCALAR("item 3.2",plain) 315 // BLOCK-END 316 // BLOCK-ENTRY 317 // BLOCK-MAPPING-START 318 // KEY 319 // SCALAR("key 1",plain) 320 // VALUE 321 // SCALAR("value 1",plain) 322 // KEY 323 // SCALAR("key 2",plain) 324 // VALUE 325 // SCALAR("value 2",plain) 326 // BLOCK-END 327 // BLOCK-END 328 // STREAM-END 329 // 330 // 2. Block mappings: 331 // 332 // a simple key: a value # The KEY token is produced here. 333 // ? a complex key 334 // : another value 335 // a mapping: 336 // key 1: value 1 337 // key 2: value 2 338 // a sequence: 339 // - item 1 340 // - item 2 341 // 342 // Tokens: 343 // 344 // STREAM-START(utf-8) 345 // BLOCK-MAPPING-START 346 // KEY 347 // SCALAR("a simple key",plain) 348 // VALUE 349 // SCALAR("a value",plain) 350 // KEY 351 // SCALAR("a complex key",plain) 352 // VALUE 353 // SCALAR("another value",plain) 354 // KEY 355 // SCALAR("a mapping",plain) 356 // BLOCK-MAPPING-START 357 // KEY 358 // SCALAR("key 1",plain) 359 // VALUE 360 // SCALAR("value 1",plain) 361 // KEY 362 // SCALAR("key 2",plain) 363 // VALUE 364 // SCALAR("value 2",plain) 365 // BLOCK-END 366 // KEY 367 // SCALAR("a sequence",plain) 368 // VALUE 369 // BLOCK-SEQUENCE-START 370 // BLOCK-ENTRY 371 // SCALAR("item 1",plain) 372 // BLOCK-ENTRY 373 // SCALAR("item 2",plain) 374 // BLOCK-END 375 // BLOCK-END 376 // STREAM-END 377 // 378 // YAML does not always require to start a new block collection from a new 379 // line. If the current line contains only '-', '?', and ':' indicators, a new 380 // block collection may start at the current line. The following examples 381 // illustrate this case: 382 // 383 // 1. Collections in a sequence: 384 // 385 // - - item 1 386 // - item 2 387 // - key 1: value 1 388 // key 2: value 2 389 // - ? complex key 390 // : complex value 391 // 392 // Tokens: 393 // 394 // STREAM-START(utf-8) 395 // BLOCK-SEQUENCE-START 396 // BLOCK-ENTRY 397 // BLOCK-SEQUENCE-START 398 // BLOCK-ENTRY 399 // SCALAR("item 1",plain) 400 // BLOCK-ENTRY 401 // SCALAR("item 2",plain) 402 // BLOCK-END 403 // BLOCK-ENTRY 404 // BLOCK-MAPPING-START 405 // KEY 406 // SCALAR("key 1",plain) 407 // VALUE 408 // SCALAR("value 1",plain) 409 // KEY 410 // SCALAR("key 2",plain) 411 // VALUE 412 // SCALAR("value 2",plain) 413 // BLOCK-END 414 // BLOCK-ENTRY 415 // BLOCK-MAPPING-START 416 // KEY 417 // SCALAR("complex key") 418 // VALUE 419 // SCALAR("complex value") 420 // BLOCK-END 421 // BLOCK-END 422 // STREAM-END 423 // 424 // 2. Collections in a mapping: 425 // 426 // ? a sequence 427 // : - item 1 428 // - item 2 429 // ? a mapping 430 // : key 1: value 1 431 // key 2: value 2 432 // 433 // Tokens: 434 // 435 // STREAM-START(utf-8) 436 // BLOCK-MAPPING-START 437 // KEY 438 // SCALAR("a sequence",plain) 439 // VALUE 440 // BLOCK-SEQUENCE-START 441 // BLOCK-ENTRY 442 // SCALAR("item 1",plain) 443 // BLOCK-ENTRY 444 // SCALAR("item 2",plain) 445 // BLOCK-END 446 // KEY 447 // SCALAR("a mapping",plain) 448 // VALUE 449 // BLOCK-MAPPING-START 450 // KEY 451 // SCALAR("key 1",plain) 452 // VALUE 453 // SCALAR("value 1",plain) 454 // KEY 455 // SCALAR("key 2",plain) 456 // VALUE 457 // SCALAR("value 2",plain) 458 // BLOCK-END 459 // BLOCK-END 460 // STREAM-END 461 // 462 // YAML also permits non-indented sequences if they are included into a block 463 // mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: 464 // 465 // key: 466 // - item 1 # BLOCK-SEQUENCE-START is NOT produced here. 467 // - item 2 468 // 469 // Tokens: 470 // 471 // STREAM-START(utf-8) 472 // BLOCK-MAPPING-START 473 // KEY 474 // SCALAR("key",plain) 475 // VALUE 476 // BLOCK-ENTRY 477 // SCALAR("item 1",plain) 478 // BLOCK-ENTRY 479 // SCALAR("item 2",plain) 480 // BLOCK-END 481 // 482 483 // Ensure that the buffer contains the required number of characters. 484 // Return true on success, false on failure (reader error or memory error). 485 func cache(parser *yaml_parser_t, length int) bool { 486 // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B) 487 return parser.unread >= length || yaml_parser_update_buffer(parser, length) 488 } 489 490 // Advance the buffer pointer. 491 func skip(parser *yaml_parser_t) { 492 w := width(parser.buffer[parser.buffer_pos]) 493 parser.mark.index += w 494 parser.mark.column++ 495 parser.unread-- 496 parser.buffer_pos += w 497 } 498 499 func skip_line(parser *yaml_parser_t) { 500 if is_crlf(parser.buffer, parser.buffer_pos) { 501 parser.mark.index += 2 502 parser.mark.column = 0 503 parser.mark.line++ 504 parser.unread -= 2 505 parser.buffer_pos += 2 506 } else if is_break(parser.buffer, parser.buffer_pos) { 507 w := width(parser.buffer[parser.buffer_pos]) 508 parser.mark.index += w 509 parser.mark.column = 0 510 parser.mark.line++ 511 parser.unread-- 512 parser.buffer_pos += w 513 } 514 } 515 516 // Copy a character to a string buffer and advance pointers. 517 func read(parser *yaml_parser_t, s []byte) []byte { 518 w := width(parser.buffer[parser.buffer_pos]) 519 if w == 0 { 520 panic("invalid character sequence") 521 } 522 if len(s) == 0 { 523 s = make([]byte, 0, 32) 524 } 525 if w == 1 && len(s)+w <= cap(s) { 526 s = s[:len(s)+1] 527 s[len(s)-1] = parser.buffer[parser.buffer_pos] 528 parser.buffer_pos++ 529 } else { 530 s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...) 531 parser.buffer_pos += w 532 } 533 parser.mark.index += w 534 parser.mark.column++ 535 parser.unread-- 536 return s 537 } 538 539 // Copy a line break character to a string buffer and advance pointers. 540 func read_line(parser *yaml_parser_t, s []byte) []byte { 541 buf := parser.buffer 542 pos := parser.buffer_pos 543 switch { 544 case buf[pos] == '\r' && buf[pos+1] == '\n': 545 // CR LF . LF 546 s = append(s, '\n') 547 parser.buffer_pos += 2 548 parser.mark.index++ 549 parser.unread-- 550 case buf[pos] == '\r' || buf[pos] == '\n': 551 // CR|LF . LF 552 s = append(s, '\n') 553 parser.buffer_pos += 1 554 case buf[pos] == '\xC2' && buf[pos+1] == '\x85': 555 // NEL . LF 556 s = append(s, '\n') 557 parser.buffer_pos += 2 558 case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): 559 // LS|PS . LS|PS 560 s = append(s, buf[parser.buffer_pos:pos+3]...) 561 parser.buffer_pos += 3 562 default: 563 return s 564 } 565 parser.mark.index++ 566 parser.mark.column = 0 567 parser.mark.line++ 568 parser.unread-- 569 return s 570 } 571 572 // Get the next token. 573 func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { 574 // Erase the token object. 575 *token = yaml_token_t{} // [Go] Is this necessary? 576 577 // No tokens after STREAM-END or error. 578 if parser.stream_end_produced || parser.error != yaml_NO_ERROR { 579 return true 580 } 581 582 // Ensure that the tokens queue contains enough tokens. 583 if !parser.token_available { 584 if !yaml_parser_fetch_more_tokens(parser) { 585 return false 586 } 587 } 588 589 // Fetch the next token from the queue. 590 *token = parser.tokens[parser.tokens_head] 591 parser.tokens_head++ 592 parser.tokens_parsed++ 593 parser.token_available = false 594 595 if token.typ == yaml_STREAM_END_TOKEN { 596 parser.stream_end_produced = true 597 } 598 return true 599 } 600 601 // Set the scanner error and return false. 602 func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { 603 parser.error = yaml_SCANNER_ERROR 604 parser.context = context 605 parser.context_mark = context_mark 606 parser.problem = problem 607 parser.problem_mark = parser.mark 608 return false 609 } 610 611 func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { 612 context := "while parsing a tag" 613 if directive { 614 context = "while parsing a %TAG directive" 615 } 616 return yaml_parser_set_scanner_error(parser, context, context_mark, problem) 617 } 618 619 func trace(args ...interface{}) func() { 620 pargs := append([]interface{}{"+++"}, args...) 621 fmt.Println(pargs...) 622 pargs = append([]interface{}{"---"}, args...) 623 return func() { fmt.Println(pargs...) } 624 } 625 626 // Ensure that the tokens queue contains at least one token which can be 627 // returned to the Parser. 628 func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { 629 // While we need more tokens to fetch, do it. 630 for { 631 // Check if we really need to fetch more tokens. 632 need_more_tokens := false 633 634 if parser.tokens_head == len(parser.tokens) { 635 // Queue is empty. 636 need_more_tokens = true 637 } else { 638 // Check if any potential simple key may occupy the head position. 639 if !yaml_parser_stale_simple_keys(parser) { 640 return false 641 } 642 643 for i := range parser.simple_keys { 644 simple_key := &parser.simple_keys[i] 645 if simple_key.possible && simple_key.token_number == parser.tokens_parsed { 646 need_more_tokens = true 647 break 648 } 649 } 650 } 651 652 // We are finished. 653 if !need_more_tokens { 654 break 655 } 656 // Fetch the next token. 657 if !yaml_parser_fetch_next_token(parser) { 658 return false 659 } 660 } 661 662 parser.token_available = true 663 return true 664 } 665 666 // The dispatcher for token fetchers. 667 func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { 668 // Ensure that the buffer is initialized. 669 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 670 return false 671 } 672 673 // Check if we just started scanning. Fetch STREAM-START then. 674 if !parser.stream_start_produced { 675 return yaml_parser_fetch_stream_start(parser) 676 } 677 678 // Eat whitespaces and comments until we reach the next token. 679 if !yaml_parser_scan_to_next_token(parser) { 680 return false 681 } 682 683 // Remove obsolete potential simple keys. 684 if !yaml_parser_stale_simple_keys(parser) { 685 return false 686 } 687 688 // Check the indentation level against the current column. 689 if !yaml_parser_unroll_indent(parser, parser.mark.column) { 690 return false 691 } 692 693 // Ensure that the buffer contains at least 4 characters. 4 is the length 694 // of the longest indicators ('--- ' and '... '). 695 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 696 return false 697 } 698 699 // Is it the end of the stream? 700 if is_z(parser.buffer, parser.buffer_pos) { 701 return yaml_parser_fetch_stream_end(parser) 702 } 703 704 // Is it a directive? 705 if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { 706 return yaml_parser_fetch_directive(parser) 707 } 708 709 buf := parser.buffer 710 pos := parser.buffer_pos 711 712 // Is it the document start indicator? 713 if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { 714 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) 715 } 716 717 // Is it the document end indicator? 718 if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { 719 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) 720 } 721 722 // Is it the flow sequence start indicator? 723 if buf[pos] == '[' { 724 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) 725 } 726 727 // Is it the flow mapping start indicator? 728 if parser.buffer[parser.buffer_pos] == '{' { 729 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) 730 } 731 732 // Is it the flow sequence end indicator? 733 if parser.buffer[parser.buffer_pos] == ']' { 734 return yaml_parser_fetch_flow_collection_end(parser, 735 yaml_FLOW_SEQUENCE_END_TOKEN) 736 } 737 738 // Is it the flow mapping end indicator? 739 if parser.buffer[parser.buffer_pos] == '}' { 740 return yaml_parser_fetch_flow_collection_end(parser, 741 yaml_FLOW_MAPPING_END_TOKEN) 742 } 743 744 // Is it the flow entry indicator? 745 if parser.buffer[parser.buffer_pos] == ',' { 746 return yaml_parser_fetch_flow_entry(parser) 747 } 748 749 // Is it the block entry indicator? 750 if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { 751 return yaml_parser_fetch_block_entry(parser) 752 } 753 754 // Is it the key indicator? 755 if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 756 return yaml_parser_fetch_key(parser) 757 } 758 759 // Is it the value indicator? 760 if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 761 return yaml_parser_fetch_value(parser) 762 } 763 764 // Is it an alias? 765 if parser.buffer[parser.buffer_pos] == '*' { 766 return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) 767 } 768 769 // Is it an anchor? 770 if parser.buffer[parser.buffer_pos] == '&' { 771 return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) 772 } 773 774 // Is it a tag? 775 if parser.buffer[parser.buffer_pos] == '!' { 776 return yaml_parser_fetch_tag(parser) 777 } 778 779 // Is it a literal scalar? 780 if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { 781 return yaml_parser_fetch_block_scalar(parser, true) 782 } 783 784 // Is it a folded scalar? 785 if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { 786 return yaml_parser_fetch_block_scalar(parser, false) 787 } 788 789 // Is it a single-quoted scalar? 790 if parser.buffer[parser.buffer_pos] == '\'' { 791 return yaml_parser_fetch_flow_scalar(parser, true) 792 } 793 794 // Is it a double-quoted scalar? 795 if parser.buffer[parser.buffer_pos] == '"' { 796 return yaml_parser_fetch_flow_scalar(parser, false) 797 } 798 799 // Is it a plain scalar? 800 // 801 // A plain scalar may start with any non-blank characters except 802 // 803 // '-', '?', ':', ',', '[', ']', '{', '}', 804 // '#', '&', '*', '!', '|', '>', '\'', '\"', 805 // '%', '@', '`'. 806 // 807 // In the block context (and, for the '-' indicator, in the flow context 808 // too), it may also start with the characters 809 // 810 // '-', '?', ':' 811 // 812 // if it is followed by a non-space character. 813 // 814 // The last rule is more restrictive than the specification requires. 815 // [Go] Make this logic more reasonable. 816 //switch parser.buffer[parser.buffer_pos] { 817 //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': 818 //} 819 if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || 820 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || 821 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || 822 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 823 parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || 824 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || 825 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || 826 parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || 827 parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || 828 parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || 829 (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || 830 (parser.flow_level == 0 && 831 (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && 832 !is_blankz(parser.buffer, parser.buffer_pos+1)) { 833 return yaml_parser_fetch_plain_scalar(parser) 834 } 835 836 // If we don't determine the token type so far, it is an error. 837 return yaml_parser_set_scanner_error(parser, 838 "while scanning for the next token", parser.mark, 839 "found character that cannot start any token") 840 } 841 842 // Check the list of potential simple keys and remove the positions that 843 // cannot contain simple keys anymore. 844 func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool { 845 // Check for a potential simple key for each flow level. 846 for i := range parser.simple_keys { 847 simple_key := &parser.simple_keys[i] 848 849 // The specification requires that a simple key 850 // 851 // - is limited to a single line, 852 // - is shorter than 1024 characters. 853 if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) { 854 855 // Check if the potential simple key to be removed is required. 856 if simple_key.required { 857 return yaml_parser_set_scanner_error(parser, 858 "while scanning a simple key", simple_key.mark, 859 "could not find expected ':'") 860 } 861 simple_key.possible = false 862 } 863 } 864 return true 865 } 866 867 // Check if a simple key may start at the current position and add it if 868 // needed. 869 func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { 870 // A simple key is required at the current position if the scanner is in 871 // the block context and the current column coincides with the indentation 872 // level. 873 874 required := parser.flow_level == 0 && parser.indent == parser.mark.column 875 876 // 877 // If the current position may start a simple key, save it. 878 // 879 if parser.simple_key_allowed { 880 simple_key := yaml_simple_key_t{ 881 possible: true, 882 required: required, 883 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), 884 } 885 simple_key.mark = parser.mark 886 887 if !yaml_parser_remove_simple_key(parser) { 888 return false 889 } 890 parser.simple_keys[len(parser.simple_keys)-1] = simple_key 891 } 892 return true 893 } 894 895 // Remove a potential simple key at the current flow level. 896 func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { 897 i := len(parser.simple_keys) - 1 898 if parser.simple_keys[i].possible { 899 // If the key is required, it is an error. 900 if parser.simple_keys[i].required { 901 return yaml_parser_set_scanner_error(parser, 902 "while scanning a simple key", parser.simple_keys[i].mark, 903 "could not find expected ':'") 904 } 905 } 906 // Remove the key from the stack. 907 parser.simple_keys[i].possible = false 908 return true 909 } 910 911 // Increase the flow level and resize the simple key list if needed. 912 func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { 913 // Reset the simple key on the next level. 914 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) 915 916 // Increase the flow level. 917 parser.flow_level++ 918 return true 919 } 920 921 // Decrease the flow level. 922 func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { 923 if parser.flow_level > 0 { 924 parser.flow_level-- 925 parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1] 926 } 927 return true 928 } 929 930 // Push the current indentation level to the stack and set the new level 931 // the current column is greater than the indentation level. In this case, 932 // append or insert the specified token into the token queue. 933 func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { 934 // In the flow context, do nothing. 935 if parser.flow_level > 0 { 936 return true 937 } 938 939 if parser.indent < column { 940 // Push the current indentation level to the stack and set the new 941 // indentation level. 942 parser.indents = append(parser.indents, parser.indent) 943 parser.indent = column 944 945 // Create a token and insert it into the queue. 946 token := yaml_token_t{ 947 typ: typ, 948 start_mark: mark, 949 end_mark: mark, 950 } 951 if number > -1 { 952 number -= parser.tokens_parsed 953 } 954 yaml_insert_token(parser, number, &token) 955 } 956 return true 957 } 958 959 // Pop indentation levels from the indents stack until the current level 960 // becomes less or equal to the column. For each indentation level, append 961 // the BLOCK-END token. 962 func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool { 963 // In the flow context, do nothing. 964 if parser.flow_level > 0 { 965 return true 966 } 967 968 // Loop through the indentation levels in the stack. 969 for parser.indent > column { 970 // Create a token and append it to the queue. 971 token := yaml_token_t{ 972 typ: yaml_BLOCK_END_TOKEN, 973 start_mark: parser.mark, 974 end_mark: parser.mark, 975 } 976 yaml_insert_token(parser, -1, &token) 977 978 // Pop the indentation level. 979 parser.indent = parser.indents[len(parser.indents)-1] 980 parser.indents = parser.indents[:len(parser.indents)-1] 981 } 982 return true 983 } 984 985 // Initialize the scanner and produce the STREAM-START token. 986 func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { 987 988 // Set the initial indentation. 989 parser.indent = -1 990 991 // Initialize the simple key stack. 992 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) 993 994 // A simple key is allowed at the beginning of the stream. 995 parser.simple_key_allowed = true 996 997 // We have started. 998 parser.stream_start_produced = true 999 1000 // Create the STREAM-START token and append it to the queue. 1001 token := yaml_token_t{ 1002 typ: yaml_STREAM_START_TOKEN, 1003 start_mark: parser.mark, 1004 end_mark: parser.mark, 1005 encoding: parser.encoding, 1006 } 1007 yaml_insert_token(parser, -1, &token) 1008 return true 1009 } 1010 1011 // Produce the STREAM-END token and shut down the scanner. 1012 func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { 1013 1014 // Force new line. 1015 if parser.mark.column != 0 { 1016 parser.mark.column = 0 1017 parser.mark.line++ 1018 } 1019 1020 // Reset the indentation level. 1021 if !yaml_parser_unroll_indent(parser, -1) { 1022 return false 1023 } 1024 1025 // Reset simple keys. 1026 if !yaml_parser_remove_simple_key(parser) { 1027 return false 1028 } 1029 1030 parser.simple_key_allowed = false 1031 1032 // Create the STREAM-END token and append it to the queue. 1033 token := yaml_token_t{ 1034 typ: yaml_STREAM_END_TOKEN, 1035 start_mark: parser.mark, 1036 end_mark: parser.mark, 1037 } 1038 yaml_insert_token(parser, -1, &token) 1039 return true 1040 } 1041 1042 // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. 1043 func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { 1044 // Reset the indentation level. 1045 if !yaml_parser_unroll_indent(parser, -1) { 1046 return false 1047 } 1048 1049 // Reset simple keys. 1050 if !yaml_parser_remove_simple_key(parser) { 1051 return false 1052 } 1053 1054 parser.simple_key_allowed = false 1055 1056 // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. 1057 token := yaml_token_t{} 1058 if !yaml_parser_scan_directive(parser, &token) { 1059 return false 1060 } 1061 // Append the token to the queue. 1062 yaml_insert_token(parser, -1, &token) 1063 return true 1064 } 1065 1066 // Produce the DOCUMENT-START or DOCUMENT-END token. 1067 func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1068 // Reset the indentation level. 1069 if !yaml_parser_unroll_indent(parser, -1) { 1070 return false 1071 } 1072 1073 // Reset simple keys. 1074 if !yaml_parser_remove_simple_key(parser) { 1075 return false 1076 } 1077 1078 parser.simple_key_allowed = false 1079 1080 // Consume the token. 1081 start_mark := parser.mark 1082 1083 skip(parser) 1084 skip(parser) 1085 skip(parser) 1086 1087 end_mark := parser.mark 1088 1089 // Create the DOCUMENT-START or DOCUMENT-END token. 1090 token := yaml_token_t{ 1091 typ: typ, 1092 start_mark: start_mark, 1093 end_mark: end_mark, 1094 } 1095 // Append the token to the queue. 1096 yaml_insert_token(parser, -1, &token) 1097 return true 1098 } 1099 1100 // Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 1101 func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1102 // The indicators '[' and '{' may start a simple key. 1103 if !yaml_parser_save_simple_key(parser) { 1104 return false 1105 } 1106 1107 // Increase the flow level. 1108 if !yaml_parser_increase_flow_level(parser) { 1109 return false 1110 } 1111 1112 // A simple key may follow the indicators '[' and '{'. 1113 parser.simple_key_allowed = true 1114 1115 // Consume the token. 1116 start_mark := parser.mark 1117 skip(parser) 1118 end_mark := parser.mark 1119 1120 // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. 1121 token := yaml_token_t{ 1122 typ: typ, 1123 start_mark: start_mark, 1124 end_mark: end_mark, 1125 } 1126 // Append the token to the queue. 1127 yaml_insert_token(parser, -1, &token) 1128 return true 1129 } 1130 1131 // Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. 1132 func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1133 // Reset any potential simple key on the current flow level. 1134 if !yaml_parser_remove_simple_key(parser) { 1135 return false 1136 } 1137 1138 // Decrease the flow level. 1139 if !yaml_parser_decrease_flow_level(parser) { 1140 return false 1141 } 1142 1143 // No simple keys after the indicators ']' and '}'. 1144 parser.simple_key_allowed = false 1145 1146 // Consume the token. 1147 1148 start_mark := parser.mark 1149 skip(parser) 1150 end_mark := parser.mark 1151 1152 // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. 1153 token := yaml_token_t{ 1154 typ: typ, 1155 start_mark: start_mark, 1156 end_mark: end_mark, 1157 } 1158 // Append the token to the queue. 1159 yaml_insert_token(parser, -1, &token) 1160 return true 1161 } 1162 1163 // Produce the FLOW-ENTRY token. 1164 func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { 1165 // Reset any potential simple keys on the current flow level. 1166 if !yaml_parser_remove_simple_key(parser) { 1167 return false 1168 } 1169 1170 // Simple keys are allowed after ','. 1171 parser.simple_key_allowed = true 1172 1173 // Consume the token. 1174 start_mark := parser.mark 1175 skip(parser) 1176 end_mark := parser.mark 1177 1178 // Create the FLOW-ENTRY token and append it to the queue. 1179 token := yaml_token_t{ 1180 typ: yaml_FLOW_ENTRY_TOKEN, 1181 start_mark: start_mark, 1182 end_mark: end_mark, 1183 } 1184 yaml_insert_token(parser, -1, &token) 1185 return true 1186 } 1187 1188 // Produce the BLOCK-ENTRY token. 1189 func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { 1190 // Check if the scanner is in the block context. 1191 if parser.flow_level == 0 { 1192 // Check if we are allowed to start a new entry. 1193 if !parser.simple_key_allowed { 1194 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1195 "block sequence entries are not allowed in this context") 1196 } 1197 // Add the BLOCK-SEQUENCE-START token if needed. 1198 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { 1199 return false 1200 } 1201 } else { 1202 // It is an error for the '-' indicator to occur in the flow context, 1203 // but we let the Parser detect and report about it because the Parser 1204 // is able to point to the context. 1205 } 1206 1207 // Reset any potential simple keys on the current flow level. 1208 if !yaml_parser_remove_simple_key(parser) { 1209 return false 1210 } 1211 1212 // Simple keys are allowed after '-'. 1213 parser.simple_key_allowed = true 1214 1215 // Consume the token. 1216 start_mark := parser.mark 1217 skip(parser) 1218 end_mark := parser.mark 1219 1220 // Create the BLOCK-ENTRY token and append it to the queue. 1221 token := yaml_token_t{ 1222 typ: yaml_BLOCK_ENTRY_TOKEN, 1223 start_mark: start_mark, 1224 end_mark: end_mark, 1225 } 1226 yaml_insert_token(parser, -1, &token) 1227 return true 1228 } 1229 1230 // Produce the KEY token. 1231 func yaml_parser_fetch_key(parser *yaml_parser_t) bool { 1232 1233 // In the block context, additional checks are required. 1234 if parser.flow_level == 0 { 1235 // Check if we are allowed to start a new key (not nessesary simple). 1236 if !parser.simple_key_allowed { 1237 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1238 "mapping keys are not allowed in this context") 1239 } 1240 // Add the BLOCK-MAPPING-START token if needed. 1241 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1242 return false 1243 } 1244 } 1245 1246 // Reset any potential simple keys on the current flow level. 1247 if !yaml_parser_remove_simple_key(parser) { 1248 return false 1249 } 1250 1251 // Simple keys are allowed after '?' in the block context. 1252 parser.simple_key_allowed = parser.flow_level == 0 1253 1254 // Consume the token. 1255 start_mark := parser.mark 1256 skip(parser) 1257 end_mark := parser.mark 1258 1259 // Create the KEY token and append it to the queue. 1260 token := yaml_token_t{ 1261 typ: yaml_KEY_TOKEN, 1262 start_mark: start_mark, 1263 end_mark: end_mark, 1264 } 1265 yaml_insert_token(parser, -1, &token) 1266 return true 1267 } 1268 1269 // Produce the VALUE token. 1270 func yaml_parser_fetch_value(parser *yaml_parser_t) bool { 1271 1272 simple_key := &parser.simple_keys[len(parser.simple_keys)-1] 1273 1274 // Have we found a simple key? 1275 if simple_key.possible { 1276 // Create the KEY token and insert it into the queue. 1277 token := yaml_token_t{ 1278 typ: yaml_KEY_TOKEN, 1279 start_mark: simple_key.mark, 1280 end_mark: simple_key.mark, 1281 } 1282 yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) 1283 1284 // In the block context, we may need to add the BLOCK-MAPPING-START token. 1285 if !yaml_parser_roll_indent(parser, simple_key.mark.column, 1286 simple_key.token_number, 1287 yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { 1288 return false 1289 } 1290 1291 // Remove the simple key. 1292 simple_key.possible = false 1293 1294 // A simple key cannot follow another simple key. 1295 parser.simple_key_allowed = false 1296 1297 } else { 1298 // The ':' indicator follows a complex key. 1299 1300 // In the block context, extra checks are required. 1301 if parser.flow_level == 0 { 1302 1303 // Check if we are allowed to start a complex value. 1304 if !parser.simple_key_allowed { 1305 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1306 "mapping values are not allowed in this context") 1307 } 1308 1309 // Add the BLOCK-MAPPING-START token if needed. 1310 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1311 return false 1312 } 1313 } 1314 1315 // Simple keys after ':' are allowed in the block context. 1316 parser.simple_key_allowed = parser.flow_level == 0 1317 } 1318 1319 // Consume the token. 1320 start_mark := parser.mark 1321 skip(parser) 1322 end_mark := parser.mark 1323 1324 // Create the VALUE token and append it to the queue. 1325 token := yaml_token_t{ 1326 typ: yaml_VALUE_TOKEN, 1327 start_mark: start_mark, 1328 end_mark: end_mark, 1329 } 1330 yaml_insert_token(parser, -1, &token) 1331 return true 1332 } 1333 1334 // Produce the ALIAS or ANCHOR token. 1335 func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1336 // An anchor or an alias could be a simple key. 1337 if !yaml_parser_save_simple_key(parser) { 1338 return false 1339 } 1340 1341 // A simple key cannot follow an anchor or an alias. 1342 parser.simple_key_allowed = false 1343 1344 // Create the ALIAS or ANCHOR token and append it to the queue. 1345 var token yaml_token_t 1346 if !yaml_parser_scan_anchor(parser, &token, typ) { 1347 return false 1348 } 1349 yaml_insert_token(parser, -1, &token) 1350 return true 1351 } 1352 1353 // Produce the TAG token. 1354 func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { 1355 // A tag could be a simple key. 1356 if !yaml_parser_save_simple_key(parser) { 1357 return false 1358 } 1359 1360 // A simple key cannot follow a tag. 1361 parser.simple_key_allowed = false 1362 1363 // Create the TAG token and append it to the queue. 1364 var token yaml_token_t 1365 if !yaml_parser_scan_tag(parser, &token) { 1366 return false 1367 } 1368 yaml_insert_token(parser, -1, &token) 1369 return true 1370 } 1371 1372 // Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. 1373 func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { 1374 // Remove any potential simple keys. 1375 if !yaml_parser_remove_simple_key(parser) { 1376 return false 1377 } 1378 1379 // A simple key may follow a block scalar. 1380 parser.simple_key_allowed = true 1381 1382 // Create the SCALAR token and append it to the queue. 1383 var token yaml_token_t 1384 if !yaml_parser_scan_block_scalar(parser, &token, literal) { 1385 return false 1386 } 1387 yaml_insert_token(parser, -1, &token) 1388 return true 1389 } 1390 1391 // Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. 1392 func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { 1393 // A plain scalar could be a simple key. 1394 if !yaml_parser_save_simple_key(parser) { 1395 return false 1396 } 1397 1398 // A simple key cannot follow a flow scalar. 1399 parser.simple_key_allowed = false 1400 1401 // Create the SCALAR token and append it to the queue. 1402 var token yaml_token_t 1403 if !yaml_parser_scan_flow_scalar(parser, &token, single) { 1404 return false 1405 } 1406 yaml_insert_token(parser, -1, &token) 1407 return true 1408 } 1409 1410 // Produce the SCALAR(...,plain) token. 1411 func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { 1412 // A plain scalar could be a simple key. 1413 if !yaml_parser_save_simple_key(parser) { 1414 return false 1415 } 1416 1417 // A simple key cannot follow a flow scalar. 1418 parser.simple_key_allowed = false 1419 1420 // Create the SCALAR token and append it to the queue. 1421 var token yaml_token_t 1422 if !yaml_parser_scan_plain_scalar(parser, &token) { 1423 return false 1424 } 1425 yaml_insert_token(parser, -1, &token) 1426 return true 1427 } 1428 1429 // Eat whitespaces and comments until the next token is found. 1430 func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { 1431 1432 parser.linesSinceLast = 0 1433 parser.spacesSinceLast = 0 1434 1435 // Until the next token is not found. 1436 for { 1437 // Allow the BOM mark to start a line. 1438 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1439 return false 1440 } 1441 if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { 1442 skip(parser) 1443 } 1444 1445 // Eat whitespaces. 1446 // Tabs are allowed: 1447 // - in the flow context 1448 // - in the block context, but not at the beginning of the line or 1449 // after '-', '?', or ':' (complex value). 1450 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1451 return false 1452 } 1453 1454 for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { 1455 skip(parser) 1456 parser.spacesSinceLast++ 1457 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1458 return false 1459 } 1460 } 1461 1462 // Eat a comment until a line break. 1463 if parser.buffer[parser.buffer_pos] == '#' { 1464 m := parser.mark 1465 parser.comment_buffer = parser.comment_buffer[:0] 1466 for !is_breakz(parser.buffer, parser.buffer_pos) { 1467 p := parser.buffer_pos 1468 skip(parser) 1469 parser.comment_buffer = append(parser.comment_buffer, 1470 parser.buffer[p:parser.buffer_pos]...) 1471 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1472 return false 1473 } 1474 } 1475 add_comment(parser, m, string(parser.comment_buffer)) 1476 } 1477 1478 // If it is a line break, eat it. 1479 if is_break(parser.buffer, parser.buffer_pos) { 1480 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1481 return false 1482 } 1483 skip_line(parser) 1484 parser.linesSinceLast++ 1485 1486 // In the block context, a new line may start a simple key. 1487 if parser.flow_level == 0 { 1488 parser.simple_key_allowed = true 1489 } 1490 } else { 1491 break // We have found a token. 1492 } 1493 } 1494 1495 return true 1496 } 1497 1498 // Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. 1499 // 1500 // Scope: 1501 // 1502 // %YAML 1.1 # a comment \n 1503 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1504 // %TAG !yaml! tag:yaml.org,2002: \n 1505 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1506 func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { 1507 // Eat '%'. 1508 start_mark := parser.mark 1509 skip(parser) 1510 1511 // Scan the directive name. 1512 var name []byte 1513 if !yaml_parser_scan_directive_name(parser, start_mark, &name) { 1514 return false 1515 } 1516 1517 // Is it a YAML directive? 1518 if bytes.Equal(name, []byte("YAML")) { 1519 // Scan the VERSION directive value. 1520 var major, minor int8 1521 if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { 1522 return false 1523 } 1524 end_mark := parser.mark 1525 1526 // Create a VERSION-DIRECTIVE token. 1527 *token = yaml_token_t{ 1528 typ: yaml_VERSION_DIRECTIVE_TOKEN, 1529 start_mark: start_mark, 1530 end_mark: end_mark, 1531 major: major, 1532 minor: minor, 1533 } 1534 1535 // Is it a TAG directive? 1536 } else if bytes.Equal(name, []byte("TAG")) { 1537 // Scan the TAG directive value. 1538 var handle, prefix []byte 1539 if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { 1540 return false 1541 } 1542 end_mark := parser.mark 1543 1544 // Create a TAG-DIRECTIVE token. 1545 *token = yaml_token_t{ 1546 typ: yaml_TAG_DIRECTIVE_TOKEN, 1547 start_mark: start_mark, 1548 end_mark: end_mark, 1549 value: handle, 1550 prefix: prefix, 1551 } 1552 1553 // Unknown directive. 1554 } else { 1555 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1556 start_mark, "found unknown directive name") 1557 return false 1558 } 1559 1560 // Eat the rest of the line including any comments. 1561 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1562 return false 1563 } 1564 1565 for is_blank(parser.buffer, parser.buffer_pos) { 1566 skip(parser) 1567 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1568 return false 1569 } 1570 } 1571 1572 if parser.buffer[parser.buffer_pos] == '#' { 1573 m := parser.mark 1574 parser.comment_buffer = parser.comment_buffer[:0] 1575 for !is_breakz(parser.buffer, parser.buffer_pos) { 1576 p := parser.buffer_pos 1577 skip(parser) 1578 parser.comment_buffer = append(parser.comment_buffer, 1579 parser.buffer[p:parser.buffer_pos]...) 1580 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1581 return false 1582 } 1583 } 1584 add_comment(parser, m, string(parser.comment_buffer)) 1585 } 1586 1587 // Check if we are at the end of the line. 1588 if !is_breakz(parser.buffer, parser.buffer_pos) { 1589 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1590 start_mark, "did not find expected comment or line break") 1591 return false 1592 } 1593 1594 // Eat a line break. 1595 if is_break(parser.buffer, parser.buffer_pos) { 1596 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1597 return false 1598 } 1599 skip_line(parser) 1600 } 1601 1602 return true 1603 } 1604 1605 // Scan the directive name. 1606 // 1607 // Scope: 1608 // 1609 // %YAML 1.1 # a comment \n 1610 // ^^^^ 1611 // %TAG !yaml! tag:yaml.org,2002: \n 1612 // ^^^ 1613 func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { 1614 // Consume the directive name. 1615 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1616 return false 1617 } 1618 1619 var s []byte 1620 for is_alpha(parser.buffer, parser.buffer_pos) { 1621 s = read(parser, s) 1622 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1623 return false 1624 } 1625 } 1626 1627 // Check if the name is empty. 1628 if len(s) == 0 { 1629 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1630 start_mark, "could not find expected directive name") 1631 return false 1632 } 1633 1634 // Check for an blank character after the name. 1635 if !is_blankz(parser.buffer, parser.buffer_pos) { 1636 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1637 start_mark, "found unexpected non-alphabetical character") 1638 return false 1639 } 1640 *name = s 1641 return true 1642 } 1643 1644 // Scan the value of VERSION-DIRECTIVE. 1645 // 1646 // Scope: 1647 // 1648 // %YAML 1.1 # a comment \n 1649 // ^^^^^^ 1650 func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { 1651 // Eat whitespaces. 1652 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1653 return false 1654 } 1655 for is_blank(parser.buffer, parser.buffer_pos) { 1656 skip(parser) 1657 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1658 return false 1659 } 1660 } 1661 1662 // Consume the major version number. 1663 if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { 1664 return false 1665 } 1666 1667 // Eat '.'. 1668 if parser.buffer[parser.buffer_pos] != '.' { 1669 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1670 start_mark, "did not find expected digit or '.' character") 1671 } 1672 1673 skip(parser) 1674 1675 // Consume the minor version number. 1676 if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { 1677 return false 1678 } 1679 return true 1680 } 1681 1682 const max_number_length = 2 1683 1684 // Scan the version number of VERSION-DIRECTIVE. 1685 // 1686 // Scope: 1687 // 1688 // %YAML 1.1 # a comment \n 1689 // ^ 1690 // %YAML 1.1 # a comment \n 1691 // ^ 1692 func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { 1693 1694 // Repeat while the next character is digit. 1695 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1696 return false 1697 } 1698 var value, length int8 1699 for is_digit(parser.buffer, parser.buffer_pos) { 1700 // Check if the number is too long. 1701 length++ 1702 if length > max_number_length { 1703 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1704 start_mark, "found extremely long version number") 1705 } 1706 value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos)) 1707 skip(parser) 1708 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1709 return false 1710 } 1711 } 1712 1713 // Check if the number was present. 1714 if length == 0 { 1715 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1716 start_mark, "did not find expected version number") 1717 } 1718 *number = value 1719 return true 1720 } 1721 1722 // Scan the value of a TAG-DIRECTIVE token. 1723 // 1724 // Scope: 1725 // 1726 // %TAG !yaml! tag:yaml.org,2002: \n 1727 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1728 func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { 1729 var handle_value, prefix_value []byte 1730 1731 // Eat whitespaces. 1732 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1733 return false 1734 } 1735 1736 for is_blank(parser.buffer, parser.buffer_pos) { 1737 skip(parser) 1738 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1739 return false 1740 } 1741 } 1742 1743 // Scan a handle. 1744 if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { 1745 return false 1746 } 1747 1748 // Expect a whitespace. 1749 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1750 return false 1751 } 1752 if !is_blank(parser.buffer, parser.buffer_pos) { 1753 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1754 start_mark, "did not find expected whitespace") 1755 return false 1756 } 1757 1758 // Eat whitespaces. 1759 for is_blank(parser.buffer, parser.buffer_pos) { 1760 skip(parser) 1761 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1762 return false 1763 } 1764 } 1765 1766 // Scan a prefix. 1767 if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { 1768 return false 1769 } 1770 1771 // Expect a whitespace or line break. 1772 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1773 return false 1774 } 1775 if !is_blankz(parser.buffer, parser.buffer_pos) { 1776 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1777 start_mark, "did not find expected whitespace or line break") 1778 return false 1779 } 1780 1781 *handle = handle_value 1782 *prefix = prefix_value 1783 return true 1784 } 1785 1786 func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { 1787 var s []byte 1788 1789 // Eat the indicator character. 1790 start_mark := parser.mark 1791 skip(parser) 1792 1793 // Consume the value. 1794 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1795 return false 1796 } 1797 1798 for is_alpha(parser.buffer, parser.buffer_pos) { 1799 s = read(parser, s) 1800 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1801 return false 1802 } 1803 } 1804 1805 end_mark := parser.mark 1806 1807 /* 1808 * Check if length of the anchor is greater than 0 and it is followed by 1809 * a whitespace character or one of the indicators: 1810 * 1811 * '?', ':', ',', ']', '}', '%', '@', '`'. 1812 */ 1813 1814 if len(s) == 0 || 1815 !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || 1816 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || 1817 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || 1818 parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || 1819 parser.buffer[parser.buffer_pos] == '`') { 1820 context := "while scanning an alias" 1821 if typ == yaml_ANCHOR_TOKEN { 1822 context = "while scanning an anchor" 1823 } 1824 yaml_parser_set_scanner_error(parser, context, start_mark, 1825 "did not find expected alphabetic or numeric character") 1826 return false 1827 } 1828 1829 // Create a token. 1830 *token = yaml_token_t{ 1831 typ: typ, 1832 start_mark: start_mark, 1833 end_mark: end_mark, 1834 value: s, 1835 } 1836 1837 return true 1838 } 1839 1840 /* 1841 * Scan a TAG token. 1842 */ 1843 1844 func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { 1845 var handle, suffix []byte 1846 1847 start_mark := parser.mark 1848 1849 // Check if the tag is in the canonical form. 1850 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1851 return false 1852 } 1853 1854 if parser.buffer[parser.buffer_pos+1] == '<' { 1855 // Keep the handle as '' 1856 1857 // Eat '!<' 1858 skip(parser) 1859 skip(parser) 1860 1861 // Consume the tag value. 1862 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1863 return false 1864 } 1865 1866 // Check for '>' and eat it. 1867 if parser.buffer[parser.buffer_pos] != '>' { 1868 yaml_parser_set_scanner_error(parser, "while scanning a tag", 1869 start_mark, "did not find the expected '>'") 1870 return false 1871 } 1872 1873 skip(parser) 1874 } else { 1875 // The tag has either the '!suffix' or the '!handle!suffix' form. 1876 1877 // First, try to scan a handle. 1878 if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { 1879 return false 1880 } 1881 1882 // Check if it is, indeed, handle. 1883 if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { 1884 // Scan the suffix now. 1885 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1886 return false 1887 } 1888 } else { 1889 // It wasn't a handle after all. Scan the rest of the tag. 1890 if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { 1891 return false 1892 } 1893 1894 // Set the handle to '!'. 1895 handle = []byte{'!'} 1896 1897 // A special case: the '!' tag. Set the handle to '' and the 1898 // suffix to '!'. 1899 if len(suffix) == 0 { 1900 handle, suffix = suffix, handle 1901 } 1902 } 1903 } 1904 1905 // Check the character which ends the tag. 1906 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1907 return false 1908 } 1909 if !is_blankz(parser.buffer, parser.buffer_pos) { 1910 yaml_parser_set_scanner_error(parser, "while scanning a tag", 1911 start_mark, "did not find expected whitespace or line break") 1912 return false 1913 } 1914 1915 end_mark := parser.mark 1916 1917 // Create a token. 1918 *token = yaml_token_t{ 1919 typ: yaml_TAG_TOKEN, 1920 start_mark: start_mark, 1921 end_mark: end_mark, 1922 value: handle, 1923 suffix: suffix, 1924 } 1925 return true 1926 } 1927 1928 // Scan a tag handle. 1929 func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { 1930 // Check the initial '!' character. 1931 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1932 return false 1933 } 1934 if parser.buffer[parser.buffer_pos] != '!' { 1935 yaml_parser_set_scanner_tag_error(parser, directive, 1936 start_mark, "did not find expected '!'") 1937 return false 1938 } 1939 1940 var s []byte 1941 1942 // Copy the '!' character. 1943 s = read(parser, s) 1944 1945 // Copy all subsequent alphabetical and numerical characters. 1946 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1947 return false 1948 } 1949 for is_alpha(parser.buffer, parser.buffer_pos) { 1950 s = read(parser, s) 1951 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1952 return false 1953 } 1954 } 1955 1956 // Check if the trailing character is '!' and copy it. 1957 if parser.buffer[parser.buffer_pos] == '!' { 1958 s = read(parser, s) 1959 } else { 1960 // It's either the '!' tag or not really a tag handle. If it's a %TAG 1961 // directive, it's an error. If it's a tag token, it must be a part of URI. 1962 if directive && string(s) != "!" { 1963 yaml_parser_set_scanner_tag_error(parser, directive, 1964 start_mark, "did not find expected '!'") 1965 return false 1966 } 1967 } 1968 1969 *handle = s 1970 return true 1971 } 1972 1973 // Scan a tag. 1974 func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { 1975 //size_t length = head ? strlen((char *)head) : 0 1976 var s []byte 1977 hasTag := len(head) > 0 1978 1979 // Copy the head if needed. 1980 // 1981 // Note that we don't copy the leading '!' character. 1982 if len(head) > 1 { 1983 s = append(s, head[1:]...) 1984 } 1985 1986 // Scan the tag. 1987 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1988 return false 1989 } 1990 1991 // The set of characters that may appear in URI is as follows: 1992 // 1993 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', 1994 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', 1995 // '%'. 1996 // [Go] Convert this into more reasonable logic. 1997 for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || 1998 parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || 1999 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || 2000 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || 2001 parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || 2002 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || 2003 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || 2004 parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || 2005 parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || 2006 parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || 2007 parser.buffer[parser.buffer_pos] == '%' { 2008 // Check if it is a URI-escape sequence. 2009 if parser.buffer[parser.buffer_pos] == '%' { 2010 if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { 2011 return false 2012 } 2013 } else { 2014 s = read(parser, s) 2015 } 2016 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2017 return false 2018 } 2019 hasTag = true 2020 } 2021 2022 if !hasTag { 2023 yaml_parser_set_scanner_tag_error(parser, directive, 2024 start_mark, "did not find expected tag URI") 2025 return false 2026 } 2027 *uri = s 2028 return true 2029 } 2030 2031 // Decode an URI-escape sequence corresponding to a single UTF-8 character. 2032 func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { 2033 2034 // Decode the required number of characters. 2035 w := 1024 2036 for w > 0 { 2037 // Check for a URI-escaped octet. 2038 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2039 return false 2040 } 2041 2042 if !(parser.buffer[parser.buffer_pos] == '%' && 2043 is_hex(parser.buffer, parser.buffer_pos+1) && 2044 is_hex(parser.buffer, parser.buffer_pos+2)) { 2045 return yaml_parser_set_scanner_tag_error(parser, directive, 2046 start_mark, "did not find URI escaped octet") 2047 } 2048 2049 // Get the octet. 2050 octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) 2051 2052 // If it is the leading octet, determine the length of the UTF-8 sequence. 2053 if w == 1024 { 2054 w = width(octet) 2055 if w == 0 { 2056 return yaml_parser_set_scanner_tag_error(parser, directive, 2057 start_mark, "found an incorrect leading UTF-8 octet") 2058 } 2059 } else { 2060 // Check if the trailing octet is correct. 2061 if octet&0xC0 != 0x80 { 2062 return yaml_parser_set_scanner_tag_error(parser, directive, 2063 start_mark, "found an incorrect trailing UTF-8 octet") 2064 } 2065 } 2066 2067 // Copy the octet and move the pointers. 2068 *s = append(*s, octet) 2069 skip(parser) 2070 skip(parser) 2071 skip(parser) 2072 w-- 2073 } 2074 return true 2075 } 2076 2077 // Scan a block scalar. 2078 func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { 2079 // Eat the indicator '|' or '>'. 2080 start_mark := parser.mark 2081 skip(parser) 2082 2083 // Scan the additional block scalar indicators. 2084 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2085 return false 2086 } 2087 2088 // Check for a chomping indicator. 2089 var chomping, increment int 2090 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2091 // Set the chomping method and eat the indicator. 2092 if parser.buffer[parser.buffer_pos] == '+' { 2093 chomping = +1 2094 } else { 2095 chomping = -1 2096 } 2097 skip(parser) 2098 2099 // Check for an indentation indicator. 2100 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2101 return false 2102 } 2103 if is_digit(parser.buffer, parser.buffer_pos) { 2104 // Check that the indentation is greater than 0. 2105 if parser.buffer[parser.buffer_pos] == '0' { 2106 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2107 start_mark, "found an indentation indicator equal to 0") 2108 return false 2109 } 2110 2111 // Get the indentation level and eat the indicator. 2112 increment = as_digit(parser.buffer, parser.buffer_pos) 2113 skip(parser) 2114 } 2115 2116 } else if is_digit(parser.buffer, parser.buffer_pos) { 2117 // Do the same as above, but in the opposite order. 2118 2119 if parser.buffer[parser.buffer_pos] == '0' { 2120 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2121 start_mark, "found an indentation indicator equal to 0") 2122 return false 2123 } 2124 increment = as_digit(parser.buffer, parser.buffer_pos) 2125 skip(parser) 2126 2127 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2128 return false 2129 } 2130 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2131 if parser.buffer[parser.buffer_pos] == '+' { 2132 chomping = +1 2133 } else { 2134 chomping = -1 2135 } 2136 skip(parser) 2137 } 2138 } 2139 2140 // Eat whitespaces and comments to the end of the line. 2141 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2142 return false 2143 } 2144 for is_blank(parser.buffer, parser.buffer_pos) { 2145 skip(parser) 2146 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2147 return false 2148 } 2149 } 2150 if parser.buffer[parser.buffer_pos] == '#' { 2151 m := parser.mark 2152 parser.comment_buffer = parser.comment_buffer[:0] 2153 for !is_breakz(parser.buffer, parser.buffer_pos) { 2154 p := parser.buffer_pos 2155 skip(parser) 2156 parser.comment_buffer = append(parser.comment_buffer, 2157 parser.buffer[p:parser.buffer_pos]...) 2158 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2159 return false 2160 } 2161 } 2162 add_comment(parser, m, string(parser.comment_buffer)) 2163 } 2164 2165 // Check if we are at the end of the line. 2166 if !is_breakz(parser.buffer, parser.buffer_pos) { 2167 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2168 start_mark, "did not find expected comment or line break") 2169 return false 2170 } 2171 2172 // Eat a line break. 2173 if is_break(parser.buffer, parser.buffer_pos) { 2174 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2175 return false 2176 } 2177 skip_line(parser) 2178 } 2179 2180 end_mark := parser.mark 2181 2182 // Set the indentation level if it was specified. 2183 var indent int 2184 if increment > 0 { 2185 if parser.indent >= 0 { 2186 indent = parser.indent + increment 2187 } else { 2188 indent = increment 2189 } 2190 } 2191 2192 // Scan the leading line breaks and determine the indentation level if needed. 2193 var s, leading_break, trailing_breaks []byte 2194 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2195 return false 2196 } 2197 2198 // Scan the block scalar content. 2199 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2200 return false 2201 } 2202 var leading_blank, trailing_blank bool 2203 for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { 2204 // We are at the beginning of a non-empty line. 2205 2206 // Is it a trailing whitespace? 2207 trailing_blank = is_blank(parser.buffer, parser.buffer_pos) 2208 2209 // Check if we need to fold the leading line break. 2210 if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' { 2211 // Do we need to join the lines by space? 2212 if len(trailing_breaks) == 0 { 2213 s = append(s, ' ') 2214 } 2215 } else { 2216 s = append(s, leading_break...) 2217 } 2218 leading_break = leading_break[:0] 2219 2220 // Append the remaining line breaks. 2221 s = append(s, trailing_breaks...) 2222 trailing_breaks = trailing_breaks[:0] 2223 2224 // Is it a leading whitespace? 2225 leading_blank = is_blank(parser.buffer, parser.buffer_pos) 2226 2227 // Consume the current line. 2228 for !is_breakz(parser.buffer, parser.buffer_pos) { 2229 s = read(parser, s) 2230 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2231 return false 2232 } 2233 } 2234 2235 // Consume the line break. 2236 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2237 return false 2238 } 2239 2240 leading_break = read_line(parser, leading_break) 2241 2242 // Eat the following indentation spaces and line breaks. 2243 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2244 return false 2245 } 2246 } 2247 2248 // Chomp the tail. 2249 if chomping != -1 { 2250 s = append(s, leading_break...) 2251 } 2252 if chomping == 1 { 2253 s = append(s, trailing_breaks...) 2254 } 2255 2256 // Create a token. 2257 *token = yaml_token_t{ 2258 typ: yaml_SCALAR_TOKEN, 2259 start_mark: start_mark, 2260 end_mark: end_mark, 2261 value: s, 2262 style: yaml_LITERAL_SCALAR_STYLE, 2263 } 2264 if !literal { 2265 token.style = yaml_FOLDED_SCALAR_STYLE 2266 } 2267 return true 2268 } 2269 2270 // Scan indentation spaces and line breaks for a block scalar. Determine the 2271 // indentation level if needed. 2272 func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { 2273 *end_mark = parser.mark 2274 2275 // Eat the indentation spaces and line breaks. 2276 max_indent := 0 2277 for { 2278 // Eat the indentation spaces. 2279 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2280 return false 2281 } 2282 for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) { 2283 skip(parser) 2284 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2285 return false 2286 } 2287 } 2288 if parser.mark.column > max_indent { 2289 max_indent = parser.mark.column 2290 } 2291 2292 // Check for a tab character messing the indentation. 2293 if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { 2294 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2295 start_mark, "found a tab character where an indentation space is expected") 2296 } 2297 2298 // Have we found a non-empty line? 2299 if !is_break(parser.buffer, parser.buffer_pos) { 2300 break 2301 } 2302 2303 // Consume the line break. 2304 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2305 return false 2306 } 2307 // [Go] Should really be returning breaks instead. 2308 *breaks = read_line(parser, *breaks) 2309 *end_mark = parser.mark 2310 } 2311 2312 // Determine the indentation level if needed. 2313 if *indent == 0 { 2314 *indent = max_indent 2315 if *indent < parser.indent+1 { 2316 *indent = parser.indent + 1 2317 } 2318 if *indent < 1 { 2319 *indent = 1 2320 } 2321 } 2322 return true 2323 } 2324 2325 // Scan a quoted scalar. 2326 func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { 2327 // Eat the left quote. 2328 start_mark := parser.mark 2329 skip(parser) 2330 2331 // Consume the content of the quoted scalar. 2332 var s, leading_break, trailing_breaks, whitespaces []byte 2333 for { 2334 // Check that there are no document indicators at the beginning of the line. 2335 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2336 return false 2337 } 2338 2339 if parser.mark.column == 0 && 2340 ((parser.buffer[parser.buffer_pos+0] == '-' && 2341 parser.buffer[parser.buffer_pos+1] == '-' && 2342 parser.buffer[parser.buffer_pos+2] == '-') || 2343 (parser.buffer[parser.buffer_pos+0] == '.' && 2344 parser.buffer[parser.buffer_pos+1] == '.' && 2345 parser.buffer[parser.buffer_pos+2] == '.')) && 2346 is_blankz(parser.buffer, parser.buffer_pos+3) { 2347 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2348 start_mark, "found unexpected document indicator") 2349 return false 2350 } 2351 2352 // Check for EOF. 2353 if is_z(parser.buffer, parser.buffer_pos) { 2354 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2355 start_mark, "found unexpected end of stream") 2356 return false 2357 } 2358 2359 // Consume non-blank characters. 2360 leading_blanks := false 2361 for !is_blankz(parser.buffer, parser.buffer_pos) { 2362 if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { 2363 // Is is an escaped single quote. 2364 s = append(s, '\'') 2365 skip(parser) 2366 skip(parser) 2367 2368 } else if single && parser.buffer[parser.buffer_pos] == '\'' { 2369 // It is a right single quote. 2370 break 2371 } else if !single && parser.buffer[parser.buffer_pos] == '"' { 2372 // It is a right double quote. 2373 break 2374 2375 } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { 2376 // It is an escaped line break. 2377 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2378 return false 2379 } 2380 skip(parser) 2381 skip_line(parser) 2382 leading_blanks = true 2383 break 2384 2385 } else if !single && parser.buffer[parser.buffer_pos] == '\\' { 2386 // It is an escape sequence. 2387 code_length := 0 2388 2389 // Check the escape character. 2390 switch parser.buffer[parser.buffer_pos+1] { 2391 case '0': 2392 s = append(s, 0) 2393 case 'a': 2394 s = append(s, '\x07') 2395 case 'b': 2396 s = append(s, '\x08') 2397 case 't', '\t': 2398 s = append(s, '\x09') 2399 case 'n': 2400 s = append(s, '\x0A') 2401 case 'v': 2402 s = append(s, '\x0B') 2403 case 'f': 2404 s = append(s, '\x0C') 2405 case 'r': 2406 s = append(s, '\x0D') 2407 case 'e': 2408 s = append(s, '\x1B') 2409 case ' ': 2410 s = append(s, '\x20') 2411 case '"': 2412 s = append(s, '"') 2413 case '\'': 2414 s = append(s, '\'') 2415 case '\\': 2416 s = append(s, '\\') 2417 case 'N': // NEL (#x85) 2418 s = append(s, '\xC2') 2419 s = append(s, '\x85') 2420 case '_': // #xA0 2421 s = append(s, '\xC2') 2422 s = append(s, '\xA0') 2423 case 'L': // LS (#x2028) 2424 s = append(s, '\xE2') 2425 s = append(s, '\x80') 2426 s = append(s, '\xA8') 2427 case 'P': // PS (#x2029) 2428 s = append(s, '\xE2') 2429 s = append(s, '\x80') 2430 s = append(s, '\xA9') 2431 case 'x': 2432 code_length = 2 2433 case 'u': 2434 code_length = 4 2435 case 'U': 2436 code_length = 8 2437 default: 2438 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2439 start_mark, "found unknown escape character") 2440 return false 2441 } 2442 2443 skip(parser) 2444 skip(parser) 2445 2446 // Consume an arbitrary escape code. 2447 if code_length > 0 { 2448 var value int 2449 2450 // Scan the character value. 2451 if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) { 2452 return false 2453 } 2454 for k := 0; k < code_length; k++ { 2455 if !is_hex(parser.buffer, parser.buffer_pos+k) { 2456 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2457 start_mark, "did not find expected hexdecimal number") 2458 return false 2459 } 2460 value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) 2461 } 2462 2463 // Check the value and write the character. 2464 if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { 2465 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2466 start_mark, "found invalid Unicode character escape code") 2467 return false 2468 } 2469 if value <= 0x7F { 2470 s = append(s, byte(value)) 2471 } else if value <= 0x7FF { 2472 s = append(s, byte(0xC0+(value>>6))) 2473 s = append(s, byte(0x80+(value&0x3F))) 2474 } else if value <= 0xFFFF { 2475 s = append(s, byte(0xE0+(value>>12))) 2476 s = append(s, byte(0x80+((value>>6)&0x3F))) 2477 s = append(s, byte(0x80+(value&0x3F))) 2478 } else { 2479 s = append(s, byte(0xF0+(value>>18))) 2480 s = append(s, byte(0x80+((value>>12)&0x3F))) 2481 s = append(s, byte(0x80+((value>>6)&0x3F))) 2482 s = append(s, byte(0x80+(value&0x3F))) 2483 } 2484 2485 // Advance the pointer. 2486 for k := 0; k < code_length; k++ { 2487 skip(parser) 2488 } 2489 } 2490 } else { 2491 // It is a non-escaped non-blank character. 2492 s = read(parser, s) 2493 } 2494 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2495 return false 2496 } 2497 } 2498 2499 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2500 return false 2501 } 2502 2503 // Check if we are at the end of the scalar. 2504 if single { 2505 if parser.buffer[parser.buffer_pos] == '\'' { 2506 break 2507 } 2508 } else { 2509 if parser.buffer[parser.buffer_pos] == '"' { 2510 break 2511 } 2512 } 2513 2514 // Consume blank characters. 2515 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2516 if is_blank(parser.buffer, parser.buffer_pos) { 2517 // Consume a space or a tab character. 2518 if !leading_blanks { 2519 whitespaces = read(parser, whitespaces) 2520 } else { 2521 skip(parser) 2522 } 2523 } else { 2524 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2525 return false 2526 } 2527 2528 // Check if it is a first line break. 2529 if !leading_blanks { 2530 whitespaces = whitespaces[:0] 2531 leading_break = read_line(parser, leading_break) 2532 leading_blanks = true 2533 } else { 2534 trailing_breaks = read_line(parser, trailing_breaks) 2535 } 2536 } 2537 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2538 return false 2539 } 2540 } 2541 2542 // Join the whitespaces or fold line breaks. 2543 if leading_blanks { 2544 // Do we need to fold line breaks? 2545 if len(leading_break) > 0 && leading_break[0] == '\n' { 2546 if len(trailing_breaks) == 0 { 2547 s = append(s, ' ') 2548 } else { 2549 s = append(s, trailing_breaks...) 2550 } 2551 } else { 2552 s = append(s, leading_break...) 2553 s = append(s, trailing_breaks...) 2554 } 2555 trailing_breaks = trailing_breaks[:0] 2556 leading_break = leading_break[:0] 2557 } else { 2558 s = append(s, whitespaces...) 2559 whitespaces = whitespaces[:0] 2560 } 2561 } 2562 2563 // Eat the right quote. 2564 skip(parser) 2565 end_mark := parser.mark 2566 2567 // Create a token. 2568 *token = yaml_token_t{ 2569 typ: yaml_SCALAR_TOKEN, 2570 start_mark: start_mark, 2571 end_mark: end_mark, 2572 value: s, 2573 style: yaml_SINGLE_QUOTED_SCALAR_STYLE, 2574 } 2575 if !single { 2576 token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE 2577 } 2578 return true 2579 } 2580 2581 // Scan a plain scalar. 2582 func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { 2583 2584 var s, leading_break, trailing_breaks, whitespaces []byte 2585 var leading_blanks bool 2586 var indent = parser.indent + 1 2587 2588 start_mark := parser.mark 2589 end_mark := parser.mark 2590 2591 // Consume the content of the plain scalar. 2592 for { 2593 // Check for a document indicator. 2594 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2595 return false 2596 } 2597 if parser.mark.column == 0 && 2598 ((parser.buffer[parser.buffer_pos+0] == '-' && 2599 parser.buffer[parser.buffer_pos+1] == '-' && 2600 parser.buffer[parser.buffer_pos+2] == '-') || 2601 (parser.buffer[parser.buffer_pos+0] == '.' && 2602 parser.buffer[parser.buffer_pos+1] == '.' && 2603 parser.buffer[parser.buffer_pos+2] == '.')) && 2604 is_blankz(parser.buffer, parser.buffer_pos+3) { 2605 break 2606 } 2607 2608 // Check for a comment. 2609 if parser.buffer[parser.buffer_pos] == '#' { 2610 break 2611 } 2612 2613 // Consume non-blank characters. 2614 for !is_blankz(parser.buffer, parser.buffer_pos) { 2615 2616 // Check for indicators that may end a plain scalar. 2617 if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || 2618 (parser.flow_level > 0 && 2619 (parser.buffer[parser.buffer_pos] == ',' || 2620 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || 2621 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 2622 parser.buffer[parser.buffer_pos] == '}')) { 2623 break 2624 } 2625 2626 // Check if we need to join whitespaces and breaks. 2627 if leading_blanks || len(whitespaces) > 0 { 2628 if leading_blanks { 2629 // Do we need to fold line breaks? 2630 if leading_break[0] == '\n' { 2631 if len(trailing_breaks) == 0 { 2632 s = append(s, ' ') 2633 } else { 2634 s = append(s, trailing_breaks...) 2635 } 2636 } else { 2637 s = append(s, leading_break...) 2638 s = append(s, trailing_breaks...) 2639 } 2640 trailing_breaks = trailing_breaks[:0] 2641 leading_break = leading_break[:0] 2642 leading_blanks = false 2643 } else { 2644 s = append(s, whitespaces...) 2645 whitespaces = whitespaces[:0] 2646 } 2647 } 2648 2649 // Copy the character. 2650 s = read(parser, s) 2651 2652 end_mark = parser.mark 2653 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2654 return false 2655 } 2656 } 2657 2658 // Is it the end? 2659 if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { 2660 break 2661 } 2662 2663 // Consume blank characters. 2664 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2665 return false 2666 } 2667 2668 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2669 if is_blank(parser.buffer, parser.buffer_pos) { 2670 2671 // Check for tab characters that abuse indentation. 2672 if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { 2673 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 2674 start_mark, "found a tab character that violates indentation") 2675 return false 2676 } 2677 2678 // Consume a space or a tab character. 2679 if !leading_blanks { 2680 whitespaces = read(parser, whitespaces) 2681 } else { 2682 skip(parser) 2683 } 2684 } else { 2685 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2686 return false 2687 } 2688 2689 // Check if it is a first line break. 2690 if !leading_blanks { 2691 whitespaces = whitespaces[:0] 2692 leading_break = read_line(parser, leading_break) 2693 leading_blanks = true 2694 } else { 2695 trailing_breaks = read_line(parser, trailing_breaks) 2696 } 2697 } 2698 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2699 return false 2700 } 2701 } 2702 2703 // Check indentation level. 2704 if parser.flow_level == 0 && parser.mark.column < indent { 2705 break 2706 } 2707 } 2708 2709 // Create a token. 2710 *token = yaml_token_t{ 2711 typ: yaml_SCALAR_TOKEN, 2712 start_mark: start_mark, 2713 end_mark: end_mark, 2714 value: s, 2715 style: yaml_PLAIN_SCALAR_STYLE, 2716 } 2717 2718 // Note that we change the 'simple_key_allowed' flag. 2719 if leading_blanks { 2720 parser.simple_key_allowed = true 2721 } 2722 return true 2723 }