modernc.org/knuth@v0.0.4/web/tangle.go (about) 1 // Copyright 2023 The Knuth Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package web deals with .web files. 6 package web // modernc.org/knuth/web 7 8 import ( 9 "bytes" 10 "fmt" 11 "go/token" 12 "io" 13 "os" 14 "path/filepath" 15 "runtime/debug" 16 "sort" 17 "strconv" 18 "strings" 19 20 "modernc.org/knuth" 21 "modernc.org/sortutil" 22 ) 23 24 var ( 25 oTrcw bool // testing 26 ) 27 28 const ( 29 blankSet = " \t\n\r" 30 incompleteNameTag = "..." 31 poolSumInit = 271828 // tangle.pdf, p.138 32 stackLimit = 250 // min 124 33 ) 34 35 const ( 36 eof = -iota - 1 37 ccAt // "@@" 38 ccBeginMetaComment // "@{" 39 ccBeginPascal // "@p" 40 ccBigLineBreak // "@#" 41 ccCheckSum // "@$" 42 ccDefinition // "@d" 43 ccEnd // "@>" 44 ccEndMetaComment // "@}" 45 ccForceLine // "@\\" 46 ccFormat // "@f" 47 ccHex // "@\"" 48 ccJoin // "@&" 49 ccLineBreak // "@/" 50 ccMathBreak // "@|" 51 ccModuleName // "@<" 52 ccNewModule // "@ " 53 ccNewStarredModule // "@*" 54 ccNoLineBreak // "@+" 55 ccNoUnderline // "@?" 56 ccOctal // "@'" 57 ccPopMacroArg // "@Z" internal 58 ccPseudoSemi // "@;" 59 ccTeXString // "@t" 60 ccThinSpace // "@," 61 ccUnderline // "@!" 62 ccVerbatim // "@=" 63 ccXrefRoman // "@^" 64 ccXrefTypewriter // "@." 65 ccXrefWildcard // "@:" 66 ) 67 68 type abort error 69 70 type webScanner struct { 71 controlCodePos token.Position 72 src knuth.RuneSource 73 stack []knuth.RuneSource 74 75 c2 rune 76 controlCode rune 77 78 controlCodeValid bool 79 } 80 81 func newWebScanner(src knuth.RuneSource) *webScanner { return &webScanner{src: src} } 82 83 func (s *webScanner) srcStack() (r []string) { 84 for i, v := range s.stack { 85 p := v.Position() 86 p.Filename = filepath.Base(p.Filename) 87 r = append(r, fmt.Sprintf("%3d: %p %v:", i, v, p)) 88 } 89 if v := s.src; v != nil { 90 p := v.Position() 91 p.Filename = filepath.Base(p.Filename) 92 r = append(r, fmt.Sprintf("TOS: %p %v:", v, p)) 93 } else { 94 r = append(r, fmt.Sprintf("TOS: <nil>")) 95 } 96 return r 97 } 98 99 func (s *webScanner) c() (r rune) { 100 if s.controlCodeValid { 101 return s.controlCode 102 } 103 104 pop: 105 if s.src == nil { 106 n := len(s.stack) 107 if n == 0 { 108 return eof 109 } 110 111 s.src = s.stack[n-1] 112 s.stack = s.stack[:n-1] 113 } 114 c, err := s.src.C() 115 switch { 116 case err == io.EOF: 117 s.src = nil 118 if len(s.stack) != 0 { 119 goto pop 120 } 121 122 return eof 123 case err != nil: 124 panic(abort(fmt.Errorf("%v: %v", s.src.Position(), err))) 125 case c == '@': 126 s.controlCodePos = s.position() 127 s.consume() 128 // The letters L, T , P , M , C, and/or S following each code indicate whether 129 // or not that code is allowable in limbo, in TEX text, in Pascal text, in 130 // module names, in comments, and/or in strings. 131 if s.c2, err = s.src.C(); err != nil { 132 panic(abort(fmt.Errorf("%v: %v", s.src.Position(), err))) 133 } 134 135 switch s.c2 { 136 case '*': 137 // @* [!L, !P , !T] This denotes the beginning of a new starred module, i.e., a 138 // module that begins a new major group. The title of the new group should 139 // appear after the @*, followed by a period. As explained above, TEX control 140 // sequences should be avoided in such titles unless they are quite simple. 141 // When WEAVE and TANGLE read a @*, they print an asterisk on the terminal 142 // followed by the current module number, so that the user can see some 143 // indication of progress. The very first module should be starred. 144 s.controlCode = ccNewStarredModule 145 case 'd', 'D': 146 // @d [!P , !T] Macro definitions begin with @d (or @D), followed by the Pascal 147 // text for one of the three kinds of macros, as explained earlier. 148 s.controlCode = ccDefinition 149 case ' ', '\t', '\n': 150 // @ [!L, !P , !T] This denotes the beginning of a new (unstarred) module. A 151 // tab mark or end-of-line (carriage return) is equivalent to a space when it 152 // follows an @ sign. 153 s.controlCode = ccNewModule 154 case 'p', 'P': 155 // @p [!P , !T] The Pascal part of an unnamed module begins with @p (or @P). 156 // This causes TANGLE to append the following Pascal code to the initial 157 // program text T 0 as explained above. The WEAVE processor does not cause a 158 // ‘@p’ to appear explicitly in the TEX output, so if you are creating a WEB 159 // file based on a TEX-printed WEB documentation you have to remember to insert 160 // @p in the appropriate places of the unnamed modules. 161 s.controlCode = ccBeginPascal 162 case '<': 163 // @< [P, !T] A module name begins with @< followed by TEX text followed by @>; 164 // the TEX text should not contain any WEB control codes except @@, unless 165 // these control codes appear in Pascal text that is delimited by |...|. The 166 // module name may be abbreviated, after its first appearance in a WEB file, by 167 // giving any unique prefix followed by ..., where the three dots immediately 168 // precede the closing @>. No module name should be a prefix of another. Module 169 // names may not appear in Pascal text that is enclosed in |...|, nor may they 170 // appear in the definition part of a module (since the appearance of a module 171 // name ends the definition part and begins the Pascal part). 172 s.controlCode = ccModuleName 173 case 'f', 'F': 174 // @f [!P , !T] Format definitions begin with @f (or @F); they cause WEAVE to 175 // treat identifiers in a special way when they appear in Pascal text. The 176 // general form of a format definition is ‘@f l == r’, followed by an optional 177 // comment enclosed in braces, where l and r are identifiers; WEAVE will 178 // subsequently treat identifier l as it currently treats r. This feature 179 // allows a WEB programmer to invent new reserved words and/or to unreserve 180 // some of Pascal’s reserved identifiers. The definition part of each module 181 // consists of any number of macro definitions (beginning with @d) and format 182 // definitions (beginning with @f), intermixed in any order. 183 s.controlCode = ccFormat 184 case '^': 185 // @^ [P, T] The “control text” that follows, up to the next ‘@>’, will be 186 // entered into the index together with the identifiers of the Pascal program; 187 // this text will appear in roman type. For example, to put the phrase “system 188 // dependencies” into the index, you can type ‘@^system dependencies@>’ in each 189 // module that you want to index as system dependent. A control text, like a 190 // string, must end on the same line of the WEB file as it began. Furthermore, 191 // no WEB control codes are allowed in a control text, not even @@. (If you 192 // need an @ sign you can get around this restriction by typing ‘\AT!’.) 193 s.controlCode = ccXrefRoman 194 case '>': 195 s.controlCode = ccEnd 196 case 'Z': 197 s.controlCode = ccPopMacroArg 198 case '/': 199 // @/ [P] This control code causes a line break to occur within a Pascal 200 // program formatted by WEAVE; it is ignored by TANGLE. Line breaks are chosen 201 // automatically by TEX according to a scheme that works 99% of the time, but 202 // sometimes you will prefer to force a line break so that the program is 203 // segmented according to logical rather than visual criteria. Caution: ‘@/’ 204 // should be used only after statements or clauses, not in the middle of an 205 // expression; use @| in the middle of expressions, in order to keep WEAVE’s 206 // parser happy. 207 s.controlCode = ccLineBreak 208 case 't', 'T': 209 // @t [P] The “control text” that follows, up to the next ‘@>’, will be put 210 // into a TEX \hbox and formatted along with the neighboring Pascal program. 211 // This text is ignored by TANGLE, but it can be used for various purposes 212 // within WEAVE. For example, you can make comments that mix Pascal and 213 // classical mathematics, as in ‘size < 2 15 ’, by typing ‘|size < 214 // @t$2^{15}$@>|’. A control text must end on the same line of the WEB file as 215 // it began, and it may not contain any WEB control codes. 216 s.controlCode = ccTeXString 217 case '\'': 218 // @ ́ [P, T] This denotes an octal constant, to be formed from the succeeding 219 // digits. For example, if the WEB file contains ‘@ ́100’, the TANGLE processor 220 // will treat this an equivalent to ‘64’; the constant will be formatted as 221 // “ ́100 ” in the TEX output produced via WEAVE. You should use octal notation 222 // only for positive constants; don’t try to get, e.g., −1 by saying 223 // ‘@ ́777777777777’. 224 s.controlCode = ccOctal 225 case '.': 226 // @. [P, T] The “control text” that follows will be entered into the index in 227 // typewriter type; see the rules for ‘@^’, which is analogous. 228 s.controlCode = ccXrefTypewriter 229 case '@': 230 // @@ [C, L, M, P, S, T] A double @ denotes the single character ‘@’. This is 231 // the only control code that is legal in limbo, in comments, and in strings. 232 s.controlCode = ccAt 233 case '#': 234 // @# [P] This control code forces a line break, like @/ does, and it also 235 // causes a little extra white space to appear between the lines at this break. 236 // You might use it, for example, between procedure definitions or between 237 // groups of macro definitions that are logically separate but within the same 238 // module. 239 s.controlCode = ccBigLineBreak 240 case ',': 241 // @, [P] This control code inserts a thin space in WEAVE’s output; it is 242 // ignored by TANGLE. Sometimes you need this extra space if you are using 243 // macros in an unusual way, e.g., if two identifiers are adjacent. 244 s.controlCode = ccThinSpace 245 case ':': 246 // @: [P, T] The “control text” that follows will be entered into the index in 247 // a format controlled by the TEX macro ‘\9’, which the user should define as 248 // desired; see the rules for ‘@^’, which is analogous. 249 s.controlCode = ccXrefWildcard 250 case '&': 251 // @& [P] The @& operation causes whatever is on its left to be adjacent to 252 // whatever is on its right, in the Pascal output. No spaces or line breaks 253 // will separate these two items. However, the thing on the left should not be 254 // a semicolon, since a line break might occur after a semicolon. 255 s.controlCode = ccJoin 256 case '{': 257 // @{ [P] The beginning of a “meta comment,” i.e., a comment that is supposed 258 // to appear in the Pascal code, is indicated by @{ in the WEB file. Such 259 // delimiters can be used as isolated symbols in macros or modules, but they 260 // should be properly nested in the final Pascal program. The TANGLE processor 261 // will convert ‘@{’ into ‘{’ in the Pascal output file, unless the output is 262 // already part of a meta-comment; in the latter case ‘@{’ is converted into 263 // ‘[’, since Pascal does not allow nested comments. The WEAVE processor 264 // outputs ‘@{’. Incidentally, module numbers are automatically inserted as 265 // meta-comments into the Pascal program, in order to help correlate the 266 // outputs of WEAVE and TANGLE (see Appendix C) Meta-comments can be used to 267 // put conditional text into a Pascal program; this helps to overcome one of 268 // the limitations of WEB, since the simple macro processing routines of TANGLE 269 // do not include the dynamic evaluation of boolean expressions. 270 s.controlCode = ccBeginMetaComment 271 case '}': 272 // @} [P] The end of a “meta comment” is indicated by ‘@}’; this is converted 273 // either into ‘}’ or ‘]’ in the Pascal output, according to the conventions 274 // explained for @{ above. The WEAVE processor outputs ‘@}’. 275 s.controlCode = ccEndMetaComment 276 case '$': 277 // @$ [P] This denotes the string pool check sum. 278 s.controlCode = ccCheckSum 279 case '?': 280 // @? [P, T] This cancels an implicit (or explicit) ‘@!’, so that the next 281 // index entry will not be underlined. 282 s.controlCode = ccNoUnderline 283 case '=': 284 // @= [P] The “control text” that follows, up to the next ‘@>’, will be passed 285 // verbatim to the Pascal program. 286 s.controlCode = ccVerbatim 287 case '"': 288 // @" [P, T] A hexadecimal constant; ‘@"D0D0’ tangles to 53456 and weaves to 289 // ‘ ̋D0D0’. 290 s.controlCode = ccHex 291 case '\\': 292 // @\ [P] Force end-of-line here in the Pascal program file. 293 s.controlCode = ccForceLine 294 case '!': 295 // @! [P, T] The module number in an index entry will be underlined if ‘@!’ 296 // immediately precedes the identifier or control text being indexed. This 297 // convention is used to distinguish the modules where an identifier is 298 // defined, or where it is explained in some special way, from the modules 299 // where it is used. A reserved word or an identifier of length one will not be 300 // indexed except for underlined entries. An ‘@!’ is implicitly inserted by 301 // WEAVE just after the reserved words function, procedure, program, and var, 302 // and just after @d and @f. But you should insert your own ‘@!’ before the 303 // definitions of types, constants, variables, parameters, and components of 304 // records and enumerated types that are not covered by this implicit 305 // convention, if you want to improve the quality of the index that you get. 306 s.controlCode = ccUnderline 307 case '+': 308 // @+ [P] This control code cancels a line break that might otherwise be 309 // inserted by WEAVE, e.g., before the word ‘else’, if you want to put a short 310 // if-then-else construction on a single line. It is ignored by TANGLE. 311 s.controlCode = ccNoLineBreak 312 case ';': 313 // @; [P] This control code is treated like a semicolon, for formatting 314 // purposes, except that it is invisible. You can use it, for example, after a 315 // module name when the Pascal text represented by that module name ends with a 316 // semicolon. 317 s.controlCode = ccPseudoSemi 318 case '|': 319 // @| [P] This control code specifies an optional line break in the midst of an 320 // expression. For example, if you have a long condition between if and then, 321 // or a long expression on the right-hand side of an assignment statement, you 322 // can use ‘@|’ to specify breakpoints more logical than the ones that TEX 323 // might choose on visual grounds. 324 s.controlCode = ccMathBreak 325 default: 326 panic(todo("%v: %#U", s.controlCodePos, s.c2)) 327 } 328 329 s.controlCodeValid = true 330 return s.controlCode 331 default: 332 return c 333 } 334 } 335 336 func (s *webScanner) consume() { 337 s.controlCodeValid = false 338 s.src.Consume() 339 } 340 341 func (s *webScanner) position() (r token.Position) { 342 if s.controlCodeValid { 343 return s.controlCodePos 344 } 345 346 if s.src != nil { 347 return s.src.Position() 348 } 349 350 return r 351 } 352 353 // Tangle processes 'src' and outputs the resulting Pascal code to 'pascal' and 354 // a string pool to 'pool'. To apply a change file, pass knuth.NewChanger(src, 355 // changes) as 'src'. 356 // 357 // The result is similar, but not compatible, to what the original TANGLE 358 // outputs. It's also not compatible with the ISO Pascal Standard. 359 func Tangle(pascal, pool io.Writer, src knuth.RuneSource) (err error) { 360 if !doPanic { 361 defer func() { 362 e := recover() 363 switch x := e.(type) { 364 case nil: 365 return 366 case abort: 367 err = error(x) 368 return 369 } 370 371 err = fmt.Errorf("PANIC %T: %[1]s, %s\n%s", e, err, debug.Stack()) 372 }() 373 } 374 375 t := newTangle(pascal, pool, src) 376 if err := t.scan(); err != nil { 377 return err 378 } 379 380 // Sanitize code names. 381 t.codeNames = t.codeNames[:sortutil.Dedupe(sort.StringSlice(t.codeNames))] 382 for i, v := range t.codeNames { 383 if i < len(t.codeNames)-1 && strings.HasPrefix(t.codeNames[i+1], v) { 384 panic(todo("", i)) 385 } 386 } 387 for _, m := range t.modules { 388 for _, c := range m.codes { 389 if nm := c.name; nm != "" { 390 nm = t.completeName(nm) 391 c.name = nm 392 t.codesByName[nm] = append(t.codesByName[nm], c) 393 } 394 } 395 } 396 t.src = nil 397 for _, m := range t.modules { 398 m.render(t) 399 } 400 t.post() 401 return nil 402 } 403 404 type tangle struct { 405 *webScanner 406 codeNames []string 407 codes []*code 408 codesByName map[string][]*code 409 definitions map[string]*definition // @d 410 formats map[string]*format // @f 411 macroArgs []func() knuth.RuneSource 412 modules []*module 413 pascal io.Writer 414 pascal0 bytes.Buffer 415 pool io.Writer 416 poolSum int 417 strings map[string]int 418 419 constCount int 420 constInjectState int 421 metaCommentLevel int 422 } 423 424 func newTangle(pascal, pool io.Writer, src knuth.RuneSource) *tangle { 425 return &tangle{ 426 codesByName: map[string][]*code{}, 427 definitions: map[string]*definition{}, 428 formats: map[string]*format{}, 429 pascal: pascal, 430 pool: pool, 431 poolSum: poolSumInit, 432 strings: map[string]int{}, 433 webScanner: newWebScanner(src), 434 } 435 } 436 437 func (t *tangle) post() { 438 tagJoin := []byte("@&") 439 tagCheckSum := []byte("@$") 440 nl3 := []byte("\n\n\n") 441 b := t.pascal0.Bytes() 442 for len(b) != 0 { 443 x := bytes.Index(b, tagJoin) 444 if x < 0 { 445 break 446 } 447 448 c := b[:x] 449 b = b[x+len(tagJoin):] 450 c = bytes.TrimRight(c, blankSet) 451 if _, err := t.pascal.Write(c); err != nil { 452 panic(todo("", err)) 453 } 454 455 b = bytes.TrimLeft(b, blankSet) 456 } 457 for bytes.Index(b, nl3) >= 0 { 458 b = bytes.ReplaceAll(b, nl3, nl3[:2]) 459 } 460 for bytes.Index(b, tagCheckSum) >= 0 { 461 b = bytes.ReplaceAll(b, tagCheckSum, []byte(fmt.Sprintf(" %d ", t.poolSum))) 462 } 463 if _, err := t.pascal.Write(b); err != nil { 464 panic(todo("", err)) 465 } 466 if _, err := fmt.Fprintf(t.pool, "*%09d\n", t.poolSum); err != nil { 467 panic(todo("", err)) 468 } 469 } 470 471 func (t *tangle) w(s string, args ...interface{}) { 472 b := []byte(fmt.Sprintf(s, args...)) 473 if oTrcw { 474 os.Stderr.Write(b) 475 } 476 var w []byte 477 for i := 0; i < len(b); { 478 c := b[i] 479 switch c { 480 case '@': 481 if i+1 == len(b) { 482 break 483 } 484 485 switch b[i+1] { 486 case '{': 487 t.metaCommentLevel++ 488 if t.metaCommentLevel == 1 { 489 w = append(w, '{') 490 } else { 491 w = append(w, '[') 492 } 493 i += 2 494 continue 495 case '}': 496 if t.metaCommentLevel == 0 { 497 panic(todo("%v:", t.position())) 498 } 499 500 t.metaCommentLevel-- 501 if t.metaCommentLevel == 0 { 502 w = append(w, '}') 503 } else { 504 w = append(w, ']') 505 } 506 i += 2 507 continue 508 case '@': 509 i++ 510 case '&': 511 w = append(w, "@&"...) 512 i += 2 513 continue 514 } 515 case '{': 516 if t.metaCommentLevel != 0 { 517 c = '[' 518 } 519 case '}': 520 if t.metaCommentLevel != 0 { 521 c = ']' 522 } 523 } 524 i++ 525 w = append(w, c) 526 } 527 b = w 528 // fmt.Printf("%s", b) 529 if _, err := t.pascal0.Write(b); err != nil { 530 panic(abort(fmt.Errorf("%v: writing tangle result: %v", t.src.Position(), err))) 531 } 532 } 533 534 func (t *tangle) push(src knuth.RuneSource) { 535 if len(t.stack) == stackLimit { 536 panic(todo("", t.srcStack())) 537 } 538 539 if t.src != nil { 540 t.stack = append(t.stack, t.src) 541 } 542 t.src = src 543 } 544 545 func (t *tangle) pushCode(c *code) { 546 src := knuth.NewRuneSource(c.pos.Filename, []byte(c.pascal), knuth.Unicode) 547 src.AddLineColumnInfo(0, c.pos.Filename, c.pos.Line, c.pos.Column) 548 t.push(src) 549 } 550 551 func (t *tangle) scan() error { 552 t.scanLimbo() 553 for { 554 switch c := t.c(); c { 555 case eof: 556 return nil 557 case ccNewStarredModule: 558 t.consume() // "@*" 559 t.addModule(t.scanModule(t.scanModuleNameDot())) 560 case ccNewModule: 561 t.consume() // "@ " 562 t.addModule(t.scanModule("")) 563 default: 564 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 565 } 566 } 567 } 568 569 func (t *tangle) addModule(m *module) { 570 t.modules = append(t.modules, m) 571 m.number = len(t.modules) 572 } 573 574 func (t *tangle) scanModuleNameDot() string { 575 var b strings.Builder 576 for { 577 switch c := t.c(); c { 578 case '.': 579 t.consume() 580 return strings.TrimSpace(b.String()) 581 default: 582 if c >= 0 { 583 b.WriteRune(c) 584 t.consume() 585 continue 586 } 587 588 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 589 } 590 } 591 } 592 593 func (t *tangle) scanModule(nm string) *module { 594 m := &module{name: nm} 595 m.pos, m.tex = t.scanTeX() 596 for { 597 switch c := t.c(); c { 598 case ccDefinition: 599 t.addDefinition(t.scanDefinition()) 600 case ccNewStarredModule, ccNewModule, eof: 601 return m 602 case ccBeginPascal: 603 t.consume() 604 pos, s := t.scanPascal(false, true) 605 c := &code{pos: pos, pascal: s, inModule: m} 606 m.codes = append(m.codes, c) 607 case ccModuleName: 608 nm := t.scanModuleName() 609 t.addCodeName(nm) 610 t.scanBlank() 611 if t.c() == '+' { 612 t.consume() // handle "+=" the same as "=". 613 } 614 switch c := t.c(); c { 615 case '=': 616 t.consume() 617 pos, s := t.scanPascal(false, true) 618 c := &code{name: nm, pos: pos, pascal: s, inModule: m} 619 m.codes = append(m.codes, c) 620 default: 621 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 622 } 623 case ccFormat: 624 t.addFormat(t.scanFormat()) 625 default: 626 if c >= 0 { 627 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 628 } 629 630 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 631 } 632 t.scanSeparator() 633 } 634 } 635 636 func (t *tangle) completeName(nm string) string { 637 if strings.HasSuffix(nm, incompleteNameTag) { 638 nm0 := nm 639 nm = nm[:len(nm)-len(incompleteNameTag)] 640 x := sort.SearchStrings(t.codeNames, nm) 641 if x == len(t.codeNames) { 642 panic(todo("%q %q", nm, t.codeNames)) 643 } 644 645 nm1 := nm 646 nm = t.codeNames[x] 647 if !strings.HasPrefix(nm, nm1) { 648 for i, v := range t.codeNames { 649 trc("%3d: %q", i, v) 650 } 651 panic(todo("%q -> %q -> %q, x %d", nm0, nm1, nm, x)) 652 } 653 } 654 return nm 655 } 656 657 func (t *tangle) findCode(nm string) (r []*code) { 658 nm = t.completeName(nm) 659 r = t.codesByName[nm] 660 if r == nil { 661 panic(todo("%q", nm)) 662 } 663 664 return r 665 } 666 667 func (t *tangle) addCodeName(nm string) { 668 if strings.HasSuffix(nm, incompleteNameTag) { 669 return 670 } 671 672 t.codeNames = append(t.codeNames, nm) 673 } 674 675 func (t *tangle) addFormat(f *format) { 676 if ex, ok := t.formats[f.l]; ok { 677 panic(todo("%v: %q redefined, previous at %v:", f.pos, f.l, ex.pos)) 678 } 679 680 t.formats[f.l] = f 681 } 682 683 func (t *tangle) addDefinition(d *definition) { 684 if ex, ok := t.definitions[d.name]; ok { 685 panic(todo("%v: %q redefined, previous at %v:", d.pos, d.name, ex.pos)) 686 } 687 688 d.ord = len(t.definitions) 689 t.definitions[d.name] = d 690 if d.kind == "=" { 691 t.constCount++ 692 } 693 } 694 695 func (t *tangle) scanFormat() *format { 696 t.consume() 697 f := &format{pos: t.position()} 698 _, _, f.l = t.scanIdentifier() 699 t.scanBlank() 700 switch c := t.c(); c { 701 case '=': 702 t.consume() 703 switch c := t.c(); c { 704 case '=': 705 t.consume() 706 _, _, f.r = t.scanIdentifier() 707 f.postSep = t.scanSeparator() 708 default: 709 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 710 } 711 default: 712 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 713 } 714 return f 715 } 716 717 func (t *tangle) scanDefinition() (r *definition) { 718 t.consume() 719 d := &definition{pos: t.position()} 720 721 defer func() { 722 if r != nil { 723 r.replacement = strings.TrimRight(r.replacement, blankSet) + " " 724 } 725 }() 726 727 _, _, d.name = t.scanIdentifier() 728 t.scanBlank() 729 switch c := t.c(); c { 730 case '=': 731 t.consume() 732 switch c := t.c(); c { 733 case '=': 734 t.consume() 735 d.kind = "==" 736 d.replPos = t.position() 737 _, d.replacement = t.scanPascal(true, false) 738 default: 739 d.kind = "=" 740 d.replPos = t.position() 741 _, d.replacement = t.scanPascal(true, true) 742 } 743 case '(': 744 t.consume() 745 t.scanBlank() 746 switch c := t.c(); c { 747 case '#': 748 t.consume() 749 t.scanBlank() 750 switch c := t.c(); c { 751 case ')': 752 t.consume() 753 t.scanBlank() 754 switch c := t.c(); c { 755 case '=': 756 t.consume() 757 switch c := t.c(); c { 758 case '=': 759 t.consume() 760 d.kind = "(#)" 761 d.replPos = t.position() 762 _, d.replacement = t.scanPascal(true, false) 763 default: 764 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 765 } 766 default: 767 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 768 } 769 default: 770 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 771 } 772 default: 773 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 774 } 775 default: 776 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 777 } 778 return d 779 } 780 781 func (t *tangle) scanPascal(def, sep bool) (pos token.Position, s string) { 782 pos = t.position() 783 var b strings.Builder 784 for { 785 s := t.scanSeparator() 786 if s != "" { 787 switch { 788 case sep: 789 b.WriteString(s) 790 default: 791 b.WriteByte(' ') 792 } 793 } 794 switch c := t.c(); c { 795 case '\'': 796 _, s := t.scanPascalStringLiteral() 797 b.WriteString(s) 798 case '"': 799 _, s := t.scanQuotedStringLiteral() 800 b.WriteString(s) 801 case ccNewStarredModule, ccDefinition, ccNewModule, ccBeginPascal, ccFormat, eof: 802 return pos, b.String() 803 case ccModuleName: 804 if def { 805 return pos, b.String() 806 } 807 808 nm := t.scanModuleName() 809 t.addCodeName(nm) 810 fmt.Fprintf(&b, "@<%s@>", nm) 811 case ccLineBreak: 812 t.consume() 813 b.WriteRune('\n') 814 case ccOctal: 815 _, sep, s := t.scanOctal() 816 n, err := strconv.ParseUint(s, 8, 64) 817 if err != nil { 818 panic(todo("", err)) 819 } 820 821 fmt.Fprintf(&b, "%s{0%o=}%[2]d", sep, n) 822 case ccHex: 823 _, sep, s := t.scanHex() 824 n, err := strconv.ParseUint(s, 16, 64) 825 if err != nil { 826 panic(todo("", err)) 827 } 828 829 fmt.Fprintf(&b, "%s{0x%x=}%[2]d", sep, n) 830 case ccXrefRoman, ccXrefTypewriter, ccXrefWildcard: 831 _, s := t.scanXref() 832 fmt.Fprintf(&b, "{ %s }", commentSafe(s)) 833 case ccTeXString: 834 fmt.Fprintf(&b, "{ %s }", commentSafe(t.scanTeXString())) 835 case ccJoin: 836 t.consume() 837 b.WriteString("@&") 838 case ccCheckSum: 839 t.consume() 840 b.WriteString("@$") 841 case ccVerbatim: 842 b.WriteString(t.scanVerbatim()) 843 case ccBeginMetaComment: 844 t.consume() 845 b.WriteString("@{") 846 case ccEndMetaComment: 847 t.consume() 848 b.WriteString("@}") 849 default: 850 if c >= 0 { 851 switch { 852 case t.isIdentFirst(c): 853 _, _, s := t.scanIdentifier() 854 b.WriteString(s) 855 case t.isDigit(c): 856 _, _, s := t.scanDecimal() 857 b.WriteString(s) 858 default: 859 switch c { 860 case 861 '(', '#', ')', ',', ';', ':', '=', 862 '+', '-', '[', ']', '>', '.', '*', 863 '<', '/', '^', '$': 864 t.consume() 865 b.WriteRune(c) 866 default: 867 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 868 } 869 } 870 break 871 } 872 873 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 874 } 875 } 876 } 877 878 func (t *tangle) scanVerbatim() string { 879 t.consume() 880 var b strings.Builder 881 for { 882 switch c := t.c(); c { 883 case ccEnd: 884 t.consume() 885 return b.String() 886 case ccAt: 887 t.consume() 888 b.WriteString("@@") 889 default: 890 if c >= 0 { 891 b.WriteRune(c) 892 t.consume() 893 continue 894 } 895 896 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 897 } 898 } 899 } 900 901 func (t *tangle) scanHex() (pos token.Position, sep, n string) { 902 t.consume() // "@\"" 903 sep = t.scanSeparator() 904 pos = t.position() 905 var b strings.Builder 906 for { 907 switch c := t.c(); c { 908 default: 909 if c >= 0 { 910 if t.isDigit(c) || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' { 911 b.WriteRune(c) 912 t.consume() 913 continue 914 } 915 916 return pos, sep, b.String() 917 } 918 919 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 920 } 921 } 922 } 923 924 func (t *tangle) scanOctal() (pos token.Position, sep, n string) { 925 t.consume() // "@'" 926 sep = t.scanSeparator() 927 pos = t.position() 928 var b strings.Builder 929 for { 930 switch c := t.c(); c { 931 default: 932 if c >= 0 { 933 if c >= '0' && c <= '7' { 934 b.WriteRune(c) 935 t.consume() 936 continue 937 } 938 939 return pos, sep, b.String() 940 } 941 942 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 943 } 944 } 945 } 946 947 func (t *tangle) scanNumeric() (pos token.Position, sep, n string) { 948 sep = t.scanSeparator() 949 pos = t.position() 950 var b strings.Builder 951 hex := false 952 for { 953 switch c := t.c(); c { 954 case ccNoLineBreak: 955 return pos, sep, b.String() 956 default: 957 if c >= 0 { 958 if c >= '0' && c <= '9' || hex && (c >= 'a' && c < 'f') { 959 b.WriteRune(c) 960 t.consume() 961 continue 962 } 963 964 if b.String() == "0" && c == 'x' { 965 hex = true 966 b.WriteRune(c) 967 t.consume() 968 continue 969 } 970 971 return pos, sep, b.String() 972 } 973 974 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 975 } 976 } 977 } 978 979 func (t *tangle) scanDecimal() (pos token.Position, sep, n string) { 980 sep = t.scanSeparator() 981 pos = t.position() 982 var b strings.Builder 983 for { 984 switch c := t.c(); c { 985 case ccNoLineBreak: 986 return pos, sep, b.String() 987 default: 988 if c >= 0 { 989 if c >= '0' && c <= '9' { 990 b.WriteRune(c) 991 t.consume() 992 continue 993 } 994 995 return pos, sep, b.String() 996 } 997 998 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 999 } 1000 } 1001 } 1002 1003 func (t *tangle) scanModuleName() (r string) { 1004 t.consume() // "@<" 1005 var b strings.Builder 1006 var last rune 1007 for { 1008 c := t.c() 1009 switch c { 1010 case ' ', '\t', '\n', '\r': 1011 c = ' ' 1012 if last == ' ' { 1013 t.consume() 1014 continue 1015 } 1016 } 1017 1018 last = c 1019 switch c { 1020 case ccEnd: 1021 t.consume() 1022 r = strings.TrimSpace(b.String()) 1023 return r 1024 case ccThinSpace: 1025 t.consume() 1026 b.WriteString("@,") 1027 case ccOctal: 1028 t.consume() 1029 b.WriteString("@'") 1030 case ccAt: 1031 t.consume() 1032 b.WriteString("@@") 1033 default: 1034 if c >= 0 { 1035 b.WriteRune(c) 1036 t.consume() 1037 continue 1038 } 1039 1040 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1041 } 1042 } 1043 } 1044 1045 func (t *tangle) scanTeXInComment() (r string) { 1046 var b strings.Builder 1047 // pos := t.position() 1048 t.consume() // "$" 1049 b.WriteByte('$') 1050 for { 1051 switch c := t.c(); c { 1052 case '$': 1053 t.consume() 1054 b.WriteRune(c) 1055 r = commentSafe(b.String()) 1056 return r 1057 case ccAt: 1058 t.consume() 1059 b.WriteString("@@") 1060 case ccHex: 1061 t.consume() 1062 b.WriteString("@\"") 1063 default: 1064 if c >= 0 { 1065 t.consume() 1066 b.WriteRune(c) 1067 break 1068 } 1069 1070 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1071 } 1072 } 1073 } 1074 1075 func (t *tangle) scanPascalBracedComment() (r string) { 1076 // pos := t.position() 1077 var b strings.Builder 1078 lvl := 0 1079 for { 1080 switch c := t.c(); c { 1081 case '{': 1082 t.consume() 1083 lvl++ 1084 b.WriteString("{") 1085 case '}': 1086 t.consume() 1087 lvl-- 1088 b.WriteString("}") 1089 if lvl == 0 { 1090 s := b.String() 1091 return "{" + commentSafe(s[1:len(s)-1]) + "}" 1092 } 1093 case '$': 1094 b.WriteString(t.scanTeXInComment()) 1095 case '\\': 1096 t.consume() 1097 b.WriteRune(c) 1098 switch c := t.c(); c { 1099 default: 1100 if c >= 0 { 1101 t.consume() 1102 b.WriteRune(c) 1103 break 1104 } 1105 1106 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1107 } 1108 case ccTeXString: 1109 b.WriteString(t.scanTeXString()) 1110 case ccOctal: 1111 t.consume() 1112 b.WriteString("@'") 1113 case ccAt: 1114 t.consume() 1115 b.WriteString("@@") 1116 case ccUnderline: 1117 t.consume() 1118 b.WriteString("@!") 1119 case ccHex: 1120 t.consume() 1121 b.WriteString("@\"") 1122 case ccBeginMetaComment: 1123 t.consume() 1124 b.WriteString(" ") 1125 case ccEndMetaComment: 1126 t.consume() 1127 b.WriteString(" ") 1128 default: 1129 if c >= 0 { 1130 t.consume() 1131 b.WriteRune(c) 1132 break 1133 } 1134 1135 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1136 } 1137 } 1138 } 1139 1140 func (t *tangle) scanQuotedStringLiteral() (pos token.Position, s string) { 1141 var b strings.Builder 1142 pos = t.position() 1143 t.consume() // leading "\"" 1144 b.WriteRune('"') 1145 out: 1146 for { 1147 switch c := t.c(); c { 1148 case '"': 1149 t.consume() 1150 b.WriteRune(c) 1151 if t.c() != '"' { 1152 break out 1153 } 1154 1155 t.consume() 1156 b.WriteRune(c) 1157 case ccAt: 1158 t.consume() 1159 b.WriteString("@@") 1160 default: 1161 if c >= 0 { 1162 t.consume() 1163 b.WriteRune(c) 1164 break 1165 } 1166 1167 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1168 } 1169 } 1170 s0 := b.String() 1171 s = s0[1 : len(s0)-1] 1172 s = strings.ReplaceAll(s, `""`, `"`) 1173 s = strings.ReplaceAll(s, `@@`, `@`) 1174 if a := []rune(s); len(a) == 1 { 1175 return pos, fmt.Sprintf("{%s=}%d", commentSafe(s0), a[0]) 1176 } 1177 1178 id := t.strings[s] 1179 if id == 0 { 1180 id = 256 + len(t.strings) 1181 t.strings[s] = id 1182 if _, err := fmt.Fprintf(t.pool, "%02d%s\n", len(s), s); err != nil { 1183 panic(todo("", err)) 1184 } 1185 const prime = 03777777667 1186 t.poolSum += t.poolSum + len(s) 1187 for t.poolSum > prime { 1188 t.poolSum -= prime 1189 } 1190 for i := 0; i < len(s); i++ { 1191 t.poolSum += t.poolSum + int(s[i]) 1192 for t.poolSum > prime { 1193 t.poolSum -= prime 1194 } 1195 } 1196 } 1197 return pos, fmt.Sprintf("{%s=}%d", commentSafe(s0), id) 1198 } 1199 1200 func (t *tangle) scanPascalStringLiteral() (pos token.Position, s string) { 1201 var b strings.Builder 1202 t.consume() // leading "'" 1203 b.WriteRune('\'') 1204 for { 1205 switch c := t.c(); c { 1206 case '\'': 1207 t.consume() 1208 b.WriteRune(c) 1209 if t.c() != '\'' { 1210 return pos, b.String() 1211 } 1212 1213 t.consume() 1214 b.WriteRune(c) 1215 case ccAt: 1216 t.consume() 1217 b.WriteString("@@") 1218 default: 1219 if c >= 0 { 1220 t.consume() 1221 b.WriteRune(c) 1222 break 1223 } 1224 1225 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1226 } 1227 } 1228 } 1229 1230 func (t *tangle) isIdentFirst(c rune) bool { 1231 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_' 1232 } 1233 1234 func (t *tangle) isIdentNext(c rune) bool { 1235 return t.isIdentFirst(c) || t.isDigit(c) 1236 } 1237 1238 func (t *tangle) isDigit(c rune) bool { return c >= '0' && c <= '9' } 1239 1240 func (t *tangle) scanIdentifier() (pos token.Position, sep, id string) { 1241 sep = t.scanSeparator() 1242 pos = t.position() 1243 var b strings.Builder 1244 first := true 1245 for { 1246 switch c := t.c(); c { 1247 default: 1248 if first && t.isIdentFirst(c) || t.isIdentNext(c) { 1249 first = false 1250 b.WriteRune(c) 1251 t.consume() 1252 continue 1253 } 1254 1255 return pos, sep, b.String() 1256 } 1257 } 1258 } 1259 1260 func (t *tangle) scanBlank() string { 1261 var b strings.Builder 1262 for { 1263 switch c := t.c(); c { 1264 case ' ', '\t', '\r': 1265 b.WriteByte(' ') 1266 t.consume() 1267 case '\n': 1268 b.WriteByte('\n') 1269 t.consume() 1270 default: 1271 return b.String() 1272 } 1273 } 1274 } 1275 1276 func (t *tangle) scanSeparator() string { 1277 var b strings.Builder 1278 for { 1279 switch c := t.c(); c { 1280 case ' ', '\t', '\r': 1281 b.WriteByte(' ') 1282 t.consume() 1283 case '\n': 1284 b.WriteByte('\n') 1285 t.consume() 1286 case '{': 1287 b.WriteString(t.scanPascalBracedComment()) 1288 case 1289 ccNewStarredModule, ccDefinition, ccNewModule, ccBeginPascal, 1290 ccFormat, eof, ccModuleName, ccOctal, ccXrefRoman, 1291 ccXrefTypewriter, ccXrefWildcard, ccTeXString, 1292 ccJoin, ccCheckSum, ccVerbatim, ccHex, ccBeginMetaComment, ccEndMetaComment: 1293 1294 return b.String() 1295 case ccBigLineBreak, ccForceLine, ccLineBreak: 1296 t.consume() 1297 b.WriteString("\n") 1298 case ccThinSpace, ccNoUnderline, ccNoLineBreak, ccMathBreak, ccPseudoSemi, ccUnderline: 1299 t.consume() 1300 b.WriteByte(' ') 1301 default: 1302 if c >= 0 || c == eof { 1303 return b.String() 1304 } 1305 1306 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1307 } 1308 } 1309 } 1310 1311 func (t *tangle) scanSeparator2() string { 1312 var b strings.Builder 1313 outer: 1314 for { 1315 switch c := t.c(); c { 1316 case ' ', '\t', '\r': 1317 b.WriteByte(' ') 1318 t.consume() 1319 case '\n': 1320 b.WriteByte('\n') 1321 t.consume() 1322 case '{': 1323 t.consume() 1324 b.WriteRune(c) 1325 for { 1326 switch c := t.c(); c { 1327 case '}': 1328 t.consume() 1329 b.WriteRune(c) 1330 continue outer 1331 case eof: 1332 return b.String() 1333 case ccForceLine: 1334 t.consume() 1335 b.WriteByte('\n') 1336 case ccAt: 1337 t.consume() 1338 b.WriteString("@@") 1339 case ccOctal: 1340 t.consume() 1341 b.WriteString("@'") 1342 case ccUnderline: 1343 t.consume() 1344 b.WriteString("@!") 1345 case ccHex: 1346 t.consume() 1347 b.WriteString("@\"") 1348 default: 1349 if c >= 0 { 1350 t.consume() 1351 b.WriteRune(c) 1352 continue 1353 } 1354 1355 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1356 } 1357 } 1358 case ccModuleName, ccPopMacroArg, ccBeginMetaComment, ccEndMetaComment: 1359 return b.String() 1360 default: 1361 if c >= 0 || c == eof { 1362 return b.String() 1363 } 1364 1365 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1366 } 1367 } 1368 } 1369 1370 func (t *tangle) scanTeX() (pos token.Position, r string) { 1371 defer func() { r = strings.TrimRight(r, blankSet) }() 1372 1373 pos = t.position() 1374 var b strings.Builder 1375 for { 1376 switch c := t.c(); c { 1377 case eof: 1378 return pos, b.String() 1379 case ccXrefRoman, ccXrefTypewriter, ccXrefWildcard: 1380 _, s := t.scanXref() 1381 b.WriteString(s) 1382 case ccNewStarredModule, ccDefinition, ccNewModule, ccBeginPascal, ccModuleName, ccFormat: 1383 return pos, b.String() 1384 case ccTeXString: 1385 b.WriteString(t.scanTeXString()) 1386 case ccOctal: 1387 t.consume() 1388 b.WriteString("@'") 1389 case ccHex: 1390 t.consume() 1391 b.WriteString("@\"") 1392 case ccThinSpace, ccUnderline: 1393 t.consume() 1394 b.WriteString(" ") 1395 case ccAt: 1396 t.consume() 1397 b.WriteString("@@") 1398 default: 1399 if c >= 0 { 1400 b.WriteRune(c) 1401 t.consume() 1402 continue 1403 } 1404 1405 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1406 } 1407 } 1408 } 1409 1410 func (t *tangle) scanTeXString() string { 1411 t.consume() 1412 var b strings.Builder 1413 for { 1414 switch c := t.c(); c { 1415 case ccEnd: 1416 t.consume() 1417 return b.String() 1418 default: 1419 if c >= 0 { 1420 b.WriteRune(c) 1421 t.consume() 1422 continue 1423 } 1424 1425 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1426 } 1427 } 1428 } 1429 1430 func (t *tangle) scanXref() (pos token.Position, s string) { 1431 pos = t.position() 1432 t.consume() 1433 var b strings.Builder 1434 for { 1435 switch c := t.c(); c { 1436 case ccEnd: 1437 t.consume() 1438 return pos, fmt.Sprintf("\\xref[%s]", b.String()) 1439 default: 1440 if c >= 0 { 1441 b.WriteRune(c) 1442 t.consume() 1443 continue 1444 } 1445 1446 panic(todo("%v: %#U %#U", t.position(), c, t.c2)) 1447 } 1448 } 1449 } 1450 1451 func (t *tangle) scanLimbo() { 1452 var b strings.Builder 1453 for { 1454 switch c := t.c(); c { 1455 case ccNewStarredModule, ccNewModule, eof: 1456 if b.Len() == 0 { 1457 return 1458 } 1459 1460 t.w("{ %s }", commentSafe(b.String())) 1461 return 1462 case ccAt: 1463 t.consume() 1464 b.WriteString("@@") 1465 default: 1466 if c >= 0 { 1467 b.WriteRune(c) 1468 t.consume() 1469 continue 1470 } 1471 1472 panic(todo("%v: %#U", t.position(), t.c2)) 1473 } 1474 } 1475 } 1476 1477 func (t *tangle) injectConstants(appending bool) { 1478 if !appending { 1479 t.w("\n\nconst") 1480 } 1481 var a []*definition 1482 for _, d := range t.definitions { 1483 if d.kind == "=" { 1484 a = append(a, d) 1485 } 1486 } 1487 sort.Slice(a, func(i, j int) bool { return a[i].ord < a[j].ord }) 1488 for _, d := range a { 1489 t.w("\n %s = %s;", d.name, d.replacement) 1490 } 1491 t.w("\n") 1492 } 1493 1494 type module struct { 1495 name string // Non blank for @* modules only. 1496 codes []*code 1497 pos token.Position 1498 tex string 1499 1500 number int 1501 1502 teXRendered bool 1503 } 1504 1505 func (m *module) render(t *tangle) { 1506 if len(m.codes) == 0 { 1507 m.renderTeX(t) 1508 return 1509 } 1510 1511 for _, c := range m.codes { 1512 if c.name == "" { 1513 c.render(t) 1514 } 1515 } 1516 } 1517 1518 func (m *module) renderTeX(t *tangle) { 1519 if m.teXRendered { 1520 return 1521 } 1522 1523 t.w("\n") 1524 switch s := strings.TrimSpace(m.name); { 1525 case s != "": 1526 t.w("\n{ %d. %s }", m.number, commentSafe(s)) 1527 default: 1528 t.w("\n{ %d. }", m.number) 1529 } 1530 if s := strings.TrimSpace(m.tex); s != "" { 1531 pos := m.pos 1532 pos.Line-- 1533 t.w("\n\n{tangle:pos %v: }", m.pos) 1534 t.w("\n\n{ %s }", commentSafe(s)) 1535 } 1536 m.teXRendered = true 1537 } 1538 1539 type format struct { 1540 l, r string // l == r 1541 pos token.Position 1542 postSep string 1543 } 1544 1545 type code struct { 1546 inModule *module 1547 name string 1548 pos token.Position 1549 pascal string 1550 } 1551 1552 func (c *code) render(t *tangle) { 1553 c.inModule.renderTeX(t) 1554 t.pushCode(c) 1555 c.scan(t) 1556 } 1557 1558 func (c *code) scan(t *tangle) { 1559 const ( 1560 injZero = iota 1561 injSeenProgram 1562 injDone 1563 ) 1564 for { 1565 switch ch := t.c(); ch { 1566 case ' ', '\n', '\t', '\r', '{': 1567 s := t.scanSeparator2() 1568 t.w("%s", s) 1569 case 1570 '(', ',', ')', ';', '=', '.', ':', '[', ']', '+', '-', '>', 1571 '*', '<', '/', '$', '^': 1572 t.consume() 1573 t.w("%c", ch) 1574 case '\'': 1575 _, s := t.scanPascalStringLiteral() 1576 t.w("%s", s) 1577 case '"': 1578 _, s := t.scanQuotedStringLiteral() 1579 t.w("%s", s) 1580 case ccModuleName: 1581 nm := t.scanModuleName() 1582 codes := t.findCode(nm) 1583 t.w("\n{ %s }", commentSafe(nm)) 1584 for i := len(codes) - 1; i >= 0; i-- { 1585 t.pushCode(codes[i]) 1586 } 1587 case '#': 1588 if len(t.macroArgs) == 0 { 1589 panic(todo("%v: %#U %#U", t.position(), ch, t.c2)) 1590 } 1591 1592 t.consume() 1593 t.push(t.macroArgs[len(t.macroArgs)-1]()) 1594 case eof: 1595 return 1596 case ccPopMacroArg: 1597 t.consume() 1598 t.macroArgs = t.macroArgs[:len(t.macroArgs)-1] 1599 case ccBeginMetaComment: 1600 t.consume() 1601 t.w("@{") 1602 case ccEndMetaComment: 1603 t.consume() 1604 t.w("@}") 1605 case ccJoin: 1606 t.consume() 1607 t.w("@&") 1608 case ccCheckSum: 1609 t.consume() 1610 t.w("@$") 1611 default: 1612 if ch >= 0 { 1613 switch { 1614 case t.isIdentFirst(ch): 1615 _, _, id := t.scanIdentifier() 1616 switch d := t.definitions[id]; { 1617 case d != nil && d.kind != "=": 1618 c.expand(t, d) 1619 default: 1620 if t.constCount != 0 { 1621 switch t.constInjectState { 1622 case injZero: 1623 if id == "program" { 1624 t.constInjectState = injSeenProgram 1625 } 1626 case injSeenProgram: 1627 switch id { 1628 case "const": 1629 t.w("\nconst\n") 1630 t.injectConstants(true) 1631 t.constInjectState = injDone 1632 continue 1633 case "type", "var", "procedure", "function", "begin": 1634 t.injectConstants(false) 1635 t.constInjectState = injDone 1636 t.w("\n") 1637 } 1638 } 1639 } 1640 t.w("%s", id) 1641 } 1642 case t.isDigit(ch): 1643 _, _, s := t.scanDecimal() 1644 t.w("%s", s) 1645 default: 1646 panic(todo("%v: %#U %#U", t.position(), ch, t.c2)) 1647 } 1648 continue 1649 } 1650 1651 panic(todo("%v: %#U %#U", t.position(), ch, t.c2)) 1652 } 1653 } 1654 } 1655 1656 func (c *code) expand(t *tangle, d *definition) { 1657 switch d.kind { 1658 case "=", "==": 1659 repl := d.replacement 1660 replSrc := knuth.NewRuneSource(d.pos.Filename, []byte(repl), knuth.Unicode) 1661 p := d.replPos 1662 replSrc.AddLineColumnInfo(0, p.Filename, p.Line, p.Column) 1663 t.push(replSrc) 1664 case "(#)": 1665 out: 1666 for { 1667 switch ch := t.c(); ch { 1668 case '(': 1669 break out 1670 case '\n', ' ': 1671 t.scanSeparator2() 1672 case ccPopMacroArg: 1673 t.consume() 1674 t.macroArgs = t.macroArgs[:len(t.macroArgs)-1] 1675 default: 1676 panic(todo("%v: %#U %#U", t.position(), ch, t.c2)) 1677 } 1678 } 1679 1680 p := t.position() 1681 arg := c.scanMacroArg(t) 1682 t.macroArgs = append(t.macroArgs, func() knuth.RuneSource { 1683 argSrc := knuth.NewRuneSource(p.Filename, []byte(arg), knuth.Unicode) 1684 argSrc.AddLineColumnInfo(0, p.Filename, p.Line, p.Column) 1685 return argSrc 1686 }) 1687 replSrc := knuth.NewRuneSource(d.pos.Filename, []byte(d.replacement+"@Z"), knuth.Unicode) 1688 p = d.replPos 1689 replSrc.AddLineColumnInfo(0, p.Filename, p.Line, p.Column) 1690 t.push(replSrc) 1691 default: 1692 panic(todo("%v: %q %q", d.pos, d.name, d.kind)) 1693 } 1694 } 1695 1696 func (c *code) scanMacroArg(t *tangle) string { 1697 var b strings.Builder 1698 lvl := 0 1699 for { 1700 switch ch := t.c(); ch { 1701 case '(': 1702 t.consume() 1703 if lvl != 0 { 1704 b.WriteRune(ch) 1705 } 1706 lvl++ 1707 case ')': 1708 t.consume() 1709 lvl-- 1710 if lvl == 0 { 1711 return b.String() 1712 } 1713 1714 b.WriteRune(ch) 1715 case ccPopMacroArg: 1716 t.consume() 1717 b.WriteString("@Z") 1718 case '#': 1719 t.consume() 1720 t.push(t.macroArgs[len(t.macroArgs)-1]()) 1721 case ' ', '\n', '{': 1722 s := t.scanSeparator2() 1723 b.WriteString(s) 1724 case 1725 ',', ';', '=', '.', ':', '[', ']', '+', '-', '>', '*', '<', 1726 '/', '^': 1727 t.consume() 1728 b.WriteRune(ch) 1729 case '\'': 1730 _, s := t.scanPascalStringLiteral() 1731 b.WriteString(s) 1732 case '"': 1733 _, s := t.scanQuotedStringLiteral() 1734 b.WriteString(s) 1735 case ccCheckSum: 1736 t.consume() 1737 b.WriteString("@$") 1738 default: 1739 if ch >= 0 { 1740 switch { 1741 case t.isIdentFirst(ch): 1742 _, _, id := t.scanIdentifier() 1743 b.WriteByte(' ') 1744 b.WriteString(id) 1745 case t.isDigit(ch): 1746 _, _, s := t.scanNumeric() 1747 b.WriteByte(' ') 1748 b.WriteString(s) 1749 default: 1750 panic(todo("%v: %#U %#U", t.position(), ch, t.c2)) 1751 } 1752 continue 1753 } 1754 1755 panic(todo("%v: %#U %#U", t.position(), ch, t.c2)) 1756 } 1757 } 1758 } 1759 1760 type definition struct { 1761 kind string // "=", "==", "(#)" 1762 name string 1763 ord int 1764 pos token.Position 1765 replPos token.Position 1766 replacement string 1767 }