github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/docgen/extract/extract.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package extract 12 13 import ( 14 "bufio" 15 "bytes" 16 "context" 17 "fmt" 18 "io" 19 "io/ioutil" 20 "net/url" 21 "os/exec" 22 "regexp" 23 "strings" 24 "unicode" 25 26 "github.com/cockroachdb/cockroach/pkg/internal/rsg/yacc" 27 "github.com/cockroachdb/cockroach/pkg/util/httputil" 28 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 29 ) 30 31 const ( 32 rrAddr = "http://bottlecaps.de/rr/ui" 33 ) 34 35 var ( 36 reIsExpr = regexp.MustCompile("^[a-z_0-9]+$") 37 reIsIdent = regexp.MustCompile("^[A-Z_0-9]+$") 38 rrLock syncutil.Mutex 39 ) 40 41 // GenerateRRJar generates via the Railroad jar. 42 func GenerateRRJar(jar string, bnf []byte) ([]byte, error) { 43 // Note: the RR generator is already multithreaded. The 44 // -max-workers setting at the toplevel is probably already 45 // optimally set to 1. 46 47 // JAR generation is enabled by placing Railroad.jar (ask mjibson for a link) 48 // in the generate directory. 49 cmd := exec.Command( 50 "java", 51 "-jar", jar, 52 "-suppressebnf", 53 "-color:#ffffff", 54 "-width:760", 55 "-") 56 cmd.Stdin = bytes.NewReader(bnf) 57 58 out, err := cmd.CombinedOutput() 59 if err != nil { 60 return nil, fmt.Errorf("%s: %s", err, out) 61 } 62 return out, nil 63 } 64 65 // GenerateRRNet generates the RR XHTML from a EBNF file. 66 func GenerateRRNet(bnf []byte) ([]byte, error) { 67 rrLock.Lock() 68 defer rrLock.Unlock() 69 70 v := url.Values{} 71 v.Add("color", "#ffffff") 72 v.Add("frame", "diagram") 73 //v.Add("options", "suppressebnf") 74 v.Add("text", string(bnf)) 75 v.Add("width", "760") 76 v.Add("options", "eliminaterecursion") 77 v.Add("options", "factoring") 78 v.Add("options", "inline") 79 80 resp, err := httputil.Post(context.TODO(), rrAddr, "application/x-www-form-urlencoded", strings.NewReader(v.Encode())) 81 if err != nil { 82 return nil, err 83 } 84 body, err := ioutil.ReadAll(resp.Body) 85 if err != nil { 86 return nil, err 87 } 88 resp.Body.Close() 89 if resp.StatusCode != 200 { 90 return nil, fmt.Errorf("%s: %s", resp.Status, string(body)) 91 } 92 return body, nil 93 } 94 95 // GenerateBNF Opens or downloads the .y file at addr and returns at as an EBNF 96 // file. Unimplemented branches are removed. Resulting empty nodes and their 97 // uses are further removed. Empty nodes are elided. 98 func GenerateBNF(addr string) (ebnf []byte, err error) { 99 var b []byte 100 if strings.HasPrefix(addr, "http") { 101 resp, err := httputil.Get(context.TODO(), addr) 102 if err != nil { 103 return nil, err 104 } 105 b, err = ioutil.ReadAll(resp.Body) 106 if err != nil { 107 return nil, err 108 } 109 resp.Body.Close() 110 } else { 111 body, err := ioutil.ReadFile(addr) 112 if err != nil { 113 return nil, err 114 } 115 b = body 116 } 117 t, err := yacc.Parse(addr, string(b)) 118 if err != nil { 119 return nil, err 120 } 121 buf := new(bytes.Buffer) 122 123 // Remove unimplemented branches. 124 prods := make(map[string][][]yacc.Item) 125 for _, p := range t.Productions { 126 var impl [][]yacc.Item 127 for _, e := range p.Expressions { 128 if strings.Contains(e.Command, "unimplemented") && !strings.Contains(e.Command, "FORCE DOC") { 129 continue 130 } 131 if strings.Contains(e.Command, "SKIP DOC") { 132 continue 133 } 134 impl = append(impl, e.Items) 135 } 136 prods[p.Name] = impl 137 } 138 // Cascade removal of empty nodes. That is, for any node that has no branches, 139 // remove it and anything it refers to. 140 for { 141 changed := false 142 for name, exprs := range prods { 143 var next [][]yacc.Item 144 for _, expr := range exprs { 145 add := true 146 var items []yacc.Item 147 for _, item := range expr { 148 p := prods[item.Value] 149 if item.Typ == yacc.TypToken && !isUpper(item.Value) && len(p) == 0 { 150 add = false 151 changed = true 152 break 153 } 154 // Remove items that have one branch which accepts nothing. 155 if len(p) == 1 && len(p[0]) == 0 { 156 changed = true 157 continue 158 } 159 items = append(items, item) 160 } 161 if add { 162 next = append(next, items) 163 } 164 } 165 prods[name] = next 166 } 167 if !changed { 168 break 169 } 170 } 171 172 start := true 173 for _, prod := range t.Productions { 174 p := prods[prod.Name] 175 if len(p) == 0 { 176 continue 177 } 178 if start { 179 start = false 180 } else { 181 buf.WriteString("\n") 182 } 183 fmt.Fprintf(buf, "%s ::=\n", prod.Name) 184 for i, items := range p { 185 buf.WriteString("\t") 186 if i > 0 { 187 buf.WriteString("| ") 188 } 189 for j, item := range items { 190 if j > 0 { 191 buf.WriteString(" ") 192 } 193 buf.WriteString(item.Value) 194 } 195 buf.WriteString("\n") 196 } 197 } 198 return buf.Bytes(), nil 199 } 200 201 func isUpper(s string) bool { 202 return s == strings.ToUpper(s) 203 } 204 205 // ParseGrammar parses the grammar from b. 206 func ParseGrammar(r io.Reader) (Grammar, error) { 207 g := make(Grammar) 208 209 var name string 210 var prods productions 211 scan := bufio.NewScanner(r) 212 i := 0 213 for scan.Scan() { 214 s := scan.Text() 215 i++ 216 f := strings.Fields(s) 217 if len(f) == 0 { 218 if len(prods) > 0 { 219 g[name] = prods 220 } 221 continue 222 } 223 if !unicode.IsSpace(rune(s[0])) { 224 if len(f) != 2 { 225 return nil, fmt.Errorf("bad line: %v: %s", i, s) 226 } 227 name = f[0] 228 prods = nil 229 continue 230 } 231 if f[0] == "|" { 232 f = f[1:] 233 } 234 var seq sequence 235 for _, v := range f { 236 if reIsIdent.MatchString(v) { 237 seq = append(seq, literal(v)) 238 } else if reIsExpr.MatchString(v) { 239 seq = append(seq, token(v)) 240 } else if strings.HasPrefix(v, `'`) && strings.HasSuffix(v, `'`) { 241 seq = append(seq, literal(v[1:len(v)-1])) 242 } else if strings.HasPrefix(v, `/*`) && strings.HasSuffix(v, `*/`) { 243 seq = append(seq, comment(v)) 244 } else { 245 panic(v) 246 } 247 } 248 prods = append(prods, seq) 249 } 250 if err := scan.Err(); err != nil { 251 return nil, err 252 } 253 if len(prods) > 0 { 254 g[name] = prods 255 } 256 g.simplify() 257 return g, nil 258 } 259 260 // Grammar represents a parsed grammar. 261 type Grammar map[string]productions 262 263 // ExtractProduction extracts the named statement and all its dependencies, 264 // in order, into a BNF file. If descend is false, only the named statement 265 // is extracted. 266 func (g Grammar) ExtractProduction( 267 name string, descend, nosplit bool, match, exclude []*regexp.Regexp, 268 ) ([]byte, error) { 269 names := []token{token(name)} 270 b := new(bytes.Buffer) 271 done := map[token]bool{token(name): true} 272 for i := 0; i < len(names); i++ { 273 if i > 0 { 274 b.WriteString("\n") 275 } 276 n := names[i] 277 prods := g[string(n)] 278 if len(prods) == 0 { 279 return nil, fmt.Errorf("couldn't find %s", n) 280 } 281 walkToken(prods, func(t token) { 282 if !done[t] && descend { 283 names = append(names, t) 284 done[t] = true 285 } 286 }) 287 fmt.Fprintf(b, "%s ::=\n", n) 288 b.WriteString(prods.Match(nosplit, match, exclude)) 289 } 290 return b.Bytes(), nil 291 } 292 293 // Inline inlines names. 294 func (g Grammar) Inline(names ...string) error { 295 for _, name := range names { 296 p, ok := g[name] 297 if !ok { 298 return fmt.Errorf("unknown name: %s", name) 299 } 300 grp := group(p) 301 for _, prods := range g { 302 replaceToken(prods, func(t token) expression { 303 if string(t) == name { 304 return grp 305 } 306 return nil 307 }) 308 } 309 } 310 return nil 311 } 312 313 func (g Grammar) simplify() { 314 for name, prods := range g { 315 p := simplify(name, prods) 316 if p != nil { 317 g[name] = p 318 } 319 } 320 } 321 322 func simplify(name string, prods productions) productions { 323 funcs := []func(string, productions) productions{ 324 simplifySelfRefList, 325 } 326 for _, f := range funcs { 327 if e := f(name, prods); e != nil { 328 return e 329 } 330 } 331 return nil 332 } 333 334 func simplifySelfRefList(name string, prods productions) productions { 335 // First check we have sequences everywhere, and that the production 336 // is a prefix of at least one of them. 337 // Split the sequences in leaf and recursive groups: 338 // X := A | B | X C | X D 339 // group 1: A | B 340 // group 2: C | D 341 // Final: (A | B) (C | D)* 342 var group1, group2 group 343 for _, p := range prods { 344 s, ok := p.(sequence) 345 if !ok { 346 return nil 347 } 348 if len(s) > 0 && s[0] == token(name) { 349 group2 = append(group2, s[1:]) 350 } else { 351 group1 = append(group1, s) 352 } 353 } 354 if len(group2) == 0 { 355 // Not a recursive rule; do nothing. 356 return nil 357 } 358 return productions{ 359 sequence{group1, repeat{group2}}, 360 } 361 } 362 363 func replaceToken(p productions, f func(token) expression) { 364 replacetoken(p, f) 365 } 366 367 func replacetoken(e expression, f func(token) expression) expression { 368 switch e := e.(type) { 369 case sequence: 370 for i, v := range e { 371 n := replacetoken(v, f) 372 if n != nil { 373 e[i] = n 374 } 375 } 376 case token: 377 return f(e) 378 case group: 379 for i, v := range e { 380 n := replacetoken(v, f) 381 if n != nil { 382 e[i] = n 383 } 384 } 385 case productions: 386 for i, v := range e { 387 n := replacetoken(v, f) 388 if n != nil { 389 e[i] = n 390 } 391 } 392 case repeat: 393 return replacetoken(e.expression, f) 394 case literal, comment: 395 // ignore 396 default: 397 panic(fmt.Errorf("unknown type: %T", e)) 398 } 399 return nil 400 } 401 402 func walkToken(e expression, f func(token)) { 403 switch e := e.(type) { 404 case sequence: 405 for _, v := range e { 406 walkToken(v, f) 407 } 408 case token: 409 f(e) 410 case group: 411 for _, v := range e { 412 walkToken(v, f) 413 } 414 case repeat: 415 walkToken(e.expression, f) 416 case productions: 417 for _, v := range e { 418 walkToken(v, f) 419 } 420 case literal, comment: 421 // ignore 422 default: 423 panic(fmt.Errorf("unknown type: %T", e)) 424 } 425 } 426 427 type productions []expression 428 429 func (p productions) Match(nosplit bool, match, exclude []*regexp.Regexp) string { 430 b := new(bytes.Buffer) 431 first := true 432 for _, e := range p { 433 if nosplit { 434 b.WriteString("\t") 435 if !first { 436 b.WriteString("| ") 437 } else { 438 first = false 439 } 440 b.WriteString(e.String()) 441 b.WriteString("\n") 442 continue 443 } 444 Loop: 445 for _, s := range split(e) { 446 for _, ex := range exclude { 447 if ex.MatchString(s) { 448 continue Loop 449 } 450 } 451 for _, ma := range match { 452 if !ma.MatchString(s) { 453 continue Loop 454 } 455 } 456 b.WriteString("\t") 457 if !first { 458 b.WriteString("| ") 459 } else { 460 first = false 461 } 462 b.WriteString(s) 463 b.WriteString("\n") 464 } 465 } 466 return b.String() 467 } 468 469 func (p productions) String() string { 470 b := new(bytes.Buffer) 471 for i, e := range p { 472 b.WriteString("\t") 473 if i > 0 { 474 b.WriteString("| ") 475 } 476 b.WriteString(e.String()) 477 b.WriteString("\n") 478 } 479 return b.String() 480 } 481 482 type expression interface { 483 String() string 484 } 485 486 type sequence []expression 487 488 func (s sequence) String() string { 489 b := new(bytes.Buffer) 490 for i, e := range s { 491 if i > 0 { 492 b.WriteString(" ") 493 } 494 b.WriteString(e.String()) 495 } 496 return b.String() 497 } 498 499 type token string 500 501 func (t token) String() string { 502 return string(t) 503 } 504 505 type literal string 506 507 func (l literal) String() string { 508 return fmt.Sprintf("'%s'", string(l)) 509 } 510 511 type group []expression 512 513 func (g group) String() string { 514 b := new(bytes.Buffer) 515 b.WriteString("( ") 516 for i, e := range g { 517 if i > 0 { 518 b.WriteString(" | ") 519 } 520 b.WriteString(e.String()) 521 } 522 b.WriteString(" )") 523 return b.String() 524 } 525 526 type repeat struct { 527 expression 528 } 529 530 func (r repeat) String() string { 531 return fmt.Sprintf("( %s )*", r.expression) 532 } 533 534 type comment string 535 536 func (c comment) String() string { 537 return string(c) 538 } 539 540 func split(e expression) []string { 541 appendRet := func(cur, add []string) []string { 542 if len(cur) == 0 { 543 if len(add) == 0 { 544 return []string{""} 545 } 546 return add 547 } 548 var next []string 549 for _, r := range cur { 550 for _, s := range add { 551 next = append(next, r+" "+s) 552 } 553 } 554 return next 555 } 556 var ret []string 557 switch e := e.(type) { 558 case sequence: 559 for _, v := range e { 560 ret = appendRet(ret, split(v)) 561 } 562 case group: 563 var next []string 564 for _, v := range e { 565 next = append(next, appendRet(ret, split(v))...) 566 } 567 ret = next 568 case literal, comment, repeat, token: 569 ret = append(ret, e.String()) 570 default: 571 panic(fmt.Errorf("unknown type: %T", e)) 572 } 573 return ret 574 }