github.com/pankona/gometalinter@v2.0.11+incompatible/_linters/src/golang.org/x/text/width/gen.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 // This program generates the trie for width operations. The generated table 8 // includes width category information as well as the normalization mappings. 9 package main 10 11 import ( 12 "bytes" 13 "fmt" 14 "io" 15 "log" 16 "math" 17 "unicode/utf8" 18 19 "golang.org/x/text/internal/gen" 20 "golang.org/x/text/internal/triegen" 21 ) 22 23 // See gen_common.go for flags. 24 25 func main() { 26 gen.Init() 27 genTables() 28 genTests() 29 gen.Repackage("gen_trieval.go", "trieval.go", "width") 30 gen.Repackage("gen_common.go", "common_test.go", "width") 31 } 32 33 func genTables() { 34 t := triegen.NewTrie("width") 35 // fold and inverse mappings. See mapComment for a description of the format 36 // of each entry. Add dummy value to make an index of 0 mean no mapping. 37 inverse := [][4]byte{{}} 38 mapping := map[[4]byte]int{[4]byte{}: 0} 39 40 getWidthData(func(r rune, tag elem, alt rune) { 41 idx := 0 42 if alt != 0 { 43 var buf [4]byte 44 buf[0] = byte(utf8.EncodeRune(buf[1:], alt)) 45 s := string(r) 46 buf[buf[0]] ^= s[len(s)-1] 47 var ok bool 48 if idx, ok = mapping[buf]; !ok { 49 idx = len(mapping) 50 if idx > math.MaxUint8 { 51 log.Fatalf("Index %d does not fit in a byte.", idx) 52 } 53 mapping[buf] = idx 54 inverse = append(inverse, buf) 55 } 56 } 57 t.Insert(r, uint64(tag|elem(idx))) 58 }) 59 60 w := &bytes.Buffer{} 61 gen.WriteUnicodeVersion(w) 62 63 sz, err := t.Gen(w) 64 if err != nil { 65 log.Fatal(err) 66 } 67 68 sz += writeMappings(w, inverse) 69 70 fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024) 71 72 gen.WriteGoFile(*outputFile, "width", w.Bytes()) 73 } 74 75 const inverseDataComment = ` 76 // inverseData contains 4-byte entries of the following format: 77 // <length> <modified UTF-8-encoded rune> <0 padding> 78 // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the 79 // UTF-8 encoding of the original rune. Mappings often have the following 80 // pattern: 81 // A -> A (U+FF21 -> U+0041) 82 // B -> B (U+FF22 -> U+0042) 83 // ... 84 // By xor-ing the last byte the same entry can be shared by many mappings. This 85 // reduces the total number of distinct entries by about two thirds. 86 // The resulting entry for the aforementioned mappings is 87 // { 0x01, 0xE0, 0x00, 0x00 } 88 // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get 89 // E0 ^ A1 = 41. 90 // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get 91 // E0 ^ A2 = 42. 92 // Note that because of the xor-ing, the byte sequence stored in the entry is 93 // not valid UTF-8.` 94 95 func writeMappings(w io.Writer, data [][4]byte) int { 96 fmt.Fprintln(w, inverseDataComment) 97 fmt.Fprintf(w, "var inverseData = [%d][4]byte{\n", len(data)) 98 for _, x := range data { 99 fmt.Fprintf(w, "{ 0x%02x, 0x%02x, 0x%02x, 0x%02x },\n", x[0], x[1], x[2], x[3]) 100 } 101 fmt.Fprintln(w, "}") 102 return len(data) * 4 103 } 104 105 func genTests() { 106 w := &bytes.Buffer{} 107 fmt.Fprintf(w, "\nvar mapRunes = map[rune]struct{r rune; e elem}{\n") 108 getWidthData(func(r rune, tag elem, alt rune) { 109 if alt != 0 { 110 fmt.Fprintf(w, "\t0x%X: {0x%X, 0x%X},\n", r, alt, tag) 111 } 112 }) 113 fmt.Fprintln(w, "}") 114 gen.WriteGoFile("runes_test.go", "width", w.Bytes()) 115 }