github.com/xzntrc/go-enry/v2@v2.0.0-20230215091818-766cc1d65498/internal/code-generator/generator/vendor.go (about) 1 package generator 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "sort" 9 "strings" 10 "text/template" 11 12 "gopkg.in/yaml.v2" 13 ) 14 15 // Vendor generates regex matchers in Go for vendoring files/dirs. 16 // It is of generator.File type. 17 func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error { 18 data, err := ioutil.ReadFile(fileToParse) 19 if err != nil { 20 return err 21 } 22 23 var regexps []string 24 if err := yaml.Unmarshal(data, ®exps); err != nil { 25 return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err) 26 } 27 28 buf := &bytes.Buffer{} 29 if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil { 30 return err 31 } 32 33 return formatedWrite(outPath, buf.Bytes()) 34 } 35 36 func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error { 37 funcs := template.FuncMap{"optimize": collateAllMatchers} 38 return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps) 39 } 40 41 func collateAllMatchers(regexps []string) string { 42 // We now collate all regexps from VendorMatchers to a single large regexp 43 // which is at least twice as fast to test than simply iterating & matching. 44 // 45 // --- 46 // 47 // We could test each matcher from VendorMatchers in turn i.e. 48 // 49 // func IsVendor(filename string) bool { 50 // for _, matcher := range data.VendorMatchers { 51 // if matcher.MatchString(filename) { 52 // return true 53 // } 54 // } 55 // return false 56 // } 57 // 58 // Or naïvely concatentate all these regexps using groups i.e. 59 // 60 // `(regexp1)|(regexp2)|(regexp3)|...` 61 // 62 // However, both of these are relatively slow and don't take advantage 63 // of the inherent structure within our regexps. 64 // 65 // Imperical observation: by looking at the regexps, we only have 3 types. 66 // 1. Those that start with `^` 67 // 2. Those that start with `(^|/)` 68 // 3. All the rest 69 // 70 // If we collate our regexps into these 3 groups - that will significantly 71 // reduce the likelihood of backtracking within the regexp trie matcher. 72 // 73 // A further improvement is to use non-capturing groups (?:) as otherwise 74 // the regexp parser, whilst matching, will have to allocate slices for 75 // matching positions. (A future improvement left out could be to 76 // enforce non-capturing groups within the sub-regexps.) 77 const ( 78 caret = "^" 79 caretOrSlash = "(^|/)" 80 ) 81 82 sort.Strings(regexps) 83 84 var caretPrefixed, caretOrSlashPrefixed, theRest []string 85 // Check prefix, add to the respective group slices 86 for _, re := range regexps { 87 if strings.HasPrefix(re, caret) { 88 caretPrefixed = append(caretPrefixed, re[len(caret):]) 89 } else if strings.HasPrefix(re, caretOrSlash) { 90 caretOrSlashPrefixed = append(caretOrSlashPrefixed, re[len(caretOrSlash):]) 91 } else { 92 theRest = append(theRest, re) 93 } 94 } 95 var sb strings.Builder 96 appendGroupWithCommonPrefix(&sb, "^", caretPrefixed) 97 sb.WriteString("|") 98 99 appendGroupWithCommonPrefix(&sb, "(?:^|/)", caretOrSlashPrefixed) 100 sb.WriteString("|") 101 102 appendGroupWithCommonPrefix(&sb, "", theRest) 103 return sb.String() 104 } 105 106 func appendGroupWithCommonPrefix(sb *strings.Builder, commonPrefix string, res []string) { 107 sb.WriteString("(?:") 108 if commonPrefix != "" { 109 sb.WriteString(fmt.Sprintf("%s(?:(?:", commonPrefix)) 110 } 111 sb.WriteString(strings.Join(res, ")|(?:")) 112 if commonPrefix != "" { 113 sb.WriteString("))") 114 } 115 sb.WriteString(")") 116 }