github.com/xzntrc/go-enry/v2@v2.0.0-20230215091818-766cc1d65498/internal/code-generator/generator/vendor.go (about)

     1  package generator
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"sort"
     9  	"strings"
    10  	"text/template"
    11  
    12  	"gopkg.in/yaml.v2"
    13  )
    14  
    15  // Vendor generates regex matchers in Go for vendoring files/dirs.
    16  // It is of generator.File type.
    17  func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
    18  	data, err := ioutil.ReadFile(fileToParse)
    19  	if err != nil {
    20  		return err
    21  	}
    22  
    23  	var regexps []string
    24  	if err := yaml.Unmarshal(data, &regexps); err != nil {
    25  		return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err)
    26  	}
    27  
    28  	buf := &bytes.Buffer{}
    29  	if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil {
    30  		return err
    31  	}
    32  
    33  	return formatedWrite(outPath, buf.Bytes())
    34  }
    35  
    36  func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error {
    37  	funcs := template.FuncMap{"optimize": collateAllMatchers}
    38  	return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps)
    39  }
    40  
    41  func collateAllMatchers(regexps []string) string {
    42  	// We now collate all regexps from VendorMatchers to a single large regexp
    43  	// which is at least twice as fast to test than simply iterating & matching.
    44  	//
    45  	// ---
    46  	//
    47  	// We could test each matcher from VendorMatchers in turn i.e.
    48  	//
    49  	//  	func IsVendor(filename string) bool {
    50  	// 			for _, matcher := range data.VendorMatchers {
    51  	// 				if matcher.MatchString(filename) {
    52  	//					return true
    53  	//				}
    54  	//			}
    55  	//			return false
    56  	//		}
    57  	//
    58  	// Or naïvely concatentate all these regexps using groups i.e.
    59  	//
    60  	//		`(regexp1)|(regexp2)|(regexp3)|...`
    61  	//
    62  	// However, both of these are relatively slow and don't take advantage
    63  	// of the inherent structure within our regexps.
    64  	//
    65  	// Imperical observation: by looking at the regexps, we only have 3 types.
    66  	//  1. Those that start with `^`
    67  	//  2. Those that start with `(^|/)`
    68  	//  3. All the rest
    69  	//
    70  	// If we collate our regexps into these 3 groups - that will significantly
    71  	// reduce the likelihood of backtracking within the regexp trie matcher.
    72  	//
    73  	// A further improvement is to use non-capturing groups (?:) as otherwise
    74  	// the regexp parser, whilst matching, will have to allocate slices for
    75  	// matching positions. (A future improvement left out could be to
    76  	// enforce non-capturing groups within the sub-regexps.)
    77  	const (
    78  		caret        = "^"
    79  		caretOrSlash = "(^|/)"
    80  	)
    81  
    82  	sort.Strings(regexps)
    83  
    84  	var caretPrefixed, caretOrSlashPrefixed, theRest []string
    85  	// Check prefix, add to the respective group slices
    86  	for _, re := range regexps {
    87  		if strings.HasPrefix(re, caret) {
    88  			caretPrefixed = append(caretPrefixed, re[len(caret):])
    89  		} else if strings.HasPrefix(re, caretOrSlash) {
    90  			caretOrSlashPrefixed = append(caretOrSlashPrefixed, re[len(caretOrSlash):])
    91  		} else {
    92  			theRest = append(theRest, re)
    93  		}
    94  	}
    95  	var sb strings.Builder
    96  	appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
    97  	sb.WriteString("|")
    98  
    99  	appendGroupWithCommonPrefix(&sb, "(?:^|/)", caretOrSlashPrefixed)
   100  	sb.WriteString("|")
   101  
   102  	appendGroupWithCommonPrefix(&sb, "", theRest)
   103  	return sb.String()
   104  }
   105  
   106  func appendGroupWithCommonPrefix(sb *strings.Builder, commonPrefix string, res []string) {
   107  	sb.WriteString("(?:")
   108  	if commonPrefix != "" {
   109  		sb.WriteString(fmt.Sprintf("%s(?:(?:", commonPrefix))
   110  	}
   111  	sb.WriteString(strings.Join(res, ")|(?:"))
   112  	if commonPrefix != "" {
   113  		sb.WriteString("))")
   114  	}
   115  	sb.WriteString(")")
   116  }