github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/discover/pat.go (about)

     1  package discover
     2  
     3  import (
     4  	"regexp"
     5  	"strings"
     6  )
     7  
     8  // simple patterns for simplified url matching
     9  
    10  var patReplacer *strings.Replacer = strings.NewReplacer(
    11  	"ID", `([0-9]{4,})`,
    12  	"SLUG", `([^/]+-[^/]+)`,
    13  	"YYYY", `(\d\d\d\d)`,
    14  	"MM", `([01]?[0-9])`,
    15  	"DD", `([0123]?[0-9])`,
    16  )
    17  
    18  // turn a simplified pattern into a regexp
    19  func patToRegexp(in string) (*regexp.Regexp, error) {
    20  	suffix := ""
    21  	// don't want to escape a trailing '$' if it's there....
    22  	if strings.HasSuffix(in, "$") {
    23  		in = in[0 : len(in)-1] // assumes single-byte rune...
    24  		suffix = "$"
    25  	}
    26  
    27  	in = regexp.QuoteMeta(in)
    28  	in = in + suffix
    29  	in = patReplacer.Replace(in)
    30  
    31  	return regexp.Compile(in)
    32  }