github.com/cilki/sh@v2.6.4+incompatible/syntax/pattern.go (about)

     1  // Copyright (c) 2017, Daniel Martí <mvdan@mvdan.cc>
     2  // See LICENSE for licensing information
     3  
     4  package syntax
     5  
     6  import (
     7  	"bytes"
     8  	"fmt"
     9  	"regexp"
    10  	"strings"
    11  )
    12  
    13  func charClass(s string) (string, error) {
    14  	if strings.HasPrefix(s, "[[.") || strings.HasPrefix(s, "[[=") {
    15  		return "", fmt.Errorf("collating features not available")
    16  	}
    17  	if !strings.HasPrefix(s, "[[:") {
    18  		return "", nil
    19  	}
    20  	name := s[3:]
    21  	end := strings.Index(name, ":]]")
    22  	if end < 0 {
    23  		return "", fmt.Errorf("[[: was not matched with a closing :]]")
    24  	}
    25  	name = name[:end]
    26  	switch name {
    27  	case "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
    28  		"lower", "print", "punct", "space", "upper", "word", "xdigit":
    29  	default:
    30  		return "", fmt.Errorf("invalid character class: %q", name)
    31  	}
    32  	return s[:len(name)+6], nil
    33  }
    34  
    35  // TranslatePattern turns a shell wildcard pattern into a regular expression
    36  // that can be used with regexp.Compile. It will return an error if the input
    37  // pattern was incorrect. Otherwise, the returned expression can be passed to
    38  // regexp.MustCompile.
    39  //
    40  // For example, TranslatePattern(`foo*bar?`, true) returns `foo.*bar.`.
    41  //
    42  // Note that this function (and QuotePattern) should not be directly used with
    43  // file paths if Windows is supported, as the path separator on that platform is
    44  // the same character as the escaping character for shell patterns.
    45  func TranslatePattern(pattern string, greedy bool) (string, error) {
    46  	any := false
    47  loop:
    48  	for _, r := range pattern {
    49  		switch r {
    50  		// including those that need escaping since they are
    51  		// special chars in regexes
    52  		case '*', '?', '[', '\\', '.', '+', '(', ')', '|',
    53  			']', '{', '}', '^', '$':
    54  			any = true
    55  			break loop
    56  		}
    57  	}
    58  	if !any { // short-cut without a string copy
    59  		return pattern, nil
    60  	}
    61  	var buf bytes.Buffer
    62  	for i := 0; i < len(pattern); i++ {
    63  		switch c := pattern[i]; c {
    64  		case '*':
    65  			buf.WriteString(".*")
    66  			if !greedy {
    67  				buf.WriteByte('?')
    68  			}
    69  		case '?':
    70  			buf.WriteString(".")
    71  		case '\\':
    72  			if i++; i >= len(pattern) {
    73  				return "", fmt.Errorf(`\ at end of pattern`)
    74  			}
    75  			buf.WriteString(regexp.QuoteMeta(string(pattern[i])))
    76  		case '[':
    77  			name, err := charClass(pattern[i:])
    78  			if err != nil {
    79  				return "", err
    80  			}
    81  			if name != "" {
    82  				buf.WriteString(name)
    83  				i += len(name) - 1
    84  				break
    85  			}
    86  			buf.WriteByte(c)
    87  			if i++; i >= len(pattern) {
    88  				return "", fmt.Errorf("[ was not matched with a closing ]")
    89  			}
    90  			switch c = pattern[i]; c {
    91  			case '!', '^':
    92  				buf.WriteByte('^')
    93  				i++
    94  				c = pattern[i]
    95  			}
    96  			buf.WriteByte(c)
    97  			last := c
    98  			rangeStart := byte(0)
    99  			for {
   100  				if i++; i >= len(pattern) {
   101  					return "", fmt.Errorf("[ was not matched with a closing ]")
   102  				}
   103  				last, c = c, pattern[i]
   104  				buf.WriteByte(c)
   105  				if c == ']' {
   106  					break
   107  				}
   108  				if rangeStart != 0 && rangeStart > c {
   109  					return "", fmt.Errorf("invalid range: %c-%c", rangeStart, c)
   110  				}
   111  				if c == '-' {
   112  					rangeStart = last
   113  				} else {
   114  					rangeStart = 0
   115  				}
   116  			}
   117  		default:
   118  			buf.WriteString(regexp.QuoteMeta(string(c)))
   119  		}
   120  	}
   121  	return buf.String(), nil
   122  }
   123  
   124  // HasPattern returns whether a string contains any unescaped wildcard
   125  // characters: '*', '?', or '['. When the function returns false, the given
   126  // pattern can only match at most one string.
   127  //
   128  // For example, HasPattern(`foo\*bar`) returns false, but HasPattern(`foo*bar`)
   129  // returns true.
   130  //
   131  // This can be useful to avoid extra work, like TranslatePattern. Note that this
   132  // function cannot be used to avoid QuotePattern, as backslashes are quoted by
   133  // that function but ignored here.
   134  func HasPattern(pattern string) bool {
   135  	for i := 0; i < len(pattern); i++ {
   136  		switch pattern[i] {
   137  		case '\\':
   138  			i++
   139  		case '*', '?', '[':
   140  			return true
   141  		}
   142  	}
   143  	return false
   144  }
   145  
   146  // QuotePattern returns a string that quotes all special characters in the given
   147  // wildcard pattern. The returned string is a pattern that matches the literal
   148  // string.
   149  //
   150  // For example, QuotePattern(`foo*bar?`) returns `foo\*bar\?`.
   151  func QuotePattern(pattern string) string {
   152  	any := false
   153  loop:
   154  	for _, r := range pattern {
   155  		switch r {
   156  		case '*', '?', '[', '\\':
   157  			any = true
   158  			break loop
   159  		}
   160  	}
   161  	if !any { // short-cut without a string copy
   162  		return pattern
   163  	}
   164  	var buf bytes.Buffer
   165  	for _, r := range pattern {
   166  		switch r {
   167  		case '*', '?', '[', '\\':
   168  			buf.WriteByte('\\')
   169  		}
   170  		buf.WriteRune(r)
   171  	}
   172  	return buf.String()
   173  }