github.com/containerd/containerd@v22.0.0-20200918172823-438c87b8e050+incompatible/filters/quote.go (about)

     1  /*
     2     Copyright The containerd Authors.
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package filters
    18  
    19  import (
    20  	"unicode/utf8"
    21  
    22  	"github.com/pkg/errors"
    23  )
    24  
    25  // NOTE(stevvooe): Most of this code in this file is copied from the stdlib
    26  // strconv package and modified to be able to handle quoting with `/` and `|`
    27  // as delimiters.  The copyright is held by the Go authors.
    28  
    29  var errQuoteSyntax = errors.New("quote syntax error")
    30  
    31  // UnquoteChar decodes the first character or byte in the escaped string
    32  // or character literal represented by the string s.
    33  // It returns four values:
    34  //
    35  //	1) value, the decoded Unicode code point or byte value;
    36  //	2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
    37  //	3) tail, the remainder of the string after the character; and
    38  //	4) an error that will be nil if the character is syntactically valid.
    39  //
    40  // The second argument, quote, specifies the type of literal being parsed
    41  // and therefore which escaped quote character is permitted.
    42  // If set to a single quote, it permits the sequence \' and disallows unescaped '.
    43  // If set to a double quote, it permits \" and disallows unescaped ".
    44  // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
    45  //
    46  // This is from Go strconv package, modified to support `|` and `/` as double
    47  // quotes for use with regular expressions.
    48  func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
    49  	// easy cases
    50  	switch c := s[0]; {
    51  	case c == quote && (quote == '\'' || quote == '"' || quote == '/' || quote == '|'):
    52  		err = errQuoteSyntax
    53  		return
    54  	case c >= utf8.RuneSelf:
    55  		r, size := utf8.DecodeRuneInString(s)
    56  		return r, true, s[size:], nil
    57  	case c != '\\':
    58  		return rune(s[0]), false, s[1:], nil
    59  	}
    60  
    61  	// hard case: c is backslash
    62  	if len(s) <= 1 {
    63  		err = errQuoteSyntax
    64  		return
    65  	}
    66  	c := s[1]
    67  	s = s[2:]
    68  
    69  	switch c {
    70  	case 'a':
    71  		value = '\a'
    72  	case 'b':
    73  		value = '\b'
    74  	case 'f':
    75  		value = '\f'
    76  	case 'n':
    77  		value = '\n'
    78  	case 'r':
    79  		value = '\r'
    80  	case 't':
    81  		value = '\t'
    82  	case 'v':
    83  		value = '\v'
    84  	case 'x', 'u', 'U':
    85  		n := 0
    86  		switch c {
    87  		case 'x':
    88  			n = 2
    89  		case 'u':
    90  			n = 4
    91  		case 'U':
    92  			n = 8
    93  		}
    94  		var v rune
    95  		if len(s) < n {
    96  			err = errQuoteSyntax
    97  			return
    98  		}
    99  		for j := 0; j < n; j++ {
   100  			x, ok := unhex(s[j])
   101  			if !ok {
   102  				err = errQuoteSyntax
   103  				return
   104  			}
   105  			v = v<<4 | x
   106  		}
   107  		s = s[n:]
   108  		if c == 'x' {
   109  			// single-byte string, possibly not UTF-8
   110  			value = v
   111  			break
   112  		}
   113  		if v > utf8.MaxRune {
   114  			err = errQuoteSyntax
   115  			return
   116  		}
   117  		value = v
   118  		multibyte = true
   119  	case '0', '1', '2', '3', '4', '5', '6', '7':
   120  		v := rune(c) - '0'
   121  		if len(s) < 2 {
   122  			err = errQuoteSyntax
   123  			return
   124  		}
   125  		for j := 0; j < 2; j++ { // one digit already; two more
   126  			x := rune(s[j]) - '0'
   127  			if x < 0 || x > 7 {
   128  				err = errQuoteSyntax
   129  				return
   130  			}
   131  			v = (v << 3) | x
   132  		}
   133  		s = s[2:]
   134  		if v > 255 {
   135  			err = errQuoteSyntax
   136  			return
   137  		}
   138  		value = v
   139  	case '\\':
   140  		value = '\\'
   141  	case '\'', '"', '|', '/':
   142  		if c != quote {
   143  			err = errQuoteSyntax
   144  			return
   145  		}
   146  		value = rune(c)
   147  	default:
   148  		err = errQuoteSyntax
   149  		return
   150  	}
   151  	tail = s
   152  	return
   153  }
   154  
   155  // unquote interprets s as a single-quoted, double-quoted,
   156  // or backquoted Go string literal, returning the string value
   157  // that s quotes.  (If s is single-quoted, it would be a Go
   158  // character literal; Unquote returns the corresponding
   159  // one-character string.)
   160  //
   161  // This is modified from the standard library to support `|` and `/` as quote
   162  // characters for use with regular expressions.
   163  func unquote(s string) (string, error) {
   164  	n := len(s)
   165  	if n < 2 {
   166  		return "", errQuoteSyntax
   167  	}
   168  	quote := s[0]
   169  	if quote != s[n-1] {
   170  		return "", errQuoteSyntax
   171  	}
   172  	s = s[1 : n-1]
   173  
   174  	if quote == '`' {
   175  		if contains(s, '`') {
   176  			return "", errQuoteSyntax
   177  		}
   178  		if contains(s, '\r') {
   179  			// -1 because we know there is at least one \r to remove.
   180  			buf := make([]byte, 0, len(s)-1)
   181  			for i := 0; i < len(s); i++ {
   182  				if s[i] != '\r' {
   183  					buf = append(buf, s[i])
   184  				}
   185  			}
   186  			return string(buf), nil
   187  		}
   188  		return s, nil
   189  	}
   190  	if quote != '"' && quote != '\'' && quote != '|' && quote != '/' {
   191  		return "", errQuoteSyntax
   192  	}
   193  	if contains(s, '\n') {
   194  		return "", errQuoteSyntax
   195  	}
   196  
   197  	// Is it trivial?  Avoid allocation.
   198  	if !contains(s, '\\') && !contains(s, quote) {
   199  		switch quote {
   200  		case '"', '/', '|': // pipe and slash are treated like double quote
   201  			return s, nil
   202  		case '\'':
   203  			r, size := utf8.DecodeRuneInString(s)
   204  			if size == len(s) && (r != utf8.RuneError || size != 1) {
   205  				return s, nil
   206  			}
   207  		}
   208  	}
   209  
   210  	var runeTmp [utf8.UTFMax]byte
   211  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   212  	for len(s) > 0 {
   213  		c, multibyte, ss, err := unquoteChar(s, quote)
   214  		if err != nil {
   215  			return "", err
   216  		}
   217  		s = ss
   218  		if c < utf8.RuneSelf || !multibyte {
   219  			buf = append(buf, byte(c))
   220  		} else {
   221  			n := utf8.EncodeRune(runeTmp[:], c)
   222  			buf = append(buf, runeTmp[:n]...)
   223  		}
   224  		if quote == '\'' && len(s) != 0 {
   225  			// single-quoted must be single character
   226  			return "", errQuoteSyntax
   227  		}
   228  	}
   229  	return string(buf), nil
   230  }
   231  
   232  // contains reports whether the string contains the byte c.
   233  func contains(s string, c byte) bool {
   234  	for i := 0; i < len(s); i++ {
   235  		if s[i] == c {
   236  			return true
   237  		}
   238  	}
   239  	return false
   240  }
   241  
   242  func unhex(b byte) (v rune, ok bool) {
   243  	c := rune(b)
   244  	switch {
   245  	case '0' <= c && c <= '9':
   246  		return c - '0', true
   247  	case 'a' <= c && c <= 'f':
   248  		return c - 'a' + 10, true
   249  	case 'A' <= c && c <= 'F':
   250  		return c - 'A' + 10, true
   251  	}
   252  	return
   253  }