github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/syntax/quote.go (about)

     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  // exprcore quoted string utilities.
     8  
     9  import (
    10  	"fmt"
    11  	"strconv"
    12  	"strings"
    13  )
    14  
    15  // unesc maps single-letter chars following \ to their actual values.
    16  var unesc = [256]byte{
    17  	'a':  '\a',
    18  	'b':  '\b',
    19  	'f':  '\f',
    20  	'n':  '\n',
    21  	'r':  '\r',
    22  	't':  '\t',
    23  	'v':  '\v',
    24  	'\\': '\\',
    25  	'\'': '\'',
    26  	'"':  '"',
    27  }
    28  
    29  // esc maps escape-worthy bytes to the char that should follow \.
    30  var esc = [256]byte{
    31  	'\a': 'a',
    32  	'\b': 'b',
    33  	'\f': 'f',
    34  	'\n': 'n',
    35  	'\r': 'r',
    36  	'\t': 't',
    37  	'\v': 'v',
    38  	'\\': '\\',
    39  	'\'': '\'',
    40  	'"':  '"',
    41  }
    42  
    43  // unquote unquotes the quoted string, returning the actual
    44  // string value, whether the original was triple-quoted, and
    45  // an error describing invalid input.
    46  func unquote(quoted string) (s string, triple bool, err error) {
    47  	// Check for raw prefix: means don't interpret the inner \.
    48  	raw := false
    49  	if strings.HasPrefix(quoted, "r") {
    50  		raw = true
    51  		quoted = quoted[1:]
    52  	}
    53  
    54  	if len(quoted) < 2 {
    55  		err = fmt.Errorf("string literal too short")
    56  		return
    57  	}
    58  
    59  	if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] {
    60  		err = fmt.Errorf("string literal has invalid quotes")
    61  		return
    62  	}
    63  
    64  	// Check for triple quoted string.
    65  	quote := quoted[0]
    66  	if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] {
    67  		triple = true
    68  		quoted = quoted[3 : len(quoted)-3]
    69  	} else {
    70  		quoted = quoted[1 : len(quoted)-1]
    71  	}
    72  
    73  	// Now quoted is the quoted data, but no quotes.
    74  	// If we're in raw mode or there are no escapes or
    75  	// carriage returns, we're done.
    76  	var unquoteChars string
    77  	if raw {
    78  		unquoteChars = "\r"
    79  	} else {
    80  		unquoteChars = "\\\r"
    81  	}
    82  	if !strings.ContainsAny(quoted, unquoteChars) {
    83  		s = quoted
    84  		return
    85  	}
    86  
    87  	// Otherwise process quoted string.
    88  	// Each iteration processes one escape sequence along with the
    89  	// plain text leading up to it.
    90  	buf := new(strings.Builder)
    91  	for {
    92  		// Remove prefix before escape sequence.
    93  		i := strings.IndexAny(quoted, unquoteChars)
    94  		if i < 0 {
    95  			i = len(quoted)
    96  		}
    97  		buf.WriteString(quoted[:i])
    98  		quoted = quoted[i:]
    99  
   100  		if len(quoted) == 0 {
   101  			break
   102  		}
   103  
   104  		// Process carriage return.
   105  		if quoted[0] == '\r' {
   106  			buf.WriteByte('\n')
   107  			if len(quoted) > 1 && quoted[1] == '\n' {
   108  				quoted = quoted[2:]
   109  			} else {
   110  				quoted = quoted[1:]
   111  			}
   112  			continue
   113  		}
   114  
   115  		// Process escape sequence.
   116  		if len(quoted) == 1 {
   117  			err = fmt.Errorf(`truncated escape sequence \`)
   118  			return
   119  		}
   120  
   121  		switch quoted[1] {
   122  		default:
   123  			// In exprcore, like Go, a backslash must escape something.
   124  			// (Python still treats unnecessary backslashes literally,
   125  			// but since 3.6 has emitted a deprecation warning.)
   126  			err = fmt.Errorf("invalid escape sequence \\%c", quoted[1])
   127  			return
   128  
   129  		case '\n':
   130  			// Ignore the escape and the line break.
   131  			quoted = quoted[2:]
   132  
   133  		case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
   134  			// One-char escape.
   135  			// We escape only the kind of quotation mark in use.
   136  			buf.WriteByte(unesc[quoted[1]])
   137  			quoted = quoted[2:]
   138  
   139  		case '0', '1', '2', '3', '4', '5', '6', '7':
   140  			// Octal escape, up to 3 digits.
   141  			n := int(quoted[1] - '0')
   142  			quoted = quoted[2:]
   143  			for i := 1; i < 3; i++ {
   144  				if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] {
   145  					break
   146  				}
   147  				n = n*8 + int(quoted[0]-'0')
   148  				quoted = quoted[1:]
   149  			}
   150  			if n >= 256 {
   151  				// NOTE: Python silently discards the high bit,
   152  				// so that '\541' == '\141' == 'a'.
   153  				// Let's see if we can avoid doing that in BUILD files.
   154  				err = fmt.Errorf(`invalid escape sequence \%03o`, n)
   155  				return
   156  			}
   157  			buf.WriteByte(byte(n))
   158  
   159  		case 'x':
   160  			// Hexadecimal escape, exactly 2 digits.
   161  			if len(quoted) < 4 {
   162  				err = fmt.Errorf(`truncated escape sequence %s`, quoted)
   163  				return
   164  			}
   165  			n, err1 := strconv.ParseUint(quoted[2:4], 16, 0)
   166  			if err1 != nil {
   167  				err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4])
   168  				return
   169  			}
   170  			buf.WriteByte(byte(n))
   171  			quoted = quoted[4:]
   172  		}
   173  	}
   174  
   175  	s = buf.String()
   176  	return
   177  }
   178  
   179  // indexByte returns the index of the first instance of b in s, or else -1.
   180  func indexByte(s string, b byte) int {
   181  	for i := 0; i < len(s); i++ {
   182  		if s[i] == b {
   183  			return i
   184  		}
   185  	}
   186  	return -1
   187  }
   188  
   189  // hex is a list of the hexadecimal digits, for use in quoting.
   190  // We always print lower-case hexadecimal.
   191  const hex = "0123456789abcdef"
   192  
   193  // quote returns the quoted form of the string value "x".
   194  // If triple is true, quote uses the triple-quoted form """x""".
   195  func quote(unquoted string, triple bool) string {
   196  	q := `"`
   197  	if triple {
   198  		q = `"""`
   199  	}
   200  
   201  	buf := new(strings.Builder)
   202  	buf.WriteString(q)
   203  
   204  	for i := 0; i < len(unquoted); i++ {
   205  		c := unquoted[i]
   206  		if c == '"' && triple && (i+1 < len(unquoted) && unquoted[i+1] != '"' || i+2 < len(unquoted) && unquoted[i+2] != '"') {
   207  			// Can pass up to two quotes through, because they are followed by a non-quote byte.
   208  			buf.WriteByte(c)
   209  			if i+1 < len(unquoted) && unquoted[i+1] == '"' {
   210  				buf.WriteByte(c)
   211  				i++
   212  			}
   213  			continue
   214  		}
   215  		if triple && c == '\n' {
   216  			// Can allow newline in triple-quoted string.
   217  			buf.WriteByte(c)
   218  			continue
   219  		}
   220  		if c == '\'' {
   221  			// Can allow ' since we always use ".
   222  			buf.WriteByte(c)
   223  			continue
   224  		}
   225  		if esc[c] != 0 {
   226  			buf.WriteByte('\\')
   227  			buf.WriteByte(esc[c])
   228  			continue
   229  		}
   230  		if c < 0x20 || c >= 0x80 {
   231  			// BUILD files are supposed to be Latin-1, so escape all control and high bytes.
   232  			// I'd prefer to use \x here, but Blaze does not implement
   233  			// \x in quoted strings (b/7272572).
   234  			buf.WriteByte('\\')
   235  			buf.WriteByte(hex[c>>6]) // actually octal but reusing hex digits 0-7.
   236  			buf.WriteByte(hex[(c>>3)&7])
   237  			buf.WriteByte(hex[c&7])
   238  			/*
   239  				buf.WriteByte('\\')
   240  				buf.WriteByte('x')
   241  				buf.WriteByte(hex[c>>4])
   242  				buf.WriteByte(hex[c&0xF])
   243  			*/
   244  			continue
   245  		}
   246  		buf.WriteByte(c)
   247  		continue
   248  	}
   249  
   250  	buf.WriteString(q)
   251  	return buf.String()
   252  }