github.com/thiagoyeds/go-cloud@v0.26.0/internal/escape/escape.go (about)

     1  // Copyright 2019 The Go Cloud Development Kit Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package escape includes helpers for escaping and unescaping strings.
    16  package escape
    17  
    18  import (
    19  	"fmt"
    20  	"net/url"
    21  	"strconv"
    22  	"strings"
    23  )
    24  
    25  // NonUTF8String is a string for which utf8.ValidString returns false.
    26  const NonUTF8String = "\xbd\xb2"
    27  
    28  // IsASCIIAlphanumeric returns true iff r is alphanumeric: a-z, A-Z, 0-9.
    29  func IsASCIIAlphanumeric(r rune) bool {
    30  	switch {
    31  	case 'A' <= r && r <= 'Z':
    32  		return true
    33  	case 'a' <= r && r <= 'z':
    34  		return true
    35  	case '0' <= r && r <= '9':
    36  		return true
    37  	}
    38  	return false
    39  }
    40  
    41  // HexEscape returns s, with all runes for which shouldEscape returns true
    42  // escaped to "__0xXXX__", where XXX is the hex representation of the rune
    43  // value. For example, " " would escape to "__0x20__".
    44  //
    45  // Non-UTF-8 strings will have their non-UTF-8 characters escaped to
    46  // unicode.ReplacementChar; the original value is lost. Please file an
    47  // issue if you need non-UTF8 support.
    48  //
    49  // Note: shouldEscape takes the whole string as a slice of runes and an
    50  // index. Passing it a single byte or a single rune doesn't provide
    51  // enough context for some escape decisions; for example, the caller might
    52  // want to escape the second "/" in "//" but not the first one.
    53  // We pass a slice of runes instead of the string or a slice of bytes
    54  // because some decisions will be made on a rune basis (e.g., encode
    55  // all non-ASCII runes).
    56  func HexEscape(s string, shouldEscape func(s []rune, i int) bool) string {
    57  	// Do a first pass to see which runes (if any) need escaping.
    58  	runes := []rune(s)
    59  	var toEscape []int
    60  	for i := range runes {
    61  		if shouldEscape(runes, i) {
    62  			toEscape = append(toEscape, i)
    63  		}
    64  	}
    65  	if len(toEscape) == 0 {
    66  		return s
    67  	}
    68  	// Each escaped rune turns into at most 14 runes ("__0x7fffffff__"),
    69  	// so allocate an extra 13 for each. We'll reslice at the end
    70  	// if we didn't end up using them.
    71  	escaped := make([]rune, len(runes)+13*len(toEscape))
    72  	n := 0 // current index into toEscape
    73  	j := 0 // current index into escaped
    74  	for i, r := range runes {
    75  		if n < len(toEscape) && i == toEscape[n] {
    76  			// We were asked to escape this rune.
    77  			for _, x := range fmt.Sprintf("__%#x__", r) {
    78  				escaped[j] = x
    79  				j++
    80  			}
    81  			n++
    82  		} else {
    83  			escaped[j] = r
    84  			j++
    85  		}
    86  	}
    87  	return string(escaped[0:j])
    88  }
    89  
    90  // unescape tries to unescape starting at r[i].
    91  // It returns a boolean indicating whether the unescaping was successful,
    92  // and (if true) the unescaped rune and the last index of r that was used
    93  // during unescaping.
    94  func unescape(r []rune, i int) (bool, rune, int) {
    95  	// Look for "__0x".
    96  	if r[i] != '_' {
    97  		return false, 0, 0
    98  	}
    99  	i++
   100  	if i >= len(r) || r[i] != '_' {
   101  		return false, 0, 0
   102  	}
   103  	i++
   104  	if i >= len(r) || r[i] != '0' {
   105  		return false, 0, 0
   106  	}
   107  	i++
   108  	if i >= len(r) || r[i] != 'x' {
   109  		return false, 0, 0
   110  	}
   111  	i++
   112  	// Capture the digits until the next "_" (if any).
   113  	var hexdigits []rune
   114  	for ; i < len(r) && r[i] != '_'; i++ {
   115  		hexdigits = append(hexdigits, r[i])
   116  	}
   117  	// Look for the trailing "__".
   118  	if i >= len(r) || r[i] != '_' {
   119  		return false, 0, 0
   120  	}
   121  	i++
   122  	if i >= len(r) || r[i] != '_' {
   123  		return false, 0, 0
   124  	}
   125  	// Parse the hex digits into an int32.
   126  	retval, err := strconv.ParseInt(string(hexdigits), 16, 32)
   127  	if err != nil {
   128  		return false, 0, 0
   129  	}
   130  	return true, rune(retval), i
   131  }
   132  
   133  // HexUnescape reverses HexEscape.
   134  func HexUnescape(s string) string {
   135  	var unescaped []rune
   136  	runes := []rune(s)
   137  	for i := 0; i < len(runes); i++ {
   138  		if ok, newR, newI := unescape(runes, i); ok {
   139  			// We unescaped some runes starting at i, resulting in the
   140  			// unescaped rune newR. The last rune used was newI.
   141  			if unescaped == nil {
   142  				// This is the first rune we've encountered that
   143  				// needed unescaping. Allocate a buffer and copy any
   144  				// previous runes.
   145  				unescaped = make([]rune, i)
   146  				copy(unescaped, runes)
   147  			}
   148  			unescaped = append(unescaped, newR)
   149  			i = newI
   150  		} else if unescaped != nil {
   151  			unescaped = append(unescaped, runes[i])
   152  		}
   153  	}
   154  	if unescaped == nil {
   155  		return s
   156  	}
   157  	return string(unescaped)
   158  }
   159  
   160  // URLEscape uses url.PathEscape to escape s.
   161  func URLEscape(s string) string {
   162  	return url.PathEscape(s)
   163  }
   164  
   165  // URLUnescape reverses URLEscape using url.PathUnescape. If the unescape
   166  // returns an error, it returns s.
   167  func URLUnescape(s string) string {
   168  	if u, err := url.PathUnescape(s); err == nil {
   169  		return u
   170  	}
   171  	return s
   172  }
   173  
   174  func makeASCIIString(start, end int) string {
   175  	var s []byte
   176  	for i := start; i < end; i++ {
   177  		if i >= 'a' && i <= 'z' {
   178  			continue
   179  		}
   180  		if i >= 'A' && i <= 'Z' {
   181  			continue
   182  		}
   183  		if i >= '0' && i <= '9' {
   184  			continue
   185  		}
   186  		s = append(s, byte(i))
   187  	}
   188  	return string(s)
   189  }
   190  
   191  // WeirdStrings are unusual/weird strings for use in testing escaping.
   192  // The keys are descriptive strings, the values are the weird strings.
   193  var WeirdStrings = map[string]string{
   194  	"fwdslashes":          "foo/bar/baz",
   195  	"repeatedfwdslashes":  "foo//bar///baz",
   196  	"dotdotslash":         "../foo/../bar/../../baz../",
   197  	"backslashes":         "foo\\bar\\baz",
   198  	"repeatedbackslashes": "..\\foo\\\\bar\\\\\\baz",
   199  	"dotdotbackslash":     "..\\foo\\..\\bar\\..\\..\\baz..\\",
   200  	"quote":               "foo\"bar\"baz",
   201  	"spaces":              "foo bar baz",
   202  	"startwithdigit":      "12345",
   203  	"unicode":             strings.Repeat("☺", 3),
   204  	// The ASCII characters 0-128, split up to avoid the possibly-escaped
   205  	// versions from getting too long.
   206  	"ascii-1": makeASCIIString(0, 16),
   207  	"ascii-2": makeASCIIString(16, 32),
   208  	"ascii-3": makeASCIIString(32, 48),
   209  	"ascii-4": makeASCIIString(48, 64),
   210  	"ascii-5": makeASCIIString(64, 80),
   211  	"ascii-6": makeASCIIString(80, 96),
   212  	"ascii-7": makeASCIIString(96, 112),
   213  	"ascii-8": makeASCIIString(112, 128),
   214  }