github.com/xhghs/rclone@v1.51.1-0.20200430155106-e186a28cced8/lib/encoder/encoder.go (about)

     1  /*
     2  Translate file names for usage on restrictive storage systems
     3  
     4  The restricted set of characters are mapped to a unicode equivalent version
     5  (most to their FULLWIDTH variant) to increase compatability with other
     6  storage systems.
     7  See: http://unicode-search.net/unicode-namesearch.pl?term=FULLWIDTH
     8  
     9  Encoders will also quote reserved characters to differentiate between
    10  the raw and encoded forms.
    11  */
    12  
    13  package encoder
    14  
    15  import (
    16  	"bytes"
    17  	"fmt"
    18  	"io"
    19  	"sort"
    20  	"strconv"
    21  	"strings"
    22  	"unicode/utf8"
    23  )
    24  
    25  const (
    26  	// adding this to any printable ASCII character turns it into the
    27  	// FULLWIDTH variant
    28  	fullOffset = 0xFEE0
    29  	// the first rune of the SYMBOL FOR block for control characters
    30  	symbolOffset = '␀' // SYMBOL FOR NULL
    31  	// QuoteRune is the rune used for quoting reserved characters
    32  	QuoteRune = '‛' // SINGLE HIGH-REVERSED-9 QUOTATION MARK
    33  )
    34  
    35  // NB keep the tests in fstests/fstests/fstests.go FsEncoding up to date with this
    36  // NB keep the aliases up to date below also
    37  
    38  // Possible flags for the MultiEncoder
    39  const (
    40  	EncodeZero          MultiEncoder = 0         // NUL(0x00)
    41  	EncodeSlash         MultiEncoder = 1 << iota // /
    42  	EncodeLtGt                                   // <>
    43  	EncodeDoubleQuote                            // "
    44  	EncodeSingleQuote                            // '
    45  	EncodeBackQuote                              // `
    46  	EncodeDollar                                 // $
    47  	EncodeColon                                  // :
    48  	EncodeQuestion                               // ?
    49  	EncodeAsterisk                               // *
    50  	EncodePipe                                   // |
    51  	EncodeHash                                   // #
    52  	EncodePercent                                // %
    53  	EncodeBackSlash                              // \
    54  	EncodeCrLf                                   // CR(0x0D), LF(0x0A)
    55  	EncodeDel                                    // DEL(0x7F)
    56  	EncodeCtl                                    // CTRL(0x01-0x1F)
    57  	EncodeLeftSpace                              // Leading SPACE
    58  	EncodeLeftPeriod                             // Leading .
    59  	EncodeLeftTilde                              // Leading ~
    60  	EncodeLeftCrLfHtVt                           // Leading CR LF HT VT
    61  	EncodeRightSpace                             // Trailing SPACE
    62  	EncodeRightPeriod                            // Trailing .
    63  	EncodeRightCrLfHtVt                          // Trailing CR LF HT VT
    64  	EncodeInvalidUtf8                            // Invalid UTF-8 bytes
    65  	EncodeDot                                    // . and .. names
    66  
    67  	// Synthetic
    68  	EncodeWin         = EncodeColon | EncodeQuestion | EncodeDoubleQuote | EncodeAsterisk | EncodeLtGt | EncodePipe // :?"*<>|
    69  	EncodeHashPercent = EncodeHash | EncodePercent                                                                  // #%
    70  )
    71  
    72  // Has returns true if flag is contained in mask
    73  func (mask MultiEncoder) Has(flag MultiEncoder) bool {
    74  	return mask&flag != 0
    75  }
    76  
    77  // Encoder can transform names to and from the original and translated version.
    78  type Encoder interface {
    79  	// Encode takes a raw name and substitutes any reserved characters and
    80  	// patterns in it
    81  	Encode(string) string
    82  	// Decode takes a name and undoes any substitutions made by Encode
    83  	Decode(string) string
    84  
    85  	// FromStandardPath takes a / separated path in Standard encoding
    86  	// and converts it to a / separated path in this encoding.
    87  	FromStandardPath(string) string
    88  	// FromStandardName takes name in Standard encoding and converts
    89  	// it in this encoding.
    90  	FromStandardName(string) string
    91  	// ToStandardPath takes a / separated path in this encoding
    92  	// and converts it to a / separated path in Standard encoding.
    93  	ToStandardPath(string) string
    94  	// ToStandardName takes name in this encoding and converts
    95  	// it in Standard encoding.
    96  	ToStandardName(string) string
    97  }
    98  
    99  // MultiEncoder is a configurable Encoder. The Encode* constants in this
   100  // package can be combined using bitwise or (|) to enable handling of multiple
   101  // character classes
   102  type MultiEncoder uint
   103  
   104  // Aliases maps encodings to names and vice versa
   105  var (
   106  	encodingToName = map[MultiEncoder]string{}
   107  	nameToEncoding = map[string]MultiEncoder{}
   108  )
   109  
   110  // alias adds an alias for MultiEncoder.String() and MultiEncoder.Set()
   111  func alias(name string, mask MultiEncoder) {
   112  	nameToEncoding[name] = mask
   113  	// don't overwrite existing reverse translations
   114  	if _, ok := encodingToName[mask]; !ok {
   115  		encodingToName[mask] = name
   116  	}
   117  }
   118  
   119  func init() {
   120  	alias("None", EncodeZero)
   121  	alias("Slash", EncodeSlash)
   122  	alias("LtGt", EncodeLtGt)
   123  	alias("DoubleQuote", EncodeDoubleQuote)
   124  	alias("SingleQuote", EncodeSingleQuote)
   125  	alias("BackQuote", EncodeBackQuote)
   126  	alias("Dollar", EncodeDollar)
   127  	alias("Colon", EncodeColon)
   128  	alias("Question", EncodeQuestion)
   129  	alias("Asterisk", EncodeAsterisk)
   130  	alias("Pipe", EncodePipe)
   131  	alias("Hash", EncodeHash)
   132  	alias("Percent", EncodePercent)
   133  	alias("BackSlash", EncodeBackSlash)
   134  	alias("CrLf", EncodeCrLf)
   135  	alias("Del", EncodeDel)
   136  	alias("Ctl", EncodeCtl)
   137  	alias("LeftSpace", EncodeLeftSpace)
   138  	alias("LeftPeriod", EncodeLeftPeriod)
   139  	alias("LeftTilde", EncodeLeftTilde)
   140  	alias("LeftCrLfHtVt", EncodeLeftCrLfHtVt)
   141  	alias("RightSpace", EncodeRightSpace)
   142  	alias("RightPeriod", EncodeRightPeriod)
   143  	alias("RightCrLfHtVt", EncodeRightCrLfHtVt)
   144  	alias("InvalidUtf8", EncodeInvalidUtf8)
   145  	alias("Dot", EncodeDot)
   146  }
   147  
   148  // validStrings returns all the valid MultiEncoder strings
   149  func validStrings() string {
   150  	var out []string
   151  	for k := range nameToEncoding {
   152  		out = append(out, k)
   153  	}
   154  	sort.Strings(out)
   155  	return strings.Join(out, ", ")
   156  }
   157  
   158  // String converts the MultiEncoder into text
   159  func (mask MultiEncoder) String() string {
   160  	// See if there is an exact translation - if so return that
   161  	if name, ok := encodingToName[mask]; ok {
   162  		return name
   163  	}
   164  	var out []string
   165  	// Otherwise decompose bit by bit
   166  	for bit := MultiEncoder(1); bit != 0; bit *= 2 {
   167  		if (mask & bit) != 0 {
   168  			if name, ok := encodingToName[bit]; ok {
   169  				out = append(out, name)
   170  			} else {
   171  				out = append(out, fmt.Sprintf("0x%X", uint(bit)))
   172  			}
   173  		}
   174  	}
   175  	return strings.Join(out, ",")
   176  }
   177  
   178  // Set converts a string into a MultiEncoder
   179  func (mask *MultiEncoder) Set(in string) error {
   180  	var out MultiEncoder
   181  	parts := strings.Split(in, ",")
   182  	for _, part := range parts {
   183  		part = strings.TrimSpace(part)
   184  		if bits, ok := nameToEncoding[part]; ok {
   185  			out |= bits
   186  		} else {
   187  			i, err := strconv.ParseInt(part, 0, 64)
   188  			if err != nil {
   189  				return fmt.Errorf("bad encoding %q: possible values are: %s", part, validStrings())
   190  			}
   191  			out |= MultiEncoder(i)
   192  		}
   193  	}
   194  	*mask = out
   195  	return nil
   196  }
   197  
   198  // Type returns a textual type of the MultiEncoder to satsify the pflag.Value interface
   199  func (mask MultiEncoder) Type() string {
   200  	return "Encoding"
   201  }
   202  
   203  // Scan implements the fmt.Scanner interface
   204  func (mask *MultiEncoder) Scan(s fmt.ScanState, ch rune) error {
   205  	token, err := s.Token(true, nil)
   206  	if err != nil {
   207  		return err
   208  	}
   209  	return mask.Set(string(token))
   210  }
   211  
   212  // Encode takes a raw name and substitutes any reserved characters and
   213  // patterns in it
   214  func (mask MultiEncoder) Encode(in string) string {
   215  	if in == "" {
   216  		return ""
   217  	}
   218  
   219  	if mask.Has(EncodeDot) {
   220  		switch in {
   221  		case ".":
   222  			return "."
   223  		case "..":
   224  			return ".."
   225  		case ".":
   226  			return string(QuoteRune) + "."
   227  		case "..":
   228  			return string(QuoteRune) + "." + string(QuoteRune) + "."
   229  		}
   230  	}
   231  
   232  	// handle prefix only replacements
   233  	prefix := ""
   234  	if mask.Has(EncodeLeftSpace) { // Leading SPACE
   235  		if in[0] == ' ' {
   236  			prefix, in = "␠", in[1:] // SYMBOL FOR SPACE
   237  		} else if r, l := utf8.DecodeRuneInString(in); r == '␠' { // SYMBOL FOR SPACE
   238  			prefix, in = string(QuoteRune)+"␠", in[l:] // SYMBOL FOR SPACE
   239  		}
   240  	}
   241  	if mask.Has(EncodeLeftPeriod) && prefix == "" { // Leading PERIOD
   242  		if in[0] == '.' {
   243  			prefix, in = ".", in[1:] // FULLWIDTH FULL STOP
   244  		} else if r, l := utf8.DecodeRuneInString(in); r == '.' { // FULLWIDTH FULL STOP
   245  			prefix, in = string(QuoteRune)+".", in[l:] //  FULLWIDTH FULL STOP
   246  		}
   247  	}
   248  	if mask.Has(EncodeLeftTilde) && prefix == "" { // Leading ~
   249  		if in[0] == '~' {
   250  			prefix, in = string('~'+fullOffset), in[1:] // FULLWIDTH TILDE
   251  		} else if r, l := utf8.DecodeRuneInString(in); r == '~'+fullOffset {
   252  			prefix, in = string(QuoteRune)+string('~'+fullOffset), in[l:] // FULLWIDTH TILDE
   253  		}
   254  	}
   255  	if mask.Has(EncodeLeftCrLfHtVt) && prefix == "" { // Leading CR LF HT VT
   256  		switch c := in[0]; c {
   257  		case '\t', '\n', '\v', '\r':
   258  			prefix, in = string('␀'+rune(c)), in[1:] // SYMBOL FOR NULL
   259  		default:
   260  			switch r, l := utf8.DecodeRuneInString(in); r {
   261  			case '␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r':
   262  				prefix, in = string(QuoteRune)+string(r), in[l:]
   263  			}
   264  		}
   265  	}
   266  	// handle suffix only replacements
   267  	suffix := ""
   268  	if in != "" {
   269  		if mask.Has(EncodeRightSpace) { // Trailing SPACE
   270  			if in[len(in)-1] == ' ' {
   271  				suffix, in = "␠", in[:len(in)-1] // SYMBOL FOR SPACE
   272  			} else if r, l := utf8.DecodeLastRuneInString(in); r == '␠' {
   273  				suffix, in = string(QuoteRune)+"␠", in[:len(in)-l] // SYMBOL FOR SPACE
   274  			}
   275  		}
   276  		if mask.Has(EncodeRightPeriod) && suffix == "" { // Trailing .
   277  			if in[len(in)-1] == '.' {
   278  				suffix, in = ".", in[:len(in)-1] // FULLWIDTH FULL STOP
   279  			} else if r, l := utf8.DecodeLastRuneInString(in); r == '.' {
   280  				suffix, in = string(QuoteRune)+".", in[:len(in)-l] // FULLWIDTH FULL STOP
   281  			}
   282  		}
   283  		if mask.Has(EncodeRightCrLfHtVt) && suffix == "" { // Trailing .
   284  			switch c := in[len(in)-1]; c {
   285  			case '\t', '\n', '\v', '\r':
   286  				suffix, in = string('␀'+rune(c)), in[:len(in)-1] // FULLWIDTH FULL STOP
   287  			default:
   288  				switch r, l := utf8.DecodeLastRuneInString(in); r {
   289  				case '␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r':
   290  					suffix, in = string(QuoteRune)+string(r), in[:len(in)-l]
   291  				}
   292  			}
   293  		}
   294  	}
   295  
   296  	index := 0
   297  	if prefix == "" && suffix == "" {
   298  		// find the first rune which (most likely) needs to be replaced
   299  		index = strings.IndexFunc(in, func(r rune) bool {
   300  			switch r {
   301  			case 0, '␀', QuoteRune, utf8.RuneError:
   302  				return true
   303  			}
   304  			if mask.Has(EncodeAsterisk) { // *
   305  				switch r {
   306  				case '*',
   307  					'*':
   308  					return true
   309  				}
   310  			}
   311  			if mask.Has(EncodeLtGt) { // <>
   312  				switch r {
   313  				case '<', '>',
   314  					'<', '>':
   315  					return true
   316  				}
   317  			}
   318  			if mask.Has(EncodeQuestion) { // ?
   319  				switch r {
   320  				case '?',
   321  					'?':
   322  					return true
   323  				}
   324  			}
   325  			if mask.Has(EncodeColon) { // :
   326  				switch r {
   327  				case ':',
   328  					':':
   329  					return true
   330  				}
   331  			}
   332  			if mask.Has(EncodePipe) { // |
   333  				switch r {
   334  				case '|',
   335  					'|':
   336  					return true
   337  				}
   338  			}
   339  			if mask.Has(EncodeDoubleQuote) { // "
   340  				switch r {
   341  				case '"',
   342  					'"':
   343  					return true
   344  				}
   345  			}
   346  			if mask.Has(EncodeSingleQuote) { // '
   347  				switch r {
   348  				case '\'',
   349  					''':
   350  					return true
   351  				}
   352  			}
   353  			if mask.Has(EncodeBackQuote) { // `
   354  				switch r {
   355  				case '`',
   356  					'`':
   357  					return true
   358  				}
   359  			}
   360  			if mask.Has(EncodeDollar) { // $
   361  				switch r {
   362  				case '$',
   363  					'$':
   364  					return true
   365  				}
   366  			}
   367  			if mask.Has(EncodeSlash) { // /
   368  				switch r {
   369  				case '/',
   370  					'/':
   371  					return true
   372  				}
   373  			}
   374  			if mask.Has(EncodeBackSlash) { // \
   375  				switch r {
   376  				case '\\',
   377  					'\':
   378  					return true
   379  				}
   380  			}
   381  			if mask.Has(EncodeCrLf) { // CR LF
   382  				switch r {
   383  				case rune(0x0D), rune(0x0A),
   384  					'␍', '␊':
   385  					return true
   386  				}
   387  			}
   388  			if mask.Has(EncodeHash) { // #
   389  				switch r {
   390  				case '#',
   391  					'#':
   392  					return true
   393  				}
   394  			}
   395  			if mask.Has(EncodePercent) { // %
   396  				switch r {
   397  				case '%',
   398  					'%':
   399  					return true
   400  				}
   401  			}
   402  			if mask.Has(EncodeDel) { // DEL(0x7F)
   403  				switch r {
   404  				case rune(0x7F), '␡':
   405  					return true
   406  				}
   407  			}
   408  			if mask.Has(EncodeCtl) { // CTRL(0x01-0x1F)
   409  				if r >= 1 && r <= 0x1F {
   410  					return true
   411  				} else if r > symbolOffset && r <= symbolOffset+0x1F {
   412  					return true
   413  				}
   414  			}
   415  			return false
   416  		})
   417  	}
   418  	// nothing to replace, return input
   419  	if index == -1 {
   420  		return in
   421  	}
   422  
   423  	var out bytes.Buffer
   424  	out.Grow(len(in) + len(prefix) + len(suffix))
   425  	out.WriteString(prefix)
   426  	// copy the clean part of the input and skip it
   427  	out.WriteString(in[:index])
   428  	in = in[index:]
   429  
   430  	for i, r := range in {
   431  		switch r {
   432  		case 0:
   433  			out.WriteRune(symbolOffset)
   434  			continue
   435  		case '␀', QuoteRune:
   436  			out.WriteRune(QuoteRune)
   437  			out.WriteRune(r)
   438  			continue
   439  		case utf8.RuneError:
   440  			if mask.Has(EncodeInvalidUtf8) {
   441  				// only encode invalid sequences and not utf8.RuneError
   442  				if i+3 > len(in) || in[i:i+3] != string(utf8.RuneError) {
   443  					_, l := utf8.DecodeRuneInString(in[i:])
   444  					appendQuotedBytes(&out, in[i:i+l])
   445  					continue
   446  				}
   447  			} else {
   448  				// append the real bytes instead of utf8.RuneError
   449  				_, l := utf8.DecodeRuneInString(in[i:])
   450  				out.WriteString(in[i : i+l])
   451  				continue
   452  			}
   453  		}
   454  		if mask.Has(EncodeAsterisk) { // *
   455  			switch r {
   456  			case '*':
   457  				out.WriteRune(r + fullOffset)
   458  				continue
   459  			case '*':
   460  				out.WriteRune(QuoteRune)
   461  				out.WriteRune(r)
   462  				continue
   463  			}
   464  		}
   465  		if mask.Has(EncodeLtGt) { // <>
   466  			switch r {
   467  			case '<', '>':
   468  				out.WriteRune(r + fullOffset)
   469  				continue
   470  			case '<', '>':
   471  				out.WriteRune(QuoteRune)
   472  				out.WriteRune(r)
   473  				continue
   474  			}
   475  		}
   476  		if mask.Has(EncodeQuestion) { // ?
   477  			switch r {
   478  			case '?':
   479  				out.WriteRune(r + fullOffset)
   480  				continue
   481  			case '?':
   482  				out.WriteRune(QuoteRune)
   483  				out.WriteRune(r)
   484  				continue
   485  			}
   486  		}
   487  		if mask.Has(EncodeColon) { // :
   488  			switch r {
   489  			case ':':
   490  				out.WriteRune(r + fullOffset)
   491  				continue
   492  			case ':':
   493  				out.WriteRune(QuoteRune)
   494  				out.WriteRune(r)
   495  				continue
   496  			}
   497  		}
   498  		if mask.Has(EncodePipe) { // |
   499  			switch r {
   500  			case '|':
   501  				out.WriteRune(r + fullOffset)
   502  				continue
   503  			case '|':
   504  				out.WriteRune(QuoteRune)
   505  				out.WriteRune(r)
   506  				continue
   507  			}
   508  		}
   509  		if mask.Has(EncodeDoubleQuote) { // "
   510  			switch r {
   511  			case '"':
   512  				out.WriteRune(r + fullOffset)
   513  				continue
   514  			case '"':
   515  				out.WriteRune(QuoteRune)
   516  				out.WriteRune(r)
   517  				continue
   518  			}
   519  		}
   520  		if mask.Has(EncodeSingleQuote) { // '
   521  			switch r {
   522  			case '\'':
   523  				out.WriteRune(r + fullOffset)
   524  				continue
   525  			case ''':
   526  				out.WriteRune(QuoteRune)
   527  				out.WriteRune(r)
   528  				continue
   529  			}
   530  		}
   531  		if mask.Has(EncodeBackQuote) { // `
   532  			switch r {
   533  			case '`':
   534  				out.WriteRune(r + fullOffset)
   535  				continue
   536  			case '`':
   537  				out.WriteRune(QuoteRune)
   538  				out.WriteRune(r)
   539  				continue
   540  			}
   541  		}
   542  		if mask.Has(EncodeDollar) { // $
   543  			switch r {
   544  			case '$':
   545  				out.WriteRune(r + fullOffset)
   546  				continue
   547  			case '$':
   548  				out.WriteRune(QuoteRune)
   549  				out.WriteRune(r)
   550  				continue
   551  			}
   552  		}
   553  		if mask.Has(EncodeSlash) { // /
   554  			switch r {
   555  			case '/':
   556  				out.WriteRune(r + fullOffset)
   557  				continue
   558  			case '/':
   559  				out.WriteRune(QuoteRune)
   560  				out.WriteRune(r)
   561  				continue
   562  			}
   563  		}
   564  		if mask.Has(EncodeBackSlash) { // \
   565  			switch r {
   566  			case '\\':
   567  				out.WriteRune(r + fullOffset)
   568  				continue
   569  			case '\':
   570  				out.WriteRune(QuoteRune)
   571  				out.WriteRune(r)
   572  				continue
   573  			}
   574  		}
   575  		if mask.Has(EncodeCrLf) { // CR LF
   576  			switch r {
   577  			case rune(0x0D), rune(0x0A):
   578  				out.WriteRune(r + symbolOffset)
   579  				continue
   580  			case '␍', '␊':
   581  				out.WriteRune(QuoteRune)
   582  				out.WriteRune(r)
   583  				continue
   584  			}
   585  		}
   586  		if mask.Has(EncodeHash) { // #
   587  			switch r {
   588  			case '#':
   589  				out.WriteRune(r + fullOffset)
   590  				continue
   591  			case '#':
   592  				out.WriteRune(QuoteRune)
   593  				out.WriteRune(r)
   594  				continue
   595  			}
   596  		}
   597  		if mask.Has(EncodePercent) { // %
   598  			switch r {
   599  			case '%':
   600  				out.WriteRune(r + fullOffset)
   601  				continue
   602  			case '%':
   603  				out.WriteRune(QuoteRune)
   604  				out.WriteRune(r)
   605  				continue
   606  			}
   607  		}
   608  		if mask.Has(EncodeDel) { // DEL(0x7F)
   609  			switch r {
   610  			case rune(0x7F):
   611  				out.WriteRune('␡') // SYMBOL FOR DELETE
   612  				continue
   613  			case '␡':
   614  				out.WriteRune(QuoteRune)
   615  				out.WriteRune(r)
   616  				continue
   617  			}
   618  		}
   619  		if mask.Has(EncodeCtl) { // CTRL(0x01-0x1F)
   620  			if r >= 1 && r <= 0x1F {
   621  				out.WriteRune('␀' + r) // SYMBOL FOR NULL
   622  				continue
   623  			} else if r > symbolOffset && r <= symbolOffset+0x1F {
   624  				out.WriteRune(QuoteRune)
   625  				out.WriteRune(r)
   626  				continue
   627  			}
   628  		}
   629  		out.WriteRune(r)
   630  	}
   631  	out.WriteString(suffix)
   632  	return out.String()
   633  }
   634  
   635  // Decode takes a name and undoes any substitutions made by Encode
   636  func (mask MultiEncoder) Decode(in string) string {
   637  	if mask.Has(EncodeDot) {
   638  		switch in {
   639  		case ".":
   640  			return "."
   641  		case "..":
   642  			return ".."
   643  		case string(QuoteRune) + ".":
   644  			return "."
   645  		case string(QuoteRune) + "." + string(QuoteRune) + ".":
   646  			return ".."
   647  		}
   648  	}
   649  
   650  	// handle prefix only replacements
   651  	prefix := ""
   652  	if r, l1 := utf8.DecodeRuneInString(in); mask.Has(EncodeLeftSpace) && r == '␠' { // SYMBOL FOR SPACE
   653  		prefix, in = " ", in[l1:]
   654  	} else if mask.Has(EncodeLeftPeriod) && r == '.' { // FULLWIDTH FULL STOP
   655  		prefix, in = ".", in[l1:]
   656  	} else if mask.Has(EncodeLeftTilde) && r == '~' { // FULLWIDTH TILDE
   657  		prefix, in = "~", in[l1:]
   658  	} else if mask.Has(EncodeLeftCrLfHtVt) && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
   659  		prefix, in = string(r-'␀'), in[l1:]
   660  	} else if r == QuoteRune {
   661  		if r, l2 := utf8.DecodeRuneInString(in[l1:]); mask.Has(EncodeLeftSpace) && r == '␠' { // SYMBOL FOR SPACE
   662  			prefix, in = "␠", in[l1+l2:]
   663  		} else if mask.Has(EncodeLeftPeriod) && r == '.' { // FULLWIDTH FULL STOP
   664  			prefix, in = ".", in[l1+l2:]
   665  		} else if mask.Has(EncodeLeftTilde) && r == '~' { // FULLWIDTH TILDE
   666  			prefix, in = "~", in[l1+l2:]
   667  		} else if mask.Has(EncodeLeftCrLfHtVt) && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
   668  			prefix, in = string(r), in[l1+l2:]
   669  		}
   670  	}
   671  
   672  	// handle suffix only replacements
   673  	suffix := ""
   674  	if r, l := utf8.DecodeLastRuneInString(in); mask.Has(EncodeRightSpace) && r == '␠' { // SYMBOL FOR SPACE
   675  		in = in[:len(in)-l]
   676  		if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
   677  			suffix, in = "␠", in[:len(in)-l2]
   678  		} else {
   679  			suffix = " "
   680  		}
   681  	} else if mask.Has(EncodeRightPeriod) && r == '.' { // FULLWIDTH FULL STOP
   682  		in = in[:len(in)-l]
   683  		if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
   684  			suffix, in = ".", in[:len(in)-l2]
   685  		} else {
   686  			suffix = "."
   687  		}
   688  	} else if mask.Has(EncodeRightCrLfHtVt) && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
   689  		in = in[:len(in)-l]
   690  		if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
   691  			suffix, in = string(r), in[:len(in)-l2]
   692  		} else {
   693  			suffix = string(r - '␀')
   694  		}
   695  	}
   696  	index := 0
   697  	if prefix == "" && suffix == "" {
   698  		// find the first rune which (most likely) needs to be replaced
   699  		index = strings.IndexFunc(in, func(r rune) bool {
   700  			switch r {
   701  			case '␀', QuoteRune:
   702  				return true
   703  			}
   704  			if mask.Has(EncodeAsterisk) { // *
   705  				switch r {
   706  				case '*':
   707  					return true
   708  				}
   709  			}
   710  			if mask.Has(EncodeLtGt) { // <>
   711  				switch r {
   712  				case '<', '>':
   713  					return true
   714  				}
   715  			}
   716  			if mask.Has(EncodeQuestion) { // ?
   717  				switch r {
   718  				case '?':
   719  					return true
   720  				}
   721  			}
   722  			if mask.Has(EncodeColon) { // :
   723  				switch r {
   724  				case ':':
   725  					return true
   726  				}
   727  			}
   728  			if mask.Has(EncodePipe) { // |
   729  				switch r {
   730  				case '|':
   731  					return true
   732  				}
   733  			}
   734  			if mask.Has(EncodeDoubleQuote) { // "
   735  				switch r {
   736  				case '"':
   737  					return true
   738  				}
   739  			}
   740  			if mask.Has(EncodeSingleQuote) { // '
   741  				switch r {
   742  				case ''':
   743  					return true
   744  				}
   745  			}
   746  			if mask.Has(EncodeBackQuote) { // `
   747  				switch r {
   748  				case '`':
   749  					return true
   750  				}
   751  			}
   752  			if mask.Has(EncodeDollar) { // $
   753  				switch r {
   754  				case '$':
   755  					return true
   756  				}
   757  			}
   758  			if mask.Has(EncodeSlash) { // /
   759  				switch r {
   760  				case '/':
   761  					return true
   762  				}
   763  			}
   764  			if mask.Has(EncodeBackSlash) { // \
   765  				switch r {
   766  				case '\':
   767  					return true
   768  				}
   769  			}
   770  			if mask.Has(EncodeCrLf) { // CR LF
   771  				switch r {
   772  				case '␍', '␊':
   773  					return true
   774  				}
   775  			}
   776  			if mask.Has(EncodeHash) { // #
   777  				switch r {
   778  				case '#':
   779  					return true
   780  				}
   781  			}
   782  			if mask.Has(EncodePercent) { // %
   783  				switch r {
   784  				case '%':
   785  					return true
   786  				}
   787  			}
   788  			if mask.Has(EncodeDel) { // DEL(0x7F)
   789  				switch r {
   790  				case '␡':
   791  					return true
   792  				}
   793  			}
   794  			if mask.Has(EncodeCtl) { // CTRL(0x01-0x1F)
   795  				if r > symbolOffset && r <= symbolOffset+0x1F {
   796  					return true
   797  				}
   798  			}
   799  
   800  			return false
   801  		})
   802  	}
   803  	// nothing to replace, return input
   804  	if index == -1 {
   805  		return in
   806  	}
   807  
   808  	var out bytes.Buffer
   809  	out.Grow(len(in))
   810  	out.WriteString(prefix)
   811  	// copy the clean part of the input and skip it
   812  	out.WriteString(in[:index])
   813  	in = in[index:]
   814  	var unquote, unquoteNext, skipNext bool
   815  
   816  	for i, r := range in {
   817  		if skipNext {
   818  			skipNext = false
   819  			continue
   820  		}
   821  		unquote, unquoteNext = unquoteNext, false
   822  		switch r {
   823  		case '␀': // SYMBOL FOR NULL
   824  			if unquote {
   825  				out.WriteRune(r)
   826  			} else {
   827  				out.WriteRune(0)
   828  			}
   829  			continue
   830  		case QuoteRune:
   831  			if unquote {
   832  				out.WriteRune(r)
   833  			} else {
   834  				unquoteNext = true
   835  			}
   836  			continue
   837  		}
   838  		if mask.Has(EncodeAsterisk) { // *
   839  			switch r {
   840  			case '*':
   841  				if unquote {
   842  					out.WriteRune(r)
   843  				} else {
   844  					out.WriteRune(r - fullOffset)
   845  				}
   846  				continue
   847  			}
   848  		}
   849  		if mask.Has(EncodeLtGt) { // <>
   850  			switch r {
   851  			case '<', '>':
   852  				if unquote {
   853  					out.WriteRune(r)
   854  				} else {
   855  					out.WriteRune(r - fullOffset)
   856  				}
   857  				continue
   858  			}
   859  		}
   860  		if mask.Has(EncodeQuestion) { // ?
   861  			switch r {
   862  			case '?':
   863  				if unquote {
   864  					out.WriteRune(r)
   865  				} else {
   866  					out.WriteRune(r - fullOffset)
   867  				}
   868  				continue
   869  			}
   870  		}
   871  		if mask.Has(EncodeColon) { // :
   872  			switch r {
   873  			case ':':
   874  				if unquote {
   875  					out.WriteRune(r)
   876  				} else {
   877  					out.WriteRune(r - fullOffset)
   878  				}
   879  				continue
   880  			}
   881  		}
   882  		if mask.Has(EncodePipe) { // |
   883  			switch r {
   884  			case '|':
   885  				if unquote {
   886  					out.WriteRune(r)
   887  				} else {
   888  					out.WriteRune(r - fullOffset)
   889  				}
   890  				continue
   891  			}
   892  		}
   893  		if mask.Has(EncodeDoubleQuote) { // "
   894  			switch r {
   895  			case '"':
   896  				if unquote {
   897  					out.WriteRune(r)
   898  				} else {
   899  					out.WriteRune(r - fullOffset)
   900  				}
   901  				continue
   902  			}
   903  		}
   904  		if mask.Has(EncodeSingleQuote) { // '
   905  			switch r {
   906  			case ''':
   907  				if unquote {
   908  					out.WriteRune(r)
   909  				} else {
   910  					out.WriteRune(r - fullOffset)
   911  				}
   912  				continue
   913  			}
   914  		}
   915  		if mask.Has(EncodeBackQuote) { // `
   916  			switch r {
   917  			case '`':
   918  				if unquote {
   919  					out.WriteRune(r)
   920  				} else {
   921  					out.WriteRune(r - fullOffset)
   922  				}
   923  				continue
   924  			}
   925  		}
   926  		if mask.Has(EncodeDollar) { // $
   927  			switch r {
   928  			case '$':
   929  				if unquote {
   930  					out.WriteRune(r)
   931  				} else {
   932  					out.WriteRune(r - fullOffset)
   933  				}
   934  				continue
   935  			}
   936  		}
   937  		if mask.Has(EncodeSlash) { // /
   938  			switch r {
   939  			case '/': // FULLWIDTH SOLIDUS
   940  				if unquote {
   941  					out.WriteRune(r)
   942  				} else {
   943  					out.WriteRune(r - fullOffset)
   944  				}
   945  				continue
   946  			}
   947  		}
   948  		if mask.Has(EncodeBackSlash) { // \
   949  			switch r {
   950  			case '\': // FULLWIDTH REVERSE SOLIDUS
   951  				if unquote {
   952  					out.WriteRune(r)
   953  				} else {
   954  					out.WriteRune(r - fullOffset)
   955  				}
   956  				continue
   957  			}
   958  		}
   959  		if mask.Has(EncodeCrLf) { // CR LF
   960  			switch r {
   961  			case '␍', '␊':
   962  				if unquote {
   963  					out.WriteRune(r)
   964  				} else {
   965  					out.WriteRune(r - symbolOffset)
   966  				}
   967  				continue
   968  			}
   969  		}
   970  		if mask.Has(EncodeHash) { // %
   971  			switch r {
   972  			case '#':
   973  				if unquote {
   974  					out.WriteRune(r)
   975  				} else {
   976  					out.WriteRune(r - fullOffset)
   977  				}
   978  				continue
   979  			}
   980  		}
   981  		if mask.Has(EncodePercent) { // %
   982  			switch r {
   983  			case '%':
   984  				if unquote {
   985  					out.WriteRune(r)
   986  				} else {
   987  					out.WriteRune(r - fullOffset)
   988  				}
   989  				continue
   990  			}
   991  		}
   992  		if mask.Has(EncodeDel) { // DEL(0x7F)
   993  			switch r {
   994  			case '␡': // SYMBOL FOR DELETE
   995  				if unquote {
   996  					out.WriteRune(r)
   997  				} else {
   998  					out.WriteRune(0x7F)
   999  				}
  1000  				continue
  1001  			}
  1002  		}
  1003  		if mask.Has(EncodeCtl) { // CTRL(0x01-0x1F)
  1004  			if r > symbolOffset && r <= symbolOffset+0x1F {
  1005  				if unquote {
  1006  					out.WriteRune(r)
  1007  				} else {
  1008  					out.WriteRune(r - symbolOffset)
  1009  				}
  1010  				continue
  1011  			}
  1012  		}
  1013  		if unquote {
  1014  			if mask.Has(EncodeInvalidUtf8) {
  1015  				skipNext = appendUnquotedByte(&out, in[i:])
  1016  				if skipNext {
  1017  					continue
  1018  				}
  1019  			}
  1020  			out.WriteRune(QuoteRune)
  1021  		}
  1022  		switch r {
  1023  		case utf8.RuneError:
  1024  			// append the real bytes instead of utf8.RuneError
  1025  			_, l := utf8.DecodeRuneInString(in[i:])
  1026  			out.WriteString(in[i : i+l])
  1027  			continue
  1028  		}
  1029  
  1030  		out.WriteRune(r)
  1031  	}
  1032  	if unquoteNext {
  1033  		out.WriteRune(QuoteRune)
  1034  	}
  1035  	out.WriteString(suffix)
  1036  	return out.String()
  1037  }
  1038  
  1039  // FromStandardPath takes a / separated path in Standard encoding
  1040  // and converts it to a / separated path in this encoding.
  1041  func (mask MultiEncoder) FromStandardPath(s string) string {
  1042  	return FromStandardPath(mask, s)
  1043  }
  1044  
  1045  // FromStandardName takes name in Standard encoding and converts
  1046  // it in this encoding.
  1047  func (mask MultiEncoder) FromStandardName(s string) string {
  1048  	return FromStandardName(mask, s)
  1049  }
  1050  
  1051  // ToStandardPath takes a / separated path in this encoding
  1052  // and converts it to a / separated path in Standard encoding.
  1053  func (mask MultiEncoder) ToStandardPath(s string) string {
  1054  	return ToStandardPath(mask, s)
  1055  }
  1056  
  1057  // ToStandardName takes name in this encoding and converts
  1058  // it in Standard encoding.
  1059  func (mask MultiEncoder) ToStandardName(s string) string {
  1060  	return ToStandardName(mask, s)
  1061  }
  1062  
  1063  func appendQuotedBytes(w io.Writer, s string) {
  1064  	for _, b := range []byte(s) {
  1065  		_, _ = fmt.Fprintf(w, string(QuoteRune)+"%02X", b)
  1066  	}
  1067  }
  1068  func appendUnquotedByte(w io.Writer, s string) bool {
  1069  	if len(s) < 2 {
  1070  		return false
  1071  	}
  1072  	u, err := strconv.ParseUint(s[:2], 16, 8)
  1073  	if err != nil {
  1074  		return false
  1075  	}
  1076  	n, _ := w.Write([]byte{byte(u)})
  1077  	return n == 1
  1078  }
  1079  
  1080  type identity struct{}
  1081  
  1082  func (identity) Encode(in string) string { return in }
  1083  func (identity) Decode(in string) string { return in }
  1084  
  1085  func (i identity) FromStandardPath(s string) string {
  1086  	return FromStandardPath(i, s)
  1087  }
  1088  func (i identity) FromStandardName(s string) string {
  1089  	return FromStandardName(i, s)
  1090  }
  1091  func (i identity) ToStandardPath(s string) string {
  1092  	return ToStandardPath(i, s)
  1093  }
  1094  func (i identity) ToStandardName(s string) string {
  1095  	return ToStandardName(i, s)
  1096  }
  1097  
  1098  // Identity returns a Encoder that always returns the input value
  1099  func Identity() Encoder {
  1100  	return identity{}
  1101  }
  1102  
  1103  // FromStandardPath takes a / separated path in Standard encoding
  1104  // and converts it to a / separated path in the given encoding.
  1105  func FromStandardPath(e Encoder, s string) string {
  1106  	if e == Standard {
  1107  		return s
  1108  	}
  1109  	parts := strings.Split(s, "/")
  1110  	encoded := make([]string, len(parts))
  1111  	changed := false
  1112  	for i, p := range parts {
  1113  		enc := FromStandardName(e, p)
  1114  		changed = changed || enc != p
  1115  		encoded[i] = enc
  1116  	}
  1117  	if !changed {
  1118  		return s
  1119  	}
  1120  	return strings.Join(encoded, "/")
  1121  }
  1122  
  1123  // FromStandardName takes name in Standard encoding and converts
  1124  // it in the given encoding.
  1125  func FromStandardName(e Encoder, s string) string {
  1126  	if e == Standard {
  1127  		return s
  1128  	}
  1129  	return e.Encode(Standard.Decode(s))
  1130  }
  1131  
  1132  // ToStandardPath takes a / separated path in the given encoding
  1133  // and converts it to a / separated path in Standard encoding.
  1134  func ToStandardPath(e Encoder, s string) string {
  1135  	if e == Standard {
  1136  		return s
  1137  	}
  1138  	parts := strings.Split(s, "/")
  1139  	encoded := make([]string, len(parts))
  1140  	changed := false
  1141  	for i, p := range parts {
  1142  		dec := ToStandardName(e, p)
  1143  		changed = changed || dec != p
  1144  		encoded[i] = dec
  1145  	}
  1146  	if !changed {
  1147  		return s
  1148  	}
  1149  	return strings.Join(encoded, "/")
  1150  }
  1151  
  1152  // ToStandardName takes name in the given encoding and converts
  1153  // it in Standard encoding.
  1154  func ToStandardName(e Encoder, s string) string {
  1155  	if e == Standard {
  1156  		return s
  1157  	}
  1158  	return Standard.Encode(e.Decode(s))
  1159  }