github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/archive/tar/strconv.go

github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/archive/tar/strconv.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package tar
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  )
    14  
    15  // hasNUL reports whether the NUL character exists within s.
    16  func hasNUL(s string) bool {
    17  	return strings.Contains(s, "\x00")
    18  }
    19  
    20  // isASCII reports whether the input is an ASCII C-style string.
    21  func isASCII(s string) bool {
    22  	for _, c := range s {
    23  		if c >= 0x80 || c == 0x00 {
    24  			return false
    25  		}
    26  	}
    27  	return true
    28  }
    29  
    30  // toASCII converts the input to an ASCII C-style string.
    31  // This is a best effort conversion, so invalid characters are dropped.
    32  func toASCII(s string) string {
    33  	if isASCII(s) {
    34  		return s
    35  	}
    36  	b := make([]byte, 0, len(s))
    37  	for _, c := range s {
    38  		if c < 0x80 && c != 0x00 {
    39  			b = append(b, byte(c))
    40  		}
    41  	}
    42  	return string(b)
    43  }
    44  
    45  type parser struct {
    46  	err error // Last error seen
    47  }
    48  
    49  type formatter struct {
    50  	err error // Last error seen
    51  }
    52  
    53  // parseString parses bytes as a NUL-terminated C-style string.
    54  // If a NUL byte is not found then the whole slice is returned as a string.
    55  func (*parser) parseString(b []byte) string {
    56  	if i := bytes.IndexByte(b, 0); i >= 0 {
    57  		return string(b[:i])
    58  	}
    59  	return string(b)
    60  }
    61  
    62  // formatString copies s into b, NUL-terminating if possible.
    63  func (f *formatter) formatString(b []byte, s string) {
    64  	if len(s) > len(b) {
    65  		f.err = ErrFieldTooLong
    66  	}
    67  	copy(b, s)
    68  	if len(s) < len(b) {
    69  		b[len(s)] = 0
    70  	}
    71  
    72  	// Some buggy readers treat regular files with a trailing slash
    73  	// in the V7 path field as a directory even though the full path
    74  	// recorded elsewhere (e.g., via PAX record) contains no trailing slash.
    75  	if len(s) > len(b) && b[len(b)-1] == '/' {
    76  		n := len(strings.TrimRight(s[:len(b)], "/"))
    77  		b[n] = 0 // Replace trailing slash with NUL terminator
    78  	}
    79  }
    80  
    81  // fitsInBase256 reports whether x can be encoded into n bytes using base-256
    82  // encoding. Unlike octal encoding, base-256 encoding does not require that the
    83  // string ends with a NUL character. Thus, all n bytes are available for output.
    84  //
    85  // If operating in binary mode, this assumes strict GNU binary mode; which means
    86  // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
    87  // equivalent to the sign bit in two's complement form.
    88  func fitsInBase256(n int, x int64) bool {
    89  	binBits := uint(n-1) * 8
    90  	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
    91  }
    92  
    93  // parseNumeric parses the input as being encoded in either base-256 or octal.
    94  // This function may return negative numbers.
    95  // If parsing fails or an integer overflow occurs, err will be set.
    96  func (p *parser) parseNumeric(b []byte) int64 {
    97  	// Check for base-256 (binary) format first.
    98  	// If the first bit is set, then all following bits constitute a two's
    99  	// complement encoded number in big-endian byte order.
   100  	if len(b) > 0 && b[0]&0x80 != 0 {
   101  		// Handling negative numbers relies on the following identity:
   102  		//	-a-1 == ^a
   103  		//
   104  		// If the number is negative, we use an inversion mask to invert the
   105  		// data bytes and treat the value as an unsigned number.
   106  		var inv byte // 0x00 if positive or zero, 0xff if negative
   107  		if b[0]&0x40 != 0 {
   108  			inv = 0xff
   109  		}
   110  
   111  		var x uint64
   112  		for i, c := range b {
   113  			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
   114  			if i == 0 {
   115  				c &= 0x7f // Ignore signal bit in first byte
   116  			}
   117  			if (x >> 56) > 0 {
   118  				p.err = ErrHeader // Integer overflow
   119  				return 0
   120  			}
   121  			x = x<<8 | uint64(c)
   122  		}
   123  		if (x >> 63) > 0 {
   124  			p.err = ErrHeader // Integer overflow
   125  			return 0
   126  		}
   127  		if inv == 0xff {
   128  			return ^int64(x)
   129  		}
   130  		return int64(x)
   131  	}
   132  
   133  	// Normal case is base-8 (octal) format.
   134  	return p.parseOctal(b)
   135  }
   136  
   137  // formatNumeric encodes x into b using base-8 (octal) encoding if possible.
   138  // Otherwise it will attempt to use base-256 (binary) encoding.
   139  func (f *formatter) formatNumeric(b []byte, x int64) {
   140  	if fitsInOctal(len(b), x) {
   141  		f.formatOctal(b, x)
   142  		return
   143  	}
   144  
   145  	if fitsInBase256(len(b), x) {
   146  		for i := len(b) - 1; i >= 0; i-- {
   147  			b[i] = byte(x)
   148  			x >>= 8
   149  		}
   150  		b[0] |= 0x80 // Highest bit indicates binary format
   151  		return
   152  	}
   153  
   154  	f.formatOctal(b, 0) // Last resort, just write zero
   155  	f.err = ErrFieldTooLong
   156  }
   157  
   158  func (p *parser) parseOctal(b []byte) int64 {
   159  	// Because unused fields are filled with NULs, we need
   160  	// to skip leading NULs. Fields may also be padded with
   161  	// spaces or NULs.
   162  	// So we remove leading and trailing NULs and spaces to
   163  	// be sure.
   164  	b = bytes.Trim(b, " \x00")
   165  
   166  	if len(b) == 0 {
   167  		return 0
   168  	}
   169  	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
   170  	if perr != nil {
   171  		p.err = ErrHeader
   172  	}
   173  	return int64(x)
   174  }
   175  
   176  func (f *formatter) formatOctal(b []byte, x int64) {
   177  	if !fitsInOctal(len(b), x) {
   178  		x = 0 // Last resort, just write zero
   179  		f.err = ErrFieldTooLong
   180  	}
   181  
   182  	s := strconv.FormatInt(x, 8)
   183  	// Add leading zeros, but leave room for a NUL.
   184  	if n := len(b) - len(s) - 1; n > 0 {
   185  		s = strings.Repeat("0", n) + s
   186  	}
   187  	f.formatString(b, s)
   188  }
   189  
   190  // fitsInOctal reports whether the integer x fits in a field n-bytes long
   191  // using octal encoding with the appropriate NUL terminator.
   192  func fitsInOctal(n int, x int64) bool {
   193  	octBits := uint(n-1) * 3
   194  	return x >= 0 && (n >= 22 || x < 1<<octBits)
   195  }
   196  
   197  // parsePAXTime takes a string of the form %d.%d as described in the PAX
   198  // specification. Note that this implementation allows for negative timestamps,
   199  // which is allowed for by the PAX specification, but not always portable.
   200  func parsePAXTime(s string) (time.Time, error) {
   201  	const maxNanoSecondDigits = 9
   202  
   203  	// Split string into seconds and sub-seconds parts.
   204  	ss, sn, _ := strings.Cut(s, ".")
   205  
   206  	// Parse the seconds.
   207  	secs, err := strconv.ParseInt(ss, 10, 64)
   208  	if err != nil {
   209  		return time.Time{}, ErrHeader
   210  	}
   211  	if len(sn) == 0 {
   212  		return time.Unix(secs, 0), nil // No sub-second values
   213  	}
   214  
   215  	// Parse the nanoseconds.
   216  	if strings.Trim(sn, "0123456789") != "" {
   217  		return time.Time{}, ErrHeader
   218  	}
   219  	if len(sn) < maxNanoSecondDigits {
   220  		sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
   221  	} else {
   222  		sn = sn[:maxNanoSecondDigits] // Right truncate
   223  	}
   224  	nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
   225  	if len(ss) > 0 && ss[0] == '-' {
   226  		return time.Unix(secs, -1*nsecs), nil // Negative correction
   227  	}
   228  	return time.Unix(secs, nsecs), nil
   229  }
   230  
   231  // formatPAXTime converts ts into a time of the form %d.%d as described in the
   232  // PAX specification. This function is capable of negative timestamps.
   233  func formatPAXTime(ts time.Time) (s string) {
   234  	secs, nsecs := ts.Unix(), ts.Nanosecond()
   235  	if nsecs == 0 {
   236  		return strconv.FormatInt(secs, 10)
   237  	}
   238  
   239  	// If seconds is negative, then perform correction.
   240  	sign := ""
   241  	if secs < 0 {
   242  		sign = "-"             // Remember sign
   243  		secs = -(secs + 1)     // Add a second to secs
   244  		nsecs = -(nsecs - 1e9) // Take that second away from nsecs
   245  	}
   246  	return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
   247  }
   248  
   249  // parsePAXRecord parses the input PAX record string into a key-value pair.
   250  // If parsing is successful, it will slice off the currently read record and
   251  // return the remainder as r.
   252  func parsePAXRecord(s string) (k, v, r string, err error) {
   253  	// The size field ends at the first space.
   254  	nStr, rest, ok := strings.Cut(s, " ")
   255  	if !ok {
   256  		return "", "", s, ErrHeader
   257  	}
   258  
   259  	// Parse the first token as a decimal integer.
   260  	n, perr := strconv.ParseInt(nStr, 10, 0) // Intentionally parse as native int
   261  	if perr != nil || n < 5 || n > int64(len(s)) {
   262  		return "", "", s, ErrHeader
   263  	}
   264  	n -= int64(len(nStr) + 1) // convert from index in s to index in rest
   265  	if n <= 0 {
   266  		return "", "", s, ErrHeader
   267  	}
   268  
   269  	// Extract everything between the space and the final newline.
   270  	rec, nl, rem := rest[:n-1], rest[n-1:n], rest[n:]
   271  	if nl != "\n" {
   272  		return "", "", s, ErrHeader
   273  	}
   274  
   275  	// The first equals separates the key from the value.
   276  	k, v, ok = strings.Cut(rec, "=")
   277  	if !ok {
   278  		return "", "", s, ErrHeader
   279  	}
   280  
   281  	if !validPAXRecord(k, v) {
   282  		return "", "", s, ErrHeader
   283  	}
   284  	return k, v, rem, nil
   285  }
   286  
   287  // formatPAXRecord formats a single PAX record, prefixing it with the
   288  // appropriate length.
   289  func formatPAXRecord(k, v string) (string, error) {
   290  	if !validPAXRecord(k, v) {
   291  		return "", ErrHeader
   292  	}
   293  
   294  	const padding = 3 // Extra padding for ' ', '=', and '\n'
   295  	size := len(k) + len(v) + padding
   296  	size += len(strconv.Itoa(size))
   297  	record := strconv.Itoa(size) + " " + k + "=" + v + "\n"
   298  
   299  	// Final adjustment if adding size field increased the record size.
   300  	if len(record) != size {
   301  		size = len(record)
   302  		record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
   303  	}
   304  	return record, nil
   305  }
   306  
   307  // validPAXRecord reports whether the key-value pair is valid where each
   308  // record is formatted as:
   309  //
   310  //	"%d %s=%s\n" % (size, key, value)
   311  //
   312  // Keys and values should be UTF-8, but the number of bad writers out there
   313  // forces us to be a more liberal.
   314  // Thus, we only reject all keys with NUL, and only reject NULs in values
   315  // for the PAX version of the USTAR string fields.
   316  // The key must not contain an '=' character.
   317  func validPAXRecord(k, v string) bool {
   318  	if k == "" || strings.Contains(k, "=") {
   319  		return false
   320  	}
   321  	switch k {
   322  	case paxPath, paxLinkpath, paxUname, paxGname:
   323  		return !hasNUL(v)
   324  	default:
   325  		return !hasNUL(k)
   326  	}
   327  }