github.com/eun/go@v0.0.0-20170811110501-92cfd07a6cfd/src/archive/tar/strconv.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package tar
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  )
    14  
    15  // isASCII reports whether the input is an ASCII C-style string.
    16  func isASCII(s string) bool {
    17  	for _, c := range s {
    18  		if c >= 0x80 || c == 0x00 {
    19  			return false
    20  		}
    21  	}
    22  	return true
    23  }
    24  
    25  // toASCII converts the input to an ASCII C-style string.
    26  // This a best effort conversion, so invalid characters are dropped.
    27  func toASCII(s string) string {
    28  	if isASCII(s) {
    29  		return s
    30  	}
    31  	var buf bytes.Buffer
    32  	for _, c := range s {
    33  		if c < 0x80 && c != 0x00 {
    34  			buf.WriteByte(byte(c))
    35  		}
    36  	}
    37  	return buf.String()
    38  }
    39  
    40  type parser struct {
    41  	err error // Last error seen
    42  }
    43  
    44  type formatter struct {
    45  	err error // Last error seen
    46  }
    47  
    48  // parseString parses bytes as a NUL-terminated C-style string.
    49  // If a NUL byte is not found then the whole slice is returned as a string.
    50  func (*parser) parseString(b []byte) string {
    51  	n := 0
    52  	for n < len(b) && b[n] != 0 {
    53  		n++
    54  	}
    55  	return string(b[0:n])
    56  }
    57  
    58  // Write s into b, terminating it with a NUL if there is room.
    59  func (f *formatter) formatString(b []byte, s string) {
    60  	if len(s) > len(b) {
    61  		f.err = ErrFieldTooLong
    62  		return
    63  	}
    64  	ascii := toASCII(s)
    65  	copy(b, ascii)
    66  	if len(ascii) < len(b) {
    67  		b[len(ascii)] = 0
    68  	}
    69  }
    70  
    71  // fitsInBase256 reports whether x can be encoded into n bytes using base-256
    72  // encoding. Unlike octal encoding, base-256 encoding does not require that the
    73  // string ends with a NUL character. Thus, all n bytes are available for output.
    74  //
    75  // If operating in binary mode, this assumes strict GNU binary mode; which means
    76  // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
    77  // equivalent to the sign bit in two's complement form.
    78  func fitsInBase256(n int, x int64) bool {
    79  	var binBits = uint(n-1) * 8
    80  	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
    81  }
    82  
    83  // parseNumeric parses the input as being encoded in either base-256 or octal.
    84  // This function may return negative numbers.
    85  // If parsing fails or an integer overflow occurs, err will be set.
    86  func (p *parser) parseNumeric(b []byte) int64 {
    87  	// Check for base-256 (binary) format first.
    88  	// If the first bit is set, then all following bits constitute a two's
    89  	// complement encoded number in big-endian byte order.
    90  	if len(b) > 0 && b[0]&0x80 != 0 {
    91  		// Handling negative numbers relies on the following identity:
    92  		//	-a-1 == ^a
    93  		//
    94  		// If the number is negative, we use an inversion mask to invert the
    95  		// data bytes and treat the value as an unsigned number.
    96  		var inv byte // 0x00 if positive or zero, 0xff if negative
    97  		if b[0]&0x40 != 0 {
    98  			inv = 0xff
    99  		}
   100  
   101  		var x uint64
   102  		for i, c := range b {
   103  			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
   104  			if i == 0 {
   105  				c &= 0x7f // Ignore signal bit in first byte
   106  			}
   107  			if (x >> 56) > 0 {
   108  				p.err = ErrHeader // Integer overflow
   109  				return 0
   110  			}
   111  			x = x<<8 | uint64(c)
   112  		}
   113  		if (x >> 63) > 0 {
   114  			p.err = ErrHeader // Integer overflow
   115  			return 0
   116  		}
   117  		if inv == 0xff {
   118  			return ^int64(x)
   119  		}
   120  		return int64(x)
   121  	}
   122  
   123  	// Normal case is base-8 (octal) format.
   124  	return p.parseOctal(b)
   125  }
   126  
   127  // formatNumeric encodes x into b using base-8 (octal) encoding if possible.
   128  // Otherwise it will attempt to use base-256 (binary) encoding.
   129  func (f *formatter) formatNumeric(b []byte, x int64) {
   130  	if fitsInOctal(len(b), x) {
   131  		f.formatOctal(b, x)
   132  		return
   133  	}
   134  
   135  	if fitsInBase256(len(b), x) {
   136  		for i := len(b) - 1; i >= 0; i-- {
   137  			b[i] = byte(x)
   138  			x >>= 8
   139  		}
   140  		b[0] |= 0x80 // Highest bit indicates binary format
   141  		return
   142  	}
   143  
   144  	f.formatOctal(b, 0) // Last resort, just write zero
   145  	f.err = ErrFieldTooLong
   146  }
   147  
   148  func (p *parser) parseOctal(b []byte) int64 {
   149  	// Because unused fields are filled with NULs, we need
   150  	// to skip leading NULs. Fields may also be padded with
   151  	// spaces or NULs.
   152  	// So we remove leading and trailing NULs and spaces to
   153  	// be sure.
   154  	b = bytes.Trim(b, " \x00")
   155  
   156  	if len(b) == 0 {
   157  		return 0
   158  	}
   159  	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
   160  	if perr != nil {
   161  		p.err = ErrHeader
   162  	}
   163  	return int64(x)
   164  }
   165  
   166  func (f *formatter) formatOctal(b []byte, x int64) {
   167  	s := strconv.FormatInt(x, 8)
   168  	// Add leading zeros, but leave room for a NUL.
   169  	if n := len(b) - len(s) - 1; n > 0 {
   170  		s = strings.Repeat("0", n) + s
   171  	}
   172  	f.formatString(b, s)
   173  }
   174  
   175  // fitsInOctal reports whether the integer x fits in a field n-bytes long
   176  // using octal encoding with the appropriate NUL terminator.
   177  func fitsInOctal(n int, x int64) bool {
   178  	octBits := uint(n-1) * 3
   179  	return x >= 0 && (n >= 22 || x < 1<<octBits)
   180  }
   181  
   182  // parsePAXTime takes a string of the form %d.%d as described in the PAX
   183  // specification. Note that this implementation allows for negative timestamps,
   184  // which is allowed for by the PAX specification, but not always portable.
   185  func parsePAXTime(s string) (time.Time, error) {
   186  	const maxNanoSecondDigits = 9
   187  
   188  	// Split string into seconds and sub-seconds parts.
   189  	ss, sn := s, ""
   190  	if pos := strings.IndexByte(s, '.'); pos >= 0 {
   191  		ss, sn = s[:pos], s[pos+1:]
   192  	}
   193  
   194  	// Parse the seconds.
   195  	secs, err := strconv.ParseInt(ss, 10, 64)
   196  	if err != nil {
   197  		return time.Time{}, ErrHeader
   198  	}
   199  	if len(sn) == 0 {
   200  		return time.Unix(secs, 0), nil // No sub-second values
   201  	}
   202  
   203  	// Parse the nanoseconds.
   204  	if strings.Trim(sn, "0123456789") != "" {
   205  		return time.Time{}, ErrHeader
   206  	}
   207  	if len(sn) < maxNanoSecondDigits {
   208  		sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
   209  	} else {
   210  		sn = sn[:maxNanoSecondDigits] // Right truncate
   211  	}
   212  	nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
   213  	if len(ss) > 0 && ss[0] == '-' {
   214  		return time.Unix(secs, -1*int64(nsecs)), nil // Negative correction
   215  	}
   216  	return time.Unix(secs, int64(nsecs)), nil
   217  }
   218  
   219  // TODO(dsnet): Implement formatPAXTime.
   220  
   221  // parsePAXRecord parses the input PAX record string into a key-value pair.
   222  // If parsing is successful, it will slice off the currently read record and
   223  // return the remainder as r.
   224  //
   225  // A PAX record is of the following form:
   226  //	"%d %s=%s\n" % (size, key, value)
   227  func parsePAXRecord(s string) (k, v, r string, err error) {
   228  	// The size field ends at the first space.
   229  	sp := strings.IndexByte(s, ' ')
   230  	if sp == -1 {
   231  		return "", "", s, ErrHeader
   232  	}
   233  
   234  	// Parse the first token as a decimal integer.
   235  	n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
   236  	if perr != nil || n < 5 || int64(len(s)) < n {
   237  		return "", "", s, ErrHeader
   238  	}
   239  
   240  	// Extract everything between the space and the final newline.
   241  	rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
   242  	if nl != "\n" {
   243  		return "", "", s, ErrHeader
   244  	}
   245  
   246  	// The first equals separates the key from the value.
   247  	eq := strings.IndexByte(rec, '=')
   248  	if eq == -1 {
   249  		return "", "", s, ErrHeader
   250  	}
   251  	k, v = rec[:eq], rec[eq+1:]
   252  
   253  	if !validPAXRecord(k, v) {
   254  		return "", "", s, ErrHeader
   255  	}
   256  	return k, v, rem, nil
   257  }
   258  
   259  // formatPAXRecord formats a single PAX record, prefixing it with the
   260  // appropriate length.
   261  func formatPAXRecord(k, v string) (string, error) {
   262  	if !validPAXRecord(k, v) {
   263  		return "", ErrHeader
   264  	}
   265  
   266  	const padding = 3 // Extra padding for ' ', '=', and '\n'
   267  	size := len(k) + len(v) + padding
   268  	size += len(strconv.Itoa(size))
   269  	record := fmt.Sprintf("%d %s=%s\n", size, k, v)
   270  
   271  	// Final adjustment if adding size field increased the record size.
   272  	if len(record) != size {
   273  		size = len(record)
   274  		record = fmt.Sprintf("%d %s=%s\n", size, k, v)
   275  	}
   276  	return record, nil
   277  }
   278  
   279  // validPAXRecord reports whether the key-value pair is valid.
   280  // Keys and values should be UTF-8, but the number of bad writers out there
   281  // forces us to be a more liberal.
   282  // Thus, we only reject all keys with NUL, and only reject NULs in values
   283  // for the PAX version of the USTAR string fields.
   284  func validPAXRecord(k, v string) bool {
   285  	switch k {
   286  	case paxPath, paxLinkpath, paxUname, paxGname:
   287  		return strings.IndexByte(v, 0) < 0
   288  	default:
   289  		return strings.IndexByte(k, 0) < 0
   290  	}
   291  }