github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/archive/tar/strconv.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 import ( 8 "bytes" 9 "fmt" 10 "strconv" 11 "strings" 12 "time" 13 ) 14 15 // hasNUL reports whether the NUL character exists within s. 16 func hasNUL(s string) bool { 17 return strings.IndexByte(s, 0) >= 0 18 } 19 20 // isASCII reports whether the input is an ASCII C-style string. 21 func isASCII(s string) bool { 22 for _, c := range s { 23 if c >= 0x80 || c == 0x00 { 24 return false 25 } 26 } 27 return true 28 } 29 30 // toASCII converts the input to an ASCII C-style string. 31 // This a best effort conversion, so invalid characters are dropped. 32 func toASCII(s string) string { 33 if isASCII(s) { 34 return s 35 } 36 b := make([]byte, 0, len(s)) 37 for _, c := range s { 38 if c < 0x80 && c != 0x00 { 39 b = append(b, byte(c)) 40 } 41 } 42 return string(b) 43 } 44 45 type parser struct { 46 err error // Last error seen 47 } 48 49 type formatter struct { 50 err error // Last error seen 51 } 52 53 // parseString parses bytes as a NUL-terminated C-style string. 54 // If a NUL byte is not found then the whole slice is returned as a string. 55 func (*parser) parseString(b []byte) string { 56 if i := bytes.IndexByte(b, 0); i >= 0 { 57 return string(b[:i]) 58 } 59 return string(b) 60 } 61 62 // formatString copies s into b, NUL-terminating if possible. 63 func (f *formatter) formatString(b []byte, s string) { 64 if len(s) > len(b) { 65 f.err = ErrFieldTooLong 66 } 67 copy(b, s) 68 if len(s) < len(b) { 69 b[len(s)] = 0 70 } 71 } 72 73 // fitsInBase256 reports whether x can be encoded into n bytes using base-256 74 // encoding. Unlike octal encoding, base-256 encoding does not require that the 75 // string ends with a NUL character. Thus, all n bytes are available for output. 76 // 77 // If operating in binary mode, this assumes strict GNU binary mode; which means 78 // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is 79 // equivalent to the sign bit in two's complement form. 80 func fitsInBase256(n int, x int64) bool { 81 binBits := uint(n-1) * 8 82 return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) 83 } 84 85 // parseNumeric parses the input as being encoded in either base-256 or octal. 86 // This function may return negative numbers. 87 // If parsing fails or an integer overflow occurs, err will be set. 88 func (p *parser) parseNumeric(b []byte) int64 { 89 // Check for base-256 (binary) format first. 90 // If the first bit is set, then all following bits constitute a two's 91 // complement encoded number in big-endian byte order. 92 if len(b) > 0 && b[0]&0x80 != 0 { 93 // Handling negative numbers relies on the following identity: 94 // -a-1 == ^a 95 // 96 // If the number is negative, we use an inversion mask to invert the 97 // data bytes and treat the value as an unsigned number. 98 var inv byte // 0x00 if positive or zero, 0xff if negative 99 if b[0]&0x40 != 0 { 100 inv = 0xff 101 } 102 103 var x uint64 104 for i, c := range b { 105 c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing 106 if i == 0 { 107 c &= 0x7f // Ignore signal bit in first byte 108 } 109 if (x >> 56) > 0 { 110 p.err = ErrHeader // Integer overflow 111 return 0 112 } 113 x = x<<8 | uint64(c) 114 } 115 if (x >> 63) > 0 { 116 p.err = ErrHeader // Integer overflow 117 return 0 118 } 119 if inv == 0xff { 120 return ^int64(x) 121 } 122 return int64(x) 123 } 124 125 // Normal case is base-8 (octal) format. 126 return p.parseOctal(b) 127 } 128 129 // formatNumeric encodes x into b using base-8 (octal) encoding if possible. 130 // Otherwise it will attempt to use base-256 (binary) encoding. 131 func (f *formatter) formatNumeric(b []byte, x int64) { 132 if fitsInOctal(len(b), x) { 133 f.formatOctal(b, x) 134 return 135 } 136 137 if fitsInBase256(len(b), x) { 138 for i := len(b) - 1; i >= 0; i-- { 139 b[i] = byte(x) 140 x >>= 8 141 } 142 b[0] |= 0x80 // Highest bit indicates binary format 143 return 144 } 145 146 f.formatOctal(b, 0) // Last resort, just write zero 147 f.err = ErrFieldTooLong 148 } 149 150 func (p *parser) parseOctal(b []byte) int64 { 151 // Because unused fields are filled with NULs, we need 152 // to skip leading NULs. Fields may also be padded with 153 // spaces or NULs. 154 // So we remove leading and trailing NULs and spaces to 155 // be sure. 156 b = bytes.Trim(b, " \x00") 157 158 if len(b) == 0 { 159 return 0 160 } 161 x, perr := strconv.ParseUint(p.parseString(b), 8, 64) 162 if perr != nil { 163 p.err = ErrHeader 164 } 165 return int64(x) 166 } 167 168 func (f *formatter) formatOctal(b []byte, x int64) { 169 if !fitsInOctal(len(b), x) { 170 x = 0 // Last resort, just write zero 171 f.err = ErrFieldTooLong 172 } 173 174 s := strconv.FormatInt(x, 8) 175 // Add leading zeros, but leave room for a NUL. 176 if n := len(b) - len(s) - 1; n > 0 { 177 s = strings.Repeat("0", n) + s 178 } 179 f.formatString(b, s) 180 } 181 182 // fitsInOctal reports whether the integer x fits in a field n-bytes long 183 // using octal encoding with the appropriate NUL terminator. 184 func fitsInOctal(n int, x int64) bool { 185 octBits := uint(n-1) * 3 186 return x >= 0 && (n >= 22 || x < 1<<octBits) 187 } 188 189 // parsePAXTime takes a string of the form %d.%d as described in the PAX 190 // specification. Note that this implementation allows for negative timestamps, 191 // which is allowed for by the PAX specification, but not always portable. 192 func parsePAXTime(s string) (time.Time, error) { 193 const maxNanoSecondDigits = 9 194 195 // Split string into seconds and sub-seconds parts. 196 ss, sn := s, "" 197 if pos := strings.IndexByte(s, '.'); pos >= 0 { 198 ss, sn = s[:pos], s[pos+1:] 199 } 200 201 // Parse the seconds. 202 secs, err := strconv.ParseInt(ss, 10, 64) 203 if err != nil { 204 return time.Time{}, ErrHeader 205 } 206 if len(sn) == 0 { 207 return time.Unix(secs, 0), nil // No sub-second values 208 } 209 210 // Parse the nanoseconds. 211 if strings.Trim(sn, "0123456789") != "" { 212 return time.Time{}, ErrHeader 213 } 214 if len(sn) < maxNanoSecondDigits { 215 sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad 216 } else { 217 sn = sn[:maxNanoSecondDigits] // Right truncate 218 } 219 nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed 220 if len(ss) > 0 && ss[0] == '-' { 221 return time.Unix(secs, -1*int64(nsecs)), nil // Negative correction 222 } 223 return time.Unix(secs, int64(nsecs)), nil 224 } 225 226 // formatPAXTime converts ts into a time of the form %d.%d as described in the 227 // PAX specification. This function is capable of negative timestamps. 228 func formatPAXTime(ts time.Time) (s string) { 229 secs, nsecs := ts.Unix(), ts.Nanosecond() 230 if nsecs == 0 { 231 return strconv.FormatInt(secs, 10) 232 } 233 234 // If seconds is negative, then perform correction. 235 sign := "" 236 if secs < 0 { 237 sign = "-" // Remember sign 238 secs = -(secs + 1) // Add a second to secs 239 nsecs = -(nsecs - 1E9) // Take that second away from nsecs 240 } 241 return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") 242 } 243 244 // parsePAXRecord parses the input PAX record string into a key-value pair. 245 // If parsing is successful, it will slice off the currently read record and 246 // return the remainder as r. 247 func parsePAXRecord(s string) (k, v, r string, err error) { 248 // The size field ends at the first space. 249 sp := strings.IndexByte(s, ' ') 250 if sp == -1 { 251 return "", "", s, ErrHeader 252 } 253 254 // Parse the first token as a decimal integer. 255 n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int 256 if perr != nil || n < 5 || int64(len(s)) < n { 257 return "", "", s, ErrHeader 258 } 259 260 // Extract everything between the space and the final newline. 261 rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] 262 if nl != "\n" { 263 return "", "", s, ErrHeader 264 } 265 266 // The first equals separates the key from the value. 267 eq := strings.IndexByte(rec, '=') 268 if eq == -1 { 269 return "", "", s, ErrHeader 270 } 271 k, v = rec[:eq], rec[eq+1:] 272 273 if !validPAXRecord(k, v) { 274 return "", "", s, ErrHeader 275 } 276 return k, v, rem, nil 277 } 278 279 // formatPAXRecord formats a single PAX record, prefixing it with the 280 // appropriate length. 281 func formatPAXRecord(k, v string) (string, error) { 282 if !validPAXRecord(k, v) { 283 return "", ErrHeader 284 } 285 286 const padding = 3 // Extra padding for ' ', '=', and '\n' 287 size := len(k) + len(v) + padding 288 size += len(strconv.Itoa(size)) 289 record := strconv.Itoa(size) + " " + k + "=" + v + "\n" 290 291 // Final adjustment if adding size field increased the record size. 292 if len(record) != size { 293 size = len(record) 294 record = strconv.Itoa(size) + " " + k + "=" + v + "\n" 295 } 296 return record, nil 297 } 298 299 // validPAXRecord reports whether the key-value pair is valid where each 300 // record is formatted as: 301 // "%d %s=%s\n" % (size, key, value) 302 // 303 // Keys and values should be UTF-8, but the number of bad writers out there 304 // forces us to be a more liberal. 305 // Thus, we only reject all keys with NUL, and only reject NULs in values 306 // for the PAX version of the USTAR string fields. 307 // The key must not contain an '=' character. 308 func validPAXRecord(k, v string) bool { 309 if k == "" || strings.IndexByte(k, '=') >= 0 { 310 return false 311 } 312 switch k { 313 case paxPath, paxLinkpath, paxUname, paxGname: 314 return !hasNUL(v) 315 default: 316 return !hasNUL(k) 317 } 318 }