github.com/ltltlt/go-source-code@v0.0.0-20190830023027-95be009773aa/archive/tar/strconv.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 import ( 8 "bytes" 9 "fmt" 10 "strconv" 11 "strings" 12 "time" 13 ) 14 15 // hasNUL reports whether the NUL character exists within s. 16 func hasNUL(s string) bool { 17 return strings.IndexByte(s, 0) >= 0 18 } 19 20 // isASCII reports whether the input is an ASCII C-style string. 21 func isASCII(s string) bool { 22 for _, c := range s { 23 if c >= 0x80 || c == 0x00 { 24 return false 25 } 26 } 27 return true 28 } 29 30 // toASCII converts the input to an ASCII C-style string. 31 // This a best effort conversion, so invalid characters are dropped. 32 func toASCII(s string) string { 33 if isASCII(s) { 34 return s 35 } 36 b := make([]byte, 0, len(s)) 37 for _, c := range s { 38 if c < 0x80 && c != 0x00 { 39 b = append(b, byte(c)) 40 } 41 } 42 return string(b) 43 } 44 45 type parser struct { 46 err error // Last error seen 47 } 48 49 type formatter struct { 50 err error // Last error seen 51 } 52 53 // parseString parses bytes as a NUL-terminated C-style string. 54 // If a NUL byte is not found then the whole slice is returned as a string. 55 func (*parser) parseString(b []byte) string { 56 if i := bytes.IndexByte(b, 0); i >= 0 { 57 return string(b[:i]) 58 } 59 return string(b) 60 } 61 62 // formatString copies s into b, NUL-terminating if possible. 63 func (f *formatter) formatString(b []byte, s string) { 64 if len(s) > len(b) { 65 f.err = ErrFieldTooLong 66 } 67 copy(b, s) 68 if len(s) < len(b) { 69 b[len(s)] = 0 70 } 71 72 // Some buggy readers treat regular files with a trailing slash 73 // in the V7 path field as a directory even though the full path 74 // recorded elsewhere (e.g., via PAX record) contains no trailing slash. 75 if len(s) > len(b) && b[len(b)-1] == '/' { 76 n := len(strings.TrimRight(s[:len(b)], "/")) 77 b[n] = 0 // Replace trailing slash with NUL terminator 78 } 79 } 80 81 // fitsInBase256 reports whether x can be encoded into n bytes using base-256 82 // encoding. Unlike octal encoding, base-256 encoding does not require that the 83 // string ends with a NUL character. Thus, all n bytes are available for output. 84 // 85 // If operating in binary mode, this assumes strict GNU binary mode; which means 86 // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is 87 // equivalent to the sign bit in two's complement form. 88 func fitsInBase256(n int, x int64) bool { 89 binBits := uint(n-1) * 8 90 return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) 91 } 92 93 // parseNumeric parses the input as being encoded in either base-256 or octal. 94 // This function may return negative numbers. 95 // If parsing fails or an integer overflow occurs, err will be set. 96 func (p *parser) parseNumeric(b []byte) int64 { 97 // Check for base-256 (binary) format first. 98 // If the first bit is set, then all following bits constitute a two's 99 // complement encoded number in big-endian byte order. 100 if len(b) > 0 && b[0]&0x80 != 0 { 101 // Handling negative numbers relies on the following identity: 102 // -a-1 == ^a 103 // 104 // If the number is negative, we use an inversion mask to invert the 105 // data bytes and treat the value as an unsigned number. 106 var inv byte // 0x00 if positive or zero, 0xff if negative 107 if b[0]&0x40 != 0 { 108 inv = 0xff 109 } 110 111 var x uint64 112 for i, c := range b { 113 c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing 114 if i == 0 { 115 c &= 0x7f // Ignore signal bit in first byte 116 } 117 if (x >> 56) > 0 { 118 p.err = ErrHeader // Integer overflow 119 return 0 120 } 121 x = x<<8 | uint64(c) 122 } 123 if (x >> 63) > 0 { 124 p.err = ErrHeader // Integer overflow 125 return 0 126 } 127 if inv == 0xff { 128 return ^int64(x) 129 } 130 return int64(x) 131 } 132 133 // Normal case is base-8 (octal) format. 134 return p.parseOctal(b) 135 } 136 137 // formatNumeric encodes x into b using base-8 (octal) encoding if possible. 138 // Otherwise it will attempt to use base-256 (binary) encoding. 139 func (f *formatter) formatNumeric(b []byte, x int64) { 140 if fitsInOctal(len(b), x) { 141 f.formatOctal(b, x) 142 return 143 } 144 145 if fitsInBase256(len(b), x) { 146 for i := len(b) - 1; i >= 0; i-- { 147 b[i] = byte(x) 148 x >>= 8 149 } 150 b[0] |= 0x80 // Highest bit indicates binary format 151 return 152 } 153 154 f.formatOctal(b, 0) // Last resort, just write zero 155 f.err = ErrFieldTooLong 156 } 157 158 func (p *parser) parseOctal(b []byte) int64 { 159 // Because unused fields are filled with NULs, we need 160 // to skip leading NULs. Fields may also be padded with 161 // spaces or NULs. 162 // So we remove leading and trailing NULs and spaces to 163 // be sure. 164 b = bytes.Trim(b, " \x00") 165 166 if len(b) == 0 { 167 return 0 168 } 169 x, perr := strconv.ParseUint(p.parseString(b), 8, 64) 170 if perr != nil { 171 p.err = ErrHeader 172 } 173 return int64(x) 174 } 175 176 func (f *formatter) formatOctal(b []byte, x int64) { 177 if !fitsInOctal(len(b), x) { 178 x = 0 // Last resort, just write zero 179 f.err = ErrFieldTooLong 180 } 181 182 s := strconv.FormatInt(x, 8) 183 // Add leading zeros, but leave room for a NUL. 184 if n := len(b) - len(s) - 1; n > 0 { 185 s = strings.Repeat("0", n) + s 186 } 187 f.formatString(b, s) 188 } 189 190 // fitsInOctal reports whether the integer x fits in a field n-bytes long 191 // using octal encoding with the appropriate NUL terminator. 192 func fitsInOctal(n int, x int64) bool { 193 octBits := uint(n-1) * 3 194 return x >= 0 && (n >= 22 || x < 1<<octBits) 195 } 196 197 // parsePAXTime takes a string of the form %d.%d as described in the PAX 198 // specification. Note that this implementation allows for negative timestamps, 199 // which is allowed for by the PAX specification, but not always portable. 200 func parsePAXTime(s string) (time.Time, error) { 201 const maxNanoSecondDigits = 9 202 203 // Split string into seconds and sub-seconds parts. 204 ss, sn := s, "" 205 if pos := strings.IndexByte(s, '.'); pos >= 0 { 206 ss, sn = s[:pos], s[pos+1:] 207 } 208 209 // Parse the seconds. 210 secs, err := strconv.ParseInt(ss, 10, 64) 211 if err != nil { 212 return time.Time{}, ErrHeader 213 } 214 if len(sn) == 0 { 215 return time.Unix(secs, 0), nil // No sub-second values 216 } 217 218 // Parse the nanoseconds. 219 if strings.Trim(sn, "0123456789") != "" { 220 return time.Time{}, ErrHeader 221 } 222 if len(sn) < maxNanoSecondDigits { 223 sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad 224 } else { 225 sn = sn[:maxNanoSecondDigits] // Right truncate 226 } 227 nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed 228 if len(ss) > 0 && ss[0] == '-' { 229 return time.Unix(secs, -1*nsecs), nil // Negative correction 230 } 231 return time.Unix(secs, nsecs), nil 232 } 233 234 // formatPAXTime converts ts into a time of the form %d.%d as described in the 235 // PAX specification. This function is capable of negative timestamps. 236 func formatPAXTime(ts time.Time) (s string) { 237 secs, nsecs := ts.Unix(), ts.Nanosecond() 238 if nsecs == 0 { 239 return strconv.FormatInt(secs, 10) 240 } 241 242 // If seconds is negative, then perform correction. 243 sign := "" 244 if secs < 0 { 245 sign = "-" // Remember sign 246 secs = -(secs + 1) // Add a second to secs 247 nsecs = -(nsecs - 1E9) // Take that second away from nsecs 248 } 249 return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") 250 } 251 252 // parsePAXRecord parses the input PAX record string into a key-value pair. 253 // If parsing is successful, it will slice off the currently read record and 254 // return the remainder as r. 255 func parsePAXRecord(s string) (k, v, r string, err error) { 256 // The size field ends at the first space. 257 sp := strings.IndexByte(s, ' ') 258 if sp == -1 { 259 return "", "", s, ErrHeader 260 } 261 262 // Parse the first token as a decimal integer. 263 n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int 264 if perr != nil || n < 5 || int64(len(s)) < n { 265 return "", "", s, ErrHeader 266 } 267 268 // Extract everything between the space and the final newline. 269 rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] 270 if nl != "\n" { 271 return "", "", s, ErrHeader 272 } 273 274 // The first equals separates the key from the value. 275 eq := strings.IndexByte(rec, '=') 276 if eq == -1 { 277 return "", "", s, ErrHeader 278 } 279 k, v = rec[:eq], rec[eq+1:] 280 281 if !validPAXRecord(k, v) { 282 return "", "", s, ErrHeader 283 } 284 return k, v, rem, nil 285 } 286 287 // formatPAXRecord formats a single PAX record, prefixing it with the 288 // appropriate length. 289 func formatPAXRecord(k, v string) (string, error) { 290 if !validPAXRecord(k, v) { 291 return "", ErrHeader 292 } 293 294 const padding = 3 // Extra padding for ' ', '=', and '\n' 295 size := len(k) + len(v) + padding 296 size += len(strconv.Itoa(size)) 297 record := strconv.Itoa(size) + " " + k + "=" + v + "\n" 298 299 // Final adjustment if adding size field increased the record size. 300 if len(record) != size { 301 size = len(record) 302 record = strconv.Itoa(size) + " " + k + "=" + v + "\n" 303 } 304 return record, nil 305 } 306 307 // validPAXRecord reports whether the key-value pair is valid where each 308 // record is formatted as: 309 // "%d %s=%s\n" % (size, key, value) 310 // 311 // Keys and values should be UTF-8, but the number of bad writers out there 312 // forces us to be a more liberal. 313 // Thus, we only reject all keys with NUL, and only reject NULs in values 314 // for the PAX version of the USTAR string fields. 315 // The key must not contain an '=' character. 316 func validPAXRecord(k, v string) bool { 317 if k == "" || strings.IndexByte(k, '=') >= 0 { 318 return false 319 } 320 switch k { 321 case paxPath, paxLinkpath, paxUname, paxGname: 322 return !hasNUL(v) 323 default: 324 return !hasNUL(k) 325 } 326 }