github.com/eun/go@v0.0.0-20170811110501-92cfd07a6cfd/src/archive/tar/strconv.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 import ( 8 "bytes" 9 "fmt" 10 "strconv" 11 "strings" 12 "time" 13 ) 14 15 // isASCII reports whether the input is an ASCII C-style string. 16 func isASCII(s string) bool { 17 for _, c := range s { 18 if c >= 0x80 || c == 0x00 { 19 return false 20 } 21 } 22 return true 23 } 24 25 // toASCII converts the input to an ASCII C-style string. 26 // This a best effort conversion, so invalid characters are dropped. 27 func toASCII(s string) string { 28 if isASCII(s) { 29 return s 30 } 31 var buf bytes.Buffer 32 for _, c := range s { 33 if c < 0x80 && c != 0x00 { 34 buf.WriteByte(byte(c)) 35 } 36 } 37 return buf.String() 38 } 39 40 type parser struct { 41 err error // Last error seen 42 } 43 44 type formatter struct { 45 err error // Last error seen 46 } 47 48 // parseString parses bytes as a NUL-terminated C-style string. 49 // If a NUL byte is not found then the whole slice is returned as a string. 50 func (*parser) parseString(b []byte) string { 51 n := 0 52 for n < len(b) && b[n] != 0 { 53 n++ 54 } 55 return string(b[0:n]) 56 } 57 58 // Write s into b, terminating it with a NUL if there is room. 59 func (f *formatter) formatString(b []byte, s string) { 60 if len(s) > len(b) { 61 f.err = ErrFieldTooLong 62 return 63 } 64 ascii := toASCII(s) 65 copy(b, ascii) 66 if len(ascii) < len(b) { 67 b[len(ascii)] = 0 68 } 69 } 70 71 // fitsInBase256 reports whether x can be encoded into n bytes using base-256 72 // encoding. Unlike octal encoding, base-256 encoding does not require that the 73 // string ends with a NUL character. Thus, all n bytes are available for output. 74 // 75 // If operating in binary mode, this assumes strict GNU binary mode; which means 76 // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is 77 // equivalent to the sign bit in two's complement form. 78 func fitsInBase256(n int, x int64) bool { 79 var binBits = uint(n-1) * 8 80 return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) 81 } 82 83 // parseNumeric parses the input as being encoded in either base-256 or octal. 84 // This function may return negative numbers. 85 // If parsing fails or an integer overflow occurs, err will be set. 86 func (p *parser) parseNumeric(b []byte) int64 { 87 // Check for base-256 (binary) format first. 88 // If the first bit is set, then all following bits constitute a two's 89 // complement encoded number in big-endian byte order. 90 if len(b) > 0 && b[0]&0x80 != 0 { 91 // Handling negative numbers relies on the following identity: 92 // -a-1 == ^a 93 // 94 // If the number is negative, we use an inversion mask to invert the 95 // data bytes and treat the value as an unsigned number. 96 var inv byte // 0x00 if positive or zero, 0xff if negative 97 if b[0]&0x40 != 0 { 98 inv = 0xff 99 } 100 101 var x uint64 102 for i, c := range b { 103 c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing 104 if i == 0 { 105 c &= 0x7f // Ignore signal bit in first byte 106 } 107 if (x >> 56) > 0 { 108 p.err = ErrHeader // Integer overflow 109 return 0 110 } 111 x = x<<8 | uint64(c) 112 } 113 if (x >> 63) > 0 { 114 p.err = ErrHeader // Integer overflow 115 return 0 116 } 117 if inv == 0xff { 118 return ^int64(x) 119 } 120 return int64(x) 121 } 122 123 // Normal case is base-8 (octal) format. 124 return p.parseOctal(b) 125 } 126 127 // formatNumeric encodes x into b using base-8 (octal) encoding if possible. 128 // Otherwise it will attempt to use base-256 (binary) encoding. 129 func (f *formatter) formatNumeric(b []byte, x int64) { 130 if fitsInOctal(len(b), x) { 131 f.formatOctal(b, x) 132 return 133 } 134 135 if fitsInBase256(len(b), x) { 136 for i := len(b) - 1; i >= 0; i-- { 137 b[i] = byte(x) 138 x >>= 8 139 } 140 b[0] |= 0x80 // Highest bit indicates binary format 141 return 142 } 143 144 f.formatOctal(b, 0) // Last resort, just write zero 145 f.err = ErrFieldTooLong 146 } 147 148 func (p *parser) parseOctal(b []byte) int64 { 149 // Because unused fields are filled with NULs, we need 150 // to skip leading NULs. Fields may also be padded with 151 // spaces or NULs. 152 // So we remove leading and trailing NULs and spaces to 153 // be sure. 154 b = bytes.Trim(b, " \x00") 155 156 if len(b) == 0 { 157 return 0 158 } 159 x, perr := strconv.ParseUint(p.parseString(b), 8, 64) 160 if perr != nil { 161 p.err = ErrHeader 162 } 163 return int64(x) 164 } 165 166 func (f *formatter) formatOctal(b []byte, x int64) { 167 s := strconv.FormatInt(x, 8) 168 // Add leading zeros, but leave room for a NUL. 169 if n := len(b) - len(s) - 1; n > 0 { 170 s = strings.Repeat("0", n) + s 171 } 172 f.formatString(b, s) 173 } 174 175 // fitsInOctal reports whether the integer x fits in a field n-bytes long 176 // using octal encoding with the appropriate NUL terminator. 177 func fitsInOctal(n int, x int64) bool { 178 octBits := uint(n-1) * 3 179 return x >= 0 && (n >= 22 || x < 1<<octBits) 180 } 181 182 // parsePAXTime takes a string of the form %d.%d as described in the PAX 183 // specification. Note that this implementation allows for negative timestamps, 184 // which is allowed for by the PAX specification, but not always portable. 185 func parsePAXTime(s string) (time.Time, error) { 186 const maxNanoSecondDigits = 9 187 188 // Split string into seconds and sub-seconds parts. 189 ss, sn := s, "" 190 if pos := strings.IndexByte(s, '.'); pos >= 0 { 191 ss, sn = s[:pos], s[pos+1:] 192 } 193 194 // Parse the seconds. 195 secs, err := strconv.ParseInt(ss, 10, 64) 196 if err != nil { 197 return time.Time{}, ErrHeader 198 } 199 if len(sn) == 0 { 200 return time.Unix(secs, 0), nil // No sub-second values 201 } 202 203 // Parse the nanoseconds. 204 if strings.Trim(sn, "0123456789") != "" { 205 return time.Time{}, ErrHeader 206 } 207 if len(sn) < maxNanoSecondDigits { 208 sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad 209 } else { 210 sn = sn[:maxNanoSecondDigits] // Right truncate 211 } 212 nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed 213 if len(ss) > 0 && ss[0] == '-' { 214 return time.Unix(secs, -1*int64(nsecs)), nil // Negative correction 215 } 216 return time.Unix(secs, int64(nsecs)), nil 217 } 218 219 // TODO(dsnet): Implement formatPAXTime. 220 221 // parsePAXRecord parses the input PAX record string into a key-value pair. 222 // If parsing is successful, it will slice off the currently read record and 223 // return the remainder as r. 224 // 225 // A PAX record is of the following form: 226 // "%d %s=%s\n" % (size, key, value) 227 func parsePAXRecord(s string) (k, v, r string, err error) { 228 // The size field ends at the first space. 229 sp := strings.IndexByte(s, ' ') 230 if sp == -1 { 231 return "", "", s, ErrHeader 232 } 233 234 // Parse the first token as a decimal integer. 235 n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int 236 if perr != nil || n < 5 || int64(len(s)) < n { 237 return "", "", s, ErrHeader 238 } 239 240 // Extract everything between the space and the final newline. 241 rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] 242 if nl != "\n" { 243 return "", "", s, ErrHeader 244 } 245 246 // The first equals separates the key from the value. 247 eq := strings.IndexByte(rec, '=') 248 if eq == -1 { 249 return "", "", s, ErrHeader 250 } 251 k, v = rec[:eq], rec[eq+1:] 252 253 if !validPAXRecord(k, v) { 254 return "", "", s, ErrHeader 255 } 256 return k, v, rem, nil 257 } 258 259 // formatPAXRecord formats a single PAX record, prefixing it with the 260 // appropriate length. 261 func formatPAXRecord(k, v string) (string, error) { 262 if !validPAXRecord(k, v) { 263 return "", ErrHeader 264 } 265 266 const padding = 3 // Extra padding for ' ', '=', and '\n' 267 size := len(k) + len(v) + padding 268 size += len(strconv.Itoa(size)) 269 record := fmt.Sprintf("%d %s=%s\n", size, k, v) 270 271 // Final adjustment if adding size field increased the record size. 272 if len(record) != size { 273 size = len(record) 274 record = fmt.Sprintf("%d %s=%s\n", size, k, v) 275 } 276 return record, nil 277 } 278 279 // validPAXRecord reports whether the key-value pair is valid. 280 // Keys and values should be UTF-8, but the number of bad writers out there 281 // forces us to be a more liberal. 282 // Thus, we only reject all keys with NUL, and only reject NULs in values 283 // for the PAX version of the USTAR string fields. 284 func validPAXRecord(k, v string) bool { 285 switch k { 286 case paxPath, paxLinkpath, paxUname, paxGname: 287 return strings.IndexByte(v, 0) < 0 288 default: 289 return strings.IndexByte(k, 0) < 0 290 } 291 }