github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/archive/tar/strconv.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 import ( 8 "bytes" 9 "fmt" 10 "strconv" 11 "strings" 12 "time" 13 ) 14 15 // hasNUL reports whether the NUL character exists within s. 16 func hasNUL(s string) bool { 17 return strings.Contains(s, "\x00") 18 } 19 20 // isASCII reports whether the input is an ASCII C-style string. 21 func isASCII(s string) bool { 22 for _, c := range s { 23 if c >= 0x80 || c == 0x00 { 24 return false 25 } 26 } 27 return true 28 } 29 30 // toASCII converts the input to an ASCII C-style string. 31 // This is a best effort conversion, so invalid characters are dropped. 32 func toASCII(s string) string { 33 if isASCII(s) { 34 return s 35 } 36 b := make([]byte, 0, len(s)) 37 for _, c := range s { 38 if c < 0x80 && c != 0x00 { 39 b = append(b, byte(c)) 40 } 41 } 42 return string(b) 43 } 44 45 type parser struct { 46 err error // Last error seen 47 } 48 49 type formatter struct { 50 err error // Last error seen 51 } 52 53 // parseString parses bytes as a NUL-terminated C-style string. 54 // If a NUL byte is not found then the whole slice is returned as a string. 55 func (*parser) parseString(b []byte) string { 56 if i := bytes.IndexByte(b, 0); i >= 0 { 57 return string(b[:i]) 58 } 59 return string(b) 60 } 61 62 // formatString copies s into b, NUL-terminating if possible. 63 func (f *formatter) formatString(b []byte, s string) { 64 if len(s) > len(b) { 65 f.err = ErrFieldTooLong 66 } 67 copy(b, s) 68 if len(s) < len(b) { 69 b[len(s)] = 0 70 } 71 72 // Some buggy readers treat regular files with a trailing slash 73 // in the V7 path field as a directory even though the full path 74 // recorded elsewhere (e.g., via PAX record) contains no trailing slash. 75 if len(s) > len(b) && b[len(b)-1] == '/' { 76 n := len(strings.TrimRight(s[:len(b)], "/")) 77 b[n] = 0 // Replace trailing slash with NUL terminator 78 } 79 } 80 81 // fitsInBase256 reports whether x can be encoded into n bytes using base-256 82 // encoding. Unlike octal encoding, base-256 encoding does not require that the 83 // string ends with a NUL character. Thus, all n bytes are available for output. 84 // 85 // If operating in binary mode, this assumes strict GNU binary mode; which means 86 // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is 87 // equivalent to the sign bit in two's complement form. 88 func fitsInBase256(n int, x int64) bool { 89 binBits := uint(n-1) * 8 90 return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) 91 } 92 93 // parseNumeric parses the input as being encoded in either base-256 or octal. 94 // This function may return negative numbers. 95 // If parsing fails or an integer overflow occurs, err will be set. 96 func (p *parser) parseNumeric(b []byte) int64 { 97 // Check for base-256 (binary) format first. 98 // If the first bit is set, then all following bits constitute a two's 99 // complement encoded number in big-endian byte order. 100 if len(b) > 0 && b[0]&0x80 != 0 { 101 // Handling negative numbers relies on the following identity: 102 // -a-1 == ^a 103 // 104 // If the number is negative, we use an inversion mask to invert the 105 // data bytes and treat the value as an unsigned number. 106 var inv byte // 0x00 if positive or zero, 0xff if negative 107 if b[0]&0x40 != 0 { 108 inv = 0xff 109 } 110 111 var x uint64 112 for i, c := range b { 113 c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing 114 if i == 0 { 115 c &= 0x7f // Ignore signal bit in first byte 116 } 117 if (x >> 56) > 0 { 118 p.err = ErrHeader // Integer overflow 119 return 0 120 } 121 x = x<<8 | uint64(c) 122 } 123 if (x >> 63) > 0 { 124 p.err = ErrHeader // Integer overflow 125 return 0 126 } 127 if inv == 0xff { 128 return ^int64(x) 129 } 130 return int64(x) 131 } 132 133 // Normal case is base-8 (octal) format. 134 return p.parseOctal(b) 135 } 136 137 // formatNumeric encodes x into b using base-8 (octal) encoding if possible. 138 // Otherwise it will attempt to use base-256 (binary) encoding. 139 func (f *formatter) formatNumeric(b []byte, x int64) { 140 if fitsInOctal(len(b), x) { 141 f.formatOctal(b, x) 142 return 143 } 144 145 if fitsInBase256(len(b), x) { 146 for i := len(b) - 1; i >= 0; i-- { 147 b[i] = byte(x) 148 x >>= 8 149 } 150 b[0] |= 0x80 // Highest bit indicates binary format 151 return 152 } 153 154 f.formatOctal(b, 0) // Last resort, just write zero 155 f.err = ErrFieldTooLong 156 } 157 158 func (p *parser) parseOctal(b []byte) int64 { 159 // Because unused fields are filled with NULs, we need 160 // to skip leading NULs. Fields may also be padded with 161 // spaces or NULs. 162 // So we remove leading and trailing NULs and spaces to 163 // be sure. 164 b = bytes.Trim(b, " \x00") 165 166 if len(b) == 0 { 167 return 0 168 } 169 x, perr := strconv.ParseUint(p.parseString(b), 8, 64) 170 if perr != nil { 171 p.err = ErrHeader 172 } 173 return int64(x) 174 } 175 176 func (f *formatter) formatOctal(b []byte, x int64) { 177 if !fitsInOctal(len(b), x) { 178 x = 0 // Last resort, just write zero 179 f.err = ErrFieldTooLong 180 } 181 182 s := strconv.FormatInt(x, 8) 183 // Add leading zeros, but leave room for a NUL. 184 if n := len(b) - len(s) - 1; n > 0 { 185 s = strings.Repeat("0", n) + s 186 } 187 f.formatString(b, s) 188 } 189 190 // fitsInOctal reports whether the integer x fits in a field n-bytes long 191 // using octal encoding with the appropriate NUL terminator. 192 func fitsInOctal(n int, x int64) bool { 193 octBits := uint(n-1) * 3 194 return x >= 0 && (n >= 22 || x < 1<<octBits) 195 } 196 197 // parsePAXTime takes a string of the form %d.%d as described in the PAX 198 // specification. Note that this implementation allows for negative timestamps, 199 // which is allowed for by the PAX specification, but not always portable. 200 func parsePAXTime(s string) (time.Time, error) { 201 const maxNanoSecondDigits = 9 202 203 // Split string into seconds and sub-seconds parts. 204 ss, sn, _ := strings.Cut(s, ".") 205 206 // Parse the seconds. 207 secs, err := strconv.ParseInt(ss, 10, 64) 208 if err != nil { 209 return time.Time{}, ErrHeader 210 } 211 if len(sn) == 0 { 212 return time.Unix(secs, 0), nil // No sub-second values 213 } 214 215 // Parse the nanoseconds. 216 if strings.Trim(sn, "0123456789") != "" { 217 return time.Time{}, ErrHeader 218 } 219 if len(sn) < maxNanoSecondDigits { 220 sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad 221 } else { 222 sn = sn[:maxNanoSecondDigits] // Right truncate 223 } 224 nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed 225 if len(ss) > 0 && ss[0] == '-' { 226 return time.Unix(secs, -1*nsecs), nil // Negative correction 227 } 228 return time.Unix(secs, nsecs), nil 229 } 230 231 // formatPAXTime converts ts into a time of the form %d.%d as described in the 232 // PAX specification. This function is capable of negative timestamps. 233 func formatPAXTime(ts time.Time) (s string) { 234 secs, nsecs := ts.Unix(), ts.Nanosecond() 235 if nsecs == 0 { 236 return strconv.FormatInt(secs, 10) 237 } 238 239 // If seconds is negative, then perform correction. 240 sign := "" 241 if secs < 0 { 242 sign = "-" // Remember sign 243 secs = -(secs + 1) // Add a second to secs 244 nsecs = -(nsecs - 1e9) // Take that second away from nsecs 245 } 246 return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") 247 } 248 249 // parsePAXRecord parses the input PAX record string into a key-value pair. 250 // If parsing is successful, it will slice off the currently read record and 251 // return the remainder as r. 252 func parsePAXRecord(s string) (k, v, r string, err error) { 253 // The size field ends at the first space. 254 nStr, rest, ok := strings.Cut(s, " ") 255 if !ok { 256 return "", "", s, ErrHeader 257 } 258 259 // Parse the first token as a decimal integer. 260 n, perr := strconv.ParseInt(nStr, 10, 0) // Intentionally parse as native int 261 if perr != nil || n < 5 || n > int64(len(s)) { 262 return "", "", s, ErrHeader 263 } 264 n -= int64(len(nStr) + 1) // convert from index in s to index in rest 265 if n <= 0 { 266 return "", "", s, ErrHeader 267 } 268 269 // Extract everything between the space and the final newline. 270 rec, nl, rem := rest[:n-1], rest[n-1:n], rest[n:] 271 if nl != "\n" { 272 return "", "", s, ErrHeader 273 } 274 275 // The first equals separates the key from the value. 276 k, v, ok = strings.Cut(rec, "=") 277 if !ok { 278 return "", "", s, ErrHeader 279 } 280 281 if !validPAXRecord(k, v) { 282 return "", "", s, ErrHeader 283 } 284 return k, v, rem, nil 285 } 286 287 // formatPAXRecord formats a single PAX record, prefixing it with the 288 // appropriate length. 289 func formatPAXRecord(k, v string) (string, error) { 290 if !validPAXRecord(k, v) { 291 return "", ErrHeader 292 } 293 294 const padding = 3 // Extra padding for ' ', '=', and '\n' 295 size := len(k) + len(v) + padding 296 size += len(strconv.Itoa(size)) 297 record := strconv.Itoa(size) + " " + k + "=" + v + "\n" 298 299 // Final adjustment if adding size field increased the record size. 300 if len(record) != size { 301 size = len(record) 302 record = strconv.Itoa(size) + " " + k + "=" + v + "\n" 303 } 304 return record, nil 305 } 306 307 // validPAXRecord reports whether the key-value pair is valid where each 308 // record is formatted as: 309 // 310 // "%d %s=%s\n" % (size, key, value) 311 // 312 // Keys and values should be UTF-8, but the number of bad writers out there 313 // forces us to be a more liberal. 314 // Thus, we only reject all keys with NUL, and only reject NULs in values 315 // for the PAX version of the USTAR string fields. 316 // The key must not contain an '=' character. 317 func validPAXRecord(k, v string) bool { 318 if k == "" || strings.Contains(k, "=") { 319 return false 320 } 321 switch k { 322 case paxPath, paxLinkpath, paxUname, paxGname: 323 return !hasNUL(v) 324 default: 325 return !hasNUL(k) 326 } 327 }