github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/archive/tar/reader.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 // TODO(dsymonds): 8 // - pax extensions 9 10 import ( 11 "bytes" 12 "errors" 13 "io" 14 "io/ioutil" 15 "os" 16 "strconv" 17 "strings" 18 "time" 19 ) 20 21 var ( 22 ErrHeader = errors.New("archive/tar: invalid tar header") 23 ) 24 25 const maxNanoSecondIntSize = 9 26 27 // A Reader provides sequential access to the contents of a tar archive. 28 // A tar archive consists of a sequence of files. 29 // The Next method advances to the next file in the archive (including the first), 30 // and then it can be treated as an io.Reader to access the file's data. 31 type Reader struct { 32 r io.Reader 33 err error 34 nb int64 // number of unread bytes for current file entry 35 pad int64 // amount of padding (ignored) after current file entry 36 } 37 38 // NewReader creates a new Reader reading from r. 39 func NewReader(r io.Reader) *Reader { return &Reader{r: r} } 40 41 // Next advances to the next entry in the tar archive. 42 func (tr *Reader) Next() (*Header, error) { 43 var hdr *Header 44 if tr.err == nil { 45 tr.skipUnread() 46 } 47 if tr.err != nil { 48 return hdr, tr.err 49 } 50 hdr = tr.readHeader() 51 if hdr == nil { 52 return hdr, tr.err 53 } 54 // Check for PAX/GNU header. 55 switch hdr.Typeflag { 56 case TypeXHeader: 57 // PAX extended header 58 headers, err := parsePAX(tr) 59 if err != nil { 60 return nil, err 61 } 62 // We actually read the whole file, 63 // but this skips alignment padding 64 tr.skipUnread() 65 hdr = tr.readHeader() 66 mergePAX(hdr, headers) 67 return hdr, nil 68 case TypeGNULongName: 69 // We have a GNU long name header. Its contents are the real file name. 70 realname, err := ioutil.ReadAll(tr) 71 if err != nil { 72 return nil, err 73 } 74 hdr, err := tr.Next() 75 hdr.Name = cString(realname) 76 return hdr, err 77 case TypeGNULongLink: 78 // We have a GNU long link header. 79 realname, err := ioutil.ReadAll(tr) 80 if err != nil { 81 return nil, err 82 } 83 hdr, err := tr.Next() 84 hdr.Linkname = cString(realname) 85 return hdr, err 86 } 87 return hdr, tr.err 88 } 89 90 // mergePAX merges well known headers according to PAX standard. 91 // In general headers with the same name as those found 92 // in the header struct overwrite those found in the header 93 // struct with higher precision or longer values. Esp. useful 94 // for name and linkname fields. 95 func mergePAX(hdr *Header, headers map[string]string) error { 96 for k, v := range headers { 97 switch k { 98 case paxPath: 99 hdr.Name = v 100 case paxLinkpath: 101 hdr.Linkname = v 102 case paxGname: 103 hdr.Gname = v 104 case paxUname: 105 hdr.Uname = v 106 case paxUid: 107 uid, err := strconv.ParseInt(v, 10, 0) 108 if err != nil { 109 return err 110 } 111 hdr.Uid = int(uid) 112 case paxGid: 113 gid, err := strconv.ParseInt(v, 10, 0) 114 if err != nil { 115 return err 116 } 117 hdr.Gid = int(gid) 118 case paxAtime: 119 t, err := parsePAXTime(v) 120 if err != nil { 121 return err 122 } 123 hdr.AccessTime = t 124 case paxMtime: 125 t, err := parsePAXTime(v) 126 if err != nil { 127 return err 128 } 129 hdr.ModTime = t 130 case paxCtime: 131 t, err := parsePAXTime(v) 132 if err != nil { 133 return err 134 } 135 hdr.ChangeTime = t 136 case paxSize: 137 size, err := strconv.ParseInt(v, 10, 0) 138 if err != nil { 139 return err 140 } 141 hdr.Size = int64(size) 142 } 143 144 } 145 return nil 146 } 147 148 // parsePAXTime takes a string of the form %d.%d as described in 149 // the PAX specification. 150 func parsePAXTime(t string) (time.Time, error) { 151 buf := []byte(t) 152 pos := bytes.IndexByte(buf, '.') 153 var seconds, nanoseconds int64 154 var err error 155 if pos == -1 { 156 seconds, err = strconv.ParseInt(t, 10, 0) 157 if err != nil { 158 return time.Time{}, err 159 } 160 } else { 161 seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) 162 if err != nil { 163 return time.Time{}, err 164 } 165 nano_buf := string(buf[pos+1:]) 166 // Pad as needed before converting to a decimal. 167 // For example .030 -> .030000000 -> 30000000 nanoseconds 168 if len(nano_buf) < maxNanoSecondIntSize { 169 // Right pad 170 nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) 171 } else if len(nano_buf) > maxNanoSecondIntSize { 172 // Right truncate 173 nano_buf = nano_buf[:maxNanoSecondIntSize] 174 } 175 nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) 176 if err != nil { 177 return time.Time{}, err 178 } 179 } 180 ts := time.Unix(seconds, nanoseconds) 181 return ts, nil 182 } 183 184 // parsePAX parses PAX headers. 185 // If an extended header (type 'x') is invalid, ErrHeader is returned 186 func parsePAX(r io.Reader) (map[string]string, error) { 187 buf, err := ioutil.ReadAll(r) 188 if err != nil { 189 return nil, err 190 } 191 headers := make(map[string]string) 192 // Each record is constructed as 193 // "%d %s=%s\n", length, keyword, value 194 for len(buf) > 0 { 195 // or the header was empty to start with. 196 var sp int 197 // The size field ends at the first space. 198 sp = bytes.IndexByte(buf, ' ') 199 if sp == -1 { 200 return nil, ErrHeader 201 } 202 // Parse the first token as a decimal integer. 203 n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) 204 if err != nil { 205 return nil, ErrHeader 206 } 207 // Extract everything between the decimal and the n -1 on the 208 // beginning to to eat the ' ', -1 on the end to skip the newline. 209 var record []byte 210 record, buf = buf[sp+1:n-1], buf[n:] 211 // The first equals is guaranteed to mark the end of the key. 212 // Everything else is value. 213 eq := bytes.IndexByte(record, '=') 214 if eq == -1 { 215 return nil, ErrHeader 216 } 217 key, value := record[:eq], record[eq+1:] 218 headers[string(key)] = string(value) 219 } 220 return headers, nil 221 } 222 223 // cString parses bytes as a NUL-terminated C-style string. 224 // If a NUL byte is not found then the whole slice is returned as a string. 225 func cString(b []byte) string { 226 n := 0 227 for n < len(b) && b[n] != 0 { 228 n++ 229 } 230 return string(b[0:n]) 231 } 232 233 func (tr *Reader) octal(b []byte) int64 { 234 // Check for binary format first. 235 if len(b) > 0 && b[0]&0x80 != 0 { 236 var x int64 237 for i, c := range b { 238 if i == 0 { 239 c &= 0x7f // ignore signal bit in first byte 240 } 241 x = x<<8 | int64(c) 242 } 243 return x 244 } 245 246 // Because unused fields are filled with NULs, we need 247 // to skip leading NULs. Fields may also be padded with 248 // spaces or NULs. 249 // So we remove leading and trailing NULs and spaces to 250 // be sure. 251 b = bytes.Trim(b, " \x00") 252 253 if len(b) == 0 { 254 return 0 255 } 256 x, err := strconv.ParseUint(cString(b), 8, 64) 257 if err != nil { 258 tr.err = err 259 } 260 return int64(x) 261 } 262 263 // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. 264 func (tr *Reader) skipUnread() { 265 nr := tr.nb + tr.pad // number of bytes to skip 266 tr.nb, tr.pad = 0, 0 267 if sr, ok := tr.r.(io.Seeker); ok { 268 if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { 269 return 270 } 271 } 272 _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) 273 } 274 275 func (tr *Reader) verifyChecksum(header []byte) bool { 276 if tr.err != nil { 277 return false 278 } 279 280 given := tr.octal(header[148:156]) 281 unsigned, signed := checksum(header) 282 return given == unsigned || given == signed 283 } 284 285 func (tr *Reader) readHeader() *Header { 286 header := make([]byte, blockSize) 287 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { 288 return nil 289 } 290 291 // Two blocks of zero bytes marks the end of the archive. 292 if bytes.Equal(header, zeroBlock[0:blockSize]) { 293 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { 294 return nil 295 } 296 if bytes.Equal(header, zeroBlock[0:blockSize]) { 297 tr.err = io.EOF 298 } else { 299 tr.err = ErrHeader // zero block and then non-zero block 300 } 301 return nil 302 } 303 304 if !tr.verifyChecksum(header) { 305 tr.err = ErrHeader 306 return nil 307 } 308 309 // Unpack 310 hdr := new(Header) 311 s := slicer(header) 312 313 hdr.Name = cString(s.next(100)) 314 hdr.Mode = tr.octal(s.next(8)) 315 hdr.Uid = int(tr.octal(s.next(8))) 316 hdr.Gid = int(tr.octal(s.next(8))) 317 hdr.Size = tr.octal(s.next(12)) 318 hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) 319 s.next(8) // chksum 320 hdr.Typeflag = s.next(1)[0] 321 hdr.Linkname = cString(s.next(100)) 322 323 // The remainder of the header depends on the value of magic. 324 // The original (v7) version of tar had no explicit magic field, 325 // so its magic bytes, like the rest of the block, are NULs. 326 magic := string(s.next(8)) // contains version field as well. 327 var format string 328 switch magic { 329 case "ustar\x0000": // POSIX tar (1003.1-1988) 330 if string(header[508:512]) == "tar\x00" { 331 format = "star" 332 } else { 333 format = "posix" 334 } 335 case "ustar \x00": // old GNU tar 336 format = "gnu" 337 } 338 339 switch format { 340 case "posix", "gnu", "star": 341 hdr.Uname = cString(s.next(32)) 342 hdr.Gname = cString(s.next(32)) 343 devmajor := s.next(8) 344 devminor := s.next(8) 345 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { 346 hdr.Devmajor = tr.octal(devmajor) 347 hdr.Devminor = tr.octal(devminor) 348 } 349 var prefix string 350 switch format { 351 case "posix", "gnu": 352 prefix = cString(s.next(155)) 353 case "star": 354 prefix = cString(s.next(131)) 355 hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) 356 hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) 357 } 358 if len(prefix) > 0 { 359 hdr.Name = prefix + "/" + hdr.Name 360 } 361 } 362 363 if tr.err != nil { 364 tr.err = ErrHeader 365 return nil 366 } 367 368 // Maximum value of hdr.Size is 64 GB (12 octal digits), 369 // so there's no risk of int64 overflowing. 370 tr.nb = int64(hdr.Size) 371 tr.pad = -tr.nb & (blockSize - 1) // blockSize is a power of two 372 373 return hdr 374 } 375 376 // Read reads from the current entry in the tar archive. 377 // It returns 0, io.EOF when it reaches the end of that entry, 378 // until Next is called to advance to the next entry. 379 func (tr *Reader) Read(b []byte) (n int, err error) { 380 if tr.nb == 0 { 381 // file consumed 382 return 0, io.EOF 383 } 384 385 if int64(len(b)) > tr.nb { 386 b = b[0:tr.nb] 387 } 388 n, err = tr.r.Read(b) 389 tr.nb -= int64(n) 390 391 if err == io.EOF && tr.nb > 0 { 392 err = io.ErrUnexpectedEOF 393 } 394 tr.err = err 395 return 396 }