github.com/gopacket/gopacket@v1.1.0/pcapgo/read.go (about)

     1  // Copyright 2014 Damjan Cvetko. All rights reserved.
     2  //
     3  // Use of this source code is governed by a BSD-style license
     4  // that can be found in the LICENSE file in the root of the source
     5  // tree.
     6  
     7  package pcapgo
     8  
     9  import (
    10  	"encoding/binary"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"time"
    15  
    16  	"bufio"
    17  	"compress/gzip"
    18  
    19  	"github.com/gopacket/gopacket"
    20  	"github.com/gopacket/gopacket/layers"
    21  )
    22  
    23  // Reader wraps an underlying io.Reader to read packet data in PCAP
    24  // format.  See http://wiki.wireshark.org/Development/LibpcapFileFormat
    25  // for information on the file format.
    26  //
    27  // We currenty read v2.4 file format with nanosecond and microsecdond
    28  // timestamp resolution in little-endian and big-endian encoding.
    29  //
    30  // If the PCAP data is gzip compressed it is transparently uncompressed
    31  // by wrapping the given io.Reader with a gzip.Reader.
    32  type Reader struct {
    33  	r              io.Reader
    34  	byteOrder      binary.ByteOrder
    35  	nanoSecsFactor uint32
    36  	versionMajor   uint16
    37  	versionMinor   uint16
    38  	// timezone
    39  	// sigfigs
    40  	snaplen  uint32
    41  	linkType layers.LinkType
    42  	// reusable buffer
    43  	buf [16]byte
    44  	// buffer for ZeroCopyReadPacketData
    45  	packetBuf []byte
    46  }
    47  
    48  const magicNanoseconds = 0xA1B23C4D
    49  const magicMicrosecondsBigendian = 0xD4C3B2A1
    50  const magicNanosecondsBigendian = 0x4D3CB2A1
    51  
    52  const magicGzip1 = 0x1f
    53  const magicGzip2 = 0x8b
    54  
    55  // NewReader returns a new reader object, for reading packet data from
    56  // the given reader. The reader must be open and header data is
    57  // read from it at this point.
    58  // If the file format is not supported an error is returned
    59  //
    60  //	// Create new reader:
    61  //	f, _ := os.Open("/tmp/file.pcap")
    62  //	defer f.Close()
    63  //	r, err := NewReader(f)
    64  //	data, ci, err := r.ReadPacketData()
    65  func NewReader(r io.Reader) (*Reader, error) {
    66  	ret := Reader{r: r}
    67  	if err := ret.readHeader(); err != nil {
    68  		return nil, err
    69  	}
    70  	return &ret, nil
    71  }
    72  
    73  func (r *Reader) readHeader() error {
    74  	br := bufio.NewReader(r.r)
    75  	gzipMagic, err := br.Peek(2)
    76  	if err != nil {
    77  		return err
    78  	}
    79  
    80  	if gzipMagic[0] == magicGzip1 && gzipMagic[1] == magicGzip2 {
    81  		if r.r, err = gzip.NewReader(br); err != nil {
    82  			return err
    83  		}
    84  	} else {
    85  		r.r = br
    86  	}
    87  
    88  	buf := make([]byte, 24)
    89  	if n, err := io.ReadFull(r.r, buf); err != nil {
    90  		return err
    91  	} else if n < 24 {
    92  		return errors.New("Not enough data for read")
    93  	}
    94  	if magic := binary.LittleEndian.Uint32(buf[0:4]); magic == magicNanoseconds {
    95  		r.byteOrder = binary.LittleEndian
    96  		r.nanoSecsFactor = 1
    97  	} else if magic == magicNanosecondsBigendian {
    98  		r.byteOrder = binary.BigEndian
    99  		r.nanoSecsFactor = 1
   100  	} else if magic == magicMicroseconds {
   101  		r.byteOrder = binary.LittleEndian
   102  		r.nanoSecsFactor = 1000
   103  	} else if magic == magicMicrosecondsBigendian {
   104  		r.byteOrder = binary.BigEndian
   105  		r.nanoSecsFactor = 1000
   106  	} else {
   107  		return fmt.Errorf("Unknown magic %x", magic)
   108  	}
   109  	if r.versionMajor = r.byteOrder.Uint16(buf[4:6]); r.versionMajor != versionMajor {
   110  		return fmt.Errorf("Unknown major version %d", r.versionMajor)
   111  	}
   112  	if r.versionMinor = r.byteOrder.Uint16(buf[6:8]); r.versionMinor != versionMinor {
   113  		return fmt.Errorf("Unknown minor version %d", r.versionMinor)
   114  	}
   115  	// ignore timezone 8:12 and sigfigs 12:16
   116  	r.snaplen = r.byteOrder.Uint32(buf[16:20])
   117  	r.linkType = layers.LinkType(r.byteOrder.Uint32(buf[20:24]))
   118  	return nil
   119  }
   120  
   121  // ReadPacketData reads next packet from file.
   122  func (r *Reader) ReadPacketData() (data []byte, ci gopacket.CaptureInfo, err error) {
   123  	if ci, err = r.readPacketHeader(); err != nil {
   124  		return
   125  	}
   126  	if ci.CaptureLength > int(r.snaplen) {
   127  		err = fmt.Errorf("capture length exceeds snap length: %d > %d", ci.CaptureLength, r.snaplen)
   128  		return
   129  	}
   130  	if ci.CaptureLength > ci.Length {
   131  		err = fmt.Errorf("capture length exceeds original packet length: %d > %d", ci.CaptureLength, ci.Length)
   132  		return
   133  	}
   134  	data = make([]byte, ci.CaptureLength)
   135  	_, err = io.ReadFull(r.r, data)
   136  	return data, ci, err
   137  }
   138  
   139  // ZeroCopyReadPacketData reads next packet from file. The data buffer is owned by the Reader,
   140  // and each call to ZeroCopyReadPacketData invalidates data returned by the previous one.
   141  //
   142  // It is not true zero copy, as data is still copied from the underlying reader. However,
   143  // this method avoids allocating heap memory for every packet.
   144  func (r *Reader) ZeroCopyReadPacketData() (data []byte, ci gopacket.CaptureInfo, err error) {
   145  	if ci, err = r.readPacketHeader(); err != nil {
   146  		return
   147  	}
   148  	if ci.CaptureLength > int(r.snaplen) {
   149  		err = fmt.Errorf("capture length exceeds snap length: %d > %d", ci.CaptureLength, r.snaplen)
   150  		return
   151  	}
   152  	if ci.CaptureLength > ci.Length {
   153  		err = fmt.Errorf("capture length exceeds original packet length: %d > %d", ci.CaptureLength, ci.Length)
   154  		return
   155  	}
   156  
   157  	if cap(r.packetBuf) < ci.CaptureLength {
   158  		snaplen := int(r.snaplen)
   159  		if snaplen < ci.CaptureLength {
   160  			snaplen = ci.CaptureLength
   161  		}
   162  		r.packetBuf = make([]byte, snaplen)
   163  	}
   164  	data = r.packetBuf[:ci.CaptureLength]
   165  	_, err = io.ReadFull(r.r, data)
   166  	return data, ci, err
   167  }
   168  
   169  func (r *Reader) readPacketHeader() (ci gopacket.CaptureInfo, err error) {
   170  	if _, err = io.ReadFull(r.r, r.buf[:]); err != nil {
   171  		return
   172  	}
   173  	ci.Timestamp = time.Unix(int64(r.byteOrder.Uint32(r.buf[0:4])), int64(r.byteOrder.Uint32(r.buf[4:8])*r.nanoSecsFactor)).UTC()
   174  	ci.CaptureLength = int(r.byteOrder.Uint32(r.buf[8:12]))
   175  	ci.Length = int(r.byteOrder.Uint32(r.buf[12:16]))
   176  	return
   177  }
   178  
   179  // LinkType returns network, as a layers.LinkType.
   180  func (r *Reader) LinkType() layers.LinkType {
   181  	return r.linkType
   182  }
   183  
   184  // Snaplen returns the snapshot length of the capture file.
   185  func (r *Reader) Snaplen() uint32 {
   186  	return r.snaplen
   187  }
   188  
   189  // SetSnaplen sets the snapshot length of the capture file.
   190  //
   191  // This is useful when a pcap file contains packets bigger than then snaplen.
   192  // Pcapgo will error when reading packets bigger than snaplen, then it dumps those
   193  // packets and reads the next 16 bytes, which are part of the "faulty" packet's payload, but pcapgo
   194  // thinks it's the next header, which is probably also faulty because it's not really a packet header.
   195  // This can lead to a lot of faulty reads.
   196  //
   197  // The SetSnaplen function can be used to set a bigger snaplen to prevent those read errors.
   198  //
   199  // This snaplen situation can happen when a pcap writer doesn't truncate packets to the snaplen size while writing packets to file.
   200  // E.g. In Python, dpkt.pcap.Writer sets snaplen by default to 1500 (https://dpkt.readthedocs.io/en/latest/api/api_auto.html#dpkt.pcap.Writer)
   201  // but doesn't enforce this when writing packets (https://dpkt.readthedocs.io/en/latest/_modules/dpkt/pcap.html#Writer.writepkt).
   202  // When reading, tools like tcpdump, tcpslice, mergecap and wireshark ignore the snaplen and use
   203  // their own defined snaplen.
   204  // E.g. When reading packets, tcpdump defines MAXIMUM_SNAPLEN (https://github.com/the-tcpdump-group/tcpdump/blob/6e80fcdbe9c41366df3fa244ffe4ac8cce2ab597/netdissect.h#L290)
   205  // and uses it (https://github.com/the-tcpdump-group/tcpdump/blob/66384fa15b04b47ad08c063d4728df3b9c1c0677/print.c#L343-L358).
   206  //
   207  // For further reading:
   208  //   - https://github.com/the-tcpdump-group/tcpdump/issues/389
   209  //   - https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=8808
   210  //   - https://www.wireshark.org/lists/wireshark-dev/201307/msg00061.html
   211  //   - https://github.com/wireshark/wireshark/blob/bfd51199e707c1d5c28732be34b44a9ee8a91cd8/wiretap/pcap-common.c#L723-L742
   212  //   - https://github.com/wireshark/wireshark/blob/f07fb6cdfc0904905627707b88450054e921f092/wiretap/libpcap.c#L592-L598
   213  //   - https://github.com/wireshark/wireshark/blob/f07fb6cdfc0904905627707b88450054e921f092/wiretap/libpcap.c#L714-L727
   214  //   - https://github.com/the-tcpdump-group/tcpdump/commit/d033c1bc381c76d13e4aface97a4f4ec8c3beca2
   215  //   - https://github.com/the-tcpdump-group/tcpdump/blob/88e87cb2cb74c5f939792171379acd9e0efd8b9a/netdissect.h#L263-L290
   216  func (r *Reader) SetSnaplen(newSnaplen uint32) {
   217  	r.snaplen = newSnaplen
   218  }
   219  
   220  // Reader formater
   221  func (r *Reader) String() string {
   222  	return fmt.Sprintf("PcapFile  maj: %x min: %x snaplen: %d linktype: %s", r.versionMajor, r.versionMinor, r.snaplen, r.linkType)
   223  }
   224  
   225  // Resolution returns the timestamp resolution of acquired timestamps before scaling to NanosecondTimestampResolution.
   226  func (r *Reader) Resolution() gopacket.TimestampResolution {
   227  	if r.nanoSecsFactor == 1 {
   228  		return gopacket.TimestampResolutionMicrosecond
   229  	}
   230  	return gopacket.TimestampResolutionNanosecond
   231  }