github.com/gopacket/gopacket@v1.1.0/tcpassembly/assembly.go (about)

     1  // Copyright 2012 Google, Inc. All rights reserved.
     2  //
     3  // Use of this source code is governed by a BSD-style license
     4  // that can be found in the LICENSE file in the root of the source
     5  // tree.
     6  
     7  // Package tcpassembly provides TCP stream re-assembly.
     8  //
     9  // The tcpassembly package implements uni-directional TCP reassembly, for use in
    10  // packet-sniffing applications.  The caller reads packets off the wire, then
    11  // presents them to an Assembler in the form of gopacket layers.TCP packets
    12  // (github.com/gopacket/gopacket, github.com/gopacket/gopacket/layers).
    13  //
    14  // The Assembler uses a user-supplied
    15  // StreamFactory to create a user-defined Stream interface, then passes packet
    16  // data in stream order to that object.  A concurrency-safe StreamPool keeps
    17  // track of all current Streams being reassembled, so multiple Assemblers may
    18  // run at once to assemble packets while taking advantage of multiple cores.
    19  package tcpassembly
    20  
    21  import (
    22  	"flag"
    23  	"fmt"
    24  	"log"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/gopacket/gopacket"
    29  	"github.com/gopacket/gopacket/layers"
    30  )
    31  
    32  var memLog = flag.Bool("assembly_memuse_log", false, "If true, the github.com/gopacket/gopacket/tcpassembly library will log information regarding its memory use every once in a while.")
    33  var debugLog = flag.Bool("assembly_debug_log", false, "If true, the github.com/gopacket/gopacket/tcpassembly library will log verbose debugging information (at least one line per packet)")
    34  
    35  const invalidSequence = -1
    36  const uint32Size = 1 << 32
    37  
    38  // Sequence is a TCP sequence number.  It provides a few convenience functions
    39  // for handling TCP wrap-around.  The sequence should always be in the range
    40  // [0,0xFFFFFFFF]... its other bits are simply used in wrap-around calculations
    41  // and should never be set.
    42  type Sequence int64
    43  
    44  // Difference defines an ordering for comparing TCP sequences that's safe for
    45  // roll-overs.  It returns:
    46  //
    47  //	> 0 : if t comes after s
    48  //	< 0 : if t comes before s
    49  //	  0 : if t == s
    50  //
    51  // The number returned is the sequence difference, so 4.Difference(8) will
    52  // return 4.
    53  //
    54  // It handles rollovers by considering any sequence in the first quarter of the
    55  // uint32 space to be after any sequence in the last quarter of that space, thus
    56  // wrapping the uint32 space.
    57  func (s Sequence) Difference(t Sequence) int {
    58  	if s > uint32Size-uint32Size/4 && t < uint32Size/4 {
    59  		t += uint32Size
    60  	} else if t > uint32Size-uint32Size/4 && s < uint32Size/4 {
    61  		s += uint32Size
    62  	}
    63  	return int(t - s)
    64  }
    65  
    66  // Add adds an integer to a sequence and returns the resulting sequence.
    67  func (s Sequence) Add(t int) Sequence {
    68  	return (s + Sequence(t)) & (uint32Size - 1)
    69  }
    70  
    71  // Reassembly objects are passed by an Assembler into Streams using the
    72  // Reassembled call.  Callers should not need to create these structs themselves
    73  // except for testing.
    74  type Reassembly struct {
    75  	// Bytes is the next set of bytes in the stream.  May be empty.
    76  	Bytes []byte
    77  	// Skip is set to non-zero if bytes were skipped between this and the
    78  	// last Reassembly.  If this is the first packet in a connection and we
    79  	// didn't see the start, we have no idea how many bytes we skipped, so
    80  	// we set it to -1.  Otherwise, it's set to the number of bytes skipped.
    81  	Skip int
    82  	// Start is set if this set of bytes has a TCP SYN accompanying it.
    83  	Start bool
    84  	// End is set if this set of bytes has a TCP FIN or RST accompanying it.
    85  	End bool
    86  	// Seen is the timestamp this set of bytes was pulled off the wire.
    87  	Seen time.Time
    88  }
    89  
    90  const pageBytes = 1900
    91  
    92  // page is used to store TCP data we're not ready for yet (out-of-order
    93  // packets).  Unused pages are stored in and returned from a pageCache, which
    94  // avoids memory allocation.  Used pages are stored in a doubly-linked list in
    95  // a connection.
    96  type page struct {
    97  	Reassembly
    98  	seq        Sequence
    99  	index      int
   100  	prev, next *page
   101  	buf        [pageBytes]byte
   102  }
   103  
   104  // pageCache is a concurrency-unsafe store of page objects we use to avoid
   105  // memory allocation as much as we can.  It grows but never shrinks.
   106  type pageCache struct {
   107  	free         []*page
   108  	pcSize       int
   109  	size, used   int
   110  	pages        [][]page
   111  	pageRequests int64
   112  }
   113  
   114  const initialAllocSize = 1024
   115  
   116  func newPageCache() *pageCache {
   117  	pc := &pageCache{
   118  		free:   make([]*page, 0, initialAllocSize),
   119  		pcSize: initialAllocSize,
   120  	}
   121  	pc.grow()
   122  	return pc
   123  }
   124  
   125  // grow exponentially increases the size of our page cache as much as necessary.
   126  func (c *pageCache) grow() {
   127  	pages := make([]page, c.pcSize)
   128  	c.pages = append(c.pages, pages)
   129  	c.size += c.pcSize
   130  	for i := range pages {
   131  		c.free = append(c.free, &pages[i])
   132  	}
   133  	if *memLog {
   134  		log.Println("PageCache: created", c.pcSize, "new pages")
   135  	}
   136  	c.pcSize *= 2
   137  }
   138  
   139  // next returns a clean, ready-to-use page object.
   140  func (c *pageCache) next(ts time.Time) (p *page) {
   141  	if *memLog {
   142  		c.pageRequests++
   143  		if c.pageRequests&0xFFFF == 0 {
   144  			log.Println("PageCache:", c.pageRequests, "requested,", c.used, "used,", len(c.free), "free")
   145  		}
   146  	}
   147  	if len(c.free) == 0 {
   148  		c.grow()
   149  	}
   150  	i := len(c.free) - 1
   151  	p, c.free = c.free[i], c.free[:i]
   152  	p.prev = nil
   153  	p.next = nil
   154  	p.Reassembly = Reassembly{Bytes: p.buf[:0], Seen: ts}
   155  	c.used++
   156  	return p
   157  }
   158  
   159  // replace replaces a page into the pageCache.
   160  func (c *pageCache) replace(p *page) {
   161  	c.used--
   162  	c.free = append(c.free, p)
   163  }
   164  
   165  // Stream is implemented by the caller to handle incoming reassembled
   166  // TCP data.  Callers create a StreamFactory, then StreamPool uses
   167  // it to create a new Stream for every TCP stream.
   168  //
   169  // assembly will, in order:
   170  //  1. Create the stream via StreamFactory.New
   171  //  2. Call Reassembled 0 or more times, passing in reassembled TCP data in order
   172  //  3. Call ReassemblyComplete one time, after which the stream is dereferenced by assembly.
   173  type Stream interface {
   174  	// Reassembled is called zero or more times.  assembly guarantees
   175  	// that the set of all Reassembly objects passed in during all
   176  	// calls are presented in the order they appear in the TCP stream.
   177  	// Reassembly objects are reused after each Reassembled call,
   178  	// so it's important to copy anything you need out of them
   179  	// (specifically out of Reassembly.Bytes) that you need to stay
   180  	// around after you return from the Reassembled call.
   181  	Reassembled([]Reassembly)
   182  	// ReassemblyComplete is called when assembly decides there is
   183  	// no more data for this Stream, either because a FIN or RST packet
   184  	// was seen, or because the stream has timed out without any new
   185  	// packet data (due to a call to FlushOlderThan).
   186  	ReassemblyComplete()
   187  }
   188  
   189  // StreamFactory is used by assembly to create a new stream for each
   190  // new TCP session.
   191  type StreamFactory interface {
   192  	// New should return a new stream for the given TCP key.
   193  	New(netFlow, tcpFlow gopacket.Flow) Stream
   194  }
   195  
   196  func (p *StreamPool) connections() []*connection {
   197  	p.mu.RLock()
   198  	conns := make([]*connection, 0, len(p.conns))
   199  	for _, conn := range p.conns {
   200  		conns = append(conns, conn)
   201  	}
   202  	p.mu.RUnlock()
   203  	return conns
   204  }
   205  
   206  // FlushOptions provide options for flushing connections.
   207  type FlushOptions struct {
   208  	T        time.Time // If nonzero, only connections with data older than T are flushed
   209  	CloseAll bool      // If true, ALL connections are closed post flush, not just those that correctly see FIN/RST.
   210  }
   211  
   212  // FlushWithOptions finds any streams waiting for packets older than
   213  // the given time, and pushes through the data they have (IE: tells
   214  // them to stop waiting and skip the data they're waiting for).
   215  //
   216  // Each Stream maintains a list of zero or more sets of bytes it has received
   217  // out-of-order.  For example, if it has processed up through sequence number
   218  // 10, it might have bytes [15-20), [20-25), [30,50) in its list.  Each set of
   219  // bytes also has the timestamp it was originally viewed.  A flush call will
   220  // look at the smallest subsequent set of bytes, in this case [15-20), and if
   221  // its timestamp is older than the passed-in time, it will push it and all
   222  // contiguous byte-sets out to the Stream's Reassembled function.  In this case,
   223  // it will push [15-20), but also [20-25), since that's contiguous.  It will
   224  // only push [30-50) if its timestamp is also older than the passed-in time,
   225  // otherwise it will wait until the next FlushOlderThan to see if bytes [25-30)
   226  // come in.
   227  //
   228  // If it pushes all bytes (or there were no sets of bytes to begin with)
   229  // AND the connection has not received any bytes since the passed-in time,
   230  // the connection will be closed.
   231  //
   232  // If CloseAll is set, it will close out connections that have been drained.
   233  // Regardless of the CloseAll setting, connections stale for the specified
   234  // time will be closed.
   235  //
   236  // Returns the number of connections flushed, and of those, the number closed
   237  // because of the flush.
   238  func (a *Assembler) FlushWithOptions(opt FlushOptions) (flushed, closed int) {
   239  	conns := a.connPool.connections()
   240  	closes := 0
   241  	flushes := 0
   242  	for _, conn := range conns {
   243  		flushed := false
   244  		conn.mu.Lock()
   245  		if conn.closed {
   246  			// Already closed connection, nothing to do here.
   247  			conn.mu.Unlock()
   248  			continue
   249  		}
   250  		for conn.first != nil && conn.first.Seen.Before(opt.T) {
   251  			a.skipFlush(conn)
   252  			flushed = true
   253  			if conn.closed {
   254  				closes++
   255  				break
   256  			}
   257  		}
   258  		if opt.CloseAll && !conn.closed && conn.first == nil && conn.lastSeen.Before(opt.T) {
   259  			flushed = true
   260  			a.closeConnection(conn)
   261  			closes++
   262  		}
   263  		if flushed {
   264  			flushes++
   265  		}
   266  		conn.mu.Unlock()
   267  	}
   268  	return flushes, closes
   269  }
   270  
   271  // FlushOlderThan calls FlushWithOptions with the CloseAll option set to true.
   272  func (a *Assembler) FlushOlderThan(t time.Time) (flushed, closed int) {
   273  	return a.FlushWithOptions(FlushOptions{CloseAll: true, T: t})
   274  }
   275  
   276  // FlushAll flushes all remaining data into all remaining connections, closing
   277  // those connections.  It returns the total number of connections flushed/closed
   278  // by the call.
   279  func (a *Assembler) FlushAll() (closed int) {
   280  	conns := a.connPool.connections()
   281  	closed = len(conns)
   282  	for _, conn := range conns {
   283  		conn.mu.Lock()
   284  		for !conn.closed {
   285  			a.skipFlush(conn)
   286  		}
   287  		conn.mu.Unlock()
   288  	}
   289  	return
   290  }
   291  
   292  type key [2]gopacket.Flow
   293  
   294  func (k *key) String() string {
   295  	return fmt.Sprintf("%s:%s", k[0], k[1])
   296  }
   297  
   298  // StreamPool stores all streams created by Assemblers, allowing multiple
   299  // assemblers to work together on stream processing while enforcing the fact
   300  // that a single stream receives its data serially.  It is safe
   301  // for concurrency, usable by multiple Assemblers at once.
   302  //
   303  // StreamPool handles the creation and storage of Stream objects used by one or
   304  // more Assembler objects.  When a new TCP stream is found by an Assembler, it
   305  // creates an associated Stream by calling its StreamFactory's New method.
   306  // Thereafter (until the stream is closed), that Stream object will receive
   307  // assembled TCP data via Assembler's calls to the stream's Reassembled
   308  // function.
   309  //
   310  // Like the Assembler, StreamPool attempts to minimize allocation.  Unlike the
   311  // Assembler, though, it does have to do some locking to make sure that the
   312  // connection objects it stores are accessible to multiple Assemblers.
   313  type StreamPool struct {
   314  	conns              map[key]*connection
   315  	users              int
   316  	mu                 sync.RWMutex
   317  	factory            StreamFactory
   318  	free               []*connection
   319  	all                [][]connection
   320  	nextAlloc          int
   321  	newConnectionCount int64
   322  }
   323  
   324  func (p *StreamPool) grow() {
   325  	conns := make([]connection, p.nextAlloc)
   326  	p.all = append(p.all, conns)
   327  	for i := range conns {
   328  		p.free = append(p.free, &conns[i])
   329  	}
   330  	if *memLog {
   331  		log.Println("StreamPool: created", p.nextAlloc, "new connections")
   332  	}
   333  	p.nextAlloc *= 2
   334  }
   335  
   336  // NewStreamPool creates a new connection pool.  Streams will
   337  // be created as necessary using the passed-in StreamFactory.
   338  func NewStreamPool(factory StreamFactory) *StreamPool {
   339  	return &StreamPool{
   340  		conns:     make(map[key]*connection, initialAllocSize),
   341  		free:      make([]*connection, 0, initialAllocSize),
   342  		factory:   factory,
   343  		nextAlloc: initialAllocSize,
   344  	}
   345  }
   346  
   347  const assemblerReturnValueInitialSize = 16
   348  
   349  // NewAssembler creates a new assembler.  Pass in the StreamPool
   350  // to use, may be shared across assemblers.
   351  //
   352  // This sets some sane defaults for the assembler options,
   353  // see DefaultAssemblerOptions for details.
   354  func NewAssembler(pool *StreamPool) *Assembler {
   355  	pool.mu.Lock()
   356  	pool.users++
   357  	pool.mu.Unlock()
   358  	return &Assembler{
   359  		ret:              make([]Reassembly, assemblerReturnValueInitialSize),
   360  		pc:               newPageCache(),
   361  		connPool:         pool,
   362  		AssemblerOptions: DefaultAssemblerOptions,
   363  	}
   364  }
   365  
   366  // DefaultAssemblerOptions provides default options for an assembler.
   367  // These options are used by default when calling NewAssembler, so if
   368  // modified before a NewAssembler call they'll affect the resulting Assembler.
   369  //
   370  // Note that the default options can result in ever-increasing memory usage
   371  // unless one of the Flush* methods is called on a regular basis.
   372  var DefaultAssemblerOptions = AssemblerOptions{
   373  	MaxBufferedPagesPerConnection: 0, // unlimited
   374  	MaxBufferedPagesTotal:         0, // unlimited
   375  }
   376  
   377  type connection struct {
   378  	key               key
   379  	pages             int
   380  	first, last       *page
   381  	nextSeq           Sequence
   382  	created, lastSeen time.Time
   383  	stream            Stream
   384  	closed            bool
   385  	mu                sync.Mutex
   386  }
   387  
   388  func (c *connection) reset(k key, s Stream, ts time.Time) {
   389  	c.key = k
   390  	c.pages = 0
   391  	c.first, c.last = nil, nil
   392  	c.nextSeq = invalidSequence
   393  	c.created = ts
   394  	c.stream = s
   395  	c.closed = false
   396  }
   397  
   398  // AssemblerOptions controls the behavior of each assembler.  Modify the
   399  // options of each assembler you create to change their behavior.
   400  type AssemblerOptions struct {
   401  	// MaxBufferedPagesTotal is an upper limit on the total number of pages to
   402  	// buffer while waiting for out-of-order packets.  Once this limit is
   403  	// reached, the assembler will degrade to flushing every connection it
   404  	// gets a packet for.  If <= 0, this is ignored.
   405  	MaxBufferedPagesTotal int
   406  	// MaxBufferedPagesPerConnection is an upper limit on the number of pages
   407  	// buffered for a single connection.  Should this limit be reached for a
   408  	// particular connection, the smallest sequence number will be flushed, along
   409  	// with any contiguous data.  If <= 0, this is ignored.
   410  	MaxBufferedPagesPerConnection int
   411  }
   412  
   413  // Assembler handles reassembling TCP streams.  It is not safe for
   414  // concurrency... after passing a packet in via the Assemble call, the caller
   415  // must wait for that call to return before calling Assemble again.  Callers can
   416  // get around this by creating multiple assemblers that share a StreamPool.  In
   417  // that case, each individual stream will still be handled serially (each stream
   418  // has an individual mutex associated with it), however multiple assemblers can
   419  // assemble different connections concurrently.
   420  //
   421  // The Assembler provides (hopefully) fast TCP stream re-assembly for sniffing
   422  // applications written in Go.  The Assembler uses the following methods to be
   423  // as fast as possible, to keep packet processing speedy:
   424  //
   425  // # Avoids Lock Contention
   426  //
   427  // Assemblers locks connections, but each connection has an individual lock, and
   428  // rarely will two Assemblers be looking at the same connection.  Assemblers
   429  // lock the StreamPool when looking up connections, but they use Reader
   430  // locks initially, and only force a write lock if they need to create a new
   431  // connection or close one down.  These happen much less frequently than
   432  // individual packet handling.
   433  //
   434  // Each assembler runs in its own goroutine, and the only state shared between
   435  // goroutines is through the StreamPool.  Thus all internal Assembler state
   436  // can be handled without any locking.
   437  //
   438  // NOTE:  If you can guarantee that packets going to a set of Assemblers will
   439  // contain information on different connections per Assembler (for example,
   440  // they're already hashed by PF_RING hashing or some other hashing mechanism),
   441  // then we recommend you use a seperate StreamPool per Assembler, thus
   442  // avoiding all lock contention.  Only when different Assemblers could receive
   443  // packets for the same Stream should a StreamPool be shared between them.
   444  //
   445  // # Avoids Memory Copying
   446  //
   447  // In the common case, handling of a single TCP packet should result in zero
   448  // memory allocations.  The Assembler will look up the connection, figure out
   449  // that the packet has arrived in order, and immediately pass that packet on to
   450  // the appropriate connection's handling code.  Only if a packet arrives out of
   451  // order is its contents copied and stored in memory for later.
   452  //
   453  // # Avoids Memory Allocation
   454  //
   455  // Assemblers try very hard to not use memory allocation unless absolutely
   456  // necessary.  Packet data for sequential packets is passed directly to streams
   457  // with no copying or allocation.  Packet data for out-of-order packets is
   458  // copied into reusable pages, and new pages are only allocated rarely when the
   459  // page cache runs out.  Page caches are Assembler-specific, thus not used
   460  // concurrently and requiring no locking.
   461  //
   462  // Internal representations for connection objects are also reused over time.
   463  // Because of this, the most common memory allocation done by the Assembler is
   464  // generally what's done by the caller in StreamFactory.New.  If no allocation
   465  // is done there, then very little allocation is done ever, mostly to handle
   466  // large increases in bandwidth or numbers of connections.
   467  //
   468  // TODO:  The page caches used by an Assembler will grow to the size necessary
   469  // to handle a workload, and currently will never shrink.  This means that
   470  // traffic spikes can result in large memory usage which isn't garbage
   471  // collected when typical traffic levels return.
   472  type Assembler struct {
   473  	AssemblerOptions
   474  	ret      []Reassembly
   475  	pc       *pageCache
   476  	connPool *StreamPool
   477  }
   478  
   479  func (p *StreamPool) newConnection(k key, s Stream, ts time.Time) (c *connection) {
   480  	if *memLog {
   481  		p.newConnectionCount++
   482  		if p.newConnectionCount&0x7FFF == 0 {
   483  			log.Println("StreamPool:", p.newConnectionCount, "requests,", len(p.conns), "used,", len(p.free), "free")
   484  		}
   485  	}
   486  	if len(p.free) == 0 {
   487  		p.grow()
   488  	}
   489  	index := len(p.free) - 1
   490  	c, p.free = p.free[index], p.free[:index]
   491  	c.reset(k, s, ts)
   492  	return c
   493  }
   494  
   495  // getConnection returns a connection.  If end is true and a connection
   496  // does not already exist, returns nil.  This allows us to check for a
   497  // connection without actually creating one if it doesn't already exist.
   498  func (p *StreamPool) getConnection(k key, end bool, ts time.Time) *connection {
   499  	p.mu.RLock()
   500  	conn := p.conns[k]
   501  	p.mu.RUnlock()
   502  	if end || conn != nil {
   503  		return conn
   504  	}
   505  	s := p.factory.New(k[0], k[1])
   506  	p.mu.Lock()
   507  	conn = p.newConnection(k, s, ts)
   508  	if conn2 := p.conns[k]; conn2 != nil {
   509  		p.mu.Unlock()
   510  		return conn2
   511  	}
   512  	p.conns[k] = conn
   513  	p.mu.Unlock()
   514  	return conn
   515  }
   516  
   517  // Assemble calls AssembleWithTimestamp with the current timestamp, useful for
   518  // packets being read directly off the wire.
   519  func (a *Assembler) Assemble(netFlow gopacket.Flow, t *layers.TCP) {
   520  	a.AssembleWithTimestamp(netFlow, t, time.Now())
   521  }
   522  
   523  // AssembleWithTimestamp reassembles the given TCP packet into its appropriate
   524  // stream.
   525  //
   526  // The timestamp passed in must be the timestamp the packet was seen.
   527  // For packets read off the wire, time.Now() should be fine.  For packets read
   528  // from PCAP files, CaptureInfo.Timestamp should be passed in.  This timestamp
   529  // will affect which streams are flushed by a call to FlushOlderThan.
   530  //
   531  // Each Assemble call results in, in order:
   532  //
   533  //	zero or one calls to StreamFactory.New, creating a stream
   534  //	zero or one calls to Reassembled on a single stream
   535  //	zero or one calls to ReassemblyComplete on the same stream
   536  func (a *Assembler) AssembleWithTimestamp(netFlow gopacket.Flow, t *layers.TCP, timestamp time.Time) {
   537  	// Ignore empty TCP packets
   538  	if !t.SYN && !t.FIN && !t.RST && len(t.LayerPayload()) == 0 {
   539  		if *debugLog {
   540  			log.Println("ignoring useless packet")
   541  		}
   542  		return
   543  	}
   544  
   545  	a.ret = a.ret[:0]
   546  	key := key{netFlow, t.TransportFlow()}
   547  	var conn *connection
   548  	// This for loop handles a race condition where a connection will close, lock
   549  	// the connection pool, and remove itself, but before it locked the connection
   550  	// pool it's returned to another Assemble statement.  This should loop 0-1
   551  	// times for the VAST majority of cases.
   552  	for {
   553  		conn = a.connPool.getConnection(
   554  			key, !t.SYN && len(t.LayerPayload()) == 0, timestamp)
   555  		if conn == nil {
   556  			if *debugLog {
   557  				log.Printf("%v got empty packet on otherwise empty connection", key)
   558  			}
   559  			return
   560  		}
   561  		conn.mu.Lock()
   562  		if !conn.closed {
   563  			break
   564  		}
   565  		conn.mu.Unlock()
   566  	}
   567  	if conn.lastSeen.Before(timestamp) {
   568  		conn.lastSeen = timestamp
   569  	}
   570  	seq, bytes := Sequence(t.Seq), t.Payload
   571  	if conn.nextSeq == invalidSequence {
   572  		if t.SYN {
   573  			if *debugLog {
   574  				log.Printf("%v saw first SYN packet, returning immediately, seq=%v", key, seq)
   575  			}
   576  			a.ret = append(a.ret, Reassembly{
   577  				Bytes: bytes,
   578  				Skip:  0,
   579  				Start: true,
   580  				Seen:  timestamp,
   581  			})
   582  			conn.nextSeq = seq.Add(len(bytes) + 1)
   583  		} else {
   584  			if *debugLog {
   585  				log.Printf("%v waiting for start, storing into connection", key)
   586  			}
   587  			a.insertIntoConn(t, conn, timestamp)
   588  		}
   589  	} else if diff := conn.nextSeq.Difference(seq); diff > 0 {
   590  		if *debugLog {
   591  			log.Printf("%v gap in sequence numbers (%v, %v) diff %v, storing into connection", key, conn.nextSeq, seq, diff)
   592  		}
   593  		a.insertIntoConn(t, conn, timestamp)
   594  	} else {
   595  		bytes, conn.nextSeq = byteSpan(conn.nextSeq, seq, bytes)
   596  		if *debugLog {
   597  			log.Printf("%v found contiguous data (%v, %v), returning immediately", key, seq, conn.nextSeq)
   598  		}
   599  		a.ret = append(a.ret, Reassembly{
   600  			Bytes: bytes,
   601  			Skip:  0,
   602  			End:   t.RST || t.FIN,
   603  			Seen:  timestamp,
   604  		})
   605  	}
   606  	if len(a.ret) > 0 {
   607  		a.sendToConnection(conn)
   608  	}
   609  	conn.mu.Unlock()
   610  }
   611  
   612  func byteSpan(expected, received Sequence, bytes []byte) (toSend []byte, next Sequence) {
   613  	if expected == invalidSequence {
   614  		return bytes, received.Add(len(bytes))
   615  	}
   616  	span := int(received.Difference(expected))
   617  	if span <= 0 {
   618  		return bytes, received.Add(len(bytes))
   619  	} else if len(bytes) < span {
   620  		return nil, expected
   621  	}
   622  	return bytes[span:], expected.Add(len(bytes) - span)
   623  }
   624  
   625  // sendToConnection sends the current values in a.ret to the connection, closing
   626  // the connection if the last thing sent had End set.
   627  func (a *Assembler) sendToConnection(conn *connection) {
   628  	a.addContiguous(conn)
   629  	if conn.stream == nil {
   630  		panic("why?")
   631  	}
   632  	conn.stream.Reassembled(a.ret)
   633  	if a.ret[len(a.ret)-1].End {
   634  		a.closeConnection(conn)
   635  	}
   636  }
   637  
   638  // addContiguous adds contiguous byte-sets to a connection.
   639  func (a *Assembler) addContiguous(conn *connection) {
   640  	for conn.first != nil && conn.nextSeq.Difference(conn.first.seq) <= 0 {
   641  		a.addNextFromConn(conn)
   642  	}
   643  }
   644  
   645  // skipFlush skips the first set of bytes we're waiting for and returns the
   646  // first set of bytes we have.  If we have no bytes pending, it closes the
   647  // connection.
   648  func (a *Assembler) skipFlush(conn *connection) {
   649  	if *debugLog {
   650  		log.Printf("%v skipFlush %v", conn.key, conn.nextSeq)
   651  	}
   652  	if conn.first == nil {
   653  		a.closeConnection(conn)
   654  		return
   655  	}
   656  	a.ret = a.ret[:0]
   657  	a.addNextFromConn(conn)
   658  	a.addContiguous(conn)
   659  	a.sendToConnection(conn)
   660  }
   661  
   662  func (p *StreamPool) remove(conn *connection) {
   663  	p.mu.Lock()
   664  	delete(p.conns, conn.key)
   665  	p.free = append(p.free, conn)
   666  	p.mu.Unlock()
   667  }
   668  
   669  func (a *Assembler) closeConnection(conn *connection) {
   670  	if *debugLog {
   671  		log.Printf("%v closing", conn.key)
   672  	}
   673  	conn.stream.ReassemblyComplete()
   674  	conn.closed = true
   675  	a.connPool.remove(conn)
   676  	for p := conn.first; p != nil; p = p.next {
   677  		a.pc.replace(p)
   678  	}
   679  }
   680  
   681  // traverseConn traverses our doubly-linked list of pages for the correct
   682  // position to put the given sequence number.  Note that it traverses backwards,
   683  // starting at the highest sequence number and going down, since we assume the
   684  // common case is that TCP packets for a stream will appear in-order, with
   685  // minimal loss or packet reordering.
   686  func (c *connection) traverseConn(seq Sequence) (prev, current *page) {
   687  	prev = c.last
   688  	for prev != nil && prev.seq.Difference(seq) < 0 {
   689  		current = prev
   690  		prev = current.prev
   691  	}
   692  	return
   693  }
   694  
   695  // pushBetween inserts the doubly-linked list first-...-last in between the
   696  // nodes prev-next in another doubly-linked list.  If prev is nil, makes first
   697  // the new first page in the connection's list.  If next is nil, makes last the
   698  // new last page in the list.  first/last may point to the same page.
   699  func (c *connection) pushBetween(prev, next, first, last *page) {
   700  	// Maintain our doubly linked list
   701  	if next == nil || c.last == nil {
   702  		c.last = last
   703  	} else {
   704  		last.next = next
   705  		next.prev = last
   706  	}
   707  	if prev == nil || c.first == nil {
   708  		c.first = first
   709  	} else {
   710  		first.prev = prev
   711  		prev.next = first
   712  	}
   713  }
   714  
   715  func (a *Assembler) insertIntoConn(t *layers.TCP, conn *connection, ts time.Time) {
   716  	if conn.first != nil && conn.first.seq == conn.nextSeq {
   717  		panic("wtf")
   718  	}
   719  	p, p2, numPages := a.pagesFromTCP(t, ts)
   720  	prev, current := conn.traverseConn(Sequence(t.Seq))
   721  	conn.pushBetween(prev, current, p, p2)
   722  	conn.pages += numPages
   723  	if (a.MaxBufferedPagesPerConnection > 0 && conn.pages >= a.MaxBufferedPagesPerConnection) ||
   724  		(a.MaxBufferedPagesTotal > 0 && a.pc.used >= a.MaxBufferedPagesTotal) {
   725  		if *debugLog {
   726  			log.Printf("%v hit max buffer size: %+v, %v, %v", conn.key, a.AssemblerOptions, conn.pages, a.pc.used)
   727  		}
   728  		a.addNextFromConn(conn)
   729  	}
   730  }
   731  
   732  // pagesFromTCP creates a page (or set of pages) from a TCP packet.  Note that
   733  // it should NEVER receive a SYN packet, as it doesn't handle sequences
   734  // correctly.
   735  //
   736  // It returns the first and last page in its doubly-linked list of new pages.
   737  func (a *Assembler) pagesFromTCP(t *layers.TCP, ts time.Time) (p, p2 *page, numPages int) {
   738  	first := a.pc.next(ts)
   739  	current := first
   740  	numPages++
   741  	seq, bytes := Sequence(t.Seq), t.Payload
   742  	for {
   743  		length := min(len(bytes), pageBytes)
   744  		current.Bytes = current.buf[:length]
   745  		copy(current.Bytes, bytes)
   746  		current.seq = seq
   747  		bytes = bytes[length:]
   748  		if len(bytes) == 0 {
   749  			break
   750  		}
   751  		seq = seq.Add(length)
   752  		current.next = a.pc.next(ts)
   753  		current.next.prev = current
   754  		current = current.next
   755  		numPages++
   756  	}
   757  	current.End = t.RST || t.FIN
   758  	return first, current, numPages
   759  }
   760  
   761  // addNextFromConn pops the first page from a connection off and adds it to the
   762  // return array.
   763  func (a *Assembler) addNextFromConn(conn *connection) {
   764  	if conn.nextSeq == invalidSequence {
   765  		conn.first.Skip = -1
   766  	} else if diff := conn.nextSeq.Difference(conn.first.seq); diff > 0 {
   767  		conn.first.Skip = int(diff)
   768  	}
   769  	conn.first.Bytes, conn.nextSeq = byteSpan(conn.nextSeq, conn.first.seq, conn.first.Bytes)
   770  	if *debugLog {
   771  		log.Printf("%v   adding from conn (%v, %v)", conn.key, conn.first.seq, conn.nextSeq)
   772  	}
   773  	a.ret = append(a.ret, conn.first.Reassembly)
   774  	a.pc.replace(conn.first)
   775  	if conn.first == conn.last {
   776  		conn.first = nil
   777  		conn.last = nil
   778  	} else {
   779  		conn.first = conn.first.next
   780  		conn.first.prev = nil
   781  	}
   782  	conn.pages--
   783  }
   784  
   785  func min(a, b int) int {
   786  	if a < b {
   787  		return a
   788  	}
   789  	return b
   790  }