
     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     5  package exec
     7  import (
     8  	"context"
     9  	"expvar"
    10  	"fmt"
    11  	"reflect"
    12  	"sort"
    14  	""
    15  	""
    16  	""
    17  	""
    18  	""
    19  	""
    20  	""
    21  	""
    22  )
    24  var (
    25  	combinerKeys         = expvar.NewInt("combinerkeys")
    26  	combinerRecords      = expvar.NewInt("combinerrecords")
    27  	combinerTotalRecords = expvar.NewInt("combinertotalrecords")
    28  	combineDiskSpills    = expvar.NewInt("combinediskspills")
    29  )
    31  var (
    32  	combiningFrameInitSize    = defaultChunksize
    33  	combiningFrameScratchSize = defaultChunksize
    34  )
    36  const (
    37  	combiningFrameLoadFactor = 0.7
    39  	// HashSeed is used when hashing keys in the hash table. This is to
    40  	// prevent a previous partitioning step from reducing hash entropy.
    41  	// In the extreme case, all entropy is removed and hash combine
    42  	// operations become quadratic.
    43  	hashSeed = 0x9acb0442
    45  	// HashMaxCapacity is the largest possible combining hash table we
    46  	// can maintain.
    47  	hashMaxCapacity = 1 << 29
    48  )
    50  // TODO(marius): use ARC or something similarly adaptive when
    51  // compacting and spilling combiner frames? It could make a big
    52  // difference if keys have varying degrees of temporal locality.
    54  // A combiningFrame maintains a frame wherein values are continually
    55  // combined by a user-supplied combiner. CombingFrames have two
    56  // columns: the first column is the key by which values are combined;
    57  // the second column is the combined value for that key.
    58  //
    59  // CombiningFrame is a power-of-two sized hash table with quadratic
    60  // probing (with c0=c1=1/2, which is guaranteed to explore every index
    61  // in the hash table) implemented directly on top of a Frame.
    62  type combiningFrame struct {
    63  	// Combiner is a function that combines values in the frame.
    64  	// It should have the signature func(x, y t) t, where t is the type
    65  	// of Frame[1].
    66  	Combiner slicefunc.Func
    68  	typ slicetype.Type
    70  	// vcol is the index of the column that stores the combined value.
    71  	vcol int
    73  	// Data is the data frame that is being combined. It stores both
    74  	// the hash table and a scratch table.
    75  	data frame.Frame
    77  	// Scratch stores the scratch slice of data.
    78  	scratch     frame.Frame
    79  	scratchCall [2]reflect.Value
    81  	// Threshold is the current a
    82  	threshold int
    84  	// Hits stores the hit count per index.
    85  	hits []int
    87  	// Len is the current data size of the hash table.
    88  	len int
    89  	// Cap is the size of the data portion of the data frame.
    90  	cap int
    92  	// Mask is the size mask to use for hashing.
    93  	mask int
    94  }
    96  // MakeCombiningFrame creates and returns a new CombiningFrame with
    97  // the provided type and combiner. MakeCombiningFrame panics if there
    98  // is type disagreement. N and nscratch determine the initial frame
    99  // size and scratch space size respective. The initial frame size
   100  // must be a power of two.
   101  func makeCombiningFrame(typ slicetype.Type, combiner slicefunc.Func, n, nscratch int) *combiningFrame {
   102  	if res := typ.NumOut() - typ.Prefix(); res != 1 {
   103  		typecheck.Panicf(1, "combining frame expects 1 residual column, got %d", res)
   104  	}
   105  	c := &combiningFrame{
   106  		Combiner: combiner,
   107  		typ:      typ,
   108  		vcol:     typ.NumOut() - 1,
   109  	}
   110  	_, _, _ = c.make(n, nscratch)
   111  	return c
   112  }
   114  func (c *combiningFrame) make(ndata, nscratch int) (data0, scratch0 frame.Frame, hits0 []int) {
   115  	if ndata&(ndata-1) != 0 {
   116  		panic("hash table size " + fmt.Sprint(ndata) + " not a power of two")
   117  	}
   118  	data0 =
   119  	scratch0 = c.scratch
   120  	hits0 = c.hits
   121 = frame.Make(c.typ, ndata+nscratch, ndata+nscratch)
   122  	c.scratch =, ndata+nscratch)
   123  	c.hits = make([]int, ndata)
   124  	c.threshold = int(combiningFrameLoadFactor * float64(ndata))
   125  	c.mask = ndata - 1
   126  	c.cap = ndata
   127  	return
   128  }
   130  // Len returns the number of enetries in the combining frame.
   131  func (c *combiningFrame) Len() int { return c.len }
   133  // Cap returns the current capacity of the combining frame.
   134  func (c *combiningFrame) Cap() int { return c.cap }
   136  // Combine combines the provided frame into the the CombiningFrame:
   137  // values in f are combined with existing values using the
   138  // CombiningFrame's combiner. When no value exists for a key, the
   139  // value is copied directly.
   140  func (c *combiningFrame) Combine(f frame.Frame) {
   141  	nchunk := (f.Len() + c.scratch.Len() - 1) / c.scratch.Len()
   142  	for i := 0; i < nchunk; i++ {
   143  		n := frame.Copy(c.scratch, f.Slice(c.scratch.Len()*i, f.Len()))
   144  		c.combine(n)
   145  	}
   146  }
   148  // Combine combines n items in the scratch space.
   149  func (c *combiningFrame) combine(n int) {
   150  	// TODO(marius): use cuckoo hashing
   151  	// TODO(marius): propagate context
   152  	ctx := context.Background()
   153  	for i := 0; i < n; i++ {
   154  		idx := int(c.scratch.HashWithSeed(i, hashSeed)) & c.mask
   155  		for try := 1; ; try++ {
   156  			if c.hits[idx] == 0 {
   157  				c.hits[idx]++
   158, c.cap+i)
   159  				c.added()
   160  				break
   161  			} else if !, c.cap+i) && !, idx) {
   162  				c.scratchCall[0] =, idx)
   163  				c.scratchCall[1] = c.scratch.Index(c.vcol, i)
   164  				rvs := c.Combiner.Call(ctx, c.scratchCall[:])
   165, idx).Set(rvs[0])
   166  				c.hits[idx]++
   167  				break
   168  			} else {
   169  				// Probe quadratically.
   170  				idx = (idx + try) & c.mask
   171  			}
   172  		}
   173  	}
   174  }
   176  func (c *combiningFrame) added() {
   177  	c.len += 1
   178  	if c.len <= c.threshold {
   179  		return
   180  	}
   181  	if c.cap == hashMaxCapacity {
   182  		panic("hash table too large")
   183  	}
   184  	// Double the hash table size and rehash all the keys. Note that because
   185  	// all of the keys are unique, we do not need to check for equality when
   186  	// probing for a slot.
   187  	n := c.cap * 2
   188  	data0, scratch0, hits0 := c.make(n, c.scratch.Len())
   189  	frame.Copy(c.scratch, scratch0)
   190  	for i := range hits0 {
   191  		if hits0[i] == 0 {
   192  			continue
   193  		}
   194  		idx := int(data0.HashWithSeed(i, hashSeed)) & c.mask
   195  		for try := 1; ; try++ {
   196  			if c.hits[idx] == 0 {
   197  				c.hits[idx] = hits0[i]
   198  				frame.Copy(, idx+1), data0.Slice(i, i+1))
   199  				break
   200  			} else {
   201  				idx = (idx + try) & c.mask
   202  			}
   203  		}
   204  	}
   205  }
   207  // Compact returns a snapshot of all of the keys in the frame after
   208  // compacting them into the beginning of the frame. After a call to
   209  // Compact, the frame is considered empty; the returned Frame is
   210  // valid only until the next call to Combine.
   211  func (c *combiningFrame) Compact() frame.Frame {
   212  	j := 0
   213  	for i, n := range c.hits {
   214  		if n == 0 {
   215  			continue
   216  		}
   217, j)
   218  		c.hits[i] = 0
   219  		j++
   220  	}
   221  	c.len = 0
   222  	return, j)
   223  }
   225  // A Combiner manages a CombiningFrame, spilling its contents to disk
   226  // when it grows beyond a configured size threshold.
   227  type combiner struct {
   228  	slicetype.Type
   230  	targetSize int
   231  	comb       *combiningFrame
   232  	combiner   slicefunc.Func
   233  	spiller    sliceio.Spiller
   234  	name       string
   235  	total      int
   236  }
   238  // NewCombiner creates a new combiner with the given type, name,
   239  // combiner, and target in-memory size (rows). Combiners can be
   240  // safely accessed concurrently.
   241  func newCombiner(typ slicetype.Type, name string, comb slicefunc.Func, targetSize int) (*combiner, error) {
   242  	c := &combiner{
   243  		Type:       typ,
   244  		name:       name,
   245  		combiner:   comb,
   246  		targetSize: targetSize,
   247  	}
   248  	var err error
   249  	c.spiller, err = sliceio.NewSpiller(name)
   250  	if err != nil {
   251  		return nil, err
   252  	}
   253  	c.comb = makeCombiningFrame(c, comb, *combiningFrameInitSize, *combiningFrameScratchSize)
   254  	if !frame.CanCompare(typ.Out(0)) {
   255  		typecheck.Panicf(1, "bigslice.newCombiner: cannot sort type %s", typ.Out(0))
   256  	}
   257  	return c, nil
   258  }
   260  func (c *combiner) spill(f frame.Frame) error {
   261  	log.Debug.Printf("combiner %s: spilling %d rows disk",, c.comb.Len())
   262  	sort.Sort(f)
   263  	n, err := c.spiller.Spill(f)
   264  	if err == nil {
   265  		combinerKeys.Add(-int64(f.Len()))
   266  		combinerRecords.Add(-int64(
   267 = 0
   268  		log.Debug.Printf("combiner %s: spilled %s to disk",, data.Size(n))
   269  	} else {
   270  		log.Error.Printf("combiner %s: failed to spill to disk: %v",, err)
   271  	}
   272  	return err
   273  }
   275  // Combine combines the provided Frame into this combiner.
   276  // If the number of in-memory keys is at or exceeds the target
   277  // size threshold, the current frame is compacted and spilled to disk.
   278  //
   279  // TODO(marius): Combine blocks until the frame has been fully spilled
   280  // to disk. We could copy the data and perform this spilling concurrently
   281  // with writing.
   282  func (c *combiner) Combine(ctx context.Context, f frame.Frame) error {
   283  	n := f.Len()
   284  	combinerRecords.Add(int64(n))
   285  	combinerTotalRecords.Add(int64(n))
   286 += n
   287  	nkeys := c.comb.Len()
   288  	c.comb.Combine(f)
   289  	// TODO(marius): keep combining up to the next threshold; spill only if
   290  	// we need to grow.  maybe Combine should return 'n', and then we invoke
   291  	// 'grow' manually; or at least an option for this API.
   292  	combinerKeys.Add(int64(c.comb.Len() - nkeys))
   293  	if nkeys >= c.targetSize {
   294  		// TODO(marius): we can copy the data and spill this concurrently
   295  		spilled := c.comb.Compact()
   296  		combineDiskSpills.Add(1)
   297  		if err := c.spill(spilled); err != nil {
   298  			return err
   299  		}
   300  	}
   301  	return nil
   302  }
   304  // Discard discards this combiner's state. The combiner is invalid
   305  // after a call to Discard.
   306  func (c *combiner) Discard() error {
   307  	return c.spiller.Cleanup()
   308  }
   310  // Reader returns a reader that streams the contents of this combiner.
   311  // A call to Reader invalidates the combiner.
   312  func (c *combiner) Reader() (sliceio.Reader, error) {
   313  	defer func() {
   314  		if cleanupErr := c.spiller.Cleanup(); cleanupErr != nil {
   315  			// Consider temporary file cleanup to be best-effort.
   316  			log.Debug.Printf(
   317  				"combiner %s: failed to clean up temporary files: %v",
   318, cleanupErr)
   319  		}
   320  	}()
   321  	readers, err := c.spiller.ClosingReaders()
   322  	if err != nil {
   323  		return nil, err
   324  	}
   325  	f := c.comb.Compact()
   326  	sort.Sort(f)
   327  	readers = append(readers, sliceio.FrameReader(f))
   328  	return sortio.Reduce(c,, readers, c.combiner), nil
   329  }
   331  // WriteTo writes the contents of this combiner to the provided
   332  // encoder. A call to WriteTo invalidates the combiner. WriteTo
   333  // merges content from the spilled combiner frames together with the
   334  // current in-memory frame.
   335  func (c *combiner) WriteTo(ctx context.Context, enc *sliceio.Encoder) (int64, error) {
   336  	// TODO: this should be a generic encoder routine..
   337  	reader, err := c.Reader()
   338  	if err != nil {
   339  		return 0, err
   340  	}
   341  	var total int64
   342  	in := frame.Make(c, *defaultChunksize, *defaultChunksize)
   343  	for {
   344  		n, err := reader.Read(ctx, in)
   345  		if err != nil && err != sliceio.EOF {
   346  			return total, err
   347  		}
   348  		total += int64(n)
   349  		if writeErr := enc.Write(ctx, in.Slice(0, n)); writeErr != nil {
   350  			return total, writeErr
   351  		}
   352  		if err == sliceio.EOF {
   353  			break
   354  		}
   355  	}
   356  	return total, nil
   357  }