github.com/coocood/badger@v1.5.1-0.20200528065104-c02ac3616d04/cache/sim/sim.go (about)

     1  /*
     2   * Copyright 2019 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package sim
    18  
    19  import (
    20  	"bufio"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"math/rand"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  )
    29  
    30  var (
    31  	// ErrDone is returned when the underlying file has ran out of lines.
    32  	ErrDone = errors.New("no more values in the Simulator")
    33  	// ErrBadLine is returned when the trace file line is unrecognizable to
    34  	// the Parser.
    35  	ErrBadLine = errors.New("bad line for trace format")
    36  )
    37  
    38  // Simulator is the central type of the `sim` package. It is a function
    39  // returning a key from some source (composed from the other functions in this
    40  // package, either generated or parsed). You can use these Simulators to
    41  // approximate access distributions.
    42  type Simulator func() (uint64, error)
    43  
    44  // NewZipfian creates a Simulator returning numbers following a Zipfian [1]
    45  // distribution infinitely. Zipfian distributions are useful for simulating real
    46  // workloads.
    47  //
    48  // [1]: https://en.wikipedia.org/wiki/Zipf%27s_law
    49  func NewZipfian(s, v float64, n uint64) Simulator {
    50  	z := rand.NewZipf(rand.New(rand.NewSource(time.Now().UnixNano())), s, v, n)
    51  	return func() (uint64, error) {
    52  		return z.Uint64(), nil
    53  	}
    54  }
    55  
    56  // NewUniform creates a Simulator returning uniformly distributed [1] (random)
    57  // numbers [0, max) infinitely.
    58  //
    59  // [1]: https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)
    60  func NewUniform(max uint64) Simulator {
    61  	m := int64(max)
    62  	r := rand.New(rand.NewSource(time.Now().UnixNano()))
    63  	return func() (uint64, error) {
    64  		return uint64(r.Int63n(m)), nil
    65  	}
    66  }
    67  
    68  // Parser is used as a parameter to NewReader so we can create Simulators from
    69  // varying trace file formats easily.
    70  type Parser func(string, error) ([]uint64, error)
    71  
    72  // NewReader creates a Simulator from two components: the Parser, which is a
    73  // filetype specific function for parsing lines, and the file itself, which will
    74  // be read from.
    75  //
    76  // When every line in the file has been read, ErrDone will be returned. For some
    77  // trace formats (LIRS) there is one item per line. For others (ARC) there is a
    78  // range of items on each line. Thus, the true number of items in each file
    79  // is hard to determine, so it's up to the user to handle ErrDone accordingly.
    80  func NewReader(parser Parser, file io.Reader) Simulator {
    81  	b := bufio.NewReader(file)
    82  	s := make([]uint64, 0)
    83  	i := -1
    84  	var err error
    85  	return func() (uint64, error) {
    86  		// only parse a new line when we've run out of items
    87  		if i++; i == len(s) {
    88  			// parse sequence from line
    89  			if s, err = parser(b.ReadString('\n')); err != nil {
    90  				s = []uint64{0}
    91  			}
    92  			i = 0
    93  		}
    94  		return s[i], err
    95  	}
    96  }
    97  
    98  // ParseLIRS takes a single line of input from a LIRS trace file as described in
    99  // multiple papers [1] and returns a slice containing one number. A nice
   100  // collection of LIRS trace files can be found in Ben Manes' repo [2].
   101  //
   102  // [1]: https://en.wikipedia.org/wiki/LIRS_caching_algorithm
   103  // [2]: https://git.io/fj9gU
   104  func ParseLIRS(line string, err error) ([]uint64, error) {
   105  	if line = strings.TrimSpace(line); line != "" {
   106  		// example: "1\r\n"
   107  		key, err := strconv.ParseUint(line, 10, 64)
   108  		return []uint64{key}, err
   109  	}
   110  	return nil, ErrDone
   111  }
   112  
   113  // ParseARC takes a single line of input from an ARC trace file as described in
   114  // "ARC: a self-tuning, low overhead replacement cache" [1] by Nimrod Megiddo
   115  // and Dharmendra S. Modha [1] and returns a sequence of numbers generated from
   116  // the line and any error. For use with NewReader.
   117  //
   118  // [1]: https://scinapse.io/papers/1860107648
   119  func ParseARC(line string, err error) ([]uint64, error) {
   120  	if line != "" {
   121  		// example: "0 5 0 0\n"
   122  		//
   123  		// -  first block: starting number in sequence
   124  		// - second block: number of items in sequence
   125  		// -  third block: ignore
   126  		// - fourth block: global line number (not used)
   127  		cols := strings.Fields(line)
   128  		if len(cols) != 4 {
   129  			return nil, ErrBadLine
   130  		}
   131  		start, err := strconv.ParseUint(cols[0], 10, 64)
   132  		if err != nil {
   133  			return nil, err
   134  		}
   135  		count, err := strconv.ParseUint(cols[1], 10, 64)
   136  		if err != nil {
   137  			return nil, err
   138  		}
   139  		// populate sequence from start to start + count
   140  		seq := make([]uint64, count)
   141  		for i := range seq {
   142  			seq[i] = start + uint64(i)
   143  		}
   144  		return seq, nil
   145  	}
   146  	return nil, ErrDone
   147  }
   148  
   149  // Collection evaluates the Simulator size times and saves each item to the
   150  // returned slice.
   151  func Collection(simulator Simulator, size uint64) []uint64 {
   152  	collection := make([]uint64, size)
   153  	for i := range collection {
   154  		collection[i], _ = simulator()
   155  	}
   156  	return collection
   157  }
   158  
   159  // StringCollection evaluates the Simulator size times and saves each item to
   160  // the returned slice, after converting it to a string.
   161  func StringCollection(simulator Simulator, size uint64) []string {
   162  	collection := make([]string, size)
   163  	for i := range collection {
   164  		n, _ := simulator()
   165  		collection[i] = fmt.Sprintf("%d", n)
   166  	}
   167  	return collection
   168  }