github.com/scottcagno/storage@v1.8.0/pkg/lsmtree/channelsearch_test.go (about)

     1  package lsmtree
     2  
     3  /*Not that, for the time being,
     4  this is really more of a proof of
     5  concept than an actual test*/
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"io"
    12  	"math/rand"
    13  	"os"
    14  	"sync"
    15  	"testing"
    16  	"time"
    17  )
    18  
    19  func TestChannelSearch_TempFileExampleThingy(t *testing.T) {
    20  
    21  	// write some data starting at n and going till p
    22  	exampleWriteData := func(file *os.File, n, p int) *os.File {
    23  		fmt.Printf("writing data from lines %d to %d\n", n, p)
    24  		for i := n; i < p; i++ {
    25  			// notice the '\n' and the end of the line
    26  			line := fmt.Sprintf("%d: this is line number %d\n", i, i)
    27  			_, err := file.WriteString(line)
    28  			if err != nil {
    29  				t.Fatalf("write: %v\n", err)
    30  			}
    31  		}
    32  		err := file.Sync()
    33  		if err != nil {
    34  			t.Fatalf("sync: %v\n", err)
    35  		}
    36  		return file
    37  	}
    38  
    39  	// get the current location of the file pointer
    40  	exampleGetFPOffset := func(file *os.File) int64 {
    41  		// by calling seek offset=0, at the current position
    42  		// we get the current offset of the file pointer
    43  		offset, err := file.Seek(0, io.SeekCurrent)
    44  		if err != nil {
    45  			t.Fatalf("seek: %v\n", err)
    46  		}
    47  		return offset
    48  	}
    49  
    50  	// rewind the file pointer
    51  	exampleRewind := func(file *os.File) {
    52  		// A file works like a tape, or record. The file pointer moves as
    53  		// we write or read data. So we have to go back to the beginning.
    54  		_, err := file.Seek(0, io.SeekStart) // seek to pos 0, from the start of the file
    55  		if err != nil {
    56  			t.Fatalf("seek: %v\n", err)
    57  		}
    58  	}
    59  
    60  	// read data and return data by line in a 2d array
    61  	exampleReadData := func(file *os.File) [][]byte {
    62  		// normally, this entire function would be
    63  		// done another way, but for now this is how
    64  		// im going to show you. I'll also show you
    65  		// an easier read and write example elsewhere
    66  		var lines [][]byte
    67  		data := make([]byte, 1)
    68  		var buffer []byte
    69  		for {
    70  			// read data size slice of data
    71  			_, err := file.Read(data)
    72  			if err != nil {
    73  				if err == io.EOF || err == io.ErrUnexpectedEOF {
    74  					break
    75  				}
    76  				t.Fatalf("reading: %v\n", err)
    77  			}
    78  			// add the current read to the buffer
    79  			buffer = append(buffer, data...)
    80  			// change value below to see the buffer
    81  			printBuffer := false
    82  			if printBuffer {
    83  				fmt.Printf(">>>> buffer=%q\n", buffer)
    84  			}
    85  			// let's see if we can spot a '\n' anywhere
    86  			n := bytes.IndexByte(buffer, '\n')
    87  			if n == -1 {
    88  				// keep reading
    89  				continue
    90  			}
    91  			// found end of line, add to lines
    92  			lines = append(lines, buffer[:n])
    93  			// reset the buffer
    94  			buffer = nil
    95  		}
    96  		return lines
    97  	}
    98  
    99  	// this requires you to use the function signature that
   100  	// matches this -> func(file *os.File) { ... }
   101  	myActualTestFunctionGoesHere := func(file *os.File) {
   102  		// I have an open file I can work with in this closure,
   103  		// and I also don't have to worry about closing or
   104  		// removing it when I am done. For example:
   105  
   106  		fmt.Printf("current file pointer offset: %d\n", exampleGetFPOffset(file))
   107  
   108  		fmt.Println("Let's write some data!")
   109  		exampleWriteData(file, 0, 100) // write lines 0-100
   110  		fmt.Printf("current file pointer offset: %d\n", exampleGetFPOffset(file))
   111  
   112  		fmt.Println("Now, just like our old tapes, we must rewind!")
   113  		exampleRewind(file)
   114  		fmt.Printf("current file pointer offset: %d\n", exampleGetFPOffset(file))
   115  
   116  		fmt.Println("Now let's read the data we wrote!")
   117  		lines := exampleReadData(file)
   118  		fmt.Printf("current file pointer offset: %d\n", exampleGetFPOffset(file))
   119  
   120  		fmt.Println("Now let's print out the data!!")
   121  		for i := range lines {
   122  			fmt.Printf("%s\n", lines[i])
   123  		}
   124  	}
   125  
   126  	// this is how you run it
   127  	GetTempFileForTesting(t, myActualTestFunctionGoesHere)
   128  
   129  	myEncodingAndDecodingFuncGoesHere := func(file *os.File) {
   130  
   131  		// take data, encode it for easier reading
   132  		encodeData := func(data []byte) []byte {
   133  			// get the length of the data passed in
   134  			size := len(data)
   135  			// make a buffer large enough to hold a uint32
   136  			buf := make([]byte, binary.MaxVarintLen32)
   137  			// encode the length into the buffer--we are
   138  			// basically just converting len(data) to a
   139  			// byte slice, so we can write it to a file.
   140  			binary.LittleEndian.PutUint32(buf, uint32(size))
   141  			// now, lets append the "length" to the start
   142  			// front of the slice of data...
   143  			buf = append(buf, data...)
   144  			return buf
   145  		}
   146  
   147  		// write some data starting at n and going till p
   148  		exampleWriteEncodedData := func(file *os.File, n, p int) *os.File {
   149  			fmt.Printf("writing data from lines %d to %d\n", n, p)
   150  			for i := n; i < p; i++ {
   151  				// notice the '\n' and the end of the line
   152  				line := encodeData([]byte(fmt.Sprintf("%d: this is ENCODED line number %d", i, i)))
   153  				_, err := file.Write(line)
   154  				if err != nil {
   155  					t.Fatalf("write: %v\n", err)
   156  				}
   157  			}
   158  			err := file.Sync()
   159  			if err != nil {
   160  				t.Fatalf("sync: %v\n", err)
   161  			}
   162  			return file
   163  		}
   164  
   165  		checkEOF := func(err error) bool {
   166  			if err != nil {
   167  				if err == io.EOF || err == io.ErrUnexpectedEOF {
   168  					return true
   169  				}
   170  				t.Fatalf("reading: %v\n", err)
   171  			}
   172  			return false
   173  		}
   174  
   175  		// read data and return data by line in a 2d array
   176  		exampleReadEncodedData := func(file *os.File) [][]byte {
   177  			var lines [][]byte
   178  			// see how simply encoding the data length makes this
   179  			// soooooo much easier?
   180  			for {
   181  				// make a new size buffer
   182  				sizebuf := make([]byte, binary.MaxVarintLen32)
   183  				// read data size slice of data
   184  				_, err := file.Read(sizebuf)
   185  				if checkEOF(err) {
   186  					break
   187  				}
   188  				// decode the length of the message to be decoded
   189  				size := binary.LittleEndian.Uint32(sizebuf)
   190  				// make a new buffer of the correct size of the message
   191  				data := make([]byte, size)
   192  				// read data size slice of data
   193  				_, err = file.Read(data)
   194  				if checkEOF(err) {
   195  					break
   196  				}
   197  				// add the read data to the lines
   198  				lines = append(lines, data)
   199  			}
   200  			return lines
   201  		}
   202  
   203  		fmt.Printf("current file pointer offset: %d\n", exampleGetFPOffset(file))
   204  
   205  		fmt.Println("Let's write some ENCODED data!")
   206  		exampleWriteEncodedData(file, 0, 100) // write lines 0-100
   207  		fmt.Printf("current file pointer offset: %d\n", exampleGetFPOffset(file))
   208  
   209  		fmt.Println("Now, just like our old tapes, we must rewind!")
   210  		exampleRewind(file)
   211  		fmt.Printf("current file pointer offset: %d\n", exampleGetFPOffset(file))
   212  
   213  		fmt.Println("Now let's read the ENCODED data we wrote!")
   214  		lines := exampleReadEncodedData(file)
   215  		fmt.Printf("current file pointer offset: %d\n", exampleGetFPOffset(file))
   216  
   217  		fmt.Println("Now let's print out the data!!")
   218  		for i := range lines {
   219  			fmt.Printf("%s\n", lines[i])
   220  		}
   221  	}
   222  
   223  	// this is another test that requires a file, and
   224  	// im going to put it in the same place
   225  	GetTempFileForTesting(t, myEncodingAndDecodingFuncGoesHere)
   226  }
   227  
   228  func TestChannelSearch(t *testing.T) {
   229  
   230  	rand.Seed(time.Now().UnixNano())
   231  
   232  	dir := &myDir{
   233  		files: make([]*myFile, 1000),
   234  	}
   235  
   236  	nums := make(map[int]bool)
   237  
   238  	for i := 0; i < 1000; i++ {
   239  		f := &myFile{
   240  			data: make([]*myEntry, 10000),
   241  		}
   242  		for j := 0; j < 10000; j++ {
   243  			for {
   244  				num := rand.Intn(20000000)
   245  				if _, ok := nums[num]; ok != true {
   246  					f.data[j] = &myEntry{
   247  						value: num,
   248  					}
   249  					break
   250  				}
   251  			}
   252  		}
   253  		dir.files[i] = f
   254  	}
   255  
   256  	var combinedLinear time.Duration
   257  	var totalLinear time.Duration
   258  	var combinedChannel time.Duration
   259  	var totalChannel time.Duration
   260  
   261  	totalFound := 0
   262  	totalCFound := 0
   263  
   264  	myNums := make([]int, 1000)
   265  
   266  	for i := 0; i < 1000; i++ {
   267  		num := rand.Intn(750000)
   268  		myNums[i] = num
   269  	}
   270  
   271  	var wg = &sync.WaitGroup{}
   272  
   273  	wg.Add(2)
   274  
   275  	go func() {
   276  		//try linear search for benchmark
   277  		start := time.Now()
   278  		for _, num := range myNums {
   279  			t := time.Now()
   280  			_, err := linearSearch(dir, num)
   281  			elapsed := time.Since(t)
   282  			combinedLinear += elapsed
   283  			if err != nil {
   284  			} else {
   285  				totalFound++
   286  				fmt.Printf("Linear search for %v: data found! [%v] elapsed\n", num, elapsed)
   287  			}
   288  		}
   289  		totalLinear = time.Since(start)
   290  		wg.Done()
   291  	}()
   292  
   293  	go func() {
   294  		//now go for channel
   295  		start := time.Now()
   296  		for _, num := range myNums {
   297  			t := time.Now()
   298  			_, err := channelSearch(dir, num)
   299  			elapsed := time.Since(t)
   300  			combinedChannel += elapsed
   301  			if err != nil {
   302  			} else {
   303  				totalCFound++
   304  				fmt.Printf("Channel search for %v: data found! [%v] elapsed\n", num, elapsed)
   305  			}
   306  		}
   307  		totalChannel = time.Since(start)
   308  		wg.Done()
   309  	}()
   310  
   311  	wg.Wait()
   312  
   313  	fmt.Println("1,000 files and 10,000,000 entries searched")
   314  	fmt.Printf("Linear Search: %v out of 1000 searched items found!\n", totalFound)
   315  	fmt.Printf("Channel Search: %v out of 1000 searched items found!\n", totalCFound)
   316  	fmt.Printf("Total linear search time: [combined]%v, [total] %v\n", combinedLinear, totalLinear)
   317  	fmt.Printf("Total channel search time: [combined]%v, [total]%v\n", combinedChannel, totalChannel)
   318  
   319  }