github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/commit_log_combiner_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"os"
    16  	"testing"
    17  
    18  	"github.com/sirupsen/logrus/hooks/test"
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  )
    22  
    23  func Test_CommitlogCombiner(t *testing.T) {
    24  	// For the combiner the contents of a commit log file don't actually matter
    25  	// so we can put arbitrary data in the files. It will only make decisions
    26  	// about what should be appended, the actual condensing will be taken care of
    27  	// by the condensor
    28  
    29  	rootPath := t.TempDir()
    30  	logger, _ := test.NewNullLogger()
    31  
    32  	threshold := int64(1000)
    33  	id := "combiner_test"
    34  	// create commit logger directory
    35  	require.Nil(t, os.MkdirAll(commitLogDirectory(rootPath, id), 0o777))
    36  
    37  	name := func(fileName string) string {
    38  		return commitLogFileName(rootPath, id, fileName)
    39  	}
    40  
    41  	t.Run("create several condensed files below the threshold", func(t *testing.T) {
    42  		// 4 files of 300 bytes each, with 1000 byte threshold. This lets us verify
    43  		// that one and two will be combined, so will three and four.
    44  		require.Nil(t, createDummyFile(name("1000.condensed"), []byte("file1\n"), 300))
    45  		require.Nil(t, createDummyFile(name("1001.condensed"), []byte("file2\n"), 300))
    46  		require.Nil(t, createDummyFile(name("1002.condensed"), []byte("file3\n"), 300))
    47  		require.Nil(t, createDummyFile(name("1003.condensed"), []byte("file4\n"), 300))
    48  		require.Nil(t, createDummyFile(name("1004"), []byte("current\n"), 50))
    49  	})
    50  
    51  	t.Run("run combiner", func(t *testing.T) {
    52  		_, err := NewCommitLogCombiner(rootPath, id, threshold, logger).Do()
    53  		require.Nil(t, err)
    54  	})
    55  
    56  	t.Run("we are now left with combined files", func(t *testing.T) {
    57  		dir, err := os.Open(commitLogDirectory(rootPath, id))
    58  		require.Nil(t, err)
    59  
    60  		fileNames, err := dir.Readdirnames(0)
    61  		require.Nil(t, err)
    62  		require.Len(t, fileNames, 3)
    63  		require.ElementsMatch(t, []string{"1000", "1002", "1004"}, fileNames)
    64  
    65  		t.Run("the first file is correctly combined", func(t *testing.T) {
    66  			contents, err := os.ReadFile(commitLogFileName(rootPath, id, "1000"))
    67  			require.Nil(t, err)
    68  			require.Len(t, contents, 600)
    69  			assert.Equal(t, contents[0:6], []byte("file1\n"))
    70  			assert.Equal(t, contents[300:306], []byte("file2\n"))
    71  		})
    72  
    73  		t.Run("the second file is correctly combined", func(t *testing.T) {
    74  			contents, err := os.ReadFile(commitLogFileName(rootPath, id, "1002"))
    75  			require.Nil(t, err)
    76  			require.Len(t, contents, 600)
    77  			assert.Equal(t, contents[0:6], []byte("file3\n"))
    78  			assert.Equal(t, contents[300:306], []byte("file4\n"))
    79  		})
    80  
    81  		t.Run("latest file is unchanged", func(t *testing.T) {
    82  			contents, err := os.ReadFile(commitLogFileName(rootPath, id, "1004"))
    83  			require.Nil(t, err)
    84  			require.Len(t, contents, 50)
    85  			assert.Equal(t, contents[0:8], []byte("current\n"))
    86  			assert.Equal(t, contents[42:], []byte("rrent\ncu"))
    87  		})
    88  	})
    89  }
    90  
    91  func createDummyFile(fileName string, content []byte, size int) error {
    92  	f, err := os.Create(fileName)
    93  	if err != nil {
    94  		return err
    95  	}
    96  
    97  	defer f.Close()
    98  
    99  	written := 0
   100  	for {
   101  		if size == written {
   102  			break
   103  		}
   104  
   105  		if size-written < len(content) {
   106  			content = content[:(size - written)]
   107  		}
   108  
   109  		n, err := f.Write([]byte(content))
   110  		written += n
   111  
   112  		if err != nil {
   113  			return err
   114  		}
   115  	}
   116  
   117  	return nil
   118  }