github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/condensor_mmap_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"os"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/sirupsen/logrus/hooks/test"
    20  	"github.com/stretchr/testify/assert"
    21  	"github.com/stretchr/testify/require"
    22  	"github.com/weaviate/weaviate/entities/cyclemanager"
    23  )
    24  
    25  func TestMmapCondensor(t *testing.T) {
    26  	t.Skip() // TODO
    27  
    28  	rootPath := t.TempDir()
    29  
    30  	logger, _ := test.NewNullLogger()
    31  	uncondensed, err := NewCommitLogger(rootPath, "uncondensed", logger,
    32  		cyclemanager.NewCallbackGroupNoop())
    33  	require.Nil(t, err)
    34  
    35  	perfect, err := NewCommitLogger(rootPath, "perfect", logger,
    36  		cyclemanager.NewCallbackGroupNoop())
    37  	require.Nil(t, err)
    38  
    39  	t.Run("add redundant data to the original log", func(t *testing.T) {
    40  		uncondensed.AddNode(&vertex{id: 0, level: 3})
    41  		uncondensed.AddNode(&vertex{id: 1, level: 3})
    42  		uncondensed.AddNode(&vertex{id: 2, level: 3})
    43  		uncondensed.AddNode(&vertex{id: 3, level: 3})
    44  
    45  		// below are some pointless connection replacements, we expect that most of
    46  		// these will be gone after condensing, this gives us a good way of testing
    47  		// whether they're really gone
    48  		for level := 0; level <= 3; level++ {
    49  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1, 2, 3})
    50  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1, 2})
    51  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1})
    52  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{2})
    53  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{3})
    54  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{2, 3})
    55  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1, 2, 3})
    56  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0, 2, 3})
    57  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0, 2})
    58  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0})
    59  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{2})
    60  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{3})
    61  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{2, 3})
    62  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0, 2, 3})
    63  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0, 1, 3})
    64  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0, 1})
    65  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0})
    66  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{1})
    67  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{3})
    68  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{1, 3})
    69  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0, 1, 3})
    70  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0, 1, 2})
    71  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0, 1})
    72  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0})
    73  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{1})
    74  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{2})
    75  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{1, 2})
    76  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0, 1, 2})
    77  		}
    78  		uncondensed.SetEntryPointWithMaxLayer(3, 3)
    79  		uncondensed.AddTombstone(2)
    80  
    81  		require.Nil(t, uncondensed.Flush())
    82  	})
    83  
    84  	t.Run("create a hypothetical perfect log", func(t *testing.T) {
    85  		perfect.AddNode(&vertex{id: 0, level: 3})
    86  		perfect.AddNode(&vertex{id: 1, level: 3})
    87  		perfect.AddNode(&vertex{id: 2, level: 3})
    88  		perfect.AddNode(&vertex{id: 3, level: 3})
    89  
    90  		// below are some pointless connection replacements, we expect that most of
    91  		// these will be gone after condensing, this gives us a good way of testing
    92  		// whether they're really gone
    93  		for level := 0; level <= 3; level++ {
    94  			perfect.ReplaceLinksAtLevel(0, level, []uint64{1, 2, 3})
    95  			perfect.ReplaceLinksAtLevel(1, level, []uint64{0, 2, 3})
    96  			perfect.ReplaceLinksAtLevel(2, level, []uint64{0, 1, 3})
    97  			perfect.ReplaceLinksAtLevel(3, level, []uint64{0, 1, 2})
    98  		}
    99  		perfect.SetEntryPointWithMaxLayer(3, 3)
   100  		perfect.AddTombstone(2)
   101  
   102  		require.Nil(t, perfect.Flush())
   103  	})
   104  
   105  	t.Run("condense the original and verify against the perfect one", func(t *testing.T) {
   106  		input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed"))
   107  		require.Nil(t, err)
   108  		require.True(t, ok)
   109  
   110  		err = NewMmapCondensor(3).Do(commitLogFileName(rootPath, "uncondensed", input))
   111  		require.Nil(t, err)
   112  
   113  		control, ok, err := getCurrentCommitLogFileName(
   114  			commitLogDirectory(rootPath, "perfect"))
   115  		require.Nil(t, err)
   116  		require.True(t, ok)
   117  
   118  		actual, ok, err := getCurrentCommitLogFileName(
   119  			commitLogDirectory(rootPath, "uncondensed"))
   120  		require.Nil(t, err)
   121  		require.True(t, ok)
   122  
   123  		assert.True(t, strings.HasSuffix(actual, ".condensed"),
   124  			"commit log is now saved as condensed")
   125  
   126  		controlStat, err := os.Stat(commitLogFileName(rootPath, "perfect", control))
   127  		require.Nil(t, err)
   128  
   129  		actualStat, err := os.Stat(commitLogFileName(rootPath, "uncondensed", actual))
   130  		require.Nil(t, err)
   131  
   132  		assert.Equal(t, controlStat.Size(), actualStat.Size())
   133  
   134  		// dumpIndexFromCommitLog(t, commitLogFileName(rootPath, "uncondensed", actual))
   135  		// dumpIndexFromCommitLog(t, commitLogFileName(rootPath, "perfect", control))
   136  	})
   137  }
   138  
   139  // func TestCondensorWithoutEntrypoint(t *testing.T) {
   140  // 	rand.Seed(time.Now().UnixNano())
   141  // 	rootPath := t.TempDir()
   142  
   143  // 	logger, _ := test.NewNullLogger()
   144  // 	uncondensed, err := NewCommitLogger(rootPath, "uncondensed", logger,
   145  // 		cyclemanager.NewCallbackGroupNoop())
   146  // 	require.Nil(t, err)
   147  
   148  // 	t.Run("add data, but do not set an entrypoint", func(t *testing.T) {
   149  // 		uncondensed.AddNode(&vertex{id: 0, level: 3})
   150  
   151  // 		require.Nil(t, uncondensed.Flush())
   152  // 	})
   153  
   154  // 	t.Run("condense the original and verify it doesn't overwrite the EP", func(t *testing.T) {
   155  // 		input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed"))
   156  // 		require.Nil(t, err)
   157  // 		require.True(t, ok)
   158  
   159  // 		err = NewMemoryCondensor2(logger).Do(commitLogFileName(rootPath, "uncondensed", input))
   160  // 		require.Nil(t, err)
   161  
   162  // 		actual, ok, err := getCurrentCommitLogFileName(
   163  // 			commitLogDirectory(rootPath, "uncondensed"))
   164  // 		require.Nil(t, err)
   165  // 		require.True(t, ok)
   166  
   167  // 		assert.True(t, strings.HasSuffix(actual, ".condensed"),
   168  // 			"commit log is now saved as condensed")
   169  
   170  // 		initialState := DeserializationResult{
   171  // 			Nodes:      nil,
   172  // 			Entrypoint: 17,
   173  // 			Level:      3,
   174  // 		}
   175  // 		fd, err := os.Open(commitLogFileName(rootPath, "uncondensed", actual))
   176  // 		require.Nil(t, err)
   177  
   178  // 		bufr := bufio.NewReader(fd)
   179  // 		res, err := NewDeserializer(logger).Do(bufr, &initialState)
   180  // 		require.Nil(t, err)
   181  
   182  // 		assert.Contains(t, res.Nodes, &vertex{id: 0, level: 3, connections: map[int][]uint64{}})
   183  // 		assert.Equal(t, uint64(17), res.Entrypoint)
   184  // 		assert.Equal(t, uint16(3), res.Level)
   185  
   186  // 	})
   187  // }