github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/condensor_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  // +build integrationTest
    14  
    15  package hnsw
    16  
    17  import (
    18  	"bufio"
    19  	"context"
    20  	"os"
    21  	"strings"
    22  	"testing"
    23  
    24  	"github.com/sirupsen/logrus/hooks/test"
    25  	"github.com/stretchr/testify/assert"
    26  	"github.com/stretchr/testify/require"
    27  	"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"
    28  	"github.com/weaviate/weaviate/entities/cyclemanager"
    29  )
    30  
    31  func TestCondensor(t *testing.T) {
    32  	rootPath := t.TempDir()
    33  	ctx := context.Background()
    34  
    35  	logger, _ := test.NewNullLogger()
    36  	uncondensed, err := NewCommitLogger(rootPath, "uncondensed", logger,
    37  		cyclemanager.NewCallbackGroupNoop())
    38  	require.Nil(t, err)
    39  	defer uncondensed.Shutdown(ctx)
    40  
    41  	perfect, err := NewCommitLogger(rootPath, "perfect", logger,
    42  		cyclemanager.NewCallbackGroupNoop())
    43  	require.Nil(t, err)
    44  	defer perfect.Shutdown(ctx)
    45  
    46  	t.Run("add redundant data to the original log", func(t *testing.T) {
    47  		uncondensed.AddNode(&vertex{id: 0, level: 3})
    48  		uncondensed.AddNode(&vertex{id: 1, level: 3})
    49  		uncondensed.AddNode(&vertex{id: 2, level: 3})
    50  		uncondensed.AddNode(&vertex{id: 3, level: 3})
    51  
    52  		// below are some pointless connection replacements, we expect that most of
    53  		// these will be gone after condensing, this gives us a good way of testing
    54  		// whether they're really gone
    55  		for level := 0; level <= 3; level++ {
    56  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1, 2, 3})
    57  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1, 2})
    58  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1})
    59  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{2})
    60  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{3})
    61  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{2, 3})
    62  			uncondensed.ReplaceLinksAtLevel(0, level, []uint64{1, 2, 3})
    63  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0, 2, 3})
    64  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0, 2})
    65  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0})
    66  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{2})
    67  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{3})
    68  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{2, 3})
    69  			uncondensed.ReplaceLinksAtLevel(1, level, []uint64{0, 2, 3})
    70  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0, 1, 3})
    71  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0, 1})
    72  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0})
    73  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{1})
    74  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{3})
    75  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{1, 3})
    76  			uncondensed.ReplaceLinksAtLevel(2, level, []uint64{0, 1, 3})
    77  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0, 1, 2})
    78  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0, 1})
    79  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0})
    80  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{1})
    81  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{2})
    82  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{1, 2})
    83  			uncondensed.ReplaceLinksAtLevel(3, level, []uint64{0, 1, 2})
    84  		}
    85  		uncondensed.SetEntryPointWithMaxLayer(3, 3)
    86  		uncondensed.AddTombstone(2)
    87  
    88  		require.Nil(t, uncondensed.Flush())
    89  	})
    90  
    91  	t.Run("create a hypothetical perfect log", func(t *testing.T) {
    92  		perfect.AddNode(&vertex{id: 0, level: 3})
    93  		perfect.AddNode(&vertex{id: 1, level: 3})
    94  		perfect.AddNode(&vertex{id: 2, level: 3})
    95  		perfect.AddNode(&vertex{id: 3, level: 3})
    96  
    97  		// below are some pointless connection replacements, we expect that most of
    98  		// these will be gone after condensing, this gives us a good way of testing
    99  		// whether they're really gone
   100  		for level := 0; level <= 3; level++ {
   101  			perfect.ReplaceLinksAtLevel(0, level, []uint64{1, 2, 3})
   102  			perfect.ReplaceLinksAtLevel(1, level, []uint64{0, 2, 3})
   103  			perfect.ReplaceLinksAtLevel(2, level, []uint64{0, 1, 3})
   104  			perfect.ReplaceLinksAtLevel(3, level, []uint64{0, 1, 2})
   105  		}
   106  		perfect.SetEntryPointWithMaxLayer(3, 3)
   107  		perfect.AddTombstone(2)
   108  
   109  		require.Nil(t, perfect.Flush())
   110  	})
   111  
   112  	t.Run("condense the original and verify against the perfect one", func(t *testing.T) {
   113  		input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed"))
   114  		require.Nil(t, err)
   115  		require.True(t, ok)
   116  
   117  		err = NewMemoryCondensor(logger).Do(commitLogFileName(rootPath, "uncondensed", input))
   118  		require.Nil(t, err)
   119  
   120  		control, ok, err := getCurrentCommitLogFileName(
   121  			commitLogDirectory(rootPath, "perfect"))
   122  		require.Nil(t, err)
   123  		require.True(t, ok)
   124  
   125  		actual, ok, err := getCurrentCommitLogFileName(
   126  			commitLogDirectory(rootPath, "uncondensed"))
   127  		require.Nil(t, err)
   128  		require.True(t, ok)
   129  
   130  		assert.True(t, strings.HasSuffix(actual, ".condensed"),
   131  			"commit log is now saved as condensed")
   132  
   133  		controlStat, err := os.Stat(commitLogFileName(rootPath, "perfect", control))
   134  		require.Nil(t, err)
   135  
   136  		actualStat, err := os.Stat(commitLogFileName(rootPath, "uncondensed", actual))
   137  		require.Nil(t, err)
   138  
   139  		assert.Equal(t, controlStat.Size(), actualStat.Size())
   140  
   141  		// dumpIndexFromCommitLog(t, commitLogFileName(rootPath, "uncondensed", actual))
   142  		// dumpIndexFromCommitLog(t, commitLogFileName(rootPath, "perfect", control))
   143  	})
   144  }
   145  
   146  func TestCondensorAppendNodeLinks(t *testing.T) {
   147  	rootPath := t.TempDir()
   148  	ctx := context.Background()
   149  
   150  	logger, _ := test.NewNullLogger()
   151  	uncondensed1, err := NewCommitLogger(rootPath, "uncondensed1", logger,
   152  		cyclemanager.NewCallbackGroupNoop())
   153  	require.Nil(t, err)
   154  	defer uncondensed1.Shutdown(ctx)
   155  
   156  	uncondensed2, err := NewCommitLogger(rootPath, "uncondensed2", logger,
   157  		cyclemanager.NewCallbackGroupNoop())
   158  	require.Nil(t, err)
   159  	defer uncondensed2.Shutdown(ctx)
   160  
   161  	control, err := NewCommitLogger(rootPath, "control", logger,
   162  		cyclemanager.NewCallbackGroupNoop())
   163  	require.Nil(t, err)
   164  	defer control.Shutdown(ctx)
   165  
   166  	t.Run("add data to the first log", func(t *testing.T) {
   167  		uncondensed1.AddLinkAtLevel(0, 0, 1)
   168  		uncondensed1.AddLinkAtLevel(0, 0, 2)
   169  		uncondensed1.AddLinkAtLevel(0, 0, 3)
   170  
   171  		require.Nil(t, uncondensed1.Flush())
   172  	})
   173  
   174  	t.Run("append data to the second log", func(t *testing.T) {
   175  		uncondensed2.AddLinkAtLevel(0, 0, 4)
   176  		uncondensed2.AddLinkAtLevel(0, 0, 5)
   177  		uncondensed2.AddLinkAtLevel(0, 0, 6)
   178  
   179  		require.Nil(t, uncondensed2.Flush())
   180  	})
   181  
   182  	t.Run("create a control log", func(t *testing.T) {
   183  		control.AddNode(&vertex{id: 0, level: 0})
   184  		control.ReplaceLinksAtLevel(0, 0, []uint64{1, 2, 3, 4, 5, 6})
   185  
   186  		require.Nil(t, control.Flush())
   187  	})
   188  
   189  	t.Run("condense both logs and verify the contents against the control", func(t *testing.T) {
   190  		input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed1"))
   191  		require.Nil(t, err)
   192  		require.True(t, ok)
   193  
   194  		err = NewMemoryCondensor(logger).Do(commitLogFileName(rootPath, "uncondensed1", input))
   195  		require.Nil(t, err)
   196  
   197  		input, ok, err = getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed2"))
   198  		require.Nil(t, err)
   199  		require.True(t, ok)
   200  
   201  		err = NewMemoryCondensor(logger).Do(commitLogFileName(rootPath, "uncondensed2", input))
   202  		require.Nil(t, err)
   203  
   204  		control, ok, err := getCurrentCommitLogFileName(
   205  			commitLogDirectory(rootPath, "control"))
   206  		require.Nil(t, err)
   207  		require.True(t, ok)
   208  
   209  		condensed1, ok, err := getCurrentCommitLogFileName(
   210  			commitLogDirectory(rootPath, "uncondensed1"))
   211  		require.Nil(t, err)
   212  		require.True(t, ok)
   213  
   214  		condensed2, ok, err := getCurrentCommitLogFileName(
   215  			commitLogDirectory(rootPath, "uncondensed2"))
   216  		require.Nil(t, err)
   217  		require.True(t, ok)
   218  
   219  		assert.True(t, strings.HasSuffix(condensed1, ".condensed"),
   220  			"commit log is now saved as condensed")
   221  		assert.True(t, strings.HasSuffix(condensed2, ".condensed"),
   222  			"commit log is now saved as condensed")
   223  
   224  		assertIndicesFromCommitLogsMatch(t, commitLogFileName(rootPath, "control", control),
   225  			[]string{
   226  				commitLogFileName(rootPath, "uncondensed1", condensed1),
   227  				commitLogFileName(rootPath, "uncondensed2", condensed2),
   228  			})
   229  	})
   230  }
   231  
   232  // This test was added as part of
   233  // https://github.com/weaviate/weaviate/issues/1868 to rule out that
   234  // replace links broken across two independent commit logs. It turned out that
   235  // this was green and not the cause for the bug. The bug could be reproduced
   236  // with the new test added in index_too_many_links_bug_integration_test.go.
   237  // Nevertheless it makes sense to keep this test around as this might have been
   238  // a potential cause as well and by having this test, we can prevent a
   239  // regression.
   240  func TestCondensorReplaceNodeLinks(t *testing.T) {
   241  	rootPath := t.TempDir()
   242  	ctx := context.Background()
   243  
   244  	logger, _ := test.NewNullLogger()
   245  	uncondensed1, err := NewCommitLogger(rootPath, "uncondensed1", logger,
   246  		cyclemanager.NewCallbackGroupNoop())
   247  	require.Nil(t, err)
   248  	defer uncondensed1.Shutdown(ctx)
   249  
   250  	uncondensed2, err := NewCommitLogger(rootPath, "uncondensed2", logger,
   251  		cyclemanager.NewCallbackGroupNoop())
   252  	require.Nil(t, err)
   253  	defer uncondensed2.Shutdown(ctx)
   254  
   255  	control, err := NewCommitLogger(rootPath, "control", logger,
   256  		cyclemanager.NewCallbackGroupNoop())
   257  	require.Nil(t, err)
   258  	defer control.Shutdown(ctx)
   259  
   260  	t.Run("add data to the first log", func(t *testing.T) {
   261  		uncondensed1.AddNode(&vertex{id: 0, level: 1})
   262  		uncondensed1.AddLinkAtLevel(0, 0, 1)
   263  		uncondensed1.AddLinkAtLevel(0, 0, 2)
   264  		uncondensed1.AddLinkAtLevel(0, 0, 3)
   265  		uncondensed1.AddLinkAtLevel(0, 1, 1)
   266  		uncondensed1.AddLinkAtLevel(0, 1, 2)
   267  
   268  		require.Nil(t, uncondensed1.Flush())
   269  	})
   270  
   271  	t.Run("replace all data from previous log", func(t *testing.T) {
   272  		uncondensed2.AddLinkAtLevel(0, 0, 10)
   273  		uncondensed2.ReplaceLinksAtLevel(0, 0, []uint64{4, 5, 6})
   274  		uncondensed2.AddLinkAtLevel(0, 0, 7)
   275  		uncondensed2.ReplaceLinksAtLevel(0, 1, []uint64{8})
   276  
   277  		require.Nil(t, uncondensed2.Flush())
   278  	})
   279  
   280  	t.Run("create a control log", func(t *testing.T) {
   281  		control.AddNode(&vertex{id: 0, level: 1})
   282  		control.ReplaceLinksAtLevel(0, 0, []uint64{4, 5, 6, 7})
   283  		control.ReplaceLinksAtLevel(0, 1, []uint64{8})
   284  
   285  		require.Nil(t, control.Flush())
   286  	})
   287  
   288  	t.Run("condense both logs and verify the contents against the control", func(t *testing.T) {
   289  		input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed1"))
   290  		require.Nil(t, err)
   291  		require.True(t, ok)
   292  
   293  		err = NewMemoryCondensor(logger).Do(commitLogFileName(rootPath, "uncondensed1", input))
   294  		require.Nil(t, err)
   295  
   296  		input, ok, err = getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed2"))
   297  		require.Nil(t, err)
   298  		require.True(t, ok)
   299  
   300  		err = NewMemoryCondensor(logger).Do(commitLogFileName(rootPath, "uncondensed2", input))
   301  		require.Nil(t, err)
   302  
   303  		control, ok, err := getCurrentCommitLogFileName(
   304  			commitLogDirectory(rootPath, "control"))
   305  		require.Nil(t, err)
   306  		require.True(t, ok)
   307  
   308  		condensed1, ok, err := getCurrentCommitLogFileName(
   309  			commitLogDirectory(rootPath, "uncondensed1"))
   310  		require.Nil(t, err)
   311  		require.True(t, ok)
   312  
   313  		condensed2, ok, err := getCurrentCommitLogFileName(
   314  			commitLogDirectory(rootPath, "uncondensed2"))
   315  		require.Nil(t, err)
   316  		require.True(t, ok)
   317  
   318  		assert.True(t, strings.HasSuffix(condensed1, ".condensed"),
   319  			"commit log is now saved as condensed")
   320  		assert.True(t, strings.HasSuffix(condensed2, ".condensed"),
   321  			"commit log is now saved as condensed")
   322  
   323  		assertIndicesFromCommitLogsMatch(t, commitLogFileName(rootPath, "control", control),
   324  			[]string{
   325  				commitLogFileName(rootPath, "uncondensed1", condensed1),
   326  				commitLogFileName(rootPath, "uncondensed2", condensed2),
   327  			})
   328  	})
   329  }
   330  
   331  // This test was added as part of the investigation and fixing of
   332  // https://github.com/weaviate/weaviate/issues/1868. We used the new
   333  // (higher level) test in index_too_many_links_bug_integration_test.go to
   334  // reproduce the problem without knowing what causes it. Eventually we came to
   335  // the conclusion that "ClearLinksAtLevel" was not propagated correctly across
   336  // two independently condensed commit logs. While the higher-level test already
   337  // makes sure that the bug is gone and prevents regressions, this test was
   338  // still added to test the broken (now fixed) behavior in relative isolation.
   339  func TestCondensorClearLinksAtLevel(t *testing.T) {
   340  	rootPath := t.TempDir()
   341  	ctx := context.Background()
   342  
   343  	logger, _ := test.NewNullLogger()
   344  	uncondensed1, err := NewCommitLogger(rootPath, "uncondensed1", logger,
   345  		cyclemanager.NewCallbackGroupNoop())
   346  	require.Nil(t, err)
   347  	defer uncondensed1.Shutdown(ctx)
   348  
   349  	uncondensed2, err := NewCommitLogger(rootPath, "uncondensed2", logger,
   350  		cyclemanager.NewCallbackGroupNoop())
   351  	require.Nil(t, err)
   352  	defer uncondensed2.Shutdown(ctx)
   353  
   354  	control, err := NewCommitLogger(rootPath, "control", logger,
   355  		cyclemanager.NewCallbackGroupNoop())
   356  	require.Nil(t, err)
   357  	defer control.Shutdown(ctx)
   358  
   359  	t.Run("add data to the first log", func(t *testing.T) {
   360  		uncondensed1.AddNode(&vertex{id: 0, level: 1})
   361  		uncondensed1.AddLinkAtLevel(0, 0, 1)
   362  		uncondensed1.AddLinkAtLevel(0, 0, 2)
   363  		uncondensed1.AddLinkAtLevel(0, 0, 3)
   364  		uncondensed1.AddLinkAtLevel(0, 1, 1)
   365  		uncondensed1.AddLinkAtLevel(0, 1, 2)
   366  
   367  		require.Nil(t, uncondensed1.Flush())
   368  	})
   369  
   370  	t.Run("replace all data from previous log", func(t *testing.T) {
   371  		uncondensed2.AddLinkAtLevel(0, 0, 10)
   372  		uncondensed2.ClearLinksAtLevel(0, 0)
   373  		uncondensed2.AddLinkAtLevel(0, 0, 4)
   374  		uncondensed2.AddLinkAtLevel(0, 0, 5)
   375  		uncondensed2.AddLinkAtLevel(0, 0, 6)
   376  		uncondensed2.AddLinkAtLevel(0, 0, 7)
   377  		uncondensed2.ClearLinksAtLevel(0, 1)
   378  		uncondensed2.AddLinkAtLevel(0, 1, 8)
   379  
   380  		require.Nil(t, uncondensed2.Flush())
   381  	})
   382  
   383  	t.Run("create a control log", func(t *testing.T) {
   384  		control.AddNode(&vertex{id: 0, level: 1})
   385  		control.ReplaceLinksAtLevel(0, 0, []uint64{4, 5, 6, 7})
   386  		control.ReplaceLinksAtLevel(0, 1, []uint64{8})
   387  
   388  		require.Nil(t, control.Flush())
   389  	})
   390  
   391  	t.Run("condense both logs and verify the contents against the control", func(t *testing.T) {
   392  		input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed1"))
   393  		require.Nil(t, err)
   394  		require.True(t, ok)
   395  
   396  		err = NewMemoryCondensor(logger).Do(commitLogFileName(rootPath, "uncondensed1", input))
   397  		require.Nil(t, err)
   398  
   399  		input, ok, err = getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed2"))
   400  		require.Nil(t, err)
   401  		require.True(t, ok)
   402  
   403  		err = NewMemoryCondensor(logger).Do(commitLogFileName(rootPath, "uncondensed2", input))
   404  		require.Nil(t, err)
   405  
   406  		control, ok, err := getCurrentCommitLogFileName(
   407  			commitLogDirectory(rootPath, "control"))
   408  		require.Nil(t, err)
   409  		require.True(t, ok)
   410  
   411  		condensed1, ok, err := getCurrentCommitLogFileName(
   412  			commitLogDirectory(rootPath, "uncondensed1"))
   413  		require.Nil(t, err)
   414  		require.True(t, ok)
   415  
   416  		condensed2, ok, err := getCurrentCommitLogFileName(
   417  			commitLogDirectory(rootPath, "uncondensed2"))
   418  		require.Nil(t, err)
   419  		require.True(t, ok)
   420  
   421  		assert.True(t, strings.HasSuffix(condensed1, ".condensed"),
   422  			"commit log is now saved as condensed")
   423  		assert.True(t, strings.HasSuffix(condensed2, ".condensed"),
   424  			"commit log is now saved as condensed")
   425  
   426  		assertIndicesFromCommitLogsMatch(t, commitLogFileName(rootPath, "control", control),
   427  			[]string{
   428  				commitLogFileName(rootPath, "uncondensed1", condensed1),
   429  				commitLogFileName(rootPath, "uncondensed2", condensed2),
   430  			})
   431  	})
   432  }
   433  
   434  func TestCondensorWithoutEntrypoint(t *testing.T) {
   435  	rootPath := t.TempDir()
   436  	ctx := context.Background()
   437  
   438  	logger, _ := test.NewNullLogger()
   439  	uncondensed, err := NewCommitLogger(rootPath, "uncondensed", logger,
   440  		cyclemanager.NewCallbackGroupNoop())
   441  	require.Nil(t, err)
   442  	defer uncondensed.Shutdown(ctx)
   443  
   444  	t.Run("add data, but do not set an entrypoint", func(t *testing.T) {
   445  		uncondensed.AddNode(&vertex{id: 0, level: 3})
   446  
   447  		require.Nil(t, uncondensed.Flush())
   448  	})
   449  
   450  	t.Run("condense the original and verify it doesn't overwrite the EP", func(t *testing.T) {
   451  		input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed"))
   452  		require.Nil(t, err)
   453  		require.True(t, ok)
   454  
   455  		err = NewMemoryCondensor(logger).Do(commitLogFileName(rootPath, "uncondensed", input))
   456  		require.Nil(t, err)
   457  
   458  		actual, ok, err := getCurrentCommitLogFileName(
   459  			commitLogDirectory(rootPath, "uncondensed"))
   460  		require.Nil(t, err)
   461  		require.True(t, ok)
   462  
   463  		assert.True(t, strings.HasSuffix(actual, ".condensed"),
   464  			"commit log is now saved as condensed")
   465  
   466  		initialState := DeserializationResult{
   467  			Nodes:      nil,
   468  			Entrypoint: 17,
   469  			Level:      3,
   470  		}
   471  		fd, err := os.Open(commitLogFileName(rootPath, "uncondensed", actual))
   472  		require.Nil(t, err)
   473  
   474  		bufr := bufio.NewReader(fd)
   475  		res, _, err := NewDeserializer(logger).Do(bufr, &initialState, false)
   476  		require.Nil(t, err)
   477  
   478  		assert.Contains(t, res.Nodes, &vertex{id: 0, level: 3, connections: make([][]uint64, 4)})
   479  		assert.Equal(t, uint64(17), res.Entrypoint)
   480  		assert.Equal(t, uint16(3), res.Level)
   481  	})
   482  }
   483  
   484  func TestCondensorWithPQInformation(t *testing.T) {
   485  	rootPath := t.TempDir()
   486  	ctx := context.Background()
   487  
   488  	logger, _ := test.NewNullLogger()
   489  	uncondensed, err := NewCommitLogger(rootPath, "uncondensed", logger,
   490  		cyclemanager.NewCallbackGroupNoop())
   491  	require.Nil(t, err)
   492  	defer uncondensed.Shutdown(ctx)
   493  
   494  	encoders := []compressionhelpers.PQEncoder{
   495  		compressionhelpers.NewKMeansWithCenters(
   496  			4,
   497  			2,
   498  			0,
   499  			[][]float32{{1, 2}, {3, 4}, {5, 6}, {7, 8}},
   500  		),
   501  		compressionhelpers.NewKMeansWithCenters(
   502  			4,
   503  			2,
   504  			1,
   505  			[][]float32{{8, 7}, {6, 5}, {4, 3}, {2, 1}},
   506  		),
   507  		compressionhelpers.NewKMeansWithCenters(
   508  			4,
   509  			2,
   510  			2,
   511  			[][]float32{{1, 2}, {3, 4}, {5, 6}, {7, 8}},
   512  		),
   513  	}
   514  
   515  	t.Run("add pq info", func(t *testing.T) {
   516  		uncondensed.AddPQ(compressionhelpers.PQData{
   517  			Ks:                  4,
   518  			M:                   3,
   519  			Dimensions:          6,
   520  			EncoderType:         compressionhelpers.UseKMeansEncoder,
   521  			EncoderDistribution: uint8(0),
   522  			Encoders:            encoders,
   523  			UseBitsEncoding:     false,
   524  		})
   525  
   526  		require.Nil(t, uncondensed.Flush())
   527  	})
   528  
   529  	t.Run("condense the original and verify the PQ info is present", func(t *testing.T) {
   530  		input, ok, err := getCurrentCommitLogFileName(commitLogDirectory(rootPath, "uncondensed"))
   531  		require.Nil(t, err)
   532  		require.True(t, ok)
   533  
   534  		err = NewMemoryCondensor(logger).Do(commitLogFileName(rootPath, "uncondensed", input))
   535  		require.Nil(t, err)
   536  
   537  		actual, ok, err := getCurrentCommitLogFileName(
   538  			commitLogDirectory(rootPath, "uncondensed"))
   539  		require.Nil(t, err)
   540  		require.True(t, ok)
   541  
   542  		assert.True(t, strings.HasSuffix(actual, ".condensed"),
   543  			"commit log is now saved as condensed")
   544  
   545  		initialState := DeserializationResult{}
   546  		fd, err := os.Open(commitLogFileName(rootPath, "uncondensed", actual))
   547  		require.Nil(t, err)
   548  
   549  		bufr := bufio.NewReader(fd)
   550  		res, _, err := NewDeserializer(logger).Do(bufr, &initialState, false)
   551  		require.Nil(t, err)
   552  
   553  		assert.True(t, res.Compressed)
   554  		expected := compressionhelpers.PQData{
   555  			Ks:                  4,
   556  			M:                   3,
   557  			Dimensions:          6,
   558  			EncoderType:         compressionhelpers.UseKMeansEncoder,
   559  			EncoderDistribution: uint8(0),
   560  			Encoders:            encoders,
   561  			UseBitsEncoding:     false,
   562  		}
   563  
   564  		assert.Equal(t, expected, res.PQData)
   565  	})
   566  }
   567  
   568  func assertIndicesFromCommitLogsMatch(t *testing.T, fileNameControl string,
   569  	fileNames []string,
   570  ) {
   571  	control := readFromCommitLogs(t, fileNameControl)
   572  	actual := readFromCommitLogs(t, fileNames...)
   573  
   574  	assert.Equal(t, control, actual)
   575  }
   576  
   577  func readFromCommitLogs(t *testing.T, fileNames ...string) *hnsw {
   578  	var res *DeserializationResult
   579  
   580  	for _, fileName := range fileNames {
   581  		fd, err := os.Open(fileName)
   582  		require.Nil(t, err)
   583  
   584  		bufr := bufio.NewReader(fd)
   585  		logger, _ := test.NewNullLogger()
   586  		res, _, err = NewDeserializer(logger).Do(bufr, res, false)
   587  		require.Nil(t, err)
   588  	}
   589  
   590  	return &hnsw{
   591  		nodes:               removeTrailingNilNodes(res.Nodes),
   592  		currentMaximumLayer: int(res.Level),
   593  		entryPointID:        res.Entrypoint,
   594  		tombstones:          res.Tombstones,
   595  	}
   596  }
   597  
   598  // just a test helper to make the output easier to compare, remove all trailing
   599  // nil nodes by starting from the last and stopping as soon as a node is not
   600  // nil
   601  func removeTrailingNilNodes(in []*vertex) []*vertex {
   602  	pos := len(in) - 1
   603  
   604  	for pos >= 0 {
   605  		if in[pos] != nil {
   606  			break
   607  		}
   608  
   609  		pos--
   610  	}
   611  
   612  	return in[:pos+1]
   613  }