github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/debug.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"encoding/json"
    16  	"fmt"
    17  	"strings"
    18  
    19  	"github.com/weaviate/weaviate/adapters/repos/db/vector/common"
    20  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer"
    21  	"github.com/weaviate/weaviate/entities/cyclemanager"
    22  	ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    23  )
    24  
    25  // Dump to stdout for debugging purposes
    26  func (h *hnsw) Dump(labels ...string) {
    27  	if len(labels) > 0 {
    28  		fmt.Printf("--------------------------------------------------\n")
    29  		fmt.Printf("--  %s\n", strings.Join(labels, ", "))
    30  	}
    31  	fmt.Printf("--------------------------------------------------\n")
    32  	fmt.Printf("ID: %s\n", h.id)
    33  	fmt.Printf("Entrypoint: %d\n", h.entryPointID)
    34  	fmt.Printf("Max Level: %d\n", h.currentMaximumLayer)
    35  	fmt.Printf("Tombstones %v\n", h.tombstones)
    36  	fmt.Printf("\nNodes and Connections:\n")
    37  	for _, node := range h.nodes {
    38  		if node == nil {
    39  			continue
    40  		}
    41  
    42  		fmt.Printf("  Node %d (level %d)\n", node.id, node.level)
    43  		for level, conns := range node.connections {
    44  			fmt.Printf("    Level %d: Connections: %v\n", level, conns)
    45  		}
    46  	}
    47  
    48  	fmt.Printf("--------------------------------------------------\n")
    49  }
    50  
    51  // DumpJSON to stdout for debugging purposes
    52  func (h *hnsw) DumpJSON(labels ...string) {
    53  	dump := JSONDump{
    54  		Labels:              labels,
    55  		ID:                  h.id,
    56  		Entrypoint:          h.entryPointID,
    57  		CurrentMaximumLayer: h.currentMaximumLayer,
    58  		Tombstones:          h.tombstones,
    59  	}
    60  	for _, node := range h.nodes {
    61  		if node == nil {
    62  			continue
    63  		}
    64  
    65  		dumpNode := JSONDumpNode{
    66  			ID:          node.id,
    67  			Level:       node.level,
    68  			Connections: node.connections,
    69  		}
    70  		dump.Nodes = append(dump.Nodes, dumpNode)
    71  	}
    72  
    73  	out, err := json.Marshal(dump)
    74  	if err != nil {
    75  		fmt.Println(err)
    76  	}
    77  	fmt.Printf("%s\n", string(out))
    78  }
    79  
    80  type JSONDump struct {
    81  	Labels              []string            `json:"labels"`
    82  	ID                  string              `json:"id"`
    83  	Entrypoint          uint64              `json:"entrypoint"`
    84  	CurrentMaximumLayer int                 `json:"currentMaximumLayer"`
    85  	Tombstones          map[uint64]struct{} `json:"tombstones"`
    86  	Nodes               []JSONDumpNode      `json:"nodes"`
    87  }
    88  
    89  type JSONDumpNode struct {
    90  	ID          uint64     `json:"id"`
    91  	Level       int        `json:"level"`
    92  	Connections [][]uint64 `json:"connections"`
    93  }
    94  
    95  type JSONDumpMap struct {
    96  	Labels              []string            `json:"labels"`
    97  	ID                  string              `json:"id"`
    98  	Entrypoint          uint64              `json:"entrypoint"`
    99  	CurrentMaximumLayer int                 `json:"currentMaximumLayer"`
   100  	Tombstones          map[uint64]struct{} `json:"tombstones"`
   101  	Nodes               []JSONDumpNodeMap   `json:"nodes"`
   102  }
   103  
   104  type JSONDumpNodeMap struct {
   105  	ID          uint64           `json:"id"`
   106  	Level       int              `json:"level"`
   107  	Connections map[int][]uint64 `json:"connections"`
   108  }
   109  
   110  func NewFromJSONDump(dumpBytes []byte, vecForID common.VectorForID[float32]) (*hnsw, error) {
   111  	var dump JSONDump
   112  	err := json.Unmarshal(dumpBytes, &dump)
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  
   117  	index, err := New(Config{
   118  		RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
   119  		ID:                    dump.ID,
   120  		MakeCommitLoggerThunk: MakeNoopCommitLogger,
   121  		DistanceProvider:      distancer.NewCosineDistanceProvider(),
   122  		VectorForIDThunk:      vecForID,
   123  	}, ent.UserConfig{
   124  		MaxConnections: 30,
   125  		EFConstruction: 128,
   126  	}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), nil)
   127  	if err != nil {
   128  		return nil, err
   129  	}
   130  
   131  	index.currentMaximumLayer = dump.CurrentMaximumLayer
   132  	index.entryPointID = dump.Entrypoint
   133  	index.tombstones = dump.Tombstones
   134  
   135  	for _, n := range dump.Nodes {
   136  		index.nodes[n.ID] = &vertex{
   137  			id:          n.ID,
   138  			level:       n.Level,
   139  			connections: n.Connections,
   140  		}
   141  	}
   142  
   143  	return index, nil
   144  }
   145  
   146  func NewFromJSONDumpMap(dumpBytes []byte, vecForID common.VectorForID[float32]) (*hnsw, error) {
   147  	var dump JSONDumpMap
   148  	err := json.Unmarshal(dumpBytes, &dump)
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  
   153  	index, err := New(Config{
   154  		RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
   155  		ID:                    dump.ID,
   156  		MakeCommitLoggerThunk: MakeNoopCommitLogger,
   157  		DistanceProvider:      distancer.NewCosineDistanceProvider(),
   158  		VectorForIDThunk:      vecForID,
   159  	}, ent.UserConfig{
   160  		MaxConnections: 30,
   161  		EFConstruction: 128,
   162  	}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), nil)
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  
   167  	index.currentMaximumLayer = dump.CurrentMaximumLayer
   168  	index.entryPointID = dump.Entrypoint
   169  	index.tombstones = dump.Tombstones
   170  
   171  	for _, n := range dump.Nodes {
   172  		index.nodes[n.ID] = &vertex{
   173  			id:          n.ID,
   174  			level:       n.Level,
   175  			connections: make([][]uint64, len(n.Connections)),
   176  		}
   177  		for level, conns := range n.Connections {
   178  			index.nodes[n.ID].connections[level] = conns
   179  		}
   180  	}
   181  
   182  	return index, nil
   183  }
   184  
   185  // was added as part of
   186  // https://github.com/weaviate/weaviate/issues/1868 for debugging. It
   187  // is not currently in use anywhere as it is somewhat costly, it would lock the
   188  // entire graph and iterate over every node which would lead to disruptions in
   189  // production. However, keeping this method around may be valuable for future
   190  // investigations where the amount of links may be a problem.
   191  func (h *hnsw) ValidateLinkIntegrity() {
   192  	h.RLock()
   193  	defer h.RUnlock()
   194  
   195  	for i, node := range h.nodes {
   196  		if node == nil {
   197  			continue
   198  		}
   199  
   200  		for level, conns := range node.connections {
   201  			m := h.maximumConnections
   202  			if level == 0 {
   203  				m = h.maximumConnectionsLayerZero
   204  			}
   205  
   206  			if len(conns) > m {
   207  				h.logger.Warnf("node %d at level %d has %d connections", i, level, len(conns))
   208  			}
   209  
   210  		}
   211  	}
   212  
   213  	h.logger.Infof("completed link integrity check")
   214  }