github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/debug.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "encoding/json" 16 "fmt" 17 "strings" 18 19 "github.com/weaviate/weaviate/adapters/repos/db/vector/common" 20 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer" 21 "github.com/weaviate/weaviate/entities/cyclemanager" 22 ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 23 ) 24 25 // Dump to stdout for debugging purposes 26 func (h *hnsw) Dump(labels ...string) { 27 if len(labels) > 0 { 28 fmt.Printf("--------------------------------------------------\n") 29 fmt.Printf("-- %s\n", strings.Join(labels, ", ")) 30 } 31 fmt.Printf("--------------------------------------------------\n") 32 fmt.Printf("ID: %s\n", h.id) 33 fmt.Printf("Entrypoint: %d\n", h.entryPointID) 34 fmt.Printf("Max Level: %d\n", h.currentMaximumLayer) 35 fmt.Printf("Tombstones %v\n", h.tombstones) 36 fmt.Printf("\nNodes and Connections:\n") 37 for _, node := range h.nodes { 38 if node == nil { 39 continue 40 } 41 42 fmt.Printf(" Node %d (level %d)\n", node.id, node.level) 43 for level, conns := range node.connections { 44 fmt.Printf(" Level %d: Connections: %v\n", level, conns) 45 } 46 } 47 48 fmt.Printf("--------------------------------------------------\n") 49 } 50 51 // DumpJSON to stdout for debugging purposes 52 func (h *hnsw) DumpJSON(labels ...string) { 53 dump := JSONDump{ 54 Labels: labels, 55 ID: h.id, 56 Entrypoint: h.entryPointID, 57 CurrentMaximumLayer: h.currentMaximumLayer, 58 Tombstones: h.tombstones, 59 } 60 for _, node := range h.nodes { 61 if node == nil { 62 continue 63 } 64 65 dumpNode := JSONDumpNode{ 66 ID: node.id, 67 Level: node.level, 68 Connections: node.connections, 69 } 70 dump.Nodes = append(dump.Nodes, dumpNode) 71 } 72 73 out, err := json.Marshal(dump) 74 if err != nil { 75 fmt.Println(err) 76 } 77 fmt.Printf("%s\n", string(out)) 78 } 79 80 type JSONDump struct { 81 Labels []string `json:"labels"` 82 ID string `json:"id"` 83 Entrypoint uint64 `json:"entrypoint"` 84 CurrentMaximumLayer int `json:"currentMaximumLayer"` 85 Tombstones map[uint64]struct{} `json:"tombstones"` 86 Nodes []JSONDumpNode `json:"nodes"` 87 } 88 89 type JSONDumpNode struct { 90 ID uint64 `json:"id"` 91 Level int `json:"level"` 92 Connections [][]uint64 `json:"connections"` 93 } 94 95 type JSONDumpMap struct { 96 Labels []string `json:"labels"` 97 ID string `json:"id"` 98 Entrypoint uint64 `json:"entrypoint"` 99 CurrentMaximumLayer int `json:"currentMaximumLayer"` 100 Tombstones map[uint64]struct{} `json:"tombstones"` 101 Nodes []JSONDumpNodeMap `json:"nodes"` 102 } 103 104 type JSONDumpNodeMap struct { 105 ID uint64 `json:"id"` 106 Level int `json:"level"` 107 Connections map[int][]uint64 `json:"connections"` 108 } 109 110 func NewFromJSONDump(dumpBytes []byte, vecForID common.VectorForID[float32]) (*hnsw, error) { 111 var dump JSONDump 112 err := json.Unmarshal(dumpBytes, &dump) 113 if err != nil { 114 return nil, err 115 } 116 117 index, err := New(Config{ 118 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 119 ID: dump.ID, 120 MakeCommitLoggerThunk: MakeNoopCommitLogger, 121 DistanceProvider: distancer.NewCosineDistanceProvider(), 122 VectorForIDThunk: vecForID, 123 }, ent.UserConfig{ 124 MaxConnections: 30, 125 EFConstruction: 128, 126 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), nil) 127 if err != nil { 128 return nil, err 129 } 130 131 index.currentMaximumLayer = dump.CurrentMaximumLayer 132 index.entryPointID = dump.Entrypoint 133 index.tombstones = dump.Tombstones 134 135 for _, n := range dump.Nodes { 136 index.nodes[n.ID] = &vertex{ 137 id: n.ID, 138 level: n.Level, 139 connections: n.Connections, 140 } 141 } 142 143 return index, nil 144 } 145 146 func NewFromJSONDumpMap(dumpBytes []byte, vecForID common.VectorForID[float32]) (*hnsw, error) { 147 var dump JSONDumpMap 148 err := json.Unmarshal(dumpBytes, &dump) 149 if err != nil { 150 return nil, err 151 } 152 153 index, err := New(Config{ 154 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 155 ID: dump.ID, 156 MakeCommitLoggerThunk: MakeNoopCommitLogger, 157 DistanceProvider: distancer.NewCosineDistanceProvider(), 158 VectorForIDThunk: vecForID, 159 }, ent.UserConfig{ 160 MaxConnections: 30, 161 EFConstruction: 128, 162 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), nil) 163 if err != nil { 164 return nil, err 165 } 166 167 index.currentMaximumLayer = dump.CurrentMaximumLayer 168 index.entryPointID = dump.Entrypoint 169 index.tombstones = dump.Tombstones 170 171 for _, n := range dump.Nodes { 172 index.nodes[n.ID] = &vertex{ 173 id: n.ID, 174 level: n.Level, 175 connections: make([][]uint64, len(n.Connections)), 176 } 177 for level, conns := range n.Connections { 178 index.nodes[n.ID].connections[level] = conns 179 } 180 } 181 182 return index, nil 183 } 184 185 // was added as part of 186 // https://github.com/weaviate/weaviate/issues/1868 for debugging. It 187 // is not currently in use anywhere as it is somewhat costly, it would lock the 188 // entire graph and iterate over every node which would lead to disruptions in 189 // production. However, keeping this method around may be valuable for future 190 // investigations where the amount of links may be a problem. 191 func (h *hnsw) ValidateLinkIntegrity() { 192 h.RLock() 193 defer h.RUnlock() 194 195 for i, node := range h.nodes { 196 if node == nil { 197 continue 198 } 199 200 for level, conns := range node.connections { 201 m := h.maximumConnections 202 if level == 0 { 203 m = h.maximumConnectionsLayerZero 204 } 205 206 if len(conns) > m { 207 h.logger.Warnf("node %d at level %d has %d connections", i, level, len(conns)) 208 } 209 210 } 211 } 212 213 h.logger.Infof("completed link integrity check") 214 }