github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/kbfs/search/indexer_test.go (about) 1 // Copyright 2020 Keybase Inc. All rights reserved. 2 // Use of this source code is governed by a BSD 3 // license that can be found in the LICENSE file. 4 5 package search 6 7 import ( 8 "context" 9 "errors" 10 "fmt" 11 12 "os" 13 "path" 14 "testing" 15 "time" 16 17 "github.com/blevesearch/bleve" 18 "github.com/keybase/client/go/kbfs/data" 19 "github.com/keybase/client/go/kbfs/idutil" 20 "github.com/keybase/client/go/kbfs/libcontext" 21 "github.com/keybase/client/go/kbfs/libkbfs" 22 "github.com/keybase/client/go/kbfs/tlf" 23 "github.com/keybase/client/go/kbfs/tlfhandle" 24 "github.com/keybase/client/go/logger" 25 "github.com/keybase/client/go/protocol/keybase1" 26 "github.com/stretchr/testify/require" 27 ) 28 29 func testInitConfig( 30 ctx context.Context, config libkbfs.Config, session idutil.SessionInfo, 31 log logger.Logger) ( 32 newCtx context.Context, newConfig libkbfs.Config, 33 configShutdown func(context.Context) error, err error) { 34 configLocal, ok := config.(*libkbfs.ConfigLocal) 35 if !ok { 36 panic(fmt.Sprintf("Wrong config type: %T", config)) 37 } 38 39 newConfig = libkbfs.ConfigAsUserWithMode( 40 configLocal, session.Name, libkbfs.InitSingleOp) 41 42 kbCtx := config.KbContext() 43 params, err := Params(kbCtx, config.StorageRoot(), session.UID) 44 if err != nil { 45 return nil, nil, nil, err 46 } 47 newConfig.(*libkbfs.ConfigLocal).SetStorageRoot(params.StorageRoot) 48 49 // We use disk-based servers here, instead of memory-based ones 50 // which would normally be preferrable in a test, because bleve 51 // writes out a config file during kvstore-registration that needs 52 // to persist across the multiple indexer instances that will be 53 // made during the test (one on startup, and one when the user 54 // login notification is triggered). If we use mem-based storage, 55 // the config file is lost when the first indexer instance is 56 // destroyed, and bleve won't work after that. 57 mdserver, err := libkbfs.MakeDiskMDServer(config, params.StorageRoot) 58 if err != nil { 59 return nil, nil, nil, err 60 } 61 newConfig.SetMDServer(mdserver) 62 63 bserver := libkbfs.MakeDiskBlockServer(config, params.StorageRoot) 64 newConfig.SetBlockServer(bserver) 65 66 newCtx, err = libcontext.NewContextWithCancellationDelayer( 67 libkbfs.CtxWithRandomIDReplayable( 68 ctx, ctxIDKey, ctxOpID, newConfig.MakeLogger(""))) 69 if err != nil { 70 return nil, nil, nil, err 71 } 72 73 return newCtx, newConfig, func(context.Context) error { 74 mdserver.Shutdown() 75 bserver.Shutdown(ctx) 76 return nil 77 }, nil 78 } 79 80 func writeFile( 81 ctx context.Context, t *testing.T, kbfsOps libkbfs.KBFSOps, i *Indexer, 82 rootNode, node libkbfs.Node, name, text string, newFile bool) { 83 oldMD, err := kbfsOps.GetNodeMetadata(ctx, node) 84 require.NoError(t, err) 85 86 err = kbfsOps.Write(ctx, node, []byte(text), 0) 87 require.NoError(t, err) 88 err = kbfsOps.SyncAll(ctx, rootNode.GetFolderBranch()) 89 require.NoError(t, err) 90 err = kbfsOps.SyncFromServer(ctx, rootNode.GetFolderBranch(), nil) 91 require.NoError(t, err) 92 93 t.Log("Wait for index to load") 94 err = i.waitForIndex(ctx) 95 require.NoError(t, err) 96 97 t.Log("Index the file") 98 namePPS := data.NewPathPartString(name, nil) 99 err = i.refreshBatch(ctx) 100 require.NoError(t, err) 101 if newFile { 102 ids, err := i.blocksDb.GetNextDocIDs(1) 103 require.NoError(t, err) 104 dirDoneFn, err := i.indexChild(ctx, rootNode, "", namePPS, ids[0], 1) 105 require.NoError(t, err) 106 require.NotNil(t, dirDoneFn) 107 } else { 108 dirDoneFn, err := i.updateChild( 109 ctx, rootNode, "", namePPS, oldMD.BlockInfo.BlockPointer, 1) 110 require.NoError(t, err) 111 require.NotNil(t, dirDoneFn) 112 } 113 err = i.flushBatch(ctx) 114 require.NoError(t, err) 115 116 err = kbfsOps.SyncAll(ctx, rootNode.GetFolderBranch()) 117 require.NoError(t, err) 118 err = kbfsOps.SyncFromServer(ctx, rootNode.GetFolderBranch(), nil) 119 require.NoError(t, err) 120 } 121 122 func writeNewFile( 123 ctx context.Context, t *testing.T, kbfsOps libkbfs.KBFSOps, i *Indexer, 124 rootNode libkbfs.Node, name, text string) { 125 t.Logf("Making file %s", name) 126 namePPS := data.NewPathPartString(name, nil) 127 n, _, err := kbfsOps.CreateFile( 128 ctx, rootNode, namePPS, false, libkbfs.NoExcl) 129 require.NoError(t, err) 130 writeFile(ctx, t, kbfsOps, i, rootNode, n, name, text, true) 131 } 132 133 func testSearch(t *testing.T, i *Indexer, word string, expected int) { 134 query := bleve.NewQueryStringQuery(word) 135 request := bleve.NewSearchRequest(query) 136 result, err := i.index.Search(request) 137 require.NoError(t, err) 138 require.Len(t, result.Hits, expected) 139 } 140 141 func testKVStoreName(testName string) string { 142 return fmt.Sprintf( 143 "%s_%s_%d", kvstoreNamePrefix, testName, time.Now().UnixNano()) 144 } 145 146 func TestIndexFile(t *testing.T) { 147 ctx := libcontext.BackgroundContextWithCancellationDelayer() 148 ctx, cancel := context.WithTimeout(ctx, 10*time.Second) 149 defer cancel() 150 config := libkbfs.MakeTestConfigOrBust(t, "user1", "user2") 151 defer libkbfs.CheckConfigAndShutdown(ctx, t, config) 152 153 tempdir, err := os.MkdirTemp("", "indexTest") 154 require.NoError(t, err) 155 defer os.RemoveAll(tempdir) 156 config.SetStorageRoot(tempdir) 157 158 i, err := newIndexerWithConfigInit( 159 config, testInitConfig, testKVStoreName("TestIndexFile")) 160 require.NoError(t, err) 161 defer func() { 162 err := i.Shutdown(ctx) 163 require.NoError(t, err) 164 }() 165 166 h, err := tlfhandle.ParseHandle( 167 ctx, config.KBPKI(), config.MDOps(), nil, "user1", tlf.Private) 168 require.NoError(t, err) 169 kbfsOps := config.KBFSOps() 170 rootNode, _, err := kbfsOps.GetOrCreateRootNode(ctx, h, data.MasterBranch) 171 require.NoError(t, err) 172 const aText = "Lorem ipsum dolor sit amet, consectetur adipiscing elit." 173 const aName = "a" 174 writeNewFile(ctx, t, kbfsOps, i, rootNode, aName, aText) 175 const bHTML = "Mauris et <a href=neque>sit</a> amet nisi " + 176 "<b>condimentum</b> fringilla vel non augue" 177 writeNewFile(ctx, t, kbfsOps, i, rootNode, "b.html", bHTML) 178 179 t.Log("Search for plaintext") 180 testSearch(t, i, "dolor", 1) 181 182 t.Log("Search for lower-case") 183 testSearch(t, i, "lorem", 1) 184 185 t.Log("Search for html") 186 testSearch(t, i, "condimentum", 1) 187 188 t.Log("Search for word in html tag, which shouldn't be indexed") 189 testSearch(t, i, "neque", 0) 190 191 t.Log("Search for shared word") 192 testSearch(t, i, "sit", 2) 193 194 t.Log("Re-index a file using the same docID") 195 aNamePPS := data.NewPathPartString(aName, nil) 196 aNode, _, err := kbfsOps.Lookup(ctx, rootNode, aNamePPS) 197 require.NoError(t, err) 198 const aNewText = "Ut feugiat dolor in tortor viverra, ac egestas justo " + 199 "tincidunt." 200 writeFile(ctx, t, kbfsOps, i, rootNode, aNode, aName, aNewText, false) 201 202 t.Log("Search for old and new words") 203 testSearch(t, i, "dolor", 1) // two hits in same doc 204 testSearch(t, i, "tortor", 1) 205 206 t.Log("Add a hit in a filename") 207 const dText = "Cras volutpat mi in purus interdum, sit amet luctus " + 208 "velit accumsan." 209 const dName = "dolor.txt" 210 writeNewFile(ctx, t, kbfsOps, i, rootNode, dName, dText) 211 testSearch(t, i, "dolor", 2) 212 213 t.Log("Rename the file") 214 const newDName = "neque.txt" 215 newDNamePPS := data.NewPathPartString(newDName, nil) 216 err = kbfsOps.Rename( 217 ctx, rootNode, data.NewPathPartString(dName, nil), rootNode, 218 newDNamePPS) 219 require.NoError(t, err) 220 err = i.refreshBatch(ctx) 221 require.NoError(t, err) 222 err = i.renameChild(ctx, rootNode, "", newDNamePPS, 1) 223 require.NoError(t, err) 224 err = i.flushBatch(ctx) 225 require.NoError(t, err) 226 err = kbfsOps.SyncAll(ctx, rootNode.GetFolderBranch()) 227 require.NoError(t, err) 228 err = kbfsOps.SyncFromServer(ctx, rootNode.GetFolderBranch(), nil) 229 require.NoError(t, err) 230 testSearch(t, i, "dolor", 1) 231 testSearch(t, i, "neque", 1) 232 233 t.Log("Delete a file") 234 md, err := kbfsOps.GetNodeMetadata(ctx, aNode) 235 require.NoError(t, err) 236 err = kbfsOps.RemoveEntry(ctx, rootNode, aNamePPS) 237 require.NoError(t, err) 238 err = i.refreshBatch(ctx) 239 require.NoError(t, err) 240 err = i.deleteFromUnrefs( 241 ctx, rootNode.GetFolderBranch().Tlf, 242 []data.BlockPointer{md.BlockInfo.BlockPointer}) 243 require.NoError(t, err) 244 err = i.flushBatch(ctx) 245 require.NoError(t, err) 246 err = kbfsOps.SyncAll(ctx, rootNode.GetFolderBranch()) 247 require.NoError(t, err) 248 err = kbfsOps.SyncFromServer(ctx, rootNode.GetFolderBranch(), nil) 249 require.NoError(t, err) 250 testSearch(t, i, "tortor", 0) 251 } 252 253 func makeSingleDirTreeToIndex( 254 ctx context.Context, t *testing.T, kbfsOps libkbfs.KBFSOps, 255 rootNode libkbfs.Node, dirName, text1, text2 string) { 256 dirNamePPS := data.NewPathPartString(dirName, nil) 257 dirNode, _, err := kbfsOps.CreateDir(ctx, rootNode, dirNamePPS) 258 require.NoError(t, err) 259 f1Name := dirName + "_file1" 260 f1NamePPS := data.NewPathPartString(f1Name, nil) 261 f1Node, _, err := kbfsOps.CreateFile( 262 ctx, dirNode, f1NamePPS, false, libkbfs.NoExcl) 263 require.NoError(t, err) 264 err = kbfsOps.Write(ctx, f1Node, []byte(text1), 0) 265 require.NoError(t, err) 266 f2Name := dirName + "_file2" 267 f2NamePPS := data.NewPathPartString(f2Name, nil) 268 f2Node, _, err := kbfsOps.CreateFile( 269 ctx, dirNode, f2NamePPS, false, libkbfs.NoExcl) 270 require.NoError(t, err) 271 err = kbfsOps.Write(ctx, f2Node, []byte(text2), 0) 272 require.NoError(t, err) 273 } 274 275 func makeDirTreesToIndex( 276 ctx context.Context, t *testing.T, kbfsOps libkbfs.KBFSOps, 277 rootNode libkbfs.Node) (names []string) { 278 aName := "alpha" 279 const a1Text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit." 280 const a2Text = "Mauris et neque sit amet nisi condimentum fringilla " + 281 "vel non augue" 282 makeSingleDirTreeToIndex(ctx, t, kbfsOps, rootNode, aName, a1Text, a2Text) 283 284 bName := "beta" 285 const b1Text = "Ut feugiat dolor in tortor viverra, ac egestas justo " + 286 "tincidunt." 287 const b2Text = "Cras volutpat mi in purus interdum, sit amet luctus " + 288 "velit accumsan." 289 makeSingleDirTreeToIndex(ctx, t, kbfsOps, rootNode, bName, b1Text, b2Text) 290 err := kbfsOps.SyncAll(ctx, rootNode.GetFolderBranch()) 291 require.NoError(t, err) 292 err = kbfsOps.SyncFromServer(ctx, rootNode.GetFolderBranch(), nil) 293 require.NoError(t, err) 294 return []string{aName, bName} 295 } 296 297 func TestFullIndexSyncedTlf(t *testing.T) { 298 ctx := libcontext.BackgroundContextWithCancellationDelayer() 299 ctx, cancel := context.WithTimeout(ctx, 10*time.Second) 300 defer cancel() 301 config := libkbfs.MakeTestConfigOrBust(t, "user1", "user2") 302 defer libkbfs.CheckConfigAndShutdown(ctx, t, config) 303 304 tempdir, err := os.MkdirTemp("", "indexTest") 305 require.NoError(t, err) 306 defer os.RemoveAll(tempdir) 307 config.SetStorageRoot(tempdir) 308 309 err = config.EnableDiskLimiter(tempdir) 310 require.NoError(t, err) 311 config.SetDiskCacheMode(libkbfs.DiskCacheModeLocal) 312 err = config.MakeDiskBlockCacheIfNotExists() 313 require.NoError(t, err) 314 err = config.MakeDiskMDCacheIfNotExists() 315 require.NoError(t, err) 316 317 i, err := newIndexerWithConfigInit( 318 config, testInitConfig, testKVStoreName("TestFullIndexSyncedTlf")) 319 require.NoError(t, err) 320 defer func() { 321 err := i.Shutdown(ctx) 322 require.NoError(t, err) 323 }() 324 325 h, err := tlfhandle.ParseHandle( 326 ctx, config.KBPKI(), config.MDOps(), nil, "user1", tlf.Private) 327 require.NoError(t, err) 328 kbfsOps := config.KBFSOps() 329 rootNode, _, err := kbfsOps.GetOrCreateRootNode(ctx, h, data.MasterBranch) 330 require.NoError(t, err) 331 332 t.Log("Create two dirs with two files each") 333 names := makeDirTreesToIndex(ctx, t, kbfsOps, rootNode) 334 335 t.Log("Wait for index to load") 336 err = i.waitForIndex(ctx) 337 require.NoError(t, err) 338 339 ch := make(chan error) 340 i.fullIndexCB = func() error { 341 select { 342 case err := <-ch: 343 return err 344 case <-ctx.Done(): 345 return ctx.Err() 346 } 347 } 348 349 t.Log("Enable syncing") 350 _, err = kbfsOps.SetSyncConfig( 351 ctx, rootNode.GetFolderBranch().Tlf, keybase1.FolderSyncConfig{ 352 Mode: keybase1.FolderSyncMode_ENABLED, 353 }) 354 require.NoError(t, err) 355 err = kbfsOps.SyncFromServer(ctx, rootNode.GetFolderBranch(), nil) 356 require.NoError(t, err) 357 358 t.Log("Index the root node and first full dir, but fail the first child " + 359 "of the second dir") 360 sendToIndexer := func(err error) { 361 select { 362 case ch <- err: 363 case <-ctx.Done(): 364 } 365 } 366 sendToIndexer(nil) // alpha 367 sendToIndexer(nil) // alpha1 368 sendToIndexer(nil) // alpha2 369 sendToIndexer(nil) // beta 370 err = errors.New("STOP") 371 sendToIndexer(err) 372 373 err = i.waitForSyncs(ctx) 374 require.NoError(t, err) 375 376 t.Log("New write will resume the interrupted indexer -- 2 children left " + 377 "to index on the old view, then 3 on the new view") 378 379 oName := "omega" 380 const o1Text = "Sed ullamcorper consectetur velit eget dapibus." 381 const o2Text = "Praesent feugiat feugiat dui, at egestas lacus pretium vel." 382 makeSingleDirTreeToIndex(ctx, t, kbfsOps, rootNode, oName, o1Text, o2Text) 383 err = kbfsOps.SyncAll(ctx, rootNode.GetFolderBranch()) 384 require.NoError(t, err) 385 err = kbfsOps.SyncFromServer(ctx, rootNode.GetFolderBranch(), nil) 386 require.NoError(t, err) 387 388 sendToIndexer(nil) // alpha (already done) 389 sendToIndexer(nil) // beta (name indexed, but dir not done yet) 390 sendToIndexer(nil) // beta1 391 sendToIndexer(nil) // beta2 392 // Incremental update. 393 sendToIndexer(nil) // omega 394 sendToIndexer(nil) // omega1 395 sendToIndexer(nil) // omega2 396 397 err = i.waitForSyncs(ctx) 398 require.NoError(t, err) 399 400 t.Log("Check searches") 401 testSearch(t, i, "dolor", 2) 402 testSearch(t, i, "feugiat", 2) 403 testSearch(t, i, names[0], 3) // Child nodes have "alpha" in their name too 404 testSearch(t, i, "file1", 3) 405 testSearch(t, i, "omega", 3) 406 testSearch(t, i, "ullamcorper", 1) 407 408 t.Log("Test a rename and a delete") 409 newName := "gamma" 410 newNamePPS := data.NewPathPartString(newName, nil) 411 err = kbfsOps.Rename( 412 ctx, rootNode, data.NewPathPartString(names[0], nil), rootNode, 413 newNamePPS) 414 require.NoError(t, err) 415 dirNode, _, err := kbfsOps.Lookup(ctx, rootNode, newNamePPS) 416 require.NoError(t, err) 417 err = kbfsOps.RemoveEntry( 418 ctx, dirNode, data.NewPathPartString(names[0]+"_file1", nil)) 419 require.NoError(t, err) 420 err = kbfsOps.SyncFromServer(ctx, rootNode.GetFolderBranch(), nil) 421 require.NoError(t, err) 422 423 sendToIndexer(nil) // gamma dir update 424 425 err = i.waitForSyncs(ctx) 426 require.NoError(t, err) 427 428 t.Log("Check searches") 429 testSearch(t, i, "dolor", 1) 430 testSearch(t, i, names[0], 1) 431 testSearch(t, i, newName, 1) 432 } 433 434 func TestFullIndexSearch(t *testing.T) { 435 ctx := libcontext.BackgroundContextWithCancellationDelayer() 436 ctx, cancel := context.WithTimeout(ctx, 10*time.Second) 437 defer cancel() 438 config := libkbfs.MakeTestConfigOrBust(t, "user1", "user2") 439 defer libkbfs.CheckConfigAndShutdown(ctx, t, config) 440 441 tempdir, err := os.MkdirTemp("", "indexTest") 442 require.NoError(t, err) 443 defer os.RemoveAll(tempdir) 444 config.SetStorageRoot(tempdir) 445 446 err = config.EnableDiskLimiter(tempdir) 447 require.NoError(t, err) 448 config.SetDiskCacheMode(libkbfs.DiskCacheModeLocal) 449 err = config.MakeDiskBlockCacheIfNotExists() 450 require.NoError(t, err) 451 err = config.MakeDiskMDCacheIfNotExists() 452 require.NoError(t, err) 453 454 i, err := newIndexerWithConfigInit( 455 config, testInitConfig, testKVStoreName("TestFullIndexSyncedTlf")) 456 require.NoError(t, err) 457 defer func() { 458 err := i.Shutdown(ctx) 459 require.NoError(t, err) 460 }() 461 462 h, err := tlfhandle.ParseHandle( 463 ctx, config.KBPKI(), config.MDOps(), nil, "user1", tlf.Private) 464 require.NoError(t, err) 465 kbfsOps := config.KBFSOps() 466 rootNode, _, err := kbfsOps.GetOrCreateRootNode(ctx, h, data.MasterBranch) 467 require.NoError(t, err) 468 469 t.Log("Create two dirs with two files each") 470 names := makeDirTreesToIndex(ctx, t, kbfsOps, rootNode) 471 472 t.Log("Wait for index to load") 473 err = i.waitForIndex(ctx) 474 require.NoError(t, err) 475 476 t.Log("Enable syncing") 477 _, err = kbfsOps.SetSyncConfig( 478 ctx, rootNode.GetFolderBranch().Tlf, keybase1.FolderSyncConfig{ 479 Mode: keybase1.FolderSyncMode_ENABLED, 480 }) 481 require.NoError(t, err) 482 err = kbfsOps.SyncFromServer(ctx, rootNode.GetFolderBranch(), nil) 483 require.NoError(t, err) 484 485 err = i.waitForSyncs(ctx) 486 require.NoError(t, err) 487 488 t.Log("Search!") 489 checkSearch := func( 490 query string, numResults, start int, expectedResults map[string]bool) { 491 results, _, err := i.Search(ctx, query, numResults, start) 492 require.NoError(t, err) 493 for _, r := range results { 494 _, ok := expectedResults[r.Path] 495 require.True(t, ok, r.Path) 496 delete(expectedResults, r.Path) 497 } 498 require.Len(t, expectedResults, 0) 499 } 500 501 userPath := func(dir, child string) string { 502 return path.Clean("/keybase/private/user1/" + dir + "/" + child) 503 } 504 505 checkSearch("dolor", 10, 0, map[string]bool{ 506 userPath(names[0], names[0]+"_file1"): true, 507 userPath(names[1], names[1]+"_file1"): true, 508 }) 509 510 checkSearch(names[0], 10, 0, map[string]bool{ 511 userPath(names[0], ""): true, 512 userPath(names[0], names[0]+"_file1"): true, 513 userPath(names[0], names[0]+"_file2"): true, 514 }) 515 516 t.Log("Try partial results") 517 results, nextResult, err := i.Search(ctx, names[0], 2, 0) 518 require.NoError(t, err) 519 require.Len(t, results, 2) 520 require.Equal(t, 2, nextResult) 521 results2, nextResult2, err := i.Search(ctx, names[0], 2, nextResult) 522 require.NoError(t, err) 523 require.Len(t, results2, 1) 524 require.Equal(t, -1, nextResult2) 525 }