github.com/anishathalye/periscope@v0.3.5/internal/db/db.go (about) 1 package db 2 3 import ( 4 "github.com/anishathalye/periscope/internal/herror" 5 6 "bytes" 7 "database/sql" 8 "errors" 9 "fmt" 10 "log" 11 "math" 12 "path/filepath" 13 "sort" 14 "strconv" 15 "sync/atomic" 16 17 _ "github.com/mattn/go-sqlite3" 18 ) 19 20 const versionKey = "version" 21 const version = 3 22 23 type FileInfo struct { 24 Path string 25 Size int64 26 ShortHash []byte 27 FullHash []byte 28 } 29 30 type DuplicateSet []FileInfo 31 32 type fileInfosOrdering []FileInfo 33 34 func (a fileInfosOrdering) Len() int { return len(a) } 35 func (a fileInfosOrdering) Less(i, j int) bool { 36 if a[i].Size > a[j].Size { 37 return true 38 } else if a[i].Size < a[j].Size { 39 return false 40 } 41 return a[i].Path < a[j].Path 42 } 43 func (a fileInfosOrdering) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 44 45 type DuplicateInfo struct { 46 Path string 47 FullHash []byte 48 Count int64 49 } 50 51 type duplicateInfoByPath []DuplicateInfo 52 53 func (a duplicateInfoByPath) Len() int { return len(a) } 54 func (a duplicateInfoByPath) Less(i, j int) bool { return a[i].Path < a[j].Path } 55 func (a duplicateInfoByPath) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 56 57 type InfoSummary struct { 58 Files int64 59 Unique int64 60 Duplicate int64 61 Overhead int64 62 } 63 64 // A database session, or a transaction. 65 // 66 // This is a sort of weird implementation, but it makes the 67 // interface/implementation convenient. The same type exposes a bunch of 68 // methods that operate on the database, and the object and methods are the 69 // same regardless of whether the operations are done within a transaction. 70 // 71 // Calling Begin() returns a Session that is a transaction, and calling 72 // Commit() on the resultant session (transaction) commits the transaction. 73 // 74 // The db field is non-nil for a new session. For an open transaction, db is 75 // nil and tx is non-nil. Once Commit() is called on the transaction, both the 76 // db and tx are nil (and any method calls on this object will fail). 77 type Session struct { 78 db *sql.DB 79 tx *sql.Tx 80 } 81 82 var inMemoryDbCtr int64 = 0 83 84 func NewInMemory() (*Session, herror.Interface) { 85 // https://www.sqlite.org/inmemorydb.html#sharedmemdb 86 // 87 // We need distinct in-memory databases (for separate tests), 88 // but each in-memory database should support multiple connections 89 ctr := atomic.LoadInt64(&inMemoryDbCtr) 90 atomic.StoreInt64(&inMemoryDbCtr, ctr+1) 91 return New(fmt.Sprintf("file:memdb%d?mode=memory&cache=shared", ctr), true) 92 } 93 94 func New(dataSourceName string, debug bool) (*Session, herror.Interface) { 95 db, err := sql.Open("sqlite3", dataSourceName) 96 if err != nil { 97 return nil, herror.Internal(err, "") 98 } 99 // execute dummy statement to catch problems with db access 100 _, err = db.Exec("") 101 if err != nil { 102 return nil, herror.Unlikely(err, fmt.Sprintf("unable to access database at '%s'", dataSourceName), ` 103 Ensure that the directory is writable, and if the database file already exists, ensure it is readable and writable. 104 `) 105 } 106 // set up pragmas 107 if debug { 108 // good sanity check but slows things down, especially the gc in RemoveDir() 109 _, err = db.Exec("PRAGMA foreign_keys = ON") 110 } else { 111 _, err = db.Exec("PRAGMA foreign_keys = OFF") 112 } 113 if err != nil { 114 return nil, herror.Internal(err, "") 115 } 116 _, err = db.Exec("PRAGMA cache_size = -500000") // 500 MB 117 if err != nil { 118 return nil, herror.Internal(err, "") 119 } 120 121 s := &Session{db: db} 122 herr := s.checkVersion() 123 if herr != nil { 124 return nil, herr 125 } 126 err = s.initSchema() 127 if err != nil { 128 return nil, herror.Internal(err, "") 129 } 130 return s, nil 131 } 132 133 func (s *Session) checkVersion() herror.Interface { 134 // ensure metadata table exists 135 _, err := s.db.Exec(` 136 CREATE TABLE IF NOT EXISTS meta 137 ( 138 key TEXT UNIQUE NOT NULL, 139 value BLOB NOT NULL 140 ) 141 `) 142 if err != nil { 143 return herror.Internal(err, "") 144 } 145 row := s.db.QueryRow("SELECT value FROM meta WHERE key = ?", versionKey) 146 var dbVersion string 147 err = row.Scan(&dbVersion) 148 if err == sql.ErrNoRows { 149 // okay, we will initialize version 150 _, err = s.db.Exec("INSERT INTO meta (key, value) VALUES (?, ?)", versionKey, strconv.Itoa(version)) 151 if err != nil { 152 return herror.Internal(err, "") 153 } 154 return nil 155 } 156 // DB has a version, make sure it's the current version 157 dbVersionInt, err := strconv.ParseInt(dbVersion, 10, 0) 158 if err != nil || dbVersionInt != version { 159 return herror.Unlikely(nil, fmt.Sprintf("database version mismatch: expected %d, got %s", version, dbVersion), ` 160 This database was likely produced by an incompatible version of Periscope. Either use a compatible version of Periscope, or delete the database (by running 'psc finish') and try again. 161 `) 162 } 163 // correct version 164 return nil 165 } 166 167 func (s *Session) initSchema() error { 168 // only called in New, so db is non-null 169 _, err := s.db.Exec(` 170 CREATE TABLE IF NOT EXISTS directory 171 ( 172 id INTEGER PRIMARY KEY NOT NULL, 173 name TEXT NOT NULL, 174 parent INTEGER NULL, 175 FOREIGN KEY(parent) REFERENCES directory(id), 176 UNIQUE(name, parent) 177 ) 178 `) 179 if err != nil { 180 return err 181 } 182 _, err = s.db.Exec(` 183 CREATE TABLE IF NOT EXISTS file_info 184 ( 185 id INTEGER PRIMARY KEY NOT NULL, 186 directory INTEGER NOT NULL, 187 filename TEXT NOT NULL, 188 size INTEGER NOT NULL, 189 short_hash BLOB NULL, 190 full_hash BLOB NULL, 191 FOREIGN KEY(directory) REFERENCES directory(id), 192 UNIQUE(directory, filename) 193 ) 194 `) 195 return err 196 } 197 198 func (s *Session) Begin() (*Session, herror.Interface) { 199 if s.tx != nil { 200 return nil, herror.Internal(nil, "cannot Begin(): already in a transaction") 201 } 202 if s.db == nil { 203 return nil, herror.Internal(nil, "cannot Begin(): finished transaction") 204 } 205 tx, err := s.db.Begin() 206 if err != nil { 207 return nil, herror.Internal(err, "") 208 } 209 return &Session{db: nil, tx: tx}, nil 210 } 211 212 func (s *Session) Commit() herror.Interface { 213 if s.tx == nil { 214 return herror.Internal(nil, "Commit(): not in a running transaction") 215 } 216 err := s.tx.Commit() 217 if err != nil { 218 return herror.Internal(err, "") 219 } 220 s.tx = nil 221 return nil 222 } 223 224 func (s *Session) query(query string, args ...interface{}) (*sql.Rows, error) { 225 if s.tx != nil { 226 return s.tx.Query(query, args...) 227 } 228 if s.db == nil { 229 return nil, herror.Internal(nil, "transaction is finished") 230 } 231 return s.db.Query(query, args...) 232 } 233 234 func (s *Session) queryRow(query string, args ...interface{}) (*sql.Row, herror.Interface) { 235 if s.tx != nil { 236 return s.tx.QueryRow(query, args...), nil 237 } 238 if s.db == nil { 239 return nil, herror.Internal(nil, "transaction is finished") 240 } 241 return s.db.QueryRow(query, args...), nil 242 } 243 244 func (s *Session) exec(query string, args ...interface{}) (sql.Result, error) { 245 if s.tx != nil { 246 return s.tx.Exec(query, args...) 247 } 248 if s.db == nil { 249 return nil, herror.Internal(nil, "transaction is finished") 250 } 251 return s.db.Exec(query, args...) 252 } 253 254 func (s *Session) pathToDirectoryId(path string, create bool) (int64, error) { 255 if path == "" { 256 return 0, errors.New("path is empty") 257 } 258 path = filepath.Clean(path) // remove extra "/" at the end, etc. 259 var elems []string 260 var base string 261 for base != "/" { 262 base = filepath.Base(path) 263 elems = append(elems, base) 264 path = filepath.Dir(path) 265 } 266 id := int64(-1) 267 for i := len(elems) - 1; i >= 0; i-- { 268 var row *sql.Row 269 var err error 270 if id == -1 { 271 row, err = s.queryRow(` 272 SELECT id 273 FROM directory 274 WHERE name = ? 275 AND parent IS NULL 276 `, elems[i]) 277 } else { 278 row, err = s.queryRow(` 279 SELECT id 280 FROM directory 281 WHERE name = ? 282 AND parent = ? 283 `, elems[i], id) 284 } 285 if err != nil { 286 return 0, err 287 } 288 err = row.Scan(&id) 289 if err == sql.ErrNoRows && create { 290 // need to create it 291 var result sql.Result 292 if id == -1 { 293 result, err = s.exec(` 294 INSERT INTO directory (name, parent) VALUES (?, NULL) 295 `, elems[i]) 296 } else { 297 result, err = s.exec(` 298 INSERT INTO directory (name, parent) VALUES (?, ?) 299 `, elems[i], id) 300 } 301 if err != nil { 302 return 0, err 303 } 304 id, err = result.LastInsertId() 305 if err != nil { 306 return 0, err 307 } 308 } else if err != nil { 309 return 0, err 310 } 311 } 312 return id, nil 313 } 314 315 func (s *Session) directoryIdToPath(id int64) (string, error) { 316 rows, err := s.query(` 317 WITH RECURSIVE sup_directory (id, name, parent, level) AS ( 318 SELECT id, name, parent, 1 FROM directory WHERE id = ? 319 UNION ALL 320 SELECT d.id, d.name, d.parent, level+1 321 FROM directory d, sup_directory sd 322 WHERE d.id = sd.parent 323 ) 324 SELECT name, (SELECT max(level) FROM sup_directory) - level AS distance 325 FROM sup_directory 326 ORDER BY distance 327 `, id) 328 if err != nil { 329 return "", err 330 } 331 defer rows.Close() 332 var path string 333 for rows.Next() { 334 var name string 335 var level int64 336 if err = rows.Scan(&name, &level); err != nil { 337 return "", err 338 } 339 if path == "" { 340 path = name 341 } else { 342 path = filepath.Join(path, name) 343 } 344 } 345 return path, nil 346 } 347 348 func (s *Session) Add(info FileInfo) herror.Interface { 349 dirname := filepath.Dir(info.Path) 350 filename := filepath.Base(info.Path) 351 dirid, err := s.pathToDirectoryId(dirname, true) 352 if err != nil { 353 return herror.Internal(err, "") 354 } 355 if _, err := s.exec(` 356 REPLACE INTO file_info (directory, filename, size, short_hash, full_hash) 357 VALUES (?, ?, ?, ?, ?) 358 `, dirid, filename, info.Size, info.ShortHash, info.FullHash); err != nil { 359 return herror.Internal(err, "") 360 } 361 return nil 362 } 363 364 // Returns all infos in the database (regardless of whether they have 365 // duplicates). 366 func (s *Session) AllInfosC() (<-chan FileInfo, herror.Interface) { 367 rows, err := s.query(` 368 SELECT directory, filename, size, short_hash, full_hash 369 FROM file_info`) 370 if err != nil { 371 return nil, herror.Internal(err, "") 372 } 373 results := make(chan FileInfo) 374 go func() { 375 defer rows.Close() 376 for rows.Next() { 377 var dirid int64 378 var filename string 379 var info FileInfo 380 if err := rows.Scan(&dirid, &filename, &info.Size, &info.ShortHash, &info.FullHash); err != nil { 381 // similar issue as below in AllDuplicatesC: how to report this? 382 log.Printf("failure while scanning row: %s", err) 383 continue 384 } 385 dirname, err := s.directoryIdToPath(dirid) 386 if err != nil { 387 log.Printf("failure while resolving directory name: %s", err) 388 continue 389 } 390 info.Path = filepath.Join(dirname, filename) 391 results <- info 392 } 393 close(results) 394 }() 395 return results, nil 396 } 397 398 func (s *Session) AllInfos() ([]FileInfo, herror.Interface) { 399 var r []FileInfo 400 c, err := s.AllInfosC() 401 if err != nil { 402 return nil, err 403 } 404 for i := range c { 405 r = append(r, i) 406 } 407 sort.Sort(fileInfosOrdering(r)) 408 return r, nil 409 } 410 411 func (s *Session) CreateIndexes() herror.Interface { 412 // ensuring that an index on full_hash exists makes a huge difference 413 // in performance for commands like ls, because we use this for finding 414 // duplicates 415 _, err := s.exec("CREATE INDEX IF NOT EXISTS idx_hash ON file_info (full_hash)") 416 if err != nil { 417 return herror.Internal(err, "") 418 } 419 // makes a big difference when we are looking up by size (relevant when 420 // scanning) 421 _, err = s.exec("CREATE INDEX IF NOT EXISTS idx_size ON file_info (size)") 422 if err != nil { 423 return herror.Internal(err, "") 424 } 425 // for looking up files by directory/filename 426 _, err = s.exec("CREATE INDEX IF NOT EXISTS idx_directory_filename ON file_info (directory, filename)") 427 if err != nil { 428 return herror.Internal(err, "") 429 } 430 // for recursive lookup 431 _, err = s.exec("CREATE INDEX IF NOT EXISTS idx_name_parent ON directory (name, parent)") 432 if err != nil { 433 return herror.Internal(err, "") 434 } 435 // indexes on foreign keys 436 _, err = s.exec("CREATE INDEX IF NOT EXISTS idx_directory ON file_info (directory)") 437 if err != nil { 438 return herror.Internal(err, "") 439 } 440 _, err = s.exec("CREATE INDEX IF NOT EXISTS idx_parent ON directory (parent)") 441 if err != nil { 442 return herror.Internal(err, "") 443 } 444 return nil 445 } 446 447 // Returns all known duplicates in the database. 448 // 449 // These are necessarily FileInfos with the FullHash field filled out. Each 450 // DuplicateSet that is returned always has > 1 element (i.e. it only includes 451 // duplicates, not infos where we happen to know the full hash). 452 // 453 // path is optional; if "", then all duplicates are returned, otherwise only 454 // ones with the given directory prefix 455 func (s *Session) AllDuplicatesC(path string) (<-chan DuplicateSet, herror.Interface) { 456 results := make(chan DuplicateSet) 457 dirid := int64(-1) 458 var err error 459 if path != "" { 460 dirid, err = s.pathToDirectoryId(path, false) 461 if err == sql.ErrNoRows { 462 close(results) 463 return results, nil 464 } else if err != nil { 465 return nil, herror.Internal(err, "") 466 } 467 } 468 var rows *sql.Rows 469 if dirid == -1 { 470 rows, err = s.query(` 471 SELECT directory, filename, size, short_hash, full_hash 472 FROM file_info 473 WHERE full_hash IS NOT NULL 474 ORDER BY size DESC, full_hash`) 475 } else { 476 rows, err = s.query(` 477 WITH dirs AS 478 ( 479 WITH RECURSIVE sub_directory (id, parent) AS ( 480 SELECT id, parent FROM directory WHERE id = ? 481 UNION ALL 482 SELECT d.id, d.parent 483 FROM directory d, sub_directory sd 484 WHERE d.parent = sd.id 485 ) 486 SELECT id FROM sub_directory 487 ), 488 matching_hashes AS 489 ( 490 SELECT full_hash FROM file_info WHERE directory IN dirs AND full_hash IS NOT NULL 491 ) 492 SELECT directory, filename, size, short_hash, full_hash 493 FROM file_info 494 WHERE full_hash IN matching_hashes 495 ORDER BY size DESC, full_hash`, dirid) 496 } 497 if err != nil { 498 return nil, herror.Internal(err, "") 499 } 500 go func() { 501 defer rows.Close() 502 var set DuplicateSet 503 var prevHash []byte 504 for rows.Next() { 505 var dirid int64 506 var filename string 507 var info FileInfo 508 if err := rows.Scan(&dirid, &filename, &info.Size, &info.ShortHash, &info.FullHash); err != nil { 509 // how should we handle this error that happens in its own goroutine? 510 // give up on this row? 511 log.Printf("failure while scanning row: %s", err) 512 continue 513 } 514 dirname, err := s.directoryIdToPath(dirid) 515 if err != nil { 516 log.Printf("failure while resolving directory name: %s", err) 517 continue 518 } 519 info.Path = filepath.Join(dirname, filename) 520 if !bytes.Equal(info.FullHash, prevHash) { 521 if len(set) > 1 { 522 // note: set may have singletons, we don't remove info about files with single matches 523 sort.Sort(fileInfosOrdering(set)) 524 results <- set 525 } 526 set = nil 527 } 528 prevHash = info.FullHash 529 set = append(set, info) 530 } 531 // will usually be some infos left over, if the last file size/hash has duplicates 532 if len(set) > 1 { 533 sort.Sort(fileInfosOrdering(set)) 534 results <- set 535 } 536 close(results) 537 }() 538 return results, nil 539 } 540 541 func (s *Session) AllDuplicates(path string) ([]DuplicateSet, herror.Interface) { 542 var r []DuplicateSet 543 c, err := s.AllDuplicatesC(path) 544 if err != nil { 545 return nil, err 546 } 547 for d := range c { 548 r = append(r, d) 549 } 550 return r, nil 551 } 552 553 func (s *Session) Summary() (InfoSummary, herror.Interface) { 554 row, err := s.queryRow("SELECT COUNT(*) FROM file_info") 555 if err != nil { 556 return InfoSummary{}, err 557 } 558 var files int64 559 if err := row.Scan(&files); err != nil { 560 return InfoSummary{}, herror.Internal(err, "") 561 } 562 row, err = s.queryRow(` 563 WITH sets AS 564 ( 565 SELECT COUNT(*) AS cnt, size 566 FROM file_info 567 GROUP BY full_hash 568 HAVING COUNT(full_hash) > 1 569 ) 570 SELECT COUNT(*), SUM(cnt), SUM((cnt-1)*size) from sets 571 `) 572 if err != nil { 573 return InfoSummary{}, err 574 } 575 var uniqueWithDuplicates int64 576 var filesWithDuplicates, overhead sql.NullInt64 577 if err := row.Scan(&uniqueWithDuplicates, &filesWithDuplicates, &overhead); err != nil { 578 return InfoSummary{}, herror.Internal(err, "") 579 } 580 duplicate := filesWithDuplicates.Int64 - uniqueWithDuplicates 581 return InfoSummary{ 582 Files: files, 583 Unique: files - duplicate, 584 Duplicate: duplicate, 585 Overhead: overhead.Int64, 586 }, nil 587 } 588 589 // Returns info for everything matching the given file. 590 // 591 // Returns [] if there isn't a matching file in the database. If the file 592 // exists in the database, that file is returned first. 593 func (s *Session) Lookup(path string) (DuplicateSet, herror.Interface) { 594 dirname := filepath.Dir(path) 595 filename := filepath.Base(path) 596 var set DuplicateSet 597 dirid, err := s.pathToDirectoryId(dirname, false) 598 if err == sql.ErrNoRows { 599 return set, nil // directory not known => empty 600 } else if err != nil { 601 return nil, herror.Internal(err, "") 602 } 603 row, herr := s.queryRow(` 604 SELECT id, size, short_hash, full_hash 605 FROM file_info 606 WHERE directory = ? AND filename = ? 607 `, dirid, filename) 608 if herr != nil { 609 return nil, herr 610 } 611 var id int 612 var info FileInfo 613 err = row.Scan(&id, &info.Size, &info.ShortHash, &info.FullHash) 614 if err == sql.ErrNoRows { 615 return set, nil // empty 616 } else if err != nil { 617 return nil, herror.Internal(err, "") 618 } 619 info.Path = filepath.Join(dirname, filename) 620 if info.FullHash == nil { 621 // no known duplicates 622 set = append(set, info) 623 return set, nil 624 } 625 // get all others 626 rows, err := s.query(` 627 SELECT directory, filename, size, short_hash, full_hash 628 FROM file_info 629 WHERE full_hash = ? AND id != ?`, info.FullHash, id) 630 if err != nil { 631 return nil, herror.Internal(err, "") 632 } 633 defer rows.Close() 634 for rows.Next() { 635 var info FileInfo 636 if err := rows.Scan(&dirid, &filename, &info.Size, &info.ShortHash, &info.FullHash); err != nil { 637 return nil, herror.Internal(err, "") 638 } 639 dirname, err := s.directoryIdToPath(dirid) 640 if err != nil { 641 return nil, herror.Internal(err, "") 642 } 643 info.Path = filepath.Join(dirname, filename) 644 set = append(set, info) 645 } 646 sort.Sort(fileInfosOrdering(set)) 647 set = append(DuplicateSet{info}, set...) // so the given info is first 648 return set, nil 649 } 650 651 // Returns all the infos with the given size. 652 // 653 // This includes all infos, even ones where the short hash or full hash is not known. 654 func (s *Session) InfosBySize(size int64) ([]FileInfo, herror.Interface) { 655 rows, err := s.query(` 656 SELECT directory, filename, size, short_hash, full_hash 657 FROM file_info 658 WHERE size = ? 659 `, size) 660 if err != nil { 661 return nil, herror.Internal(err, "") 662 } 663 defer rows.Close() 664 var results []FileInfo 665 for rows.Next() { 666 var dirid int64 667 var filename string 668 var info FileInfo 669 if err := rows.Scan(&dirid, &filename, &info.Size, &info.ShortHash, &info.FullHash); err != nil { 670 return nil, herror.Internal(err, "") 671 } 672 dirname, err := s.directoryIdToPath(dirid) 673 if err != nil { 674 return nil, herror.Internal(err, "") 675 } 676 info.Path = filepath.Join(dirname, filename) 677 results = append(results, info) 678 } 679 return results, nil 680 } 681 682 // Returns all duplicate sets (size > 1) where at least one file is contained under the given path. 683 func (s *Session) LookupAllC(path string, includeHidden bool) (<-chan DuplicateInfo, herror.Interface) { 684 results := make(chan DuplicateInfo) 685 dirid, err := s.pathToDirectoryId(path, false) 686 if err == sql.ErrNoRows { 687 close(results) 688 return results, nil 689 } else if err != nil { 690 return nil, herror.Internal(err, "") 691 } 692 var rows *sql.Rows 693 if includeHidden { 694 rows, err = s.query(` 695 WITH dirs AS 696 ( 697 WITH RECURSIVE sub_directory (id, parent) AS ( 698 SELECT id, parent FROM directory WHERE id = ? 699 UNION ALL 700 SELECT d.id, d.parent 701 FROM directory d, sub_directory sd 702 WHERE d.parent = sd.id 703 ) 704 SELECT id FROM sub_directory 705 ) 706 SELECT a.directory, a.filename, a.full_hash, COUNT(b.id) 707 FROM file_info a, file_info b 708 WHERE a.full_hash IS NOT NULL 709 AND a.full_hash = b.full_hash 710 AND a.directory IN dirs 711 GROUP BY a.directory, a.filename 712 `, dirid) 713 } else { 714 rows, err = s.query(` 715 WITH dirs AS 716 ( 717 WITH RECURSIVE sub_directory (id, parent) AS ( 718 SELECT id, parent FROM directory WHERE id = ? 719 UNION ALL 720 SELECT d.id, d.parent 721 FROM directory d, sub_directory sd 722 WHERE d.parent = sd.id 723 AND SUBSTR(d.name, 1, 1) != '.' 724 ) 725 SELECT id FROM sub_directory 726 ) 727 SELECT a.directory, a.filename, a.full_hash, COUNT(b.id) 728 FROM file_info a, file_info b 729 WHERE a.full_hash IS NOT NULL 730 AND a.full_hash = b.full_hash 731 AND a.directory IN dirs 732 AND SUBSTR(a.filename, 1, 1) != '.' 733 GROUP BY a.directory, a.filename 734 `, dirid) 735 } 736 if err != nil { 737 return nil, herror.Internal(err, "") 738 } 739 go func() { 740 defer rows.Close() 741 for rows.Next() { 742 var dirid int64 743 var filename string 744 var fullHash []byte 745 var count int64 746 if err := rows.Scan(&dirid, &filename, &fullHash, &count); err != nil { 747 log.Printf("failure while scanning row: %s", err) 748 continue 749 } 750 dirname, err := s.directoryIdToPath(dirid) 751 if err != nil { 752 log.Printf("failure while resolving directory name: %s", err) 753 continue 754 } 755 path := filepath.Join(dirname, filename) 756 if count > 1 { 757 results <- DuplicateInfo{Path: path, FullHash: fullHash, Count: count} 758 } 759 } 760 close(results) 761 }() 762 return results, nil 763 } 764 765 func (s *Session) LookupAll(path string, includeHidden bool) ([]DuplicateInfo, herror.Interface) { 766 var r []DuplicateInfo 767 c, err := s.LookupAllC(path, includeHidden) 768 if err != nil { 769 return nil, err 770 } 771 for i := range c { 772 r = append(r, i) 773 } 774 sort.Sort(duplicateInfoByPath(r)) 775 return r, nil 776 } 777 778 // Deletes a file with the given path from the database. 779 func (s *Session) Remove(path string) herror.Interface { 780 dirname := filepath.Dir(path) 781 filename := filepath.Base(path) 782 dirid, err := s.pathToDirectoryId(dirname, true) 783 if err == sql.ErrNoRows { 784 return nil 785 } else if err != nil { 786 return herror.Internal(err, "") 787 } 788 _, err = s.exec(` 789 DELETE FROM file_info 790 WHERE directory = ? AND filename = ?`, dirid, filename) 791 if err != nil { 792 return herror.Internal(err, "") 793 } 794 // don't bother to delete orphaned directories here 795 return nil 796 } 797 798 // Deletes all files matching the given directory prefix from the database, 799 // with sizes in the specified range. 800 // 801 // A max size of 0 is interpreted as infinity. This does not just match based 802 // on prefix, it interprets the prefix as a directory, and only deletes files 803 // under the given directory. This means that it won't accidentally match file 804 // names (or other directory names) where the prefix is common, e.g. deleting 805 // "/a" won't delete file "/aa" or contents under a directory "/aa". 806 func (s *Session) RemoveDir(dir string, min, max int64) herror.Interface { 807 if max <= 0 { 808 max = math.MaxInt64 809 } 810 dirid, err := s.pathToDirectoryId(dir, false) 811 if err == sql.ErrNoRows { 812 return nil 813 } else if err != nil { 814 return herror.Internal(err, "") 815 } 816 if min == 0 && max == math.MaxInt64 { 817 // more efficient query 818 _, err = s.exec(` 819 WITH dirs AS 820 ( 821 WITH RECURSIVE sub_directory (id, parent) AS ( 822 SELECT id, parent FROM directory WHERE id = ? 823 UNION ALL 824 SELECT d.id, d.parent 825 FROM directory d, sub_directory sd 826 WHERE d.parent = sd.id 827 ) 828 SELECT id FROM sub_directory 829 ) 830 DELETE FROM file_info 831 WHERE directory IN dirs 832 `, dirid) 833 } else { 834 _, err = s.exec(` 835 WITH dirs AS 836 ( 837 WITH RECURSIVE sub_directory (id, parent) AS ( 838 SELECT id, parent FROM directory WHERE id = ? 839 UNION ALL 840 SELECT d.id, d.parent 841 FROM directory d, sub_directory sd 842 WHERE d.parent = sd.id 843 ) 844 SELECT id FROM sub_directory 845 ) 846 DELETE FROM file_info 847 WHERE directory IN dirs 848 AND size > ? 849 AND size <= ? 850 `, dirid, min, max) 851 } 852 if err != nil { 853 return herror.Internal(err, "") 854 } 855 // delete orphaned directories 856 _, err = s.exec(` 857 WITH reachable AS 858 ( 859 WITH RECURSIVE sub_directory (id, parent) AS ( 860 SELECT id, parent FROM directory WHERE id IN (SELECT DISTINCT directory FROM file_info) 861 UNION ALL 862 SELECT d.id, d.parent 863 FROM directory d, sub_directory sd 864 WHERE d.id = sd.parent 865 ) 866 SELECT DISTINCT id 867 FROM sub_directory 868 ) 869 DELETE FROM directory 870 WHERE id NOT IN reachable`) 871 if err != nil { 872 return herror.Internal(err, "") 873 } 874 return nil 875 }