github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/blobserver/diskpacked/reindex.go (about) 1 /* 2 Copyright 2013 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package diskpacked 18 19 import ( 20 "bufio" 21 "bytes" 22 "fmt" 23 "io" 24 "io/ioutil" 25 "log" 26 "os" 27 "path/filepath" 28 "strconv" 29 30 "camlistore.org/pkg/blob" 31 "camlistore.org/pkg/sorted" 32 "camlistore.org/pkg/sorted/kvfile" 33 "camlistore.org/third_party/github.com/camlistore/lock" 34 ) 35 36 // Reindex rewrites the index files of the diskpacked .pack files 37 func Reindex(root string, overwrite bool) (err error) { 38 // there is newStorage, but that may open a file for writing 39 var s = &storage{root: root} 40 index, err := kvfile.NewStorage(filepath.Join(root, "index.kv")) 41 if err != nil { 42 return err 43 } 44 defer func() { 45 closeErr := index.Close() 46 // just returning the first error - if the index or disk is corrupt 47 // and can't close, it's very likely these two errors are related and 48 // have the same root cause. 49 if err == nil { 50 err = closeErr 51 } 52 }() 53 54 verbose := false // TODO: use env var? 55 for i := 0; i >= 0; i++ { 56 fh, err := os.Open(s.filename(i)) 57 if err != nil { 58 if os.IsNotExist(err) { 59 break 60 } 61 return err 62 } 63 err = reindexOne(index, overwrite, verbose, fh, fh.Name(), i) 64 fh.Close() 65 if err != nil { 66 return err 67 } 68 } 69 return nil 70 } 71 72 func reindexOne(index sorted.KeyValue, overwrite, verbose bool, r io.ReadSeeker, name string, packId int) error { 73 l, err := lock.Lock(name + ".lock") 74 defer l.Close() 75 76 var pos, size int64 77 78 errAt := func(prefix, suffix string) error { 79 if prefix != "" { 80 prefix = prefix + " " 81 } 82 if suffix != "" { 83 suffix = " " + suffix 84 } 85 return fmt.Errorf(prefix+"at %d (0x%x) in %q:"+suffix, pos, pos, name) 86 } 87 88 var batch sorted.BatchMutation 89 if overwrite { 90 batch = index.BeginBatch() 91 } 92 93 allOk := true 94 br := bufio.NewReaderSize(r, 512) 95 for { 96 if b, err := br.ReadByte(); err != nil { 97 if err == io.EOF { 98 break 99 } 100 return errAt("error while reading", err.Error()) 101 } else if b != '[' { 102 return errAt(fmt.Sprintf("found byte 0x%x", b), "but '[' should be here!") 103 } 104 chunk, err := br.ReadSlice(']') 105 if err != nil { 106 if err == io.EOF { 107 break 108 } 109 return errAt("error reading blob header", err.Error()) 110 } 111 m := len(chunk) 112 chunk = chunk[:m-1] 113 i := bytes.IndexByte(chunk, byte(' ')) 114 if i <= 0 { 115 return errAt("", fmt.Sprintf("bad header format (no space in %q)", chunk)) 116 } 117 if size, err = strconv.ParseInt(string(chunk[i+1:]), 10, 64); err != nil { 118 return errAt(fmt.Sprintf("cannot parse size %q as int", chunk[i+1:]), err.Error()) 119 } 120 ref, ok := blob.Parse(string(chunk[:i])) 121 if !ok { 122 return errAt("", fmt.Sprintf("cannot parse %q as blobref", chunk[:i])) 123 } 124 if verbose { 125 log.Printf("found %s at %d", ref, pos) 126 } 127 128 meta := blobMeta{packId, pos + 1 + int64(m), size}.String() 129 if overwrite && batch != nil { 130 batch.Set(ref.String(), meta) 131 } else { 132 if old, err := index.Get(ref.String()); err != nil { 133 allOk = false 134 if err == sorted.ErrNotFound { 135 log.Println(ref.String() + ": cannot find in index!") 136 } else { 137 log.Println(ref.String()+": error getting from index: ", err.Error()) 138 } 139 } else if old != meta { 140 allOk = false 141 log.Printf("%s: index mismatch - index=%s data=%s", ref.String(), old, meta) 142 } 143 } 144 145 pos += 1 + int64(m) 146 // TODO(tgulacsi78): not just seek, but check the hashes of the files 147 // maybe with a different command-line flag, only. 148 if pos, err = r.Seek(pos+size, 0); err != nil { 149 return errAt("", "cannot seek +"+strconv.FormatInt(size, 10)+" bytes") 150 } 151 // drain the buffer after the underlying reader Seeks 152 io.CopyN(ioutil.Discard, br, int64(br.Buffered())) 153 } 154 155 if overwrite && batch != nil { 156 log.Printf("overwriting %s from %s", index, name) 157 if err = index.CommitBatch(batch); err != nil { 158 return err 159 } 160 } else if !allOk { 161 return fmt.Errorf("index does not match data in %q", name) 162 } 163 return nil 164 }