github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blobserver/diskpacked/reindex.go (about) 1 /* 2 Copyright 2013 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package diskpacked 18 19 import ( 20 "bufio" 21 "bytes" 22 "fmt" 23 "io" 24 "io/ioutil" 25 "log" 26 "os" 27 "path/filepath" 28 "strconv" 29 30 "camlistore.org/pkg/blob" 31 "camlistore.org/pkg/context" 32 "camlistore.org/pkg/sorted" 33 "camlistore.org/pkg/sorted/kvfile" 34 ) 35 36 var camliDebug, _ = strconv.ParseBool(os.Getenv("CAMLI_DEBUG")) 37 38 // Reindex rewrites the index files of the diskpacked .pack files 39 func Reindex(root string, overwrite bool) (err error) { 40 // there is newStorage, but that may open a file for writing 41 var s = &storage{root: root} 42 index, err := kvfile.NewStorage(filepath.Join(root, "index.kv")) 43 if err != nil { 44 return err 45 } 46 defer func() { 47 closeErr := index.Close() 48 // just returning the first error - if the index or disk is corrupt 49 // and can't close, it's very likely these two errors are related and 50 // have the same root cause. 51 if err == nil { 52 err = closeErr 53 } 54 }() 55 56 ctx := context.TODO() // TODO(tgulacsi): get the verbosity from context 57 for i := 0; i >= 0; i++ { 58 fh, err := os.Open(s.filename(i)) 59 if err != nil { 60 if os.IsNotExist(err) { 61 break 62 } 63 return err 64 } 65 err = s.reindexOne(ctx, index, overwrite, i) 66 fh.Close() 67 if err != nil { 68 return err 69 } 70 } 71 return nil 72 } 73 74 func (s *storage) reindexOne(ctx *context.Context, index sorted.KeyValue, overwrite bool, packID int) error { 75 76 var batch sorted.BatchMutation 77 if overwrite { 78 batch = index.BeginBatch() 79 } 80 allOk := true 81 82 // TODO(tgulacsi): proper verbose from context 83 verbose := camliDebug 84 err := s.walkPack(verbose, packID, 85 func(packID int, ref blob.Ref, offset int64, size uint32) error { 86 if !ref.Valid() { 87 if camliDebug { 88 log.Printf("found deleted blob in %d at %d with size %d", packID, offset, size) 89 } 90 return nil 91 } 92 meta := blobMeta{packID, offset, size}.String() 93 if overwrite && batch != nil { 94 batch.Set(ref.String(), meta) 95 } else { 96 if old, err := index.Get(ref.String()); err != nil { 97 allOk = false 98 if err == sorted.ErrNotFound { 99 log.Println(ref.String() + ": cannot find in index!") 100 } else { 101 log.Println(ref.String()+": error getting from index: ", err.Error()) 102 } 103 } else if old != meta { 104 allOk = false 105 log.Printf("%s: index mismatch - index=%s data=%s", ref.String(), old, meta) 106 } 107 } 108 return nil 109 }) 110 if err != nil { 111 return err 112 } 113 114 if overwrite && batch != nil { 115 log.Printf("overwriting %s from %d", index, packID) 116 if err = index.CommitBatch(batch); err != nil { 117 return err 118 } 119 } else if !allOk { 120 return fmt.Errorf("index does not match data in %d", packID) 121 } 122 return nil 123 } 124 125 // Walk walks the storage and calls the walker callback with each blobref 126 // stops if walker returns non-nil error, and returns that 127 func (s *storage) Walk(ctx *context.Context, 128 walker func(packID int, ref blob.Ref, offset int64, size uint32) error) error { 129 130 // TODO(tgulacsi): proper verbose flag from context 131 verbose := camliDebug 132 133 for i := 0; i >= 0; i++ { 134 fh, err := os.Open(s.filename(i)) 135 if err != nil { 136 if os.IsNotExist(err) { 137 break 138 } 139 return err 140 } 141 fh.Close() 142 if err = s.walkPack(verbose, i, walker); err != nil { 143 return err 144 } 145 } 146 return nil 147 } 148 149 // walkPack walks the given pack and calls the walker callback with each blobref. 150 // Stops if walker returns non-nil error and returns that. 151 func (s *storage) walkPack(verbose bool, packID int, 152 walker func(packID int, ref blob.Ref, offset int64, size uint32) error) error { 153 154 fh, err := os.Open(s.filename(packID)) 155 if err != nil { 156 return err 157 } 158 defer fh.Close() 159 name := fh.Name() 160 161 var ( 162 pos int64 163 size uint32 164 ref blob.Ref 165 ) 166 167 errAt := func(prefix, suffix string) error { 168 if prefix != "" { 169 prefix = prefix + " " 170 } 171 if suffix != "" { 172 suffix = " " + suffix 173 } 174 return fmt.Errorf(prefix+"at %d (0x%x) in %q:"+suffix, pos, pos, name) 175 } 176 177 br := bufio.NewReaderSize(fh, 512) 178 for { 179 if b, err := br.ReadByte(); err != nil { 180 if err == io.EOF { 181 break 182 } 183 return errAt("error while reading", err.Error()) 184 } else if b != '[' { 185 return errAt(fmt.Sprintf("found byte 0x%x", b), "but '[' should be here!") 186 } 187 chunk, err := br.ReadSlice(']') 188 if err != nil { 189 if err == io.EOF { 190 break 191 } 192 return errAt("error reading blob header", err.Error()) 193 } 194 m := len(chunk) 195 chunk = chunk[:m-1] 196 i := bytes.IndexByte(chunk, byte(' ')) 197 if i <= 0 { 198 return errAt("", fmt.Sprintf("bad header format (no space in %q)", chunk)) 199 } 200 size64, err := strconv.ParseUint(string(chunk[i+1:]), 10, 32) 201 if err != nil { 202 return errAt(fmt.Sprintf("cannot parse size %q as int", chunk[i+1:]), err.Error()) 203 } 204 size = uint32(size64) 205 206 // maybe deleted? 207 state, deleted := 0, true 208 if chunk[0] == 'x' { 209 Loop: 210 for _, c := range chunk[:i] { 211 switch state { 212 case 0: 213 if c != 'x' { 214 if c == '-' { 215 state++ 216 } else { 217 deleted = false 218 break Loop 219 } 220 } 221 case 1: 222 if c != '0' { 223 deleted = false 224 break Loop 225 } 226 } 227 } 228 } 229 if deleted { 230 ref = blob.Ref{} 231 if verbose { 232 log.Printf("found deleted at %d", pos) 233 } 234 } else { 235 ref, ok := blob.Parse(string(chunk[:i])) 236 if !ok { 237 return errAt("", fmt.Sprintf("cannot parse %q as blobref", chunk[:i])) 238 } 239 if verbose { 240 log.Printf("found %s at %d", ref, pos) 241 } 242 } 243 if err = walker(packID, ref, pos+1+int64(m), size); err != nil { 244 return err 245 } 246 247 pos += 1 + int64(m) 248 // TODO(tgulacsi): not just seek, but check the hashes of the files 249 // maybe with a different command-line flag, only. 250 if pos, err = fh.Seek(pos+int64(size), 0); err != nil { 251 return errAt("", "cannot seek +"+strconv.FormatUint(size64, 10)+" bytes") 252 } 253 // drain the buffer after the underlying reader Seeks 254 io.CopyN(ioutil.Discard, br, int64(br.Buffered())) 255 } 256 return nil 257 }