github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/blobserver/diskpacked/reindex.go (about)

     1  /*
     2  Copyright 2013 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package diskpacked
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"fmt"
    23  	"io"
    24  	"io/ioutil"
    25  	"log"
    26  	"os"
    27  	"path/filepath"
    28  	"strconv"
    29  
    30  	"camlistore.org/pkg/blob"
    31  	"camlistore.org/pkg/sorted"
    32  	"camlistore.org/pkg/sorted/kvfile"
    33  	"camlistore.org/third_party/github.com/camlistore/lock"
    34  )
    35  
    36  // Reindex rewrites the index files of the diskpacked .pack files
    37  func Reindex(root string, overwrite bool) (err error) {
    38  	// there is newStorage, but that may open a file for writing
    39  	var s = &storage{root: root}
    40  	index, err := kvfile.NewStorage(filepath.Join(root, "index.kv"))
    41  	if err != nil {
    42  		return err
    43  	}
    44  	defer func() {
    45  		closeErr := index.Close()
    46  		// just returning the first error - if the index or disk is corrupt
    47  		// and can't close, it's very likely these two errors are related and
    48  		// have the same root cause.
    49  		if err == nil {
    50  			err = closeErr
    51  		}
    52  	}()
    53  
    54  	verbose := false // TODO: use env var?
    55  	for i := 0; i >= 0; i++ {
    56  		fh, err := os.Open(s.filename(i))
    57  		if err != nil {
    58  			if os.IsNotExist(err) {
    59  				break
    60  			}
    61  			return err
    62  		}
    63  		err = reindexOne(index, overwrite, verbose, fh, fh.Name(), i)
    64  		fh.Close()
    65  		if err != nil {
    66  			return err
    67  		}
    68  	}
    69  	return nil
    70  }
    71  
    72  func reindexOne(index sorted.KeyValue, overwrite, verbose bool, r io.ReadSeeker, name string, packId int) error {
    73  	l, err := lock.Lock(name + ".lock")
    74  	defer l.Close()
    75  
    76  	var pos, size int64
    77  
    78  	errAt := func(prefix, suffix string) error {
    79  		if prefix != "" {
    80  			prefix = prefix + " "
    81  		}
    82  		if suffix != "" {
    83  			suffix = " " + suffix
    84  		}
    85  		return fmt.Errorf(prefix+"at %d (0x%x) in %q:"+suffix, pos, pos, name)
    86  	}
    87  
    88  	var batch sorted.BatchMutation
    89  	if overwrite {
    90  		batch = index.BeginBatch()
    91  	}
    92  
    93  	allOk := true
    94  	br := bufio.NewReaderSize(r, 512)
    95  	for {
    96  		if b, err := br.ReadByte(); err != nil {
    97  			if err == io.EOF {
    98  				break
    99  			}
   100  			return errAt("error while reading", err.Error())
   101  		} else if b != '[' {
   102  			return errAt(fmt.Sprintf("found byte 0x%x", b), "but '[' should be here!")
   103  		}
   104  		chunk, err := br.ReadSlice(']')
   105  		if err != nil {
   106  			if err == io.EOF {
   107  				break
   108  			}
   109  			return errAt("error reading blob header", err.Error())
   110  		}
   111  		m := len(chunk)
   112  		chunk = chunk[:m-1]
   113  		i := bytes.IndexByte(chunk, byte(' '))
   114  		if i <= 0 {
   115  			return errAt("", fmt.Sprintf("bad header format (no space in %q)", chunk))
   116  		}
   117  		if size, err = strconv.ParseInt(string(chunk[i+1:]), 10, 64); err != nil {
   118  			return errAt(fmt.Sprintf("cannot parse size %q as int", chunk[i+1:]), err.Error())
   119  		}
   120  		ref, ok := blob.Parse(string(chunk[:i]))
   121  		if !ok {
   122  			return errAt("", fmt.Sprintf("cannot parse %q as blobref", chunk[:i]))
   123  		}
   124  		if verbose {
   125  			log.Printf("found %s at %d", ref, pos)
   126  		}
   127  
   128  		meta := blobMeta{packId, pos + 1 + int64(m), size}.String()
   129  		if overwrite && batch != nil {
   130  			batch.Set(ref.String(), meta)
   131  		} else {
   132  			if old, err := index.Get(ref.String()); err != nil {
   133  				allOk = false
   134  				if err == sorted.ErrNotFound {
   135  					log.Println(ref.String() + ": cannot find in index!")
   136  				} else {
   137  					log.Println(ref.String()+": error getting from index: ", err.Error())
   138  				}
   139  			} else if old != meta {
   140  				allOk = false
   141  				log.Printf("%s: index mismatch - index=%s data=%s", ref.String(), old, meta)
   142  			}
   143  		}
   144  
   145  		pos += 1 + int64(m)
   146  		// TODO(tgulacsi78): not just seek, but check the hashes of the files
   147  		// maybe with a different command-line flag, only.
   148  		if pos, err = r.Seek(pos+size, 0); err != nil {
   149  			return errAt("", "cannot seek +"+strconv.FormatInt(size, 10)+" bytes")
   150  		}
   151  		// drain the buffer after the underlying reader Seeks
   152  		io.CopyN(ioutil.Discard, br, int64(br.Buffered()))
   153  	}
   154  
   155  	if overwrite && batch != nil {
   156  		log.Printf("overwriting %s from %s", index, name)
   157  		if err = index.CommitBatch(batch); err != nil {
   158  			return err
   159  		}
   160  	} else if !allOk {
   161  		return fmt.Errorf("index does not match data in %q", name)
   162  	}
   163  	return nil
   164  }