github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blobserver/diskpacked/reindex.go (about)

     1  /*
     2  Copyright 2013 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package diskpacked
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"fmt"
    23  	"io"
    24  	"io/ioutil"
    25  	"log"
    26  	"os"
    27  	"path/filepath"
    28  	"strconv"
    29  
    30  	"camlistore.org/pkg/blob"
    31  	"camlistore.org/pkg/context"
    32  	"camlistore.org/pkg/sorted"
    33  	"camlistore.org/pkg/sorted/kvfile"
    34  )
    35  
    36  var camliDebug, _ = strconv.ParseBool(os.Getenv("CAMLI_DEBUG"))
    37  
    38  // Reindex rewrites the index files of the diskpacked .pack files
    39  func Reindex(root string, overwrite bool) (err error) {
    40  	// there is newStorage, but that may open a file for writing
    41  	var s = &storage{root: root}
    42  	index, err := kvfile.NewStorage(filepath.Join(root, "index.kv"))
    43  	if err != nil {
    44  		return err
    45  	}
    46  	defer func() {
    47  		closeErr := index.Close()
    48  		// just returning the first error - if the index or disk is corrupt
    49  		// and can't close, it's very likely these two errors are related and
    50  		// have the same root cause.
    51  		if err == nil {
    52  			err = closeErr
    53  		}
    54  	}()
    55  
    56  	ctx := context.TODO() // TODO(tgulacsi): get the verbosity from context
    57  	for i := 0; i >= 0; i++ {
    58  		fh, err := os.Open(s.filename(i))
    59  		if err != nil {
    60  			if os.IsNotExist(err) {
    61  				break
    62  			}
    63  			return err
    64  		}
    65  		err = s.reindexOne(ctx, index, overwrite, i)
    66  		fh.Close()
    67  		if err != nil {
    68  			return err
    69  		}
    70  	}
    71  	return nil
    72  }
    73  
    74  func (s *storage) reindexOne(ctx *context.Context, index sorted.KeyValue, overwrite bool, packID int) error {
    75  
    76  	var batch sorted.BatchMutation
    77  	if overwrite {
    78  		batch = index.BeginBatch()
    79  	}
    80  	allOk := true
    81  
    82  	// TODO(tgulacsi): proper verbose from context
    83  	verbose := camliDebug
    84  	err := s.walkPack(verbose, packID,
    85  		func(packID int, ref blob.Ref, offset int64, size uint32) error {
    86  			if !ref.Valid() {
    87  				if camliDebug {
    88  					log.Printf("found deleted blob in %d at %d with size %d", packID, offset, size)
    89  				}
    90  				return nil
    91  			}
    92  			meta := blobMeta{packID, offset, size}.String()
    93  			if overwrite && batch != nil {
    94  				batch.Set(ref.String(), meta)
    95  			} else {
    96  				if old, err := index.Get(ref.String()); err != nil {
    97  					allOk = false
    98  					if err == sorted.ErrNotFound {
    99  						log.Println(ref.String() + ": cannot find in index!")
   100  					} else {
   101  						log.Println(ref.String()+": error getting from index: ", err.Error())
   102  					}
   103  				} else if old != meta {
   104  					allOk = false
   105  					log.Printf("%s: index mismatch - index=%s data=%s", ref.String(), old, meta)
   106  				}
   107  			}
   108  			return nil
   109  		})
   110  	if err != nil {
   111  		return err
   112  	}
   113  
   114  	if overwrite && batch != nil {
   115  		log.Printf("overwriting %s from %d", index, packID)
   116  		if err = index.CommitBatch(batch); err != nil {
   117  			return err
   118  		}
   119  	} else if !allOk {
   120  		return fmt.Errorf("index does not match data in %d", packID)
   121  	}
   122  	return nil
   123  }
   124  
   125  // Walk walks the storage and calls the walker callback with each blobref
   126  // stops if walker returns non-nil error, and returns that
   127  func (s *storage) Walk(ctx *context.Context,
   128  	walker func(packID int, ref blob.Ref, offset int64, size uint32) error) error {
   129  
   130  	// TODO(tgulacsi): proper verbose flag from context
   131  	verbose := camliDebug
   132  
   133  	for i := 0; i >= 0; i++ {
   134  		fh, err := os.Open(s.filename(i))
   135  		if err != nil {
   136  			if os.IsNotExist(err) {
   137  				break
   138  			}
   139  			return err
   140  		}
   141  		fh.Close()
   142  		if err = s.walkPack(verbose, i, walker); err != nil {
   143  			return err
   144  		}
   145  	}
   146  	return nil
   147  }
   148  
   149  // walkPack walks the given pack and calls the walker callback with each blobref.
   150  // Stops if walker returns non-nil error and returns that.
   151  func (s *storage) walkPack(verbose bool, packID int,
   152  	walker func(packID int, ref blob.Ref, offset int64, size uint32) error) error {
   153  
   154  	fh, err := os.Open(s.filename(packID))
   155  	if err != nil {
   156  		return err
   157  	}
   158  	defer fh.Close()
   159  	name := fh.Name()
   160  
   161  	var (
   162  		pos  int64
   163  		size uint32
   164  		ref  blob.Ref
   165  	)
   166  
   167  	errAt := func(prefix, suffix string) error {
   168  		if prefix != "" {
   169  			prefix = prefix + " "
   170  		}
   171  		if suffix != "" {
   172  			suffix = " " + suffix
   173  		}
   174  		return fmt.Errorf(prefix+"at %d (0x%x) in %q:"+suffix, pos, pos, name)
   175  	}
   176  
   177  	br := bufio.NewReaderSize(fh, 512)
   178  	for {
   179  		if b, err := br.ReadByte(); err != nil {
   180  			if err == io.EOF {
   181  				break
   182  			}
   183  			return errAt("error while reading", err.Error())
   184  		} else if b != '[' {
   185  			return errAt(fmt.Sprintf("found byte 0x%x", b), "but '[' should be here!")
   186  		}
   187  		chunk, err := br.ReadSlice(']')
   188  		if err != nil {
   189  			if err == io.EOF {
   190  				break
   191  			}
   192  			return errAt("error reading blob header", err.Error())
   193  		}
   194  		m := len(chunk)
   195  		chunk = chunk[:m-1]
   196  		i := bytes.IndexByte(chunk, byte(' '))
   197  		if i <= 0 {
   198  			return errAt("", fmt.Sprintf("bad header format (no space in %q)", chunk))
   199  		}
   200  		size64, err := strconv.ParseUint(string(chunk[i+1:]), 10, 32)
   201  		if err != nil {
   202  			return errAt(fmt.Sprintf("cannot parse size %q as int", chunk[i+1:]), err.Error())
   203  		}
   204  		size = uint32(size64)
   205  
   206  		// maybe deleted?
   207  		state, deleted := 0, true
   208  		if chunk[0] == 'x' {
   209  		Loop:
   210  			for _, c := range chunk[:i] {
   211  				switch state {
   212  				case 0:
   213  					if c != 'x' {
   214  						if c == '-' {
   215  							state++
   216  						} else {
   217  							deleted = false
   218  							break Loop
   219  						}
   220  					}
   221  				case 1:
   222  					if c != '0' {
   223  						deleted = false
   224  						break Loop
   225  					}
   226  				}
   227  			}
   228  		}
   229  		if deleted {
   230  			ref = blob.Ref{}
   231  			if verbose {
   232  				log.Printf("found deleted at %d", pos)
   233  			}
   234  		} else {
   235  			ref, ok := blob.Parse(string(chunk[:i]))
   236  			if !ok {
   237  				return errAt("", fmt.Sprintf("cannot parse %q as blobref", chunk[:i]))
   238  			}
   239  			if verbose {
   240  				log.Printf("found %s at %d", ref, pos)
   241  			}
   242  		}
   243  		if err = walker(packID, ref, pos+1+int64(m), size); err != nil {
   244  			return err
   245  		}
   246  
   247  		pos += 1 + int64(m)
   248  		// TODO(tgulacsi): not just seek, but check the hashes of the files
   249  		// maybe with a different command-line flag, only.
   250  		if pos, err = fh.Seek(pos+int64(size), 0); err != nil {
   251  			return errAt("", "cannot seek +"+strconv.FormatUint(size64, 10)+" bytes")
   252  		}
   253  		// drain the buffer after the underlying reader Seeks
   254  		io.CopyN(ioutil.Discard, br, int64(br.Buffered()))
   255  	}
   256  	return nil
   257  }