github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blobserver/diskpacked/diskpacked.go (about)

     1  /*
     2  Copyright 2013 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Package diskpacked registers the "diskpacked" blobserver storage type,
    19  storing blobs in sequence of monolithic data files indexed by a kvfile index.
    20  
    21  Example low-level config:
    22  
    23       "/storage/": {
    24           "handler": "storage-diskpacked",
    25           "handlerArgs": {
    26              "path": "/var/camlistore/blobs"
    27            }
    28       },
    29  
    30  */
    31  package diskpacked
    32  
    33  import (
    34  	"bytes"
    35  	"errors"
    36  	"expvar"
    37  	"fmt"
    38  	"io"
    39  	"log"
    40  	"os"
    41  	"path/filepath"
    42  	"strings"
    43  	"sync"
    44  
    45  	"camlistore.org/pkg/blob"
    46  	"camlistore.org/pkg/blobserver"
    47  	"camlistore.org/pkg/blobserver/local"
    48  	"camlistore.org/pkg/context"
    49  	"camlistore.org/pkg/jsonconfig"
    50  	"camlistore.org/pkg/sorted"
    51  	"camlistore.org/pkg/sorted/kvfile"
    52  	"camlistore.org/pkg/syncutil"
    53  	"camlistore.org/pkg/types"
    54  	"camlistore.org/third_party/github.com/camlistore/lock"
    55  )
    56  
    57  // TODO(wathiede): replace with glog.V(2) when we decide our logging story.
    58  type debugT bool
    59  
    60  var debug = debugT(false)
    61  
    62  func (d debugT) Printf(format string, args ...interface{}) {
    63  	if bool(d) {
    64  		log.Printf(format, args...)
    65  	}
    66  }
    67  
    68  func (d debugT) Println(args ...interface{}) {
    69  	if bool(d) {
    70  		log.Println(args...)
    71  	}
    72  }
    73  
    74  const defaultMaxFileSize = 512 << 20 // 512MB
    75  
    76  type storage struct {
    77  	root        string
    78  	index       sorted.KeyValue
    79  	maxFileSize int64
    80  
    81  	writeLock io.Closer // Provided by lock.Lock, and guards other processes from accesing the file open for writes.
    82  
    83  	mu     sync.Mutex // Guards all I/O state.
    84  	closed bool
    85  	writer *os.File
    86  	fds    []*os.File
    87  	size   int64
    88  
    89  	*local.Generationer
    90  }
    91  
    92  func (s *storage) String() string {
    93  	return fmt.Sprintf("\"diskpacked\" blob packs at %s", s.root)
    94  }
    95  
    96  var (
    97  	readVar     = expvar.NewMap("diskpacked-read-bytes")
    98  	readTotVar  = expvar.NewMap("diskpacked-total-read-bytes")
    99  	openFdsVar  = expvar.NewMap("diskpacked-open-fds")
   100  	writeVar    = expvar.NewMap("diskpacked-write-bytes")
   101  	writeTotVar = expvar.NewMap("diskpacked-total-write-bytes")
   102  )
   103  
   104  const indexKV = "index.kv"
   105  
   106  // IsDir reports whether dir is a diskpacked directory.
   107  func IsDir(dir string) (bool, error) {
   108  	_, err := os.Stat(filepath.Join(dir, indexKV))
   109  	if os.IsNotExist(err) {
   110  		return false, nil
   111  	}
   112  	return err == nil, err
   113  }
   114  
   115  // New returns a diskpacked storage implementation, adding blobs to
   116  // the provided directory. It doesn't delete any existing blob pack
   117  // files.
   118  func New(dir string) (blobserver.Storage, error) {
   119  	var maxSize int64
   120  	if ok, _ := IsDir(dir); ok {
   121  		// TODO: detect existing max size from size of files, if obvious,
   122  		// and set maxSize to that?
   123  	}
   124  	return newStorage(dir, maxSize, nil)
   125  }
   126  
   127  // newStorage returns a new storage in path root with the given maxFileSize,
   128  // or defaultMaxFileSize (512MB) if <= 0
   129  func newStorage(root string, maxFileSize int64, indexConf jsonconfig.Obj) (s *storage, err error) {
   130  	fi, err := os.Stat(root)
   131  	if os.IsNotExist(err) {
   132  		return nil, fmt.Errorf("storage root %q doesn't exist", root)
   133  	}
   134  	if err != nil {
   135  		return nil, fmt.Errorf("Failed to stat directory %q: %v", root, err)
   136  	}
   137  	if !fi.IsDir() {
   138  		return nil, fmt.Errorf("storage root %q exists but is not a directory.", root)
   139  	}
   140  	var index sorted.KeyValue
   141  	if len(indexConf) > 0 {
   142  		index, err = sorted.NewKeyValue(indexConf)
   143  	} else {
   144  		index, err = kvfile.NewStorage(filepath.Join(root, indexKV))
   145  	}
   146  	if err != nil {
   147  		return nil, err
   148  	}
   149  	defer func() {
   150  		if err != nil {
   151  			index.Close()
   152  		}
   153  	}()
   154  	if maxFileSize <= 0 {
   155  		maxFileSize = defaultMaxFileSize
   156  	}
   157  	// Be consistent with trailing slashes.  Makes expvar stats for total
   158  	// reads/writes consistent across diskpacked targets, regardless of what
   159  	// people put in their low level config.
   160  	root = strings.TrimRight(root, `\/`)
   161  	s = &storage{
   162  		root:         root,
   163  		index:        index,
   164  		maxFileSize:  maxFileSize,
   165  		Generationer: local.NewGenerationer(root),
   166  	}
   167  	s.mu.Lock()
   168  	defer s.mu.Unlock()
   169  	if err := s.openAllPacks(); err != nil {
   170  		return nil, err
   171  	}
   172  	if _, _, err := s.StorageGeneration(); err != nil {
   173  		return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err)
   174  	}
   175  	return s, nil
   176  }
   177  
   178  func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) {
   179  	var (
   180  		path        = config.RequiredString("path")
   181  		maxFileSize = config.OptionalInt("maxFileSize", 0)
   182  		indexConf   = config.OptionalObject("metaIndex")
   183  	)
   184  	if err := config.Validate(); err != nil {
   185  		return nil, err
   186  	}
   187  	return newStorage(path, int64(maxFileSize), indexConf)
   188  }
   189  
   190  func init() {
   191  	blobserver.RegisterStorageConstructor("diskpacked", blobserver.StorageConstructor(newFromConfig))
   192  }
   193  
   194  // openForRead will open pack file n for read and keep a handle to it in
   195  // s.fds.  os.IsNotExist returned if n >= the number of pack files in s.root.
   196  // This function is not thread safe, s.mu should be locked by the caller.
   197  func (s *storage) openForRead(n int) error {
   198  	if n > len(s.fds) {
   199  		panic(fmt.Sprintf("openForRead called out of order got %d, expected %d", n, len(s.fds)))
   200  	}
   201  
   202  	fn := s.filename(n)
   203  	f, err := os.Open(fn)
   204  	if err != nil {
   205  		return err
   206  	}
   207  	openFdsVar.Add(s.root, 1)
   208  	debug.Printf("diskpacked: opened for read %q", fn)
   209  	s.fds = append(s.fds, f)
   210  	return nil
   211  }
   212  
   213  // openForWrite will create or open pack file n for writes, create a lock
   214  // visible external to the process and seek to the end of the file ready for
   215  // appending new data.
   216  // This function is not thread safe, s.mu should be locked by the caller.
   217  func (s *storage) openForWrite(n int) error {
   218  	fn := s.filename(n)
   219  	l, err := lock.Lock(fn + ".lock")
   220  	if err != nil {
   221  		return err
   222  	}
   223  	f, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE, 0666)
   224  	if err != nil {
   225  		l.Close()
   226  		return err
   227  	}
   228  	openFdsVar.Add(s.root, 1)
   229  	debug.Printf("diskpacked: opened for write %q", fn)
   230  
   231  	s.size, err = f.Seek(0, os.SEEK_END)
   232  	if err != nil {
   233  		return err
   234  	}
   235  
   236  	s.writer = f
   237  	s.writeLock = l
   238  	return nil
   239  }
   240  
   241  // nextPack will close the current writer and release its lock if open,
   242  // open the next pack file in sequence for writing, grab its lock, set it
   243  // to the currently active writer, and open another copy for read-only use.
   244  // This function is not thread safe, s.mu should be locked by the caller.
   245  func (s *storage) nextPack() error {
   246  	debug.Println("diskpacked: nextPack")
   247  	s.size = 0
   248  	if s.writeLock != nil {
   249  		err := s.writeLock.Close()
   250  		if err != nil {
   251  			return err
   252  		}
   253  		s.writeLock = nil
   254  	}
   255  	if s.writer != nil {
   256  		if err := s.writer.Close(); err != nil {
   257  			return err
   258  		}
   259  		openFdsVar.Add(s.root, -1)
   260  	}
   261  
   262  	n := len(s.fds)
   263  	if err := s.openForWrite(n); err != nil {
   264  		return err
   265  	}
   266  	return s.openForRead(n)
   267  }
   268  
   269  // openAllPacks opens read-only each pack file in s.root, populating s.fds.
   270  // The latest pack file will also have a writable handle opened.
   271  // This function is not thread safe, s.mu should be locked by the caller.
   272  func (s *storage) openAllPacks() error {
   273  	debug.Println("diskpacked: openAllPacks")
   274  	n := 0
   275  	for {
   276  		err := s.openForRead(n)
   277  		if os.IsNotExist(err) {
   278  			break
   279  		}
   280  		if err != nil {
   281  			s.Close()
   282  			return err
   283  		}
   284  		n++
   285  	}
   286  
   287  	if n == 0 {
   288  		// If no pack files are found, we create one open for read and write.
   289  		return s.nextPack()
   290  	}
   291  
   292  	// If 1 or more pack files are found, open the last one read and write.
   293  	return s.openForWrite(n - 1)
   294  }
   295  
   296  func (s *storage) Close() error {
   297  	s.mu.Lock()
   298  	defer s.mu.Unlock()
   299  	var closeErr error
   300  	if !s.closed {
   301  		s.closed = true
   302  		if err := s.index.Close(); err != nil {
   303  			log.Println("diskpacked: closing index:", err)
   304  		}
   305  		for _, f := range s.fds {
   306  			if err := f.Close(); err != nil {
   307  				closeErr = err
   308  			}
   309  			openFdsVar.Add(s.root, -1)
   310  		}
   311  		s.writer = nil
   312  		if l := s.writeLock; l != nil {
   313  			err := l.Close()
   314  			if closeErr == nil {
   315  				closeErr = err
   316  			}
   317  			s.writeLock = nil
   318  		}
   319  	}
   320  	return closeErr
   321  }
   322  
   323  func (s *storage) Fetch(br blob.Ref) (io.ReadCloser, uint32, error) {
   324  	meta, err := s.meta(br)
   325  	if err != nil {
   326  		return nil, 0, err
   327  	}
   328  
   329  	if meta.file >= len(s.fds) {
   330  		return nil, 0, fmt.Errorf("diskpacked: attempt to fetch blob from out of range pack file %d > %d", meta.file, len(s.fds))
   331  	}
   332  	rac := s.fds[meta.file]
   333  	var rs io.ReadSeeker = io.NewSectionReader(rac, meta.offset, int64(meta.size))
   334  	fn := rac.Name()
   335  	// Ensure entry is in map.
   336  	readVar.Add(fn, 0)
   337  	if v, ok := readVar.Get(fn).(*expvar.Int); ok {
   338  		rs = types.NewStatsReadSeeker(v, rs)
   339  	}
   340  	readTotVar.Add(s.root, 0)
   341  	if v, ok := readTotVar.Get(s.root).(*expvar.Int); ok {
   342  		rs = types.NewStatsReadSeeker(v, rs)
   343  	}
   344  	rsc := struct {
   345  		io.ReadSeeker
   346  		io.Closer
   347  	}{
   348  		rs,
   349  		types.NopCloser,
   350  	}
   351  	return rsc, meta.size, nil
   352  }
   353  
   354  func (s *storage) filename(file int) string {
   355  	return filepath.Join(s.root, fmt.Sprintf("pack-%05d.blobs", file))
   356  }
   357  
   358  var removeGate = syncutil.NewGate(20) // arbitrary
   359  
   360  // RemoveBlobs removes the blobs from index and pads data with zero bytes
   361  func (s *storage) RemoveBlobs(blobs []blob.Ref) error {
   362  	batch := s.index.BeginBatch()
   363  	var wg syncutil.Group
   364  	for _, br := range blobs {
   365  		br := br
   366  		removeGate.Start()
   367  		batch.Delete(br.String())
   368  		wg.Go(func() error {
   369  			defer removeGate.Done()
   370  			if err := s.delete(br); err != nil {
   371  				return err
   372  			}
   373  			return nil
   374  		})
   375  	}
   376  	err1 := wg.Err()
   377  	err2 := s.index.CommitBatch(batch)
   378  	if err1 != nil {
   379  		return err1
   380  	}
   381  	return err2
   382  }
   383  
   384  var statGate = syncutil.NewGate(20) // arbitrary
   385  
   386  func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) (err error) {
   387  	var wg syncutil.Group
   388  
   389  	for _, br := range blobs {
   390  		br := br
   391  		statGate.Start()
   392  		wg.Go(func() error {
   393  			defer statGate.Done()
   394  
   395  			m, err := s.meta(br)
   396  			if err == nil {
   397  				dest <- m.SizedRef(br)
   398  				return nil
   399  			}
   400  			if err == os.ErrNotExist {
   401  				return nil
   402  			}
   403  			return err
   404  		})
   405  	}
   406  	return wg.Err()
   407  }
   408  
   409  func (s *storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) {
   410  	defer close(dest)
   411  
   412  	t := s.index.Find(after, "")
   413  	defer func() {
   414  		closeErr := t.Close()
   415  		if err == nil {
   416  			err = closeErr
   417  		}
   418  	}()
   419  	for i := 0; i < limit && t.Next(); {
   420  		key := t.Key()
   421  		if key <= after {
   422  			// EnumerateBlobs' semantics are '>', but sorted.KeyValue.Find is '>='.
   423  			continue
   424  		}
   425  		br, ok := blob.Parse(key)
   426  		if !ok {
   427  			return fmt.Errorf("diskpacked: couldn't parse index key %q", key)
   428  		}
   429  		m, ok := parseBlobMeta(t.Value())
   430  		if !ok {
   431  			return fmt.Errorf("diskpacked: couldn't parse index value %q: %q", key, t.Value())
   432  		}
   433  		select {
   434  		case dest <- m.SizedRef(br):
   435  		case <-ctx.Done():
   436  			return context.ErrCanceled
   437  		}
   438  		i++
   439  	}
   440  	return nil
   441  }
   442  
   443  func (s *storage) ReceiveBlob(br blob.Ref, source io.Reader) (sbr blob.SizedRef, err error) {
   444  	var b bytes.Buffer
   445  	n, err := b.ReadFrom(source)
   446  	if err != nil {
   447  		return
   448  	}
   449  
   450  	sbr = blob.SizedRef{Ref: br, Size: uint32(n)}
   451  
   452  	// Check if it's a dup. Still accept it if the pack file on disk seems to be corrupt
   453  	// or truncated.
   454  	if m, err := s.meta(br); err == nil {
   455  		fi, err := os.Stat(s.filename(m.file))
   456  		if err == nil && fi.Size() >= m.offset+int64(m.size) {
   457  			return sbr, nil
   458  		}
   459  	}
   460  
   461  	err = s.append(sbr, &b)
   462  	return
   463  }
   464  
   465  // append writes the provided blob to the current data file.
   466  func (s *storage) append(br blob.SizedRef, r io.Reader) error {
   467  	s.mu.Lock()
   468  	defer s.mu.Unlock()
   469  	if s.closed {
   470  		return errors.New("diskpacked: write to closed storage")
   471  	}
   472  
   473  	fn := s.writer.Name()
   474  	n, err := fmt.Fprintf(s.writer, "[%v %v]", br.Ref.String(), br.Size)
   475  	s.size += int64(n)
   476  	writeVar.Add(fn, int64(n))
   477  	writeTotVar.Add(s.root, int64(n))
   478  	if err != nil {
   479  		return err
   480  	}
   481  
   482  	// TODO(adg): remove this seek and the offset check once confident
   483  	offset, err := s.writer.Seek(0, os.SEEK_CUR)
   484  	if err != nil {
   485  		return err
   486  	}
   487  	if offset != s.size {
   488  		return fmt.Errorf("diskpacked: seek says offset = %d, we think %d",
   489  			offset, s.size)
   490  	}
   491  	offset = s.size // make this a declaration once the above is removed
   492  
   493  	n2, err := io.Copy(s.writer, r)
   494  	s.size += n2
   495  	writeVar.Add(fn, int64(n))
   496  	writeTotVar.Add(s.root, int64(n))
   497  	if err != nil {
   498  		return err
   499  	}
   500  	if n2 != int64(br.Size) {
   501  		return fmt.Errorf("diskpacked: written blob size %d didn't match size %d", n, br.Size)
   502  	}
   503  	if err = s.writer.Sync(); err != nil {
   504  		return err
   505  	}
   506  
   507  	packIdx := len(s.fds) - 1
   508  	if s.size > s.maxFileSize {
   509  		if err := s.nextPack(); err != nil {
   510  			return err
   511  		}
   512  	}
   513  	return s.index.Set(br.Ref.String(), blobMeta{packIdx, offset, br.Size}.String())
   514  }
   515  
   516  // meta fetches the metadata for the specified blob from the index.
   517  func (s *storage) meta(br blob.Ref) (m blobMeta, err error) {
   518  	ms, err := s.index.Get(br.String())
   519  	if err != nil {
   520  		if err == sorted.ErrNotFound {
   521  			err = os.ErrNotExist
   522  		}
   523  		return
   524  	}
   525  	m, ok := parseBlobMeta(ms)
   526  	if !ok {
   527  		err = fmt.Errorf("diskpacked: bad blob metadata: %q", ms)
   528  	}
   529  	return
   530  }
   531  
   532  // blobMeta is the blob metadata stored in the index.
   533  type blobMeta struct {
   534  	file   int
   535  	offset int64
   536  	size   uint32
   537  }
   538  
   539  func parseBlobMeta(s string) (m blobMeta, ok bool) {
   540  	n, err := fmt.Sscan(s, &m.file, &m.offset, &m.size)
   541  	return m, n == 3 && err == nil
   542  }
   543  
   544  func (m blobMeta) String() string {
   545  	return fmt.Sprintf("%v %v %v", m.file, m.offset, m.size)
   546  }
   547  
   548  func (m blobMeta) SizedRef(br blob.Ref) blob.SizedRef {
   549  	return blob.SizedRef{Ref: br, Size: m.size}
   550  }