github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/blobserver/diskpacked/diskpacked.go (about)

     1  /*
     2  Copyright 2013 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Package diskpacked registers the "diskpacked" blobserver storage type,
    19  storing blobs in sequence of monolithic data files indexed by a kvfile index.
    20  
    21  Example low-level config:
    22  
    23       "/storage/": {
    24           "handler": "storage-diskpacked",
    25           "handlerArgs": {
    26              "path": "/var/camlistore/blobs"
    27            }
    28       },
    29  
    30  */
    31  package diskpacked
    32  
    33  import (
    34  	"bytes"
    35  	"errors"
    36  	"expvar"
    37  	"fmt"
    38  	"io"
    39  	"log"
    40  	"os"
    41  	"path/filepath"
    42  	"strings"
    43  	"sync"
    44  
    45  	"camlistore.org/pkg/blob"
    46  	"camlistore.org/pkg/blobserver"
    47  	"camlistore.org/pkg/blobserver/local"
    48  	"camlistore.org/pkg/context"
    49  	"camlistore.org/pkg/jsonconfig"
    50  	"camlistore.org/pkg/sorted"
    51  	"camlistore.org/pkg/sorted/kvfile"
    52  	"camlistore.org/pkg/syncutil"
    53  	"camlistore.org/pkg/types"
    54  	"camlistore.org/third_party/github.com/camlistore/lock"
    55  )
    56  
    57  // TODO(wathiede): replace with glog.V(2) when we decide our logging story.
    58  type debugT bool
    59  
    60  var debug = debugT(false)
    61  
    62  func (d debugT) Printf(format string, args ...interface{}) {
    63  	if bool(d) {
    64  		log.Printf(format, args...)
    65  	}
    66  }
    67  
    68  func (d debugT) Println(args ...interface{}) {
    69  	if bool(d) {
    70  		log.Println(args...)
    71  	}
    72  }
    73  
    74  const defaultMaxFileSize = 512 << 20 // 512MB
    75  
    76  type storage struct {
    77  	root        string
    78  	index       sorted.KeyValue
    79  	maxFileSize int64
    80  
    81  	writeLock io.Closer // Provided by lock.Lock, and guards other processes from accesing the file open for writes.
    82  
    83  	mu     sync.Mutex // Guards all I/O state.
    84  	closed bool
    85  	writer *os.File
    86  	fds    []*os.File
    87  	size   int64
    88  
    89  	*local.Generationer
    90  }
    91  
    92  var (
    93  	readVar     = expvar.NewMap("diskpacked-read-bytes")
    94  	readTotVar  = expvar.NewMap("diskpacked-total-read-bytes")
    95  	openFdsVar  = expvar.NewMap("diskpacked-open-fds")
    96  	writeVar    = expvar.NewMap("diskpacked-write-bytes")
    97  	writeTotVar = expvar.NewMap("diskpacked-total-write-bytes")
    98  )
    99  
   100  // newStorage returns a new storage in path root with the given maxFileSize,
   101  // or defaultMaxFileSize (512MB) if <= 0
   102  func newStorage(root string, maxFileSize int64) (s *storage, err error) {
   103  	fi, err := os.Stat(root)
   104  	if os.IsNotExist(err) {
   105  		return nil, fmt.Errorf("storage root %q doesn't exist", root)
   106  	}
   107  	if err != nil {
   108  		return nil, fmt.Errorf("Failed to stat directory %q: %v", root, err)
   109  	}
   110  	if !fi.IsDir() {
   111  		return nil, fmt.Errorf("storage root %q exists but is not a directory.", root)
   112  	}
   113  	index, err := kvfile.NewStorage(filepath.Join(root, "index.kv"))
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  	defer func() {
   118  		if err != nil {
   119  			index.Close()
   120  		}
   121  	}()
   122  	if maxFileSize <= 0 {
   123  		maxFileSize = defaultMaxFileSize
   124  	}
   125  	// Be consistent with trailing slashes.  Makes expvar stats for total
   126  	// reads/writes consistent across diskpacked targets, regardless of what
   127  	// people put in their low level config.
   128  	root = strings.TrimRight(root, `\/`)
   129  	s = &storage{
   130  		root:         root,
   131  		index:        index,
   132  		maxFileSize:  maxFileSize,
   133  		Generationer: local.NewGenerationer(root),
   134  	}
   135  	s.mu.Lock()
   136  	defer s.mu.Unlock()
   137  	if err := s.openAllPacks(); err != nil {
   138  		return nil, err
   139  	}
   140  	if _, _, err := s.StorageGeneration(); err != nil {
   141  		return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err)
   142  	}
   143  	return s, nil
   144  }
   145  
   146  func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) {
   147  	path := config.RequiredString("path")
   148  	maxFileSize := config.OptionalInt("maxFileSize", 0)
   149  	if err := config.Validate(); err != nil {
   150  		return nil, err
   151  	}
   152  	return newStorage(path, int64(maxFileSize))
   153  }
   154  
   155  func init() {
   156  	blobserver.RegisterStorageConstructor("diskpacked", blobserver.StorageConstructor(newFromConfig))
   157  }
   158  
   159  // openForRead will open pack file n for read and keep a handle to it in
   160  // s.fds.  os.IsNotExist returned if n >= the number of pack files in s.root.
   161  // This function is not thread safe, s.mu should be locked by the caller.
   162  func (s *storage) openForRead(n int) error {
   163  	if n > len(s.fds) {
   164  		panic(fmt.Sprintf("openForRead called out of order got %d, expected %d", n, len(s.fds)))
   165  	}
   166  
   167  	fn := s.filename(n)
   168  	f, err := os.Open(fn)
   169  	if err != nil {
   170  		return err
   171  	}
   172  	openFdsVar.Add(s.root, 1)
   173  	debug.Printf("diskpacked: opened for read %q", fn)
   174  	s.fds = append(s.fds, f)
   175  	return nil
   176  }
   177  
   178  // openForWrite will create or open pack file n for writes, create a lock
   179  // visible external to the process and seek to the end of the file ready for
   180  // appending new data.
   181  // This function is not thread safe, s.mu should be locked by the caller.
   182  func (s *storage) openForWrite(n int) error {
   183  	fn := s.filename(n)
   184  	l, err := lock.Lock(fn + ".lock")
   185  	if err != nil {
   186  		return err
   187  	}
   188  	f, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE, 0666)
   189  	if err != nil {
   190  		l.Close()
   191  		return err
   192  	}
   193  	openFdsVar.Add(s.root, 1)
   194  	debug.Printf("diskpacked: opened for write %q", fn)
   195  
   196  	s.size, err = f.Seek(0, os.SEEK_END)
   197  	if err != nil {
   198  		return err
   199  	}
   200  
   201  	s.writer = f
   202  	s.writeLock = l
   203  	return nil
   204  }
   205  
   206  // nextPack will close the current writer and release its lock if open,
   207  // open the next pack file in sequence for writing, grab its lock, set it
   208  // to the currently active writer, and open another copy for read-only use.
   209  // This function is not thread safe, s.mu should be locked by the caller.
   210  func (s *storage) nextPack() error {
   211  	debug.Println("diskpacked: nextPack")
   212  	s.size = 0
   213  	if s.writeLock != nil {
   214  		err := s.writeLock.Close()
   215  		if err != nil {
   216  			return err
   217  		}
   218  		s.writeLock = nil
   219  	}
   220  	if s.writer != nil {
   221  		if err := s.writer.Close(); err != nil {
   222  			return err
   223  		}
   224  		openFdsVar.Add(s.root, -1)
   225  	}
   226  
   227  	n := len(s.fds)
   228  	if err := s.openForWrite(n); err != nil {
   229  		return err
   230  	}
   231  	return s.openForRead(n)
   232  }
   233  
   234  // openAllPacks opens read-only each pack file in s.root, populating s.fds.
   235  // The latest pack file will also have a writable handle opened.
   236  // This function is not thread safe, s.mu should be locked by the caller.
   237  func (s *storage) openAllPacks() error {
   238  	debug.Println("diskpacked: openAllPacks")
   239  	n := 0
   240  	for {
   241  		err := s.openForRead(n)
   242  		if os.IsNotExist(err) {
   243  			break
   244  		}
   245  		if err != nil {
   246  			s.Close()
   247  			return err
   248  		}
   249  		n++
   250  	}
   251  
   252  	if n == 0 {
   253  		// If no pack files are found, we create one open for read and write.
   254  		return s.nextPack()
   255  	}
   256  
   257  	// If 1 or more pack files are found, open the last one read and write.
   258  	return s.openForWrite(n - 1)
   259  }
   260  
   261  func (s *storage) Close() error {
   262  	s.mu.Lock()
   263  	defer s.mu.Unlock()
   264  	var closeErr error
   265  	if !s.closed {
   266  		s.closed = true
   267  		if err := s.index.Close(); err != nil {
   268  			log.Println("diskpacked: closing index:", err)
   269  		}
   270  		for _, f := range s.fds {
   271  			if err := f.Close(); err != nil {
   272  				closeErr = err
   273  			}
   274  			openFdsVar.Add(s.root, -1)
   275  		}
   276  		s.writer = nil
   277  		if l := s.writeLock; l != nil {
   278  			err := l.Close()
   279  			if closeErr == nil {
   280  				closeErr = err
   281  			}
   282  			s.writeLock = nil
   283  		}
   284  	}
   285  	return closeErr
   286  }
   287  
   288  func (s *storage) FetchStreaming(br blob.Ref) (io.ReadCloser, int64, error) {
   289  	return s.Fetch(br)
   290  }
   291  
   292  func (s *storage) Fetch(br blob.Ref) (types.ReadSeekCloser, int64, error) {
   293  	meta, err := s.meta(br)
   294  	if err != nil {
   295  		return nil, 0, err
   296  	}
   297  
   298  	if meta.file >= len(s.fds) {
   299  		return nil, 0, fmt.Errorf("diskpacked: attempt to fetch blob from out of range pack file %d > %d", meta.file, len(s.fds))
   300  	}
   301  	rac := s.fds[meta.file]
   302  	var rs io.ReadSeeker = io.NewSectionReader(rac, meta.offset, meta.size)
   303  	fn := rac.Name()
   304  	// Ensure entry is in map.
   305  	readVar.Add(fn, 0)
   306  	if v, ok := readVar.Get(fn).(*expvar.Int); ok {
   307  		rs = types.NewStatsReadSeeker(v, rs)
   308  	}
   309  	readTotVar.Add(s.root, 0)
   310  	if v, ok := readTotVar.Get(s.root).(*expvar.Int); ok {
   311  		rs = types.NewStatsReadSeeker(v, rs)
   312  	}
   313  	rsc := struct {
   314  		io.ReadSeeker
   315  		io.Closer
   316  	}{
   317  		rs,
   318  		types.NopCloser,
   319  	}
   320  	return rsc, meta.size, nil
   321  }
   322  
   323  func (s *storage) filename(file int) string {
   324  	return filepath.Join(s.root, fmt.Sprintf("pack-%05d.blobs", file))
   325  }
   326  
   327  func (s *storage) RemoveBlobs(blobs []blob.Ref) error {
   328  	// TODO(adg): remove blob from index and pad data with spaces
   329  	return blobserver.ErrNotImplemented
   330  }
   331  
   332  var statGate = syncutil.NewGate(20) // arbitrary
   333  
   334  func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) (err error) {
   335  	var wg syncutil.Group
   336  
   337  	for _, br := range blobs {
   338  		br := br
   339  		statGate.Start()
   340  		wg.Go(func() error {
   341  			defer statGate.Done()
   342  
   343  			m, err := s.meta(br)
   344  			if err == nil {
   345  				dest <- m.SizedRef(br)
   346  				return nil
   347  			}
   348  			if err == os.ErrNotExist {
   349  				return nil
   350  			}
   351  			return err
   352  		})
   353  	}
   354  	return wg.Err()
   355  }
   356  
   357  func (s *storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) {
   358  	defer close(dest)
   359  
   360  	t := s.index.Find(after, "")
   361  	defer func() {
   362  		closeErr := t.Close()
   363  		if err == nil {
   364  			err = closeErr
   365  		}
   366  	}()
   367  	for i := 0; i < limit && t.Next(); {
   368  		key := t.Key()
   369  		if key <= after {
   370  			// EnumerateBlobs' semantics are '>', but sorted.KeyValue.Find is '>='.
   371  			continue
   372  		}
   373  		br, ok := blob.Parse(key)
   374  		if !ok {
   375  			return fmt.Errorf("diskpacked: couldn't parse index key %q", key)
   376  		}
   377  		m, ok := parseBlobMeta(t.Value())
   378  		if !ok {
   379  			return fmt.Errorf("diskpacked: couldn't parse index value %q: %q", key, t.Value())
   380  		}
   381  		select {
   382  		case dest <- m.SizedRef(br):
   383  		case <-ctx.Done():
   384  			return context.ErrCanceled
   385  		}
   386  		i++
   387  	}
   388  	return nil
   389  }
   390  
   391  func (s *storage) ReceiveBlob(br blob.Ref, source io.Reader) (sbr blob.SizedRef, err error) {
   392  	var b bytes.Buffer
   393  	n, err := b.ReadFrom(source)
   394  	if err != nil {
   395  		return
   396  	}
   397  	sbr = blob.SizedRef{Ref: br, Size: n}
   398  	err = s.append(sbr, &b)
   399  	return
   400  }
   401  
   402  // append writes the provided blob to the current data file.
   403  func (s *storage) append(br blob.SizedRef, r io.Reader) error {
   404  	s.mu.Lock()
   405  	defer s.mu.Unlock()
   406  	if s.closed {
   407  		return errors.New("diskpacked: write to closed storage")
   408  	}
   409  
   410  	fn := s.writer.Name()
   411  	n, err := fmt.Fprintf(s.writer, "[%v %v]", br.Ref.String(), br.Size)
   412  	s.size += int64(n)
   413  	writeVar.Add(fn, int64(n))
   414  	writeTotVar.Add(s.root, int64(n))
   415  	if err != nil {
   416  		return err
   417  	}
   418  
   419  	// TODO(adg): remove this seek and the offset check once confident
   420  	offset, err := s.writer.Seek(0, os.SEEK_CUR)
   421  	if err != nil {
   422  		return err
   423  	}
   424  	if offset != s.size {
   425  		return fmt.Errorf("diskpacked: seek says offset = %d, we think %d",
   426  			offset, s.size)
   427  	}
   428  	offset = s.size // make this a declaration once the above is removed
   429  
   430  	n2, err := io.Copy(s.writer, r)
   431  	s.size += n2
   432  	writeVar.Add(fn, int64(n))
   433  	writeTotVar.Add(s.root, int64(n))
   434  	if err != nil {
   435  		return err
   436  	}
   437  	if n2 != br.Size {
   438  		return fmt.Errorf("diskpacked: written blob size %d didn't match size %d", n, br.Size)
   439  	}
   440  	if err = s.writer.Sync(); err != nil {
   441  		return err
   442  	}
   443  
   444  	packIdx := len(s.fds) - 1
   445  	if s.size > s.maxFileSize {
   446  		if err := s.nextPack(); err != nil {
   447  			return err
   448  		}
   449  	}
   450  	return s.index.Set(br.Ref.String(), blobMeta{packIdx, offset, br.Size}.String())
   451  }
   452  
   453  // meta fetches the metadata for the specified blob from the index.
   454  func (s *storage) meta(br blob.Ref) (m blobMeta, err error) {
   455  	ms, err := s.index.Get(br.String())
   456  	if err != nil {
   457  		if err == sorted.ErrNotFound {
   458  			err = os.ErrNotExist
   459  		}
   460  		return
   461  	}
   462  	m, ok := parseBlobMeta(ms)
   463  	if !ok {
   464  		err = fmt.Errorf("diskpacked: bad blob metadata: %q", ms)
   465  	}
   466  	return
   467  }
   468  
   469  // blobMeta is the blob metadata stored in the index.
   470  type blobMeta struct {
   471  	file         int
   472  	offset, size int64
   473  }
   474  
   475  func parseBlobMeta(s string) (m blobMeta, ok bool) {
   476  	n, err := fmt.Sscan(s, &m.file, &m.offset, &m.size)
   477  	return m, n == 3 && err == nil
   478  }
   479  
   480  func (m blobMeta) String() string {
   481  	return fmt.Sprintf("%v %v %v", m.file, m.offset, m.size)
   482  }
   483  
   484  func (m blobMeta) SizedRef(br blob.Ref) blob.SizedRef {
   485  	return blob.SizedRef{Ref: br, Size: m.size}
   486  }