github.com/cayleygraph/cayley@v0.7.7/graph/kv/quadstore.go (about)

     1  // Copyright 2017 The Cayley Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kv
    16  
    17  import (
    18  	"context"
    19  	"encoding/binary"
    20  	"encoding/json"
    21  	"errors"
    22  	"fmt"
    23  	"os"
    24  	"sync"
    25  
    26  	"github.com/cayleygraph/cayley/clog"
    27  	"github.com/cayleygraph/cayley/graph"
    28  	"github.com/cayleygraph/cayley/graph/proto"
    29  	"github.com/cayleygraph/cayley/graph/shape"
    30  	"github.com/cayleygraph/cayley/internal/lru"
    31  	"github.com/cayleygraph/quad"
    32  	"github.com/cayleygraph/quad/pquads"
    33  	"github.com/hidal-go/hidalgo/kv"
    34  	boom "github.com/tylertreat/BoomFilters"
    35  )
    36  
    37  var ErrNoBucket = errors.New("kv: no bucket")
    38  
    39  type Registration struct {
    40  	NewFunc      NewFunc
    41  	InitFunc     InitFunc
    42  	IsPersistent bool
    43  }
    44  
    45  type InitFunc func(string, graph.Options) (kv.KV, error)
    46  type NewFunc func(string, graph.Options) (kv.KV, error)
    47  
    48  func Register(name string, r Registration) {
    49  	graph.RegisterQuadStore(name, graph.QuadStoreRegistration{
    50  		InitFunc: func(addr string, opt graph.Options) error {
    51  			if !r.IsPersistent {
    52  				return nil
    53  			}
    54  			kv, err := r.InitFunc(addr, opt)
    55  			if err != nil {
    56  				return err
    57  			}
    58  			defer kv.Close()
    59  			if err = Init(kv, opt); err != nil {
    60  				return err
    61  			}
    62  			return kv.Close()
    63  		},
    64  		NewFunc: func(addr string, opt graph.Options) (graph.QuadStore, error) {
    65  			kv, err := r.NewFunc(addr, opt)
    66  			if err != nil {
    67  				return nil, err
    68  			}
    69  			if !r.IsPersistent {
    70  				if err = Init(kv, opt); err != nil {
    71  					kv.Close()
    72  					return nil, err
    73  				}
    74  			}
    75  			return New(kv, opt)
    76  		},
    77  		IsPersistent: r.IsPersistent,
    78  	})
    79  }
    80  
    81  const (
    82  	latestDataVersion   = 2
    83  	envKVDefaultIndexes = "CAYLEY_KV_INDEXES"
    84  )
    85  
    86  var (
    87  	_ graph.BatchQuadStore = (*QuadStore)(nil)
    88  	_ shape.Optimizer      = (*QuadStore)(nil)
    89  )
    90  
    91  type QuadStore struct {
    92  	db kv.KV
    93  
    94  	indexes struct {
    95  		sync.RWMutex
    96  		all []QuadIndex
    97  		// indexes used to detect duplicate quads
    98  		exists []QuadIndex
    99  	}
   100  
   101  	valueLRU *lru.Cache
   102  
   103  	writer    sync.Mutex
   104  	mapBucket map[string]map[string][]uint64
   105  	mapBloom  map[string]*boom.BloomFilter
   106  	mapNodes  *boom.BloomFilter
   107  
   108  	exists struct {
   109  		disabled bool
   110  		sync.Mutex
   111  		buf []byte
   112  		*boom.DeletableBloomFilter
   113  	}
   114  }
   115  
   116  func newQuadStore(kv kv.KV) *QuadStore {
   117  	return &QuadStore{db: kv}
   118  }
   119  
   120  func Init(kv kv.KV, opt graph.Options) error {
   121  	ctx := context.TODO()
   122  	qs := newQuadStore(kv)
   123  	if data := os.Getenv(envKVDefaultIndexes); data != "" {
   124  		qs.indexes.all = nil
   125  		if err := json.Unmarshal([]byte(data), &qs.indexes); err != nil {
   126  			return err
   127  		}
   128  	}
   129  	if qs.indexes.all == nil {
   130  		qs.indexes.all = DefaultQuadIndexes
   131  	}
   132  	if _, err := qs.getMetadata(ctx); err == nil {
   133  		return graph.ErrDatabaseExists
   134  	} else if err != ErrNoBucket {
   135  		return err
   136  	}
   137  	upfront, err := opt.BoolKey("upfront", false)
   138  	if err != nil {
   139  		return err
   140  	}
   141  	if err := qs.createBuckets(ctx, upfront); err != nil {
   142  		return err
   143  	}
   144  	if err := setVersion(ctx, qs.db, latestDataVersion); err != nil {
   145  		return err
   146  	}
   147  	if err := qs.writeIndexesMeta(ctx); err != nil {
   148  		return err
   149  	}
   150  	return nil
   151  }
   152  
   153  const (
   154  	OptNoBloom = "no_bloom"
   155  )
   156  
   157  func New(kv kv.KV, opt graph.Options) (graph.QuadStore, error) {
   158  	ctx := context.TODO()
   159  	qs := newQuadStore(kv)
   160  	if vers, err := qs.getMetadata(ctx); err == ErrNoBucket {
   161  		return nil, graph.ErrNotInitialized
   162  	} else if err != nil {
   163  		return nil, err
   164  	} else if vers != latestDataVersion {
   165  		return nil, errors.New("kv: data version is out of date. Run cayleyupgrade for your config to update the data.")
   166  	}
   167  	if list, err := qs.readIndexesMeta(ctx); err != nil {
   168  		return nil, err
   169  	} else {
   170  		qs.indexes.all = list
   171  	}
   172  	qs.valueLRU = lru.New(2000)
   173  	qs.exists.disabled, _ = opt.BoolKey(OptNoBloom, false)
   174  	if err := qs.initBloomFilter(ctx); err != nil {
   175  		return nil, err
   176  	}
   177  	if !qs.exists.disabled {
   178  		if sz, err := qs.getSize(); err != nil {
   179  			return nil, err
   180  		} else if sz == 0 {
   181  			qs.mapBloom = make(map[string]*boom.BloomFilter)
   182  			qs.mapNodes = boom.NewBloomFilter(100*1000*1000, 0.05)
   183  		}
   184  	}
   185  	return qs, nil
   186  }
   187  
   188  func setVersion(ctx context.Context, db kv.KV, version int64) error {
   189  	return kv.Update(ctx, db, func(tx kv.Tx) error {
   190  		var buf [8]byte
   191  		binary.LittleEndian.PutUint64(buf[:], uint64(version))
   192  		if err := tx.Put(metaBucket.AppendBytes([]byte("version")), buf[:]); err != nil {
   193  			return fmt.Errorf("couldn't write version: %v", err)
   194  		}
   195  		return nil
   196  	})
   197  }
   198  
   199  func (qs *QuadStore) getMetaInt(ctx context.Context, key string) (int64, error) {
   200  	var v int64
   201  	err := kv.View(qs.db, func(tx kv.Tx) error {
   202  		val, err := tx.Get(ctx, metaBucket.AppendBytes([]byte(key)))
   203  		if err == kv.ErrNotFound {
   204  			return ErrNoBucket
   205  		} else if err != nil {
   206  			return err
   207  		}
   208  		v, err = asInt64(val, 0)
   209  		if err != nil {
   210  			return err
   211  		}
   212  		return nil
   213  	})
   214  	return v, err
   215  }
   216  
   217  func (qs *QuadStore) getSize() (int64, error) {
   218  	sz, err := qs.getMetaInt(context.TODO(), "size")
   219  	if err == ErrNoBucket {
   220  		return 0, nil
   221  	}
   222  	return sz, err
   223  }
   224  
   225  func (qs *QuadStore) Size() int64 {
   226  	sz, _ := qs.getSize()
   227  	return sz
   228  }
   229  
   230  func (qs *QuadStore) Stats(ctx context.Context, exact bool) (graph.Stats, error) {
   231  	sz, err := qs.getMetaInt(ctx, "size")
   232  	if err != nil {
   233  		return graph.Stats{}, err
   234  	}
   235  	st := graph.Stats{
   236  		Nodes: graph.Size{
   237  			Size:  sz / 3,
   238  			Exact: false, // TODO(dennwc): store nodes count
   239  		},
   240  		Quads: graph.Size{
   241  			Size:  sz,
   242  			Exact: true,
   243  		},
   244  	}
   245  	if exact {
   246  		// calculate the exact number of nodes
   247  		st.Nodes.Size = 0
   248  		it := qs.NodesAllIterator()
   249  		defer it.Close()
   250  		for it.Next(ctx) {
   251  			st.Nodes.Size++
   252  		}
   253  		if err := it.Err(); err != nil {
   254  			return st, err
   255  		}
   256  		st.Nodes.Exact = true
   257  	}
   258  	return st, nil
   259  }
   260  
   261  func (qs *QuadStore) Close() error {
   262  	return qs.db.Close()
   263  }
   264  
   265  func (qs *QuadStore) getMetadata(ctx context.Context) (int64, error) {
   266  	var vers int64
   267  	err := kv.View(qs.db, func(tx kv.Tx) error {
   268  		val, err := tx.Get(ctx, metaBucket.AppendBytes([]byte("version")))
   269  		if err == kv.ErrNotFound {
   270  			return ErrNoBucket
   271  		} else if err != nil {
   272  			return err
   273  		}
   274  		vers, err = asInt64(val, 0)
   275  		if err != nil {
   276  			return err
   277  		}
   278  		return nil
   279  	})
   280  	return vers, err
   281  }
   282  
   283  func asInt64(b []byte, empty int64) (int64, error) {
   284  	if len(b) == 0 {
   285  		return empty, nil
   286  	} else if len(b) != 8 {
   287  		return 0, fmt.Errorf("unexpected int size: %d", len(b))
   288  	}
   289  	v := int64(binary.LittleEndian.Uint64(b))
   290  	return v, nil
   291  }
   292  
   293  func (qs *QuadStore) horizon(ctx context.Context) int64 {
   294  	h, _ := qs.getMetaInt(ctx, "horizon")
   295  	return h
   296  }
   297  
   298  func (qs *QuadStore) ValuesOf(ctx context.Context, vals []graph.Ref) ([]quad.Value, error) {
   299  	out := make([]quad.Value, len(vals))
   300  	var (
   301  		inds []int
   302  		refs []uint64
   303  	)
   304  	for i, v := range vals {
   305  		if v == nil {
   306  			continue
   307  		} else if pv, ok := v.(graph.PreFetchedValue); ok {
   308  			out[i] = pv.NameOf()
   309  			continue
   310  		}
   311  		switch v := v.(type) {
   312  		case Int64Value:
   313  			if v == 0 {
   314  				continue
   315  			}
   316  			inds = append(inds, i)
   317  			refs = append(refs, uint64(v))
   318  		default:
   319  			return out, fmt.Errorf("unknown type of graph.Ref; not meant for this quadstore. apparently a %#v", v)
   320  		}
   321  	}
   322  	if len(refs) == 0 {
   323  		return out, nil
   324  	}
   325  	prim, err := qs.getPrimitives(ctx, refs)
   326  	if err != nil {
   327  		return out, err
   328  	}
   329  	var last error
   330  	for i, p := range prim {
   331  		if p == nil || !p.IsNode() {
   332  			continue
   333  		}
   334  		qv, err := pquads.UnmarshalValue(p.Value)
   335  		if err != nil {
   336  			last = err
   337  			continue
   338  		}
   339  		out[inds[i]] = qv
   340  	}
   341  	return out, last
   342  }
   343  
   344  func (qs *QuadStore) RefsOf(ctx context.Context, nodes []quad.Value) ([]graph.Ref, error) {
   345  	values := make([]graph.Ref, len(nodes))
   346  	err := kv.View(qs.db, func(tx kv.Tx) error {
   347  		for i, node := range nodes {
   348  			value, err := qs.resolveQuadValue(ctx, tx, node)
   349  			if err != nil {
   350  				return err
   351  			}
   352  			values[i] = Int64Value(value)
   353  		}
   354  		return nil
   355  	})
   356  	if err != nil {
   357  		return nil, err
   358  	}
   359  	return values, nil
   360  }
   361  
   362  func (qs *QuadStore) NameOf(v graph.Ref) quad.Value {
   363  	ctx := context.TODO()
   364  	vals, err := qs.ValuesOf(ctx, []graph.Ref{v})
   365  	if err != nil {
   366  		clog.Errorf("error getting NameOf %d: %s", v, err)
   367  		return nil
   368  	}
   369  	return vals[0]
   370  }
   371  
   372  func (qs *QuadStore) Quad(k graph.Ref) quad.Quad {
   373  	key, ok := k.(*proto.Primitive)
   374  	if !ok {
   375  		clog.Errorf("passed value was not a quad primitive: %T", k)
   376  		return quad.Quad{}
   377  	}
   378  	ctx := context.TODO()
   379  	var v quad.Quad
   380  	err := kv.View(qs.db, func(tx kv.Tx) error {
   381  		var err error
   382  		v, err = qs.primitiveToQuad(ctx, tx, key)
   383  		return err
   384  	})
   385  	if err != nil {
   386  		if err != kv.ErrNotFound {
   387  			clog.Errorf("error fetching quad %#v: %s", key, err)
   388  		}
   389  		return quad.Quad{}
   390  	}
   391  	return v
   392  }
   393  
   394  func (qs *QuadStore) primitiveToQuad(ctx context.Context, tx kv.Tx, p *proto.Primitive) (quad.Quad, error) {
   395  	q := &quad.Quad{}
   396  	for _, dir := range quad.Directions {
   397  		v := p.GetDirection(dir)
   398  		val, err := qs.getValFromLog(ctx, tx, v)
   399  		if err != nil {
   400  			return *q, err
   401  		}
   402  		q.Set(dir, val)
   403  	}
   404  	return *q, nil
   405  }
   406  
   407  func (qs *QuadStore) getValFromLog(ctx context.Context, tx kv.Tx, k uint64) (quad.Value, error) {
   408  	if k == 0 {
   409  		return nil, nil
   410  	}
   411  	p, err := qs.getPrimitiveFromLog(ctx, tx, k)
   412  	if err != nil {
   413  		return nil, err
   414  	}
   415  	return pquads.UnmarshalValue(p.Value)
   416  }
   417  
   418  func (qs *QuadStore) ValueOf(s quad.Value) graph.Ref {
   419  	ctx := context.TODO()
   420  	var out Int64Value
   421  	_ = kv.View(qs.db, func(tx kv.Tx) error {
   422  		v, err := qs.resolveQuadValue(ctx, tx, s)
   423  		out = Int64Value(v)
   424  		return err
   425  	})
   426  	if out == 0 {
   427  		return nil
   428  	}
   429  	return out
   430  }
   431  
   432  func (qs *QuadStore) QuadDirection(val graph.Ref, d quad.Direction) graph.Ref {
   433  	p, ok := val.(*proto.Primitive)
   434  	if !ok {
   435  		return nil
   436  	}
   437  	switch d {
   438  	case quad.Subject:
   439  		return Int64Value(p.Subject)
   440  	case quad.Predicate:
   441  		return Int64Value(p.Predicate)
   442  	case quad.Object:
   443  		return Int64Value(p.Object)
   444  	case quad.Label:
   445  		if p.Label == 0 {
   446  			return nil
   447  		}
   448  		return Int64Value(p.Label)
   449  	}
   450  	return nil
   451  }
   452  
   453  func (qs *QuadStore) getPrimitives(ctx context.Context, vals []uint64) ([]*proto.Primitive, error) {
   454  	tx, err := qs.db.Tx(false)
   455  	if err != nil {
   456  		return nil, err
   457  	}
   458  	defer tx.Close()
   459  	tx = wrapTx(tx)
   460  	return qs.getPrimitivesFromLog(ctx, tx, vals)
   461  }
   462  
   463  type Int64Value uint64
   464  
   465  func (v Int64Value) Key() interface{} { return v }