github.com/cayleygraph/cayley@v0.7.7/graph/gaedatastore/quadstore.go (about)

     1  // Copyright 2014 The Cayley Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gaedatastore
    16  
    17  import (
    18  	"encoding/hex"
    19  	"errors"
    20  	"math"
    21  	"net/http"
    22  	"time"
    23  
    24  	"github.com/cayleygraph/cayley/clog"
    25  
    26  	"golang.org/x/net/context"
    27  	"google.golang.org/appengine"
    28  	"google.golang.org/appengine/datastore"
    29  
    30  	"github.com/cayleygraph/cayley/graph"
    31  	"github.com/cayleygraph/cayley/graph/http"
    32  	"github.com/cayleygraph/quad"
    33  )
    34  
    35  var _ httpgraph.QuadStore = (*QuadStore)(nil)
    36  
    37  const (
    38  	QuadStoreType = "gaedatastore"
    39  	quadKind      = "quad"
    40  	nodeKind      = "node"
    41  )
    42  
    43  var (
    44  	// Order of quad fields
    45  	spo = [4]quad.Direction{quad.Subject, quad.Predicate, quad.Object, quad.Label}
    46  )
    47  
    48  type QuadStore struct {
    49  	context context.Context
    50  }
    51  
    52  type MetadataEntry struct {
    53  	NodeCount int64
    54  	QuadCount int64
    55  }
    56  
    57  type Token struct {
    58  	Kind string
    59  	Hash string
    60  }
    61  
    62  func (t Token) IsNode() bool     { return t.Kind == nodeKind }
    63  func (t Token) Key() interface{} { return t }
    64  
    65  type QuadEntry struct {
    66  	Hash      string
    67  	Added     []int64 `datastore:",noindex"`
    68  	Deleted   []int64 `datastore:",noindex"`
    69  	Subject   string  `datastore:"subject"`
    70  	Predicate string  `datastore:"predicate"`
    71  	Object    string  `datastore:"object"`
    72  	Label     string  `datastore:"label"`
    73  }
    74  
    75  type NodeEntry struct {
    76  	Name string
    77  	Size int64
    78  }
    79  
    80  type LogEntry struct {
    81  	Action    string
    82  	Key       string
    83  	Timestamp int64
    84  }
    85  
    86  func init() {
    87  	graph.RegisterQuadStore("gaedatastore", graph.QuadStoreRegistration{
    88  		NewFunc:      newQuadStore,
    89  		UpgradeFunc:  nil,
    90  		InitFunc:     initQuadStore,
    91  		IsPersistent: true,
    92  	})
    93  }
    94  
    95  func initQuadStore(_ string, _ graph.Options) error {
    96  	// TODO (panamafrancis) check appengine datastore for consistency
    97  	return nil
    98  }
    99  
   100  func newQuadStore(_ string, options graph.Options) (graph.QuadStore, error) {
   101  	return &QuadStore{}, nil
   102  }
   103  
   104  func (qs *QuadStore) createKeyForQuad(q quad.Quad) *datastore.Key {
   105  	id := hashOf(q.Subject)
   106  	id += hashOf(q.Predicate)
   107  	id += hashOf(q.Object)
   108  	id += hashOf(q.Label)
   109  	return qs.createKeyFromToken(&Token{quadKind, id})
   110  }
   111  
   112  func hashOf(s quad.Value) string {
   113  	return hex.EncodeToString(quad.HashOf(s))
   114  }
   115  
   116  func (qs *QuadStore) createKeyForNode(n quad.Value) *datastore.Key {
   117  	id := hashOf(n)
   118  	return qs.createKeyFromToken(&Token{nodeKind, id})
   119  }
   120  
   121  func (qs *QuadStore) createKeyForMetadata() *datastore.Key {
   122  	return qs.createKeyFromToken(&Token{"metadata", "metadataentry"})
   123  }
   124  
   125  func (qs *QuadStore) createKeyForLog() *datastore.Key {
   126  	return datastore.NewKey(qs.context, "logentry", "", 0, nil)
   127  }
   128  
   129  func (qs *QuadStore) createKeyFromToken(t *Token) *datastore.Key {
   130  	return datastore.NewKey(qs.context, t.Kind, t.Hash, 0, nil)
   131  }
   132  
   133  func (qs *QuadStore) checkValid(k *datastore.Key) (bool, error) {
   134  	var q QuadEntry
   135  	err := datastore.Get(qs.context, k, &q)
   136  	if err == datastore.ErrNoSuchEntity {
   137  		return false, nil
   138  	}
   139  	if _, ok := err.(*datastore.ErrFieldMismatch); ok {
   140  		return true, nil
   141  	}
   142  	if err != nil {
   143  		clog.Warningf("Error occurred when getting quad/node %s %v", k, err)
   144  		return false, err
   145  	}
   146  	// a deleted node should not be returned as found.
   147  	if len(q.Deleted) >= len(q.Added) {
   148  		return false, nil
   149  	}
   150  	return true, nil
   151  }
   152  
   153  func getContext(opts graph.Options) (context.Context, error) {
   154  	req := opts["HTTPRequest"].(*http.Request)
   155  	if req == nil {
   156  		err := errors.New("HTTP Request needed")
   157  		clog.Errorf("%v", err)
   158  		return nil, err
   159  	}
   160  	return appengine.NewContext(req), nil
   161  }
   162  
   163  func (qs *QuadStore) ForRequest(r *http.Request) (graph.QuadStore, error) {
   164  	return &QuadStore{context: appengine.NewContext(r)}, nil
   165  }
   166  
   167  func (qs *QuadStore) NewQuadWriter() (quad.WriteCloser, error) {
   168  	return &quadWriter{qs: qs}, nil
   169  }
   170  
   171  type quadWriter struct {
   172  	qs     *QuadStore
   173  	deltas []graph.Delta
   174  }
   175  
   176  func (w *quadWriter) WriteQuad(q quad.Quad) error {
   177  	_, err := w.WriteQuads([]quad.Quad{q})
   178  	return err
   179  }
   180  
   181  func (w *quadWriter) WriteQuads(buf []quad.Quad) (int, error) {
   182  	// TODO(dennwc): write an optimized implementation
   183  	w.deltas = w.deltas[:0]
   184  	if cap(w.deltas) < len(buf) {
   185  		w.deltas = make([]graph.Delta, 0, len(buf))
   186  	}
   187  	for _, q := range buf {
   188  		w.deltas = append(w.deltas, graph.Delta{
   189  			Quad: q, Action: graph.Add,
   190  		})
   191  	}
   192  	err := w.qs.ApplyDeltas(w.deltas, graph.IgnoreOpts{
   193  		IgnoreDup: true,
   194  	})
   195  	w.deltas = w.deltas[:0]
   196  	if err != nil {
   197  		return 0, err
   198  	}
   199  	return len(buf), nil
   200  }
   201  
   202  func (w *quadWriter) Close() error {
   203  	w.deltas = nil
   204  	return nil
   205  }
   206  
   207  func (qs *QuadStore) ApplyDeltas(in []graph.Delta, ignoreOpts graph.IgnoreOpts) error {
   208  	if qs.context == nil {
   209  		return errors.New("No context, graph not correctly initialised")
   210  	}
   211  	toKeep := make([]graph.Delta, 0)
   212  	for _, d := range in {
   213  		if d.Action != graph.Add && d.Action != graph.Delete {
   214  			//Defensive shortcut
   215  			return errors.New("Datastore: invalid action")
   216  		}
   217  		key := qs.createKeyForQuad(d.Quad)
   218  		keep := false
   219  		switch d.Action {
   220  		case graph.Add:
   221  			found, err := qs.checkValid(key)
   222  			if err != nil {
   223  				return err
   224  			}
   225  			if found {
   226  				if !ignoreOpts.IgnoreDup {
   227  					return graph.ErrQuadExists
   228  				}
   229  			} else {
   230  				keep = true
   231  			}
   232  		case graph.Delete:
   233  			found, err := qs.checkValid(key)
   234  			if err != nil {
   235  				return err
   236  			}
   237  			if found || ignoreOpts.IgnoreMissing {
   238  				keep = true
   239  			} else {
   240  				return graph.ErrQuadNotExist
   241  			}
   242  		default:
   243  			keep = false
   244  		}
   245  		if keep {
   246  			toKeep = append(toKeep, d)
   247  		}
   248  	}
   249  	if len(toKeep) == 0 {
   250  		return nil
   251  	}
   252  	ids, err := qs.updateLog(toKeep)
   253  	if err != nil {
   254  		clog.Errorf("Updating log failed %v", err)
   255  		return err
   256  	}
   257  
   258  	if clog.V(2) {
   259  		clog.Infof("Existence verified. Proceeding.")
   260  	}
   261  
   262  	quadsAdded, err := qs.updateQuads(toKeep, ids)
   263  	if err != nil {
   264  		clog.Errorf("UpdateQuads failed %v", err)
   265  		return err
   266  	}
   267  	nodesAdded, err := qs.updateNodes(toKeep)
   268  	if err != nil {
   269  		clog.Warningf("UpdateNodes failed %v", err)
   270  		return err
   271  	}
   272  	err = qs.updateMetadata(quadsAdded, nodesAdded)
   273  	if err != nil {
   274  		clog.Warningf("UpdateMetadata failed %v", err)
   275  		return err
   276  	}
   277  	return nil
   278  }
   279  
   280  func (qs *QuadStore) updateNodes(in []graph.Delta) (int64, error) {
   281  	// Collate changes to each node
   282  	var countDelta int64
   283  	var nodesAdded int64
   284  	nodeDeltas := make(map[quad.Value]int64)
   285  	for _, d := range in {
   286  		if d.Action == graph.Add {
   287  			countDelta = 1
   288  		} else {
   289  			countDelta = -1
   290  		}
   291  		nodeDeltas[d.Quad.Subject] += countDelta
   292  		nodeDeltas[d.Quad.Object] += countDelta
   293  		nodeDeltas[d.Quad.Predicate] += countDelta
   294  		if d.Quad.Label != nil {
   295  			nodeDeltas[d.Quad.Label] += countDelta
   296  		}
   297  		nodesAdded += countDelta
   298  	}
   299  	// Create keys and new nodes
   300  	keys := make([]*datastore.Key, 0, len(nodeDeltas))
   301  	tempNodes := make([]NodeEntry, 0, len(nodeDeltas))
   302  	for k, v := range nodeDeltas {
   303  		keys = append(keys, qs.createKeyForNode(k))
   304  		tempNodes = append(tempNodes, NodeEntry{k.String(), v})
   305  	}
   306  	// In accordance with the appengine datastore spec, cross group transactions
   307  	// like these can only be done in batches of 5
   308  	for i := 0; i < len(nodeDeltas); i += 5 {
   309  		j := int(math.Min(float64(len(nodeDeltas)-i), 5))
   310  		foundNodes := make([]NodeEntry, j)
   311  		err := datastore.RunInTransaction(qs.context, func(c context.Context) error {
   312  			err := datastore.GetMulti(c, keys[i:i+j], foundNodes)
   313  			// Sift through for errors
   314  			if me, ok := err.(appengine.MultiError); ok {
   315  				for _, merr := range me {
   316  					if merr != nil && merr != datastore.ErrNoSuchEntity {
   317  						clog.Errorf("Error: %v", merr)
   318  						return merr
   319  					}
   320  				}
   321  			}
   322  			// Carry forward the sizes of the nodes from the datastore
   323  			for k, _ := range foundNodes {
   324  				if foundNodes[k].Name != "" {
   325  					tempNodes[i+k].Size += foundNodes[k].Size
   326  				}
   327  			}
   328  			_, err = datastore.PutMulti(c, keys[i:i+j], tempNodes[i:i+j])
   329  			return err
   330  		}, &datastore.TransactionOptions{XG: true})
   331  		if err != nil {
   332  			clog.Errorf("Error: %v", err)
   333  			return 0, err
   334  		}
   335  	}
   336  
   337  	return nodesAdded, nil
   338  }
   339  
   340  func (qs *QuadStore) updateQuads(in []graph.Delta, ids []int64) (int64, error) {
   341  	keys := make([]*datastore.Key, 0, len(in))
   342  	for _, d := range in {
   343  		keys = append(keys, qs.createKeyForQuad(d.Quad))
   344  	}
   345  	var quadCount int64
   346  	for i := 0; i < len(in); i += 5 {
   347  		// Find the closest batch of 5
   348  		j := int(math.Min(float64(len(in)-i), 5))
   349  		err := datastore.RunInTransaction(qs.context, func(c context.Context) error {
   350  			foundQuads := make([]QuadEntry, j)
   351  			// We don't process errors from GetMulti as they don't mean anything,
   352  			// we've handled existing quad conflicts above and we overwrite everything again anyways
   353  			datastore.GetMulti(c, keys, foundQuads)
   354  			for k, _ := range foundQuads {
   355  				x := i + k
   356  				foundQuads[k].Hash = keys[x].StringID()
   357  				foundQuads[k].Subject = in[x].Quad.Subject.String()
   358  				foundQuads[k].Predicate = in[x].Quad.Predicate.String()
   359  				foundQuads[k].Object = in[x].Quad.Object.String()
   360  				foundQuads[k].Label = quad.StringOf(in[x].Quad.Label)
   361  
   362  				// If the quad exists the Added[] will be non-empty
   363  				if in[x].Action == graph.Add {
   364  					foundQuads[k].Added = append(foundQuads[k].Added, ids[x])
   365  					quadCount += 1
   366  				} else {
   367  					foundQuads[k].Deleted = append(foundQuads[k].Deleted, ids[x])
   368  					quadCount -= 1
   369  				}
   370  			}
   371  			_, err := datastore.PutMulti(c, keys[i:i+j], foundQuads)
   372  			return err
   373  		}, &datastore.TransactionOptions{XG: true})
   374  		if err != nil {
   375  			return 0, err
   376  		}
   377  	}
   378  	return quadCount, nil
   379  }
   380  
   381  func (qs *QuadStore) updateMetadata(quadsAdded int64, nodesAdded int64) error {
   382  	key := qs.createKeyForMetadata()
   383  	foundMetadata := new(MetadataEntry)
   384  	err := datastore.RunInTransaction(qs.context, func(c context.Context) error {
   385  		err := datastore.Get(c, key, foundMetadata)
   386  		if err != nil && err != datastore.ErrNoSuchEntity {
   387  			clog.Errorf("Error: %v", err)
   388  			return err
   389  		}
   390  		foundMetadata.QuadCount += quadsAdded
   391  		foundMetadata.NodeCount += nodesAdded
   392  		_, err = datastore.Put(c, key, foundMetadata)
   393  		if err != nil {
   394  			clog.Errorf("Error: %v", err)
   395  		}
   396  		return err
   397  	}, nil)
   398  	return err
   399  }
   400  
   401  func (qs *QuadStore) updateLog(in []graph.Delta) ([]int64, error) {
   402  	if qs.context == nil {
   403  		err := errors.New("Error updating log, context is nil, graph not correctly initialised")
   404  		return nil, err
   405  	}
   406  	if len(in) == 0 {
   407  		return nil, errors.New("Nothing to log")
   408  	}
   409  	logEntries := make([]LogEntry, 0, len(in))
   410  	logKeys := make([]*datastore.Key, 0, len(in))
   411  	for _, d := range in {
   412  		var action string
   413  		if d.Action == graph.Add {
   414  			action = "Add"
   415  		} else {
   416  			action = "Delete"
   417  		}
   418  
   419  		entry := LogEntry{
   420  			Action:    action,
   421  			Key:       qs.createKeyForQuad(d.Quad).String(),
   422  			Timestamp: time.Now().UnixNano(),
   423  		}
   424  		logEntries = append(logEntries, entry)
   425  		logKeys = append(logKeys, qs.createKeyForLog())
   426  	}
   427  
   428  	ids, err := datastore.PutMulti(qs.context, logKeys, logEntries)
   429  	if err != nil {
   430  		clog.Errorf("Error updating log: %v", err)
   431  		return nil, err
   432  	}
   433  	out := make([]int64, 0, len(ids))
   434  	for _, id := range ids {
   435  		out = append(out, id.IntID())
   436  	}
   437  	return out, nil
   438  }
   439  
   440  func (qs *QuadStore) QuadIterator(dir quad.Direction, v graph.Ref) graph.Iterator {
   441  	return NewIterator(qs, quadKind, dir, v)
   442  }
   443  
   444  func (qs *QuadStore) NodesAllIterator() graph.Iterator {
   445  	return NewAllIterator(qs, nodeKind)
   446  }
   447  
   448  func (qs *QuadStore) QuadsAllIterator() graph.Iterator {
   449  	return NewAllIterator(qs, quadKind)
   450  }
   451  
   452  func (qs *QuadStore) ValueOf(s quad.Value) graph.Ref {
   453  	id := hashOf(s)
   454  	return &Token{Kind: nodeKind, Hash: id}
   455  }
   456  
   457  func (qs *QuadStore) NameOf(val graph.Ref) quad.Value {
   458  	if qs.context == nil {
   459  		clog.Errorf("Error in NameOf, context is nil, graph not correctly initialised")
   460  		return nil
   461  	} else if v, ok := val.(graph.PreFetchedValue); ok {
   462  		return v.NameOf()
   463  	}
   464  	var key *datastore.Key
   465  	if t, ok := val.(*Token); ok && t.Kind == nodeKind {
   466  		key = qs.createKeyFromToken(t)
   467  	} else {
   468  		clog.Errorf("Token not valid")
   469  		return nil
   470  	}
   471  
   472  	// TODO (panamafrancis) implement a cache
   473  
   474  	node := new(NodeEntry)
   475  	err := datastore.Get(qs.context, key, node)
   476  	if err != nil {
   477  		clog.Errorf("Error: %v", err)
   478  		return nil
   479  	}
   480  	return quad.Raw(node.Name)
   481  }
   482  
   483  func (qs *QuadStore) Quad(val graph.Ref) quad.Quad {
   484  	if qs.context == nil {
   485  		clog.Errorf("Error fetching quad, context is nil, graph not correctly initialised")
   486  		return quad.Quad{}
   487  	}
   488  	var key *datastore.Key
   489  	if t, ok := val.(*Token); ok && t.Kind == quadKind {
   490  		key = qs.createKeyFromToken(t)
   491  	} else {
   492  		clog.Errorf("Token not valid")
   493  		return quad.Quad{}
   494  	}
   495  
   496  	q := new(QuadEntry)
   497  	err := datastore.Get(qs.context, key, q)
   498  	if err != nil {
   499  		// Red herring error : ErrFieldMismatch can happen when a quad exists but a field is empty
   500  		if _, ok := err.(*datastore.ErrFieldMismatch); !ok {
   501  			clog.Errorf("Error: %v", err)
   502  		}
   503  	}
   504  	var label interface{}
   505  	if q.Label != "" {
   506  		label = q.Label
   507  	}
   508  	return quad.Make(
   509  		q.Subject,
   510  		q.Predicate,
   511  		q.Object,
   512  		label,
   513  	)
   514  }
   515  
   516  func (qs *QuadStore) Stats(ctx context.Context, exact bool) (graph.Stats, error) {
   517  	if qs.context == nil {
   518  		return graph.Stats{}, errors.New("error fetching size, context is nil, graph not correctly initialised")
   519  	}
   520  	key := qs.createKeyForMetadata()
   521  	m := new(MetadataEntry)
   522  	err := datastore.Get(qs.context, key, m)
   523  	if err != nil {
   524  		return graph.Stats{}, err
   525  	}
   526  	return graph.Stats{
   527  		Nodes: graph.Size{
   528  			Size:  m.NodeCount,
   529  			Exact: true,
   530  		},
   531  		Quads: graph.Size{
   532  			Size:  m.QuadCount,
   533  			Exact: true,
   534  		},
   535  	}, nil
   536  }
   537  
   538  func (qs *QuadStore) QuadIteratorSize(ctx context.Context, d quad.Direction, val graph.Ref) (graph.Size, error) {
   539  	t, ok := val.(*Token)
   540  	if !ok || t.Kind != nodeKind {
   541  		return graph.Size{Size: 0, Exact: true}, nil
   542  	} else if qs.context == nil {
   543  		return graph.Size{}, errors.New("cannot count iterator without a valid context")
   544  	}
   545  	key := qs.createKeyFromToken(t)
   546  	n := new(NodeEntry)
   547  	err := datastore.Get(qs.context, key, n)
   548  	if err != nil && err != datastore.ErrNoSuchEntity {
   549  		return graph.Size{}, err
   550  	}
   551  	return graph.Size{Size: n.Size, Exact: true}, nil
   552  }
   553  
   554  func (qs *QuadStore) Close() error {
   555  	qs.context = nil
   556  	return nil
   557  }
   558  
   559  func (qs *QuadStore) QuadDirection(val graph.Ref, dir quad.Direction) graph.Ref {
   560  	t, ok := val.(*Token)
   561  	if !ok {
   562  		clog.Errorf("Token not valid")
   563  		return nil
   564  	}
   565  	if t.Kind == nodeKind {
   566  		clog.Errorf("Node tokens not valid")
   567  		return nil
   568  	}
   569  	var offset int
   570  	switch dir {
   571  	case quad.Subject:
   572  		offset = 0
   573  	case quad.Predicate:
   574  		offset = (quad.HashSize * 2)
   575  	case quad.Object:
   576  		offset = (quad.HashSize * 2) * 2
   577  	case quad.Label:
   578  		offset = (quad.HashSize * 2) * 3
   579  	}
   580  	sub := t.Hash[offset : offset+(quad.HashSize*2)]
   581  	return &Token{Kind: nodeKind, Hash: sub}
   582  }