github.com/cayleygraph/cayley@v0.7.7/graph/sql/optimizer.go (about)

     1  // Copyright 2017 The Cayley Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sql
    16  
    17  import (
    18  	"fmt"
    19  	"sort"
    20  	"strings"
    21  
    22  	"github.com/cayleygraph/cayley/graph/iterator"
    23  	"github.com/cayleygraph/cayley/graph/shape"
    24  	"github.com/cayleygraph/quad"
    25  )
    26  
    27  func NewOptimizer() *Optimizer {
    28  	return &Optimizer{}
    29  }
    30  
    31  type Optimizer struct {
    32  	tableInd int
    33  
    34  	regexpOp             CmpOp
    35  	noOffsetWithoutLimit bool // blame mysql
    36  }
    37  
    38  func (opt *Optimizer) SetRegexpOp(op CmpOp) {
    39  	opt.regexpOp = op
    40  }
    41  
    42  func (opt *Optimizer) NoOffsetWithoutLimit() {
    43  	opt.noOffsetWithoutLimit = true
    44  }
    45  
    46  func (opt *Optimizer) nextTable() string {
    47  	opt.tableInd++
    48  	return fmt.Sprintf("t_%d", opt.tableInd)
    49  }
    50  
    51  func (opt *Optimizer) ensureAliases(s *Select) {
    52  	for i, src := range s.From {
    53  		if t, ok := src.(Table); ok && t.Alias == "" {
    54  			t.Alias = opt.nextTable()
    55  			s.From[i] = t
    56  			// TODO: copy slice
    57  			for j := range s.Fields {
    58  				f := &s.Fields[j]
    59  				if f.Table == "" {
    60  					f.Table = t.Alias
    61  				}
    62  			}
    63  			for j := range s.Where {
    64  				w := &s.Where[j]
    65  				if w.Table == "" {
    66  					w.Table = t.Alias
    67  				}
    68  			}
    69  		}
    70  	}
    71  }
    72  
    73  func sortDirs(dirs []quad.Direction) {
    74  	sort.Slice(dirs, func(i, j int) bool {
    75  		return dirs[i] < dirs[j]
    76  	})
    77  }
    78  
    79  func (opt *Optimizer) OptimizeShape(s shape.Shape) (shape.Shape, bool) {
    80  	switch s := s.(type) {
    81  	case shape.AllNodes:
    82  		return AllNodes(), true
    83  	case shape.Lookup:
    84  		return opt.optimizeLookup(s)
    85  	case shape.Filter:
    86  		return opt.optimizeFilters(s)
    87  	case shape.Intersect:
    88  		return opt.optimizeIntersect(s)
    89  	case shape.Quads:
    90  		return opt.optimizeQuads(s)
    91  	case shape.NodesFrom:
    92  		return opt.optimizeNodesFrom(s)
    93  	case shape.QuadsAction:
    94  		return opt.optimizeQuadsAction(s)
    95  	case shape.Save:
    96  		return opt.optimizeSave(s)
    97  	case shape.Page:
    98  		return opt.optimizePage(s)
    99  	default:
   100  		return s, false
   101  	}
   102  }
   103  
   104  func selectValueQuery(v quad.Value, op CmpOp) ([]Where, []Value, bool) {
   105  	if op == OpEqual {
   106  		// we can use hash to check equality
   107  		return []Where{
   108  				{Field: "hash", Op: op, Value: Placeholder{}},
   109  			}, []Value{
   110  				HashOf(v),
   111  			}, true
   112  	}
   113  	var (
   114  		where  []Where
   115  		params []Value
   116  	)
   117  	switch v := v.(type) {
   118  	case quad.IRI:
   119  		where = []Where{
   120  			{Field: "value_string", Op: op, Value: Placeholder{}},
   121  			{Field: "iri", Op: OpIsTrue},
   122  		}
   123  		params = []Value{
   124  			StringVal(v),
   125  		}
   126  	case quad.BNode:
   127  		where = []Where{
   128  			{Field: "value_string", Op: op, Value: Placeholder{}},
   129  			{Field: "bnode", Op: OpIsTrue},
   130  		}
   131  		params = []Value{
   132  			StringVal(v),
   133  		}
   134  	case quad.String:
   135  		where = []Where{
   136  			{Field: "value_string", Op: op, Value: Placeholder{}},
   137  			{Field: "iri", Op: OpIsNull},
   138  			{Field: "bnode", Op: OpIsNull},
   139  			{Field: "datatype", Op: OpIsNull},
   140  			{Field: "language", Op: OpIsNull},
   141  		}
   142  		params = []Value{
   143  			StringVal(v),
   144  		}
   145  	case quad.LangString:
   146  		where = []Where{
   147  			{Field: "value_string", Op: op, Value: Placeholder{}},
   148  			{Field: "language", Op: OpEqual, Value: Placeholder{}},
   149  		}
   150  		params = []Value{
   151  			StringVal(v.Value),
   152  			StringVal(v.Lang),
   153  		}
   154  	case quad.TypedString:
   155  		where = []Where{
   156  			{Field: "value_string", Op: op, Value: Placeholder{}},
   157  			{Field: "datatype", Op: OpEqual, Value: Placeholder{}},
   158  		}
   159  		params = []Value{
   160  			StringVal(v.Value),
   161  			StringVal(v.Type),
   162  		}
   163  	case quad.Int:
   164  		where = []Where{
   165  			{Field: "value_int", Op: op, Value: Placeholder{}},
   166  		}
   167  		params = []Value{
   168  			IntVal(v),
   169  		}
   170  	case quad.Float:
   171  		where = []Where{
   172  			{Field: "value_float", Op: op, Value: Placeholder{}},
   173  		}
   174  		params = []Value{
   175  			FloatVal(v),
   176  		}
   177  	case quad.Bool:
   178  		where = []Where{
   179  			{Field: "value_bool", Op: op, Value: Placeholder{}},
   180  		}
   181  		params = []Value{
   182  			BoolVal(v),
   183  		}
   184  	case quad.Time:
   185  		where = []Where{
   186  			{Field: "value_time", Op: op, Value: Placeholder{}},
   187  		}
   188  		params = []Value{
   189  			TimeVal(v),
   190  		}
   191  	default:
   192  		return nil, nil, false
   193  	}
   194  	return where, params, true
   195  }
   196  
   197  func SelectValue(v quad.Value, op CmpOp) *Select {
   198  	where, params, ok := selectValueQuery(v, op)
   199  	if !ok {
   200  		return nil
   201  	}
   202  	sel := Nodes(where, params)
   203  	return &sel
   204  }
   205  
   206  func (opt *Optimizer) optimizeLookup(s shape.Lookup) (shape.Shape, bool) {
   207  	if len(s) != 1 {
   208  		// TODO: support for IN
   209  		return s, false
   210  	}
   211  	sel := SelectValue(s[0], OpEqual)
   212  	if sel == nil {
   213  		return s, false
   214  	}
   215  	return *sel, true
   216  }
   217  
   218  func convRegexp(re string) string {
   219  	return re // TODO: convert regular expression
   220  }
   221  
   222  func (opt *Optimizer) optimizeFilter(from shape.Shape, f shape.ValueFilter) ([]Where, []Value, bool) {
   223  	switch f := f.(type) {
   224  	case shape.Comparison:
   225  		var cmp CmpOp
   226  		switch f.Op {
   227  		case iterator.CompareGT:
   228  			cmp = OpGT
   229  		case iterator.CompareGTE:
   230  			cmp = OpGTE
   231  		case iterator.CompareLT:
   232  			cmp = OpLT
   233  		case iterator.CompareLTE:
   234  			cmp = OpLTE
   235  		default:
   236  			return nil, nil, false
   237  		}
   238  		return selectValueQuery(f.Val, cmp)
   239  	case shape.Wildcard:
   240  		if opt.regexpOp == "" {
   241  			return nil, nil, false
   242  		}
   243  		return []Where{
   244  				{Field: "value_string", Op: opt.regexpOp, Value: Placeholder{}},
   245  			}, []Value{
   246  				StringVal(convRegexp(f.Regexp())),
   247  			}, true
   248  	case shape.Regexp:
   249  		if opt.regexpOp == "" {
   250  			return nil, nil, false
   251  		}
   252  		where := []Where{
   253  			{Field: "value_string", Op: opt.regexpOp, Value: Placeholder{}},
   254  		}
   255  		if !f.Refs {
   256  			where = append(where, []Where{
   257  				{Field: "iri", Op: OpIsNull},
   258  				{Field: "bnode", Op: OpIsNull},
   259  			}...)
   260  		}
   261  		return where, []Value{
   262  			StringVal(convRegexp(f.Re.String())),
   263  		}, true
   264  	default:
   265  		return nil, nil, false
   266  	}
   267  }
   268  func (opt *Optimizer) optimizeFilters(s shape.Filter) (shape.Shape, bool) {
   269  	switch from := s.From.(type) {
   270  	case shape.AllNodes:
   271  	case Select:
   272  		if !from.isAll() {
   273  			return s, false
   274  		}
   275  		t, ok := from.From[0].(Table)
   276  		if !ok || t.Name != "nodes" {
   277  			return s, false
   278  		}
   279  	default:
   280  		return s, false
   281  	}
   282  	var (
   283  		where  []Where
   284  		params []Value
   285  	)
   286  	left := shape.Filter{
   287  		From: s.From,
   288  	}
   289  	for _, f := range s.Filters {
   290  		if w, p, ok := opt.optimizeFilter(s.From, f); ok {
   291  			where = append(where, w...)
   292  			params = append(params, p...)
   293  		} else {
   294  			left.Filters = append(left.Filters, f)
   295  		}
   296  	}
   297  	if len(where) == 0 {
   298  		return s, false
   299  	}
   300  	sel := Nodes(where, params)
   301  	if len(left.Filters) == 0 {
   302  		return sel, true
   303  	}
   304  	left.From = sel
   305  	return left, true
   306  }
   307  
   308  func (opt *Optimizer) optimizeQuads(s shape.Quads) (shape.Shape, bool) {
   309  	t1 := opt.nextTable()
   310  	sel := AllQuads(t1)
   311  	for _, f := range s {
   312  		wr := Where{
   313  			Table: t1,
   314  			Field: dirField(f.Dir),
   315  			Op:    OpEqual,
   316  		}
   317  		switch fv := f.Values.(type) {
   318  		case shape.Fixed:
   319  			if len(fv) != 1 {
   320  				// TODO: support IN, or generate SELECT equivalent
   321  				return s, false
   322  			}
   323  			wr.Value = sel.AppendParam(fv[0].(Value))
   324  			sel.Where = append(sel.Where, wr)
   325  		case Select:
   326  			if len(fv.Fields) == 1 {
   327  				// simple case - just add subquery to FROM
   328  				tbl := opt.nextTable()
   329  				sel.From = append(sel.From, Subquery{
   330  					Query: fv,
   331  					Alias: tbl,
   332  				})
   333  				wr.Value = FieldName{
   334  					Name:  fv.Fields[0].NameOrAlias(),
   335  					Table: tbl,
   336  				}
   337  				sel.Where = append(sel.Where, wr)
   338  				continue
   339  			} else if fv.onlyAsSubquery() {
   340  				// TODO: generic subquery: pass all tags to main query, set WHERE on specific direction, drop __* tags
   341  				return s, false
   342  			}
   343  			opt.ensureAliases(&fv)
   344  			// add all tables from subquery to the main one, but skip __node field - we should add it to WHERE
   345  			var head Field
   346  			for _, f := range fv.Fields {
   347  				if f.Alias == tagNode {
   348  					for _, w := range fv.Where {
   349  						if w.Table == f.Table && w.Field == f.Alias {
   350  							// TODO: if __node was used in WHERE of subquery, we should rewrite it
   351  							return s, false
   352  						}
   353  					}
   354  					f.Alias = ""
   355  					head = f
   356  					continue
   357  				}
   358  				sel.Fields = append(sel.Fields, f)
   359  			}
   360  			if head.Table == "" {
   361  				// something is wrong
   362  				return s, false
   363  			}
   364  			sel.From = append(sel.From, fv.From...)
   365  			sel.Where = append(sel.Where, fv.Where...)
   366  			sel.Params = append(sel.Params, fv.Params...)
   367  			wr.Value = FieldName{
   368  				Name:  head.Name,
   369  				Table: head.Table,
   370  			}
   371  			sel.Where = append(sel.Where, wr)
   372  		default:
   373  			return s, false
   374  		}
   375  	}
   376  	return sel, true
   377  }
   378  
   379  func (opt *Optimizer) optimizeNodesFrom(s shape.NodesFrom) (shape.Shape, bool) {
   380  	sel, ok := s.Quads.(Select)
   381  	if !ok {
   382  		return s, false
   383  	}
   384  	sel.Fields = append([]Field{}, sel.Fields...)
   385  
   386  	// all we need is to remove all quad-related tags and preserve one with matching direction
   387  	dir := dirTag(s.Dir)
   388  	found := false
   389  	for i := 0; i < len(sel.Fields); i++ {
   390  		f := &sel.Fields[i]
   391  		if f.Alias == dir {
   392  			f.Alias = tagNode
   393  			found = true
   394  		} else if strings.HasPrefix(f.Alias, tagPref) {
   395  			sel.Fields = append(sel.Fields[:i], sel.Fields[i+1:]...)
   396  			i--
   397  		}
   398  	}
   399  	if !found {
   400  		return s, false
   401  	}
   402  	// NodesFrom implies that the iterator will use NextPath
   403  	sel.nextPath = true
   404  	return sel, true
   405  }
   406  
   407  func (opt *Optimizer) optimizeQuadsAction(s shape.QuadsAction) (shape.Shape, bool) {
   408  	sel := Select{
   409  		Fields: []Field{
   410  			{Name: dirField(s.Result), Alias: tagNode},
   411  		},
   412  		From: []Source{
   413  			Table{Name: "quads"},
   414  		},
   415  		// NodesFrom (that is a part of QuadsAction) implies that the iterator will use NextPath
   416  		nextPath: true,
   417  	}
   418  	var dirs []quad.Direction
   419  	for d := range s.Save {
   420  		dirs = append(dirs, d)
   421  	}
   422  	sortDirs(dirs)
   423  	for _, d := range dirs {
   424  		for _, t := range s.Save[d] {
   425  			sel.Fields = append(sel.Fields, Field{
   426  				Name: dirField(d), Alias: t,
   427  			})
   428  		}
   429  	}
   430  	dirs = nil
   431  	for d := range s.Filter {
   432  		dirs = append(dirs, d)
   433  	}
   434  	sortDirs(dirs)
   435  	for _, d := range dirs {
   436  		v := s.Filter[d]
   437  		sel.WhereEq("", dirField(d), v.(Value))
   438  	}
   439  	return sel, true
   440  }
   441  
   442  func (opt *Optimizer) optimizeSave(s shape.Save) (shape.Shape, bool) {
   443  	sel, ok := s.From.(Select)
   444  	if !ok {
   445  		return s, false
   446  	}
   447  	// find primary value used by iterators
   448  	fi := -1
   449  	for i, f := range sel.Fields {
   450  		if f.Alias == tagNode {
   451  			fi = i
   452  			break
   453  		}
   454  	}
   455  	if fi < 0 {
   456  		return s, false
   457  	}
   458  	// add SELECT fields as aliases for primary field
   459  	f := sel.Fields[fi]
   460  	fields := make([]Field, 0, len(s.Tags)+len(sel.Fields))
   461  	for _, tag := range s.Tags {
   462  		f.Alias = tag
   463  		fields = append(fields, f)
   464  	}
   465  	// add other fields
   466  	fields = append(fields, sel.Fields...)
   467  	sel.Fields = fields
   468  	return sel, true
   469  }
   470  
   471  func (opt *Optimizer) optimizePage(s shape.Page) (shape.Shape, bool) {
   472  	sel, ok := s.From.(Select)
   473  	if !ok {
   474  		return s, false
   475  	}
   476  	// do not optimize if db only can use offset with limit, and we have no limits set
   477  	if opt.noOffsetWithoutLimit && sel.Limit == 0 && s.Limit == 0 {
   478  		return s, false
   479  	}
   480  	// call shapes optimizer to calculate correct skip and limit
   481  	p := shape.Page{
   482  		Skip:  sel.Offset,
   483  		Limit: sel.Limit,
   484  	}.ApplyPage(s)
   485  	if p == nil {
   486  		// no intersection - no results
   487  		return nil, true
   488  	}
   489  	sel.Limit = p.Limit
   490  	sel.Offset = p.Skip
   491  	return sel, true
   492  }
   493  
   494  func (opt *Optimizer) optimizeIntersect(s shape.Intersect) (shape.Shape, bool) {
   495  	var (
   496  		sels  []Select
   497  		other shape.Intersect
   498  	)
   499  	// we will add our merged Select to this slot
   500  	other = append(other, nil)
   501  	for _, sub := range s {
   502  		// TODO: sort by onlySubquery flag first
   503  		if sel, ok := sub.(Select); ok && !sel.onlyAsSubquery() {
   504  			sels = append(sels, sel)
   505  		} else {
   506  			other = append(other, sub)
   507  		}
   508  	}
   509  	if len(sels) <= 1 {
   510  		return s, false
   511  	}
   512  	for i := range sels {
   513  		sels[i] = sels[i].Clone()
   514  		opt.ensureAliases(&sels[i])
   515  	}
   516  	pri := sels[0]
   517  	var head *Field
   518  	for i, f := range pri.Fields {
   519  		if f.Alias == tagNode {
   520  			head = &pri.Fields[i]
   521  			break
   522  		}
   523  	}
   524  	if head == nil {
   525  		return s, false
   526  	}
   527  	sec := sels[1:]
   528  
   529  	nextPath := false
   530  	for _, s2 := range sec {
   531  		// merge From, Where and Params
   532  		pri.From = append(pri.From, s2.From...)
   533  		pri.Where = append(pri.Where, s2.Where...)
   534  		pri.Params = append(pri.Params, s2.Params...)
   535  		nextPath = nextPath || s2.nextPath
   536  		// also find and remove primary tag, but add the same field to WHERE
   537  		ok := false
   538  		for _, f := range s2.Fields {
   539  			if f.Alias == tagNode {
   540  				ok = true
   541  				pri.Where = append(pri.Where, Where{
   542  					Table: head.Table,
   543  					Field: head.Name,
   544  					Op:    OpEqual,
   545  					Value: FieldName{
   546  						Table: f.Table,
   547  						Name:  f.Name,
   548  					},
   549  				})
   550  			} else {
   551  				pri.Fields = append(pri.Fields, f)
   552  			}
   553  		}
   554  		if !ok {
   555  			return s, false
   556  		}
   557  	}
   558  	if len(other) == 1 {
   559  		pri.nextPath = pri.nextPath || nextPath
   560  		return pri, true
   561  	}
   562  	other[0] = pri
   563  	return other, true
   564  }