github.com/cayleygraph/cayley@v0.7.7/graph/sql/optimizer.go (about) 1 // Copyright 2017 The Cayley Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sql 16 17 import ( 18 "fmt" 19 "sort" 20 "strings" 21 22 "github.com/cayleygraph/cayley/graph/iterator" 23 "github.com/cayleygraph/cayley/graph/shape" 24 "github.com/cayleygraph/quad" 25 ) 26 27 func NewOptimizer() *Optimizer { 28 return &Optimizer{} 29 } 30 31 type Optimizer struct { 32 tableInd int 33 34 regexpOp CmpOp 35 noOffsetWithoutLimit bool // blame mysql 36 } 37 38 func (opt *Optimizer) SetRegexpOp(op CmpOp) { 39 opt.regexpOp = op 40 } 41 42 func (opt *Optimizer) NoOffsetWithoutLimit() { 43 opt.noOffsetWithoutLimit = true 44 } 45 46 func (opt *Optimizer) nextTable() string { 47 opt.tableInd++ 48 return fmt.Sprintf("t_%d", opt.tableInd) 49 } 50 51 func (opt *Optimizer) ensureAliases(s *Select) { 52 for i, src := range s.From { 53 if t, ok := src.(Table); ok && t.Alias == "" { 54 t.Alias = opt.nextTable() 55 s.From[i] = t 56 // TODO: copy slice 57 for j := range s.Fields { 58 f := &s.Fields[j] 59 if f.Table == "" { 60 f.Table = t.Alias 61 } 62 } 63 for j := range s.Where { 64 w := &s.Where[j] 65 if w.Table == "" { 66 w.Table = t.Alias 67 } 68 } 69 } 70 } 71 } 72 73 func sortDirs(dirs []quad.Direction) { 74 sort.Slice(dirs, func(i, j int) bool { 75 return dirs[i] < dirs[j] 76 }) 77 } 78 79 func (opt *Optimizer) OptimizeShape(s shape.Shape) (shape.Shape, bool) { 80 switch s := s.(type) { 81 case shape.AllNodes: 82 return AllNodes(), true 83 case shape.Lookup: 84 return opt.optimizeLookup(s) 85 case shape.Filter: 86 return opt.optimizeFilters(s) 87 case shape.Intersect: 88 return opt.optimizeIntersect(s) 89 case shape.Quads: 90 return opt.optimizeQuads(s) 91 case shape.NodesFrom: 92 return opt.optimizeNodesFrom(s) 93 case shape.QuadsAction: 94 return opt.optimizeQuadsAction(s) 95 case shape.Save: 96 return opt.optimizeSave(s) 97 case shape.Page: 98 return opt.optimizePage(s) 99 default: 100 return s, false 101 } 102 } 103 104 func selectValueQuery(v quad.Value, op CmpOp) ([]Where, []Value, bool) { 105 if op == OpEqual { 106 // we can use hash to check equality 107 return []Where{ 108 {Field: "hash", Op: op, Value: Placeholder{}}, 109 }, []Value{ 110 HashOf(v), 111 }, true 112 } 113 var ( 114 where []Where 115 params []Value 116 ) 117 switch v := v.(type) { 118 case quad.IRI: 119 where = []Where{ 120 {Field: "value_string", Op: op, Value: Placeholder{}}, 121 {Field: "iri", Op: OpIsTrue}, 122 } 123 params = []Value{ 124 StringVal(v), 125 } 126 case quad.BNode: 127 where = []Where{ 128 {Field: "value_string", Op: op, Value: Placeholder{}}, 129 {Field: "bnode", Op: OpIsTrue}, 130 } 131 params = []Value{ 132 StringVal(v), 133 } 134 case quad.String: 135 where = []Where{ 136 {Field: "value_string", Op: op, Value: Placeholder{}}, 137 {Field: "iri", Op: OpIsNull}, 138 {Field: "bnode", Op: OpIsNull}, 139 {Field: "datatype", Op: OpIsNull}, 140 {Field: "language", Op: OpIsNull}, 141 } 142 params = []Value{ 143 StringVal(v), 144 } 145 case quad.LangString: 146 where = []Where{ 147 {Field: "value_string", Op: op, Value: Placeholder{}}, 148 {Field: "language", Op: OpEqual, Value: Placeholder{}}, 149 } 150 params = []Value{ 151 StringVal(v.Value), 152 StringVal(v.Lang), 153 } 154 case quad.TypedString: 155 where = []Where{ 156 {Field: "value_string", Op: op, Value: Placeholder{}}, 157 {Field: "datatype", Op: OpEqual, Value: Placeholder{}}, 158 } 159 params = []Value{ 160 StringVal(v.Value), 161 StringVal(v.Type), 162 } 163 case quad.Int: 164 where = []Where{ 165 {Field: "value_int", Op: op, Value: Placeholder{}}, 166 } 167 params = []Value{ 168 IntVal(v), 169 } 170 case quad.Float: 171 where = []Where{ 172 {Field: "value_float", Op: op, Value: Placeholder{}}, 173 } 174 params = []Value{ 175 FloatVal(v), 176 } 177 case quad.Bool: 178 where = []Where{ 179 {Field: "value_bool", Op: op, Value: Placeholder{}}, 180 } 181 params = []Value{ 182 BoolVal(v), 183 } 184 case quad.Time: 185 where = []Where{ 186 {Field: "value_time", Op: op, Value: Placeholder{}}, 187 } 188 params = []Value{ 189 TimeVal(v), 190 } 191 default: 192 return nil, nil, false 193 } 194 return where, params, true 195 } 196 197 func SelectValue(v quad.Value, op CmpOp) *Select { 198 where, params, ok := selectValueQuery(v, op) 199 if !ok { 200 return nil 201 } 202 sel := Nodes(where, params) 203 return &sel 204 } 205 206 func (opt *Optimizer) optimizeLookup(s shape.Lookup) (shape.Shape, bool) { 207 if len(s) != 1 { 208 // TODO: support for IN 209 return s, false 210 } 211 sel := SelectValue(s[0], OpEqual) 212 if sel == nil { 213 return s, false 214 } 215 return *sel, true 216 } 217 218 func convRegexp(re string) string { 219 return re // TODO: convert regular expression 220 } 221 222 func (opt *Optimizer) optimizeFilter(from shape.Shape, f shape.ValueFilter) ([]Where, []Value, bool) { 223 switch f := f.(type) { 224 case shape.Comparison: 225 var cmp CmpOp 226 switch f.Op { 227 case iterator.CompareGT: 228 cmp = OpGT 229 case iterator.CompareGTE: 230 cmp = OpGTE 231 case iterator.CompareLT: 232 cmp = OpLT 233 case iterator.CompareLTE: 234 cmp = OpLTE 235 default: 236 return nil, nil, false 237 } 238 return selectValueQuery(f.Val, cmp) 239 case shape.Wildcard: 240 if opt.regexpOp == "" { 241 return nil, nil, false 242 } 243 return []Where{ 244 {Field: "value_string", Op: opt.regexpOp, Value: Placeholder{}}, 245 }, []Value{ 246 StringVal(convRegexp(f.Regexp())), 247 }, true 248 case shape.Regexp: 249 if opt.regexpOp == "" { 250 return nil, nil, false 251 } 252 where := []Where{ 253 {Field: "value_string", Op: opt.regexpOp, Value: Placeholder{}}, 254 } 255 if !f.Refs { 256 where = append(where, []Where{ 257 {Field: "iri", Op: OpIsNull}, 258 {Field: "bnode", Op: OpIsNull}, 259 }...) 260 } 261 return where, []Value{ 262 StringVal(convRegexp(f.Re.String())), 263 }, true 264 default: 265 return nil, nil, false 266 } 267 } 268 func (opt *Optimizer) optimizeFilters(s shape.Filter) (shape.Shape, bool) { 269 switch from := s.From.(type) { 270 case shape.AllNodes: 271 case Select: 272 if !from.isAll() { 273 return s, false 274 } 275 t, ok := from.From[0].(Table) 276 if !ok || t.Name != "nodes" { 277 return s, false 278 } 279 default: 280 return s, false 281 } 282 var ( 283 where []Where 284 params []Value 285 ) 286 left := shape.Filter{ 287 From: s.From, 288 } 289 for _, f := range s.Filters { 290 if w, p, ok := opt.optimizeFilter(s.From, f); ok { 291 where = append(where, w...) 292 params = append(params, p...) 293 } else { 294 left.Filters = append(left.Filters, f) 295 } 296 } 297 if len(where) == 0 { 298 return s, false 299 } 300 sel := Nodes(where, params) 301 if len(left.Filters) == 0 { 302 return sel, true 303 } 304 left.From = sel 305 return left, true 306 } 307 308 func (opt *Optimizer) optimizeQuads(s shape.Quads) (shape.Shape, bool) { 309 t1 := opt.nextTable() 310 sel := AllQuads(t1) 311 for _, f := range s { 312 wr := Where{ 313 Table: t1, 314 Field: dirField(f.Dir), 315 Op: OpEqual, 316 } 317 switch fv := f.Values.(type) { 318 case shape.Fixed: 319 if len(fv) != 1 { 320 // TODO: support IN, or generate SELECT equivalent 321 return s, false 322 } 323 wr.Value = sel.AppendParam(fv[0].(Value)) 324 sel.Where = append(sel.Where, wr) 325 case Select: 326 if len(fv.Fields) == 1 { 327 // simple case - just add subquery to FROM 328 tbl := opt.nextTable() 329 sel.From = append(sel.From, Subquery{ 330 Query: fv, 331 Alias: tbl, 332 }) 333 wr.Value = FieldName{ 334 Name: fv.Fields[0].NameOrAlias(), 335 Table: tbl, 336 } 337 sel.Where = append(sel.Where, wr) 338 continue 339 } else if fv.onlyAsSubquery() { 340 // TODO: generic subquery: pass all tags to main query, set WHERE on specific direction, drop __* tags 341 return s, false 342 } 343 opt.ensureAliases(&fv) 344 // add all tables from subquery to the main one, but skip __node field - we should add it to WHERE 345 var head Field 346 for _, f := range fv.Fields { 347 if f.Alias == tagNode { 348 for _, w := range fv.Where { 349 if w.Table == f.Table && w.Field == f.Alias { 350 // TODO: if __node was used in WHERE of subquery, we should rewrite it 351 return s, false 352 } 353 } 354 f.Alias = "" 355 head = f 356 continue 357 } 358 sel.Fields = append(sel.Fields, f) 359 } 360 if head.Table == "" { 361 // something is wrong 362 return s, false 363 } 364 sel.From = append(sel.From, fv.From...) 365 sel.Where = append(sel.Where, fv.Where...) 366 sel.Params = append(sel.Params, fv.Params...) 367 wr.Value = FieldName{ 368 Name: head.Name, 369 Table: head.Table, 370 } 371 sel.Where = append(sel.Where, wr) 372 default: 373 return s, false 374 } 375 } 376 return sel, true 377 } 378 379 func (opt *Optimizer) optimizeNodesFrom(s shape.NodesFrom) (shape.Shape, bool) { 380 sel, ok := s.Quads.(Select) 381 if !ok { 382 return s, false 383 } 384 sel.Fields = append([]Field{}, sel.Fields...) 385 386 // all we need is to remove all quad-related tags and preserve one with matching direction 387 dir := dirTag(s.Dir) 388 found := false 389 for i := 0; i < len(sel.Fields); i++ { 390 f := &sel.Fields[i] 391 if f.Alias == dir { 392 f.Alias = tagNode 393 found = true 394 } else if strings.HasPrefix(f.Alias, tagPref) { 395 sel.Fields = append(sel.Fields[:i], sel.Fields[i+1:]...) 396 i-- 397 } 398 } 399 if !found { 400 return s, false 401 } 402 // NodesFrom implies that the iterator will use NextPath 403 sel.nextPath = true 404 return sel, true 405 } 406 407 func (opt *Optimizer) optimizeQuadsAction(s shape.QuadsAction) (shape.Shape, bool) { 408 sel := Select{ 409 Fields: []Field{ 410 {Name: dirField(s.Result), Alias: tagNode}, 411 }, 412 From: []Source{ 413 Table{Name: "quads"}, 414 }, 415 // NodesFrom (that is a part of QuadsAction) implies that the iterator will use NextPath 416 nextPath: true, 417 } 418 var dirs []quad.Direction 419 for d := range s.Save { 420 dirs = append(dirs, d) 421 } 422 sortDirs(dirs) 423 for _, d := range dirs { 424 for _, t := range s.Save[d] { 425 sel.Fields = append(sel.Fields, Field{ 426 Name: dirField(d), Alias: t, 427 }) 428 } 429 } 430 dirs = nil 431 for d := range s.Filter { 432 dirs = append(dirs, d) 433 } 434 sortDirs(dirs) 435 for _, d := range dirs { 436 v := s.Filter[d] 437 sel.WhereEq("", dirField(d), v.(Value)) 438 } 439 return sel, true 440 } 441 442 func (opt *Optimizer) optimizeSave(s shape.Save) (shape.Shape, bool) { 443 sel, ok := s.From.(Select) 444 if !ok { 445 return s, false 446 } 447 // find primary value used by iterators 448 fi := -1 449 for i, f := range sel.Fields { 450 if f.Alias == tagNode { 451 fi = i 452 break 453 } 454 } 455 if fi < 0 { 456 return s, false 457 } 458 // add SELECT fields as aliases for primary field 459 f := sel.Fields[fi] 460 fields := make([]Field, 0, len(s.Tags)+len(sel.Fields)) 461 for _, tag := range s.Tags { 462 f.Alias = tag 463 fields = append(fields, f) 464 } 465 // add other fields 466 fields = append(fields, sel.Fields...) 467 sel.Fields = fields 468 return sel, true 469 } 470 471 func (opt *Optimizer) optimizePage(s shape.Page) (shape.Shape, bool) { 472 sel, ok := s.From.(Select) 473 if !ok { 474 return s, false 475 } 476 // do not optimize if db only can use offset with limit, and we have no limits set 477 if opt.noOffsetWithoutLimit && sel.Limit == 0 && s.Limit == 0 { 478 return s, false 479 } 480 // call shapes optimizer to calculate correct skip and limit 481 p := shape.Page{ 482 Skip: sel.Offset, 483 Limit: sel.Limit, 484 }.ApplyPage(s) 485 if p == nil { 486 // no intersection - no results 487 return nil, true 488 } 489 sel.Limit = p.Limit 490 sel.Offset = p.Skip 491 return sel, true 492 } 493 494 func (opt *Optimizer) optimizeIntersect(s shape.Intersect) (shape.Shape, bool) { 495 var ( 496 sels []Select 497 other shape.Intersect 498 ) 499 // we will add our merged Select to this slot 500 other = append(other, nil) 501 for _, sub := range s { 502 // TODO: sort by onlySubquery flag first 503 if sel, ok := sub.(Select); ok && !sel.onlyAsSubquery() { 504 sels = append(sels, sel) 505 } else { 506 other = append(other, sub) 507 } 508 } 509 if len(sels) <= 1 { 510 return s, false 511 } 512 for i := range sels { 513 sels[i] = sels[i].Clone() 514 opt.ensureAliases(&sels[i]) 515 } 516 pri := sels[0] 517 var head *Field 518 for i, f := range pri.Fields { 519 if f.Alias == tagNode { 520 head = &pri.Fields[i] 521 break 522 } 523 } 524 if head == nil { 525 return s, false 526 } 527 sec := sels[1:] 528 529 nextPath := false 530 for _, s2 := range sec { 531 // merge From, Where and Params 532 pri.From = append(pri.From, s2.From...) 533 pri.Where = append(pri.Where, s2.Where...) 534 pri.Params = append(pri.Params, s2.Params...) 535 nextPath = nextPath || s2.nextPath 536 // also find and remove primary tag, but add the same field to WHERE 537 ok := false 538 for _, f := range s2.Fields { 539 if f.Alias == tagNode { 540 ok = true 541 pri.Where = append(pri.Where, Where{ 542 Table: head.Table, 543 Field: head.Name, 544 Op: OpEqual, 545 Value: FieldName{ 546 Table: f.Table, 547 Name: f.Name, 548 }, 549 }) 550 } else { 551 pri.Fields = append(pri.Fields, f) 552 } 553 } 554 if !ok { 555 return s, false 556 } 557 } 558 if len(other) == 1 { 559 pri.nextPath = pri.nextPath || nextPath 560 return pri, true 561 } 562 other[0] = pri 563 return other, true 564 }