github.com/cayleygraph/cayley@v0.7.7/graph/shape/shape.go (about) 1 package shape 2 3 import ( 4 "os" 5 "reflect" 6 "regexp" 7 "strings" 8 9 "github.com/cayleygraph/cayley/clog" 10 "github.com/cayleygraph/cayley/graph" 11 "github.com/cayleygraph/cayley/graph/iterator" 12 "github.com/cayleygraph/quad" 13 ) 14 15 var ( 16 debugShapes = os.Getenv("CAYLEY_DEBUG_SHAPES") == "true" 17 debugOptimizer = os.Getenv("CAYLEY_DEBUG_OPTIMIZER") == "true" 18 ) 19 20 // Shape represent a query tree shape. 21 type Shape interface { 22 // BuildIterator constructs an iterator tree from a given shapes and binds it to QuadStore. 23 BuildIterator(qs graph.QuadStore) graph.Iterator 24 // Optimize runs an optimization pass over a query shape. 25 // 26 // It returns a bool that indicates if shape was replaced and should always return a copy of shape in this case. 27 // In case no optimizations were made, it returns the same unmodified shape. 28 // 29 // If Optimizer is specified, it will be used instead of default optimizations. 30 Optimize(r Optimizer) (Shape, bool) 31 } 32 33 type Optimizer interface { 34 OptimizeShape(s Shape) (Shape, bool) 35 } 36 37 // Composite shape can be simplified to a tree of more basic shapes. 38 type Composite interface { 39 Simplify() Shape 40 } 41 42 // WalkFunc is used to visit all shapes in the tree. 43 // If false is returned, branch will not be traversed further. 44 type WalkFunc func(Shape) bool 45 46 type resolveValues struct { 47 qs graph.QuadStore 48 } 49 50 func (r resolveValues) OptimizeShape(s Shape) (Shape, bool) { 51 if l, ok := s.(Lookup); ok { 52 return l.resolve(r.qs), true 53 } 54 return s, false 55 } 56 57 // Optimize applies generic optimizations for the tree. 58 // If quad store is specified it will also resolve Lookups and apply any specific optimizations. 59 // Should not be used with Simplify - it will fold query to a compact form again. 60 func Optimize(s Shape, qs graph.QuadStore) (Shape, bool) { 61 if s == nil { 62 return nil, false 63 } 64 qs = graph.Unwrap(qs) 65 var opt bool 66 if qs != nil { 67 // resolve all lookups earlier 68 s, opt = s.Optimize(resolveValues{qs: qs}) 69 } 70 if s == nil { 71 return Null{}, true 72 } 73 // generic optimizations 74 var opt1 bool 75 s, opt1 = s.Optimize(nil) 76 if s == nil { 77 return Null{}, true 78 } 79 opt = opt || opt1 80 // apply quadstore-specific optimizations 81 if so, ok := qs.(Optimizer); ok && s != nil { 82 var opt2 bool 83 s, opt2 = s.Optimize(so) 84 opt = opt || opt2 85 } 86 if s == nil { 87 return Null{}, true 88 } 89 return s, opt 90 } 91 92 var rtShape = reflect.TypeOf((*Shape)(nil)).Elem() 93 94 // Walk calls provided function for each shape in the tree. 95 func Walk(s Shape, fnc WalkFunc) { 96 if s == nil { 97 return 98 } 99 if !fnc(s) { 100 return 101 } 102 walkReflect(reflect.ValueOf(s), fnc) 103 } 104 105 func walkReflect(rv reflect.Value, fnc WalkFunc) { 106 rt := rv.Type() 107 switch rv.Kind() { 108 case reflect.Slice: 109 if rt.Elem().ConvertibleTo(rtShape) { 110 // all element are shapes - call function on each of them 111 for i := 0; i < rv.Len(); i++ { 112 Walk(rv.Index(i).Interface().(Shape), fnc) 113 } 114 } else { 115 // elements are not shapes, but might contain them 116 for i := 0; i < rv.Len(); i++ { 117 walkReflect(rv.Index(i), fnc) 118 } 119 } 120 case reflect.Map: 121 keys := rv.MapKeys() 122 if rt.Elem().ConvertibleTo(rtShape) { 123 // all element are shapes - call function on each of them 124 for _, k := range keys { 125 Walk(rv.MapIndex(k).Interface().(Shape), fnc) 126 } 127 } else { 128 // elements are not shapes, but might contain them 129 for _, k := range keys { 130 walkReflect(rv.MapIndex(k), fnc) 131 } 132 } 133 case reflect.Struct: 134 // visit all fields 135 for i := 0; i < rt.NumField(); i++ { 136 f := rt.Field(i) 137 // if field is of shape type - call function on it 138 // we skip anonymous fields because they were already visited as part of the parent 139 if !f.Anonymous && f.Type.ConvertibleTo(rtShape) { 140 Walk(rv.Field(i).Interface().(Shape), fnc) 141 continue 142 } 143 // it might be a struct/map/slice field, so we need to go deeper 144 walkReflect(rv.Field(i), fnc) 145 } 146 } 147 } 148 149 // InternalQuad is an internal representation of quad index in QuadStore. 150 type InternalQuad struct { 151 Subject graph.Ref 152 Predicate graph.Ref 153 Object graph.Ref 154 Label graph.Ref 155 } 156 157 // Get returns a specified direction of the quad. 158 func (q InternalQuad) Get(d quad.Direction) graph.Ref { 159 switch d { 160 case quad.Subject: 161 return q.Subject 162 case quad.Predicate: 163 return q.Predicate 164 case quad.Object: 165 return q.Object 166 case quad.Label: 167 return q.Label 168 default: 169 return nil 170 } 171 } 172 173 // Set assigns a specified direction of the quad to a given value. 174 func (q InternalQuad) Set(d quad.Direction, v graph.Ref) { 175 switch d { 176 case quad.Subject: 177 q.Subject = v 178 case quad.Predicate: 179 q.Predicate = v 180 case quad.Object: 181 q.Object = v 182 case quad.Label: 183 q.Label = v 184 default: 185 panic(d) 186 } 187 } 188 189 // QuadIndexer is an optional interface for quad stores that keep an index of quad directions. 190 // 191 // It is used to optimize shapes based on stats from these indexes. 192 type QuadIndexer interface { 193 // SizeOfIndex returns a size of a quad index with given constraints. 194 SizeOfIndex(c map[quad.Direction]graph.Ref) (int64, bool) 195 // LookupQuadIndex finds a quad that matches a given constraint. 196 // It returns false if quad was not found, or there are multiple quads matching constraint. 197 LookupQuadIndex(c map[quad.Direction]graph.Ref) (InternalQuad, bool) 198 } 199 200 // IsNull safely checks if shape represents an empty set. It accounts for both Null and nil. 201 func IsNull(s Shape) bool { 202 _, ok := s.(Null) 203 return s == nil || ok 204 } 205 206 // BuildIterator optimizes the shape and builds a corresponding iterator tree. 207 func BuildIterator(qs graph.QuadStore, s Shape) graph.Iterator { 208 qs = graph.Unwrap(qs) 209 if s != nil { 210 if debugShapes || clog.V(2) { 211 clog.Infof("shape: %#v", s) 212 } 213 s, _ = Optimize(s, qs) 214 if debugOptimizer || clog.V(2) { 215 clog.Infof("optimized: %#v", s) 216 } 217 } 218 if IsNull(s) { 219 return iterator.NewNull() 220 } 221 return s.BuildIterator(qs) 222 } 223 224 // Null represent an empty set. Mostly used as a safe alias for nil shape. 225 type Null struct{} 226 227 func (Null) BuildIterator(qs graph.QuadStore) graph.Iterator { 228 return iterator.NewNull() 229 } 230 func (s Null) Optimize(r Optimizer) (Shape, bool) { 231 if r != nil { 232 return r.OptimizeShape(s) 233 } 234 return nil, true 235 } 236 237 // AllNodes represents all nodes in QuadStore. 238 type AllNodes struct{} 239 240 func (s AllNodes) BuildIterator(qs graph.QuadStore) graph.Iterator { 241 return qs.NodesAllIterator() 242 } 243 func (s AllNodes) Optimize(r Optimizer) (Shape, bool) { 244 if r != nil { 245 return r.OptimizeShape(s) 246 } 247 return s, false 248 } 249 250 // Except excludes a set on nodes from a source. If source is nil, AllNodes is assumed. 251 type Except struct { 252 Exclude Shape // nodes to exclude 253 From Shape // a set of all nodes to exclude from; nil means AllNodes 254 } 255 256 func (s Except) BuildIterator(qs graph.QuadStore) graph.Iterator { 257 var all graph.Iterator 258 if s.From != nil { 259 all = s.From.BuildIterator(qs) 260 } else { 261 all = qs.NodesAllIterator() 262 } 263 if IsNull(s.Exclude) { 264 return all 265 } 266 return iterator.NewNot(s.Exclude.BuildIterator(qs), all) 267 } 268 func (s Except) Optimize(r Optimizer) (Shape, bool) { 269 var opt bool 270 s.Exclude, opt = s.Exclude.Optimize(r) 271 if s.From != nil { 272 var opta bool 273 s.From, opta = s.From.Optimize(r) 274 opt = opt || opta 275 } 276 if r != nil { 277 ns, nopt := r.OptimizeShape(s) 278 return ns, opt || nopt 279 } 280 if IsNull(s.Exclude) { 281 return AllNodes{}, true 282 } else if _, ok := s.Exclude.(AllNodes); ok { 283 return nil, true 284 } 285 return s, opt 286 } 287 288 // ValueFilter is an interface for iterator wrappers that can filter node values. 289 type ValueFilter interface { 290 BuildIterator(qs graph.QuadStore, it graph.Iterator) graph.Iterator 291 } 292 293 // Filter filters all values from the source using a list of operations. 294 type Filter struct { 295 From Shape // source that will be filtered 296 Filters []ValueFilter // filters to apply 297 } 298 299 func (s Filter) BuildIterator(qs graph.QuadStore) graph.Iterator { 300 if IsNull(s.From) { 301 return iterator.NewNull() 302 } 303 it := s.From.BuildIterator(qs) 304 for _, f := range s.Filters { 305 it = f.BuildIterator(qs, it) 306 } 307 return it 308 } 309 func (s Filter) Optimize(r Optimizer) (Shape, bool) { 310 if IsNull(s.From) { 311 return nil, true 312 } 313 var opt bool 314 s.From, opt = s.From.Optimize(r) 315 if r != nil { 316 ns, nopt := r.OptimizeShape(s) 317 return ns, opt || nopt 318 } 319 if IsNull(s.From) { 320 return nil, true 321 } else if len(s.Filters) == 0 { 322 return s.From, true 323 } 324 return s, opt 325 } 326 327 var _ ValueFilter = Comparison{} 328 329 // Comparison is a value filter that evaluates binary operation in reference to a fixed value. 330 type Comparison struct { 331 Op iterator.Operator 332 Val quad.Value 333 } 334 335 func (f Comparison) BuildIterator(qs graph.QuadStore, it graph.Iterator) graph.Iterator { 336 return iterator.NewComparison(it, f.Op, f.Val, qs) 337 } 338 339 var _ ValueFilter = Regexp{} 340 341 // Regexp filters values using regular expression. 342 // 343 // Since regexp patterns can not be optimized in most cases, Wildcard should be used if possible. 344 type Regexp struct { 345 Re *regexp.Regexp 346 Refs bool // allow to match IRIs 347 } 348 349 func (f Regexp) BuildIterator(qs graph.QuadStore, it graph.Iterator) graph.Iterator { 350 if f.Refs { 351 return iterator.NewRegexWithRefs(it, f.Re, qs) 352 } 353 return iterator.NewRegex(it, f.Re, qs) 354 } 355 356 var _ ValueFilter = Wildcard{} 357 358 // Wildcard is a filter for string patterns. 359 // 360 // % - zero or more characters 361 // ? - exactly one character 362 type Wildcard struct { 363 Pattern string // allowed wildcards are: % and ? 364 } 365 366 // Regexp returns an analog regexp pattern in format accepted by Go stdlib (RE2). 367 func (f Wildcard) Regexp() string { 368 const any = `%` 369 // escape all meta-characters in pattern string 370 pattern := regexp.QuoteMeta(f.Pattern) 371 // if the pattern is anchored, add regexp analog for it 372 if !strings.HasPrefix(pattern, any) { 373 pattern = "^" + pattern 374 } else { 375 pattern = strings.TrimPrefix(pattern, any) 376 } 377 if !strings.HasSuffix(pattern, any) { 378 pattern = pattern + "$" 379 } else { 380 pattern = strings.TrimSuffix(pattern, any) 381 } 382 // replace wildcards 383 pattern = strings.NewReplacer( 384 any, `.*`, 385 `\?`, `.`, 386 ).Replace(pattern) 387 return pattern 388 } 389 390 func (f Wildcard) BuildIterator(qs graph.QuadStore, it graph.Iterator) graph.Iterator { 391 if f.Pattern == "" { 392 return iterator.NewNull() 393 } else if strings.Trim(f.Pattern, "%") == "" { 394 return it 395 } 396 re, err := regexp.Compile(f.Regexp()) 397 if err != nil { 398 return iterator.NewError(err) 399 } 400 return iterator.NewRegexWithRefs(it, re, qs) 401 } 402 403 // Count returns a count of objects in source as a single value. It always returns exactly one value. 404 type Count struct { 405 Values Shape 406 } 407 408 func (s Count) BuildIterator(qs graph.QuadStore) graph.Iterator { 409 var it graph.Iterator 410 if IsNull(s.Values) { 411 it = iterator.NewNull() 412 } else { 413 it = s.Values.BuildIterator(qs) 414 } 415 return iterator.NewCount(it, qs) 416 } 417 func (s Count) Optimize(r Optimizer) (Shape, bool) { 418 if IsNull(s.Values) { 419 return Fixed{graph.PreFetched(quad.Int(0))}, true 420 } 421 var opt bool 422 s.Values, opt = s.Values.Optimize(r) 423 if IsNull(s.Values) { 424 return Fixed{graph.PreFetched(quad.Int(0))}, true 425 } 426 if r != nil { 427 ns, nopt := r.OptimizeShape(s) 428 return ns, opt || nopt 429 } 430 // TODO: ask QS to estimate size - if it exact, then we can use it 431 return s, opt 432 } 433 434 // QuadFilter is a constraint used to filter quads that have a certain set of values on a given direction. 435 // Analog of LinksTo iterator. 436 type QuadFilter struct { 437 Dir quad.Direction 438 Values Shape 439 } 440 441 // buildIterator is not exposed to force to use Quads and group filters together. 442 func (s QuadFilter) buildIterator(qs graph.QuadStore) graph.Iterator { 443 if s.Values == nil { 444 return iterator.NewNull() 445 } else if v, ok := One(s.Values); ok { 446 return qs.QuadIterator(s.Dir, v) 447 } 448 if s.Dir == quad.Any { 449 panic("direction is not set") 450 } 451 sub := s.Values.BuildIterator(qs) 452 return iterator.NewLinksTo(qs, sub, s.Dir) 453 } 454 455 // Quads is a selector of quads with a given set of node constraints. Empty or nil Quads is equivalent to AllQuads. 456 // Equivalent to And(AllQuads,LinksTo*) iterator tree. 457 type Quads []QuadFilter 458 459 func (s *Quads) Intersect(q ...QuadFilter) { 460 *s = append(*s, q...) 461 } 462 func (s Quads) BuildIterator(qs graph.QuadStore) graph.Iterator { 463 if len(s) == 0 { 464 return qs.QuadsAllIterator() 465 } 466 its := make([]graph.Iterator, 0, len(s)) 467 for _, f := range s { 468 its = append(its, f.buildIterator(qs)) 469 } 470 if len(its) == 1 { 471 return its[0] 472 } 473 return iterator.NewAnd(its...) 474 } 475 func (s Quads) Optimize(r Optimizer) (Shape, bool) { 476 var opt bool 477 sw := 0 478 realloc := func() { 479 if !opt { 480 opt = true 481 nq := make(Quads, len(s)) 482 copy(nq, s) 483 s = nq 484 } 485 } 486 // TODO: multiple constraints on the same dir -> merge as Intersect on Values of this dir 487 for i := 0; i < len(s); i++ { 488 f := s[i] 489 if f.Values == nil { 490 return nil, true 491 } 492 v, ok := f.Values.Optimize(r) 493 if v == nil { 494 return nil, true 495 } 496 if ok { 497 realloc() 498 s[i].Values = v 499 } 500 switch s[i].Values.(type) { 501 case Fixed: 502 realloc() 503 s[sw], s[i] = s[i], s[sw] 504 sw++ 505 } 506 } 507 if r != nil { 508 ns, nopt := r.OptimizeShape(s) 509 return ns, opt || nopt 510 } 511 return s, opt 512 } 513 514 // NodesFrom extracts nodes on a given direction from source quads. Similar to HasA iterator. 515 type NodesFrom struct { 516 Dir quad.Direction 517 Quads Shape 518 } 519 520 func (s NodesFrom) BuildIterator(qs graph.QuadStore) graph.Iterator { 521 if IsNull(s.Quads) { 522 return iterator.NewNull() 523 } 524 sub := s.Quads.BuildIterator(qs) 525 if s.Dir == quad.Any { 526 panic("direction is not set") 527 } 528 return iterator.NewHasA(qs, sub, s.Dir) 529 } 530 func (s NodesFrom) Optimize(r Optimizer) (Shape, bool) { 531 if IsNull(s.Quads) { 532 return nil, true 533 } 534 var opt bool 535 s.Quads, opt = s.Quads.Optimize(r) 536 if r != nil { 537 // ignore default optimizations 538 ns, nopt := r.OptimizeShape(s) 539 return ns, opt || nopt 540 } 541 q, ok := s.Quads.(Quads) 542 if !ok { 543 return s, opt 544 } 545 // HasA(x, LinksTo(x, y)) == y 546 if len(q) == 1 && q[0].Dir == s.Dir { 547 return q[0].Values, true 548 } 549 // collect all fixed tags and push them up the tree 550 var ( 551 tags map[string]graph.Ref 552 nquad Quads 553 ) 554 for i, f := range q { 555 if ft, ok := f.Values.(FixedTags); ok { 556 if tags == nil { 557 // allocate map and clone quad filters 558 tags = make(map[string]graph.Ref) 559 nquad = make([]QuadFilter, len(q)) 560 copy(nquad, q) 561 q = nquad 562 } 563 q[i].Values = ft.On 564 for k, v := range ft.Tags { 565 tags[k] = v 566 } 567 } 568 } 569 if tags != nil { 570 // re-run optimization without fixed tags 571 ns, _ := NodesFrom{Dir: s.Dir, Quads: q}.Optimize(r) 572 return FixedTags{On: ns, Tags: tags}, true 573 } 574 var ( 575 // if quad filter contains one fixed value, it will be added to the map 576 filt map[quad.Direction]graph.Ref 577 // if we see a Save from AllNodes, we will write it here, since it's a Save on quad direction 578 save map[quad.Direction][]string 579 // how many filters are recognized 580 n int 581 ) 582 for _, f := range q { 583 if v, ok := One(f.Values); ok { 584 if filt == nil { 585 filt = make(map[quad.Direction]graph.Ref) 586 } 587 if _, ok := filt[f.Dir]; ok { 588 return s, opt // just to be safe 589 } 590 filt[f.Dir] = v 591 n++ 592 } else if sv, ok := f.Values.(Save); ok { 593 if _, ok = sv.From.(AllNodes); ok { 594 if save == nil { 595 save = make(map[quad.Direction][]string) 596 } 597 save[f.Dir] = append(save[f.Dir], sv.Tags...) 598 n++ 599 } 600 } 601 } 602 if n == len(q) { 603 // if all filters were recognized we can merge this tree as a single iterator with multiple 604 // constraints and multiple save commands over the same set of quads 605 ns, _ := QuadsAction{ 606 Result: s.Dir, // this is still a HasA, remember? 607 Filter: filt, 608 Save: save, 609 }.Optimize(r) 610 return ns, true 611 } 612 // TODO 613 return s, opt 614 } 615 616 var _ Composite = QuadsAction{} 617 618 // QuadsAction represents a set of actions that can be done to a set of quads in a single scan pass. 619 // It filters quads according to Filter constraints (equivalent of LinksTo), tags directions using tags in Save field 620 // and returns a specified quad direction as result of the iterator (equivalent of HasA). 621 // Optionally, Size field may be set to indicate an approximate number of quads that will be returned by this query. 622 type QuadsAction struct { 623 Size int64 // approximate size; zero means undefined 624 Result quad.Direction 625 Save map[quad.Direction][]string 626 Filter map[quad.Direction]graph.Ref 627 } 628 629 func (s *QuadsAction) SetFilter(d quad.Direction, v graph.Ref) { 630 if s.Filter == nil { 631 s.Filter = make(map[quad.Direction]graph.Ref) 632 } 633 s.Filter[d] = v 634 } 635 636 func (s QuadsAction) Clone() QuadsAction { 637 if n := len(s.Save); n != 0 { 638 s2 := make(map[quad.Direction][]string, n) 639 for k, v := range s.Save { 640 s2[k] = v 641 } 642 s.Save = s2 643 } else { 644 s.Save = nil 645 } 646 if n := len(s.Filter); n != 0 { 647 f2 := make(map[quad.Direction]graph.Ref, n) 648 for k, v := range s.Filter { 649 f2[k] = v 650 } 651 s.Filter = f2 652 } else { 653 s.Filter = nil 654 } 655 return s 656 } 657 func (s QuadsAction) simplify() NodesFrom { 658 q := make(Quads, 0, len(s.Save)+len(s.Filter)) 659 for dir, val := range s.Filter { 660 q = append(q, QuadFilter{Dir: dir, Values: Fixed{val}}) 661 } 662 for dir, tags := range s.Save { 663 q = append(q, QuadFilter{Dir: dir, Values: Save{From: AllNodes{}, Tags: tags}}) 664 } 665 return NodesFrom{Dir: s.Result, Quads: q} 666 } 667 func (s QuadsAction) SimplifyFrom(quads Shape) Shape { 668 q := make(Quads, 0, len(s.Save)) 669 for dir, tags := range s.Save { 670 q = append(q, QuadFilter{Dir: dir, Values: Save{From: AllNodes{}, Tags: tags}}) 671 } 672 if len(q) != 0 { 673 quads = IntersectShapes(quads, q) 674 } 675 return NodesFrom{Dir: s.Result, Quads: quads} 676 } 677 func (s QuadsAction) Simplify() Shape { 678 return s.simplify() 679 } 680 func (s QuadsAction) BuildIterator(qs graph.QuadStore) graph.Iterator { 681 h := s.simplify() 682 return h.BuildIterator(qs) 683 } 684 func (s QuadsAction) Optimize(r Optimizer) (Shape, bool) { 685 if r != nil { 686 return r.OptimizeShape(s) 687 } 688 // if optimizer has stats for quad indexes we can use them to do more 689 ind, ok := r.(QuadIndexer) 690 if !ok { 691 return s, false 692 } 693 if s.Size > 0 { // already optimized; specific for QuadIndexer optimization 694 return s, false 695 } 696 sz, exact := ind.SizeOfIndex(s.Filter) 697 if !exact { 698 return s, false 699 } 700 s.Size = sz // computing size is already an optimization 701 if sz == 0 { 702 // nothing here, collapse the tree 703 return nil, true 704 } else if sz == 1 { 705 // only one quad matches this set of filters 706 // try to load it from quad store, do all operations and bake result as a fixed node/tags 707 if q, ok := ind.LookupQuadIndex(s.Filter); ok { 708 fx := Fixed{q.Get(s.Result)} 709 if len(s.Save) == 0 { 710 return fx, true 711 } 712 ft := FixedTags{On: fx, Tags: make(map[string]graph.Ref)} 713 for d, tags := range s.Save { 714 for _, t := range tags { 715 ft.Tags[t] = q.Get(d) 716 } 717 } 718 return ft, true 719 } 720 } 721 if sz < int64(MaterializeThreshold) { 722 // if this set is small enough - materialize it 723 return Materialize{Values: s, Size: int(sz)}, true 724 } 725 return s, true 726 } 727 728 // One checks if Shape represents a single fixed value and returns it. 729 func One(s Shape) (graph.Ref, bool) { 730 switch s := s.(type) { 731 case Fixed: 732 if len(s) == 1 { 733 return s[0], true 734 } 735 } 736 return nil, false 737 } 738 739 // Fixed is a static set of nodes. Defined only for a particular QuadStore. 740 type Fixed []graph.Ref 741 742 func (s *Fixed) Add(v ...graph.Ref) { 743 *s = append(*s, v...) 744 } 745 func (s Fixed) BuildIterator(qs graph.QuadStore) graph.Iterator { 746 it := iterator.NewFixed() 747 for _, v := range s { 748 if _, ok := v.(quad.Value); ok { 749 panic("quad value in fixed iterator") 750 } 751 it.Add(v) 752 } 753 return it 754 } 755 func (s Fixed) Optimize(r Optimizer) (Shape, bool) { 756 if len(s) == 0 { 757 return nil, true 758 } 759 if r != nil { 760 return r.OptimizeShape(s) 761 } 762 return s, false 763 } 764 765 // FixedTags adds a set of fixed tag values to query results. It does not affect query execution in any other way. 766 // 767 // Shape implementations should try to push these objects up the tree during optimization process. 768 type FixedTags struct { 769 Tags map[string]graph.Ref 770 On Shape 771 } 772 773 func (s FixedTags) BuildIterator(qs graph.QuadStore) graph.Iterator { 774 if IsNull(s.On) { 775 return iterator.NewNull() 776 } 777 it := s.On.BuildIterator(qs) 778 sv := iterator.NewSave(it) 779 for k, v := range s.Tags { 780 sv.AddFixedTag(k, v) 781 } 782 return sv 783 } 784 func (s FixedTags) Optimize(r Optimizer) (Shape, bool) { 785 if IsNull(s.On) { 786 return nil, true 787 } 788 var opt bool 789 s.On, opt = s.On.Optimize(r) 790 if len(s.Tags) == 0 { 791 return s.On, true 792 } else if s2, ok := s.On.(FixedTags); ok { 793 tags := make(map[string]graph.Ref, len(s.Tags)+len(s2.Tags)) 794 for k, v := range s.Tags { 795 tags[k] = v 796 } 797 for k, v := range s2.Tags { 798 tags[k] = v 799 } 800 s, opt = FixedTags{On: s2.On, Tags: tags}, true 801 } 802 if r != nil { 803 ns, nopt := r.OptimizeShape(s) 804 return ns, opt || nopt 805 } 806 return s, opt 807 } 808 809 // Lookup is a static set of values that must be resolved to nodes by QuadStore. 810 type Lookup []quad.Value 811 812 func (s *Lookup) Add(v ...quad.Value) { 813 *s = append(*s, v...) 814 } 815 816 var _ valueResolver = graph.QuadStore(nil) 817 818 type valueResolver interface { 819 ValueOf(v quad.Value) graph.Ref 820 } 821 822 func (s Lookup) resolve(qs valueResolver) Shape { 823 // TODO: check if QS supports batch lookup 824 vals := make([]graph.Ref, 0, len(s)) 825 for _, v := range s { 826 if gv := qs.ValueOf(v); gv != nil { 827 vals = append(vals, gv) 828 } 829 } 830 if len(vals) == 0 { 831 return nil 832 } 833 return Fixed(vals) 834 } 835 func (s Lookup) BuildIterator(qs graph.QuadStore) graph.Iterator { 836 f := s.resolve(qs) 837 if IsNull(f) { 838 return iterator.NewNull() 839 } 840 return f.BuildIterator(qs) 841 } 842 func (s Lookup) Optimize(r Optimizer) (Shape, bool) { 843 if r == nil { 844 return s, false 845 } 846 ns, opt := r.OptimizeShape(s) 847 if opt { 848 return ns, true 849 } 850 if qs, ok := r.(valueResolver); ok { 851 ns, opt = s.resolve(qs), true 852 } 853 return ns, opt 854 } 855 856 var MaterializeThreshold = 100 // TODO: tune 857 858 // Materialize loads results of sub-query into memory during execution to speedup iteration. 859 type Materialize struct { 860 Size int // approximate size; zero means undefined 861 Values Shape 862 } 863 864 func (s Materialize) BuildIterator(qs graph.QuadStore) graph.Iterator { 865 if IsNull(s.Values) { 866 return iterator.NewNull() 867 } 868 it := s.Values.BuildIterator(qs) 869 return iterator.NewMaterializeWithSize(it, int64(s.Size)) 870 } 871 func (s Materialize) Optimize(r Optimizer) (Shape, bool) { 872 if IsNull(s.Values) { 873 return nil, true 874 } 875 var opt bool 876 s.Values, opt = s.Values.Optimize(r) 877 if r != nil { 878 ns, nopt := r.OptimizeShape(s) 879 return ns, opt || nopt 880 } 881 return s, opt 882 } 883 884 func clearFixedTags(arr []Shape) ([]Shape, map[string]graph.Ref) { 885 var tags map[string]graph.Ref 886 for i := 0; i < len(arr); i++ { 887 if ft, ok := arr[i].(FixedTags); ok { 888 if tags == nil { 889 tags = make(map[string]graph.Ref) 890 na := make([]Shape, len(arr)) 891 copy(na, arr) 892 arr = na 893 } 894 arr[i] = ft.On 895 for k, v := range ft.Tags { 896 tags[k] = v 897 } 898 } 899 } 900 return arr, tags 901 } 902 903 // Intersect computes an intersection of nodes between multiple queries. Similar to And iterator. 904 type Intersect []Shape 905 906 func (s Intersect) BuildIterator(qs graph.QuadStore) graph.Iterator { 907 if len(s) == 0 { 908 return iterator.NewNull() 909 } 910 sub := make([]graph.Iterator, 0, len(s)) 911 for _, c := range s { 912 sub = append(sub, c.BuildIterator(qs)) 913 } 914 if len(sub) == 1 { 915 return sub[0] 916 } 917 return iterator.NewAnd(sub...) 918 } 919 func (s Intersect) Optimize(r Optimizer) (sout Shape, opt bool) { 920 if len(s) == 0 { 921 return nil, true 922 } 923 // function to lazily reallocate a copy of Intersect slice 924 realloc := func() { 925 if !opt { 926 arr := make(Intersect, len(s)) 927 copy(arr, s) 928 s = arr 929 } 930 } 931 // optimize sub-iterators, return empty set if Null is found 932 for i := 0; i < len(s); i++ { 933 c := s[i] 934 if IsNull(c) { 935 return nil, true 936 } 937 v, ok := c.Optimize(r) 938 if !ok { 939 continue 940 } 941 realloc() 942 opt = true 943 if IsNull(v) { 944 return nil, true 945 } 946 s[i] = v 947 } 948 if r != nil { 949 ns, nopt := r.OptimizeShape(s) 950 return ns, opt || nopt 951 } 952 if arr, ft := clearFixedTags([]Shape(s)); ft != nil { 953 ns, _ := FixedTags{On: Intersect(arr), Tags: ft}.Optimize(r) 954 return ns, true 955 } 956 var ( 957 onlyAll = true // contains only AllNodes shapes 958 hasAll = false 959 fixed []Fixed // we will collect all Fixed, and will place it as a first iterator 960 tags []string // if we find a Save inside, we will push it outside of Intersect 961 quads Quads // also, collect all quad filters into a single set 962 optional []Shape 963 ) 964 remove := func(i *int, optimized bool) { 965 realloc() 966 if optimized { 967 opt = true 968 } 969 v := *i 970 s = append(s[:v], s[v+1:]...) 971 v-- 972 *i = v 973 } 974 // second pass - remove AllNodes, merge Quads, collect Fixed, collect Save, merge Intersects 975 for i := 0; i < len(s); i++ { 976 c := s[i] 977 switch c := c.(type) { 978 case AllNodes: // remove AllNodes - it's useless in the intersection 979 remove(&i, true) 980 hasAll = true 981 continue // prevent resetting of onlyAll 982 case Quads: // merge all quad filters 983 remove(&i, false) 984 if quads == nil { 985 quads = c[:len(c):len(c)] 986 } else { 987 opt = true 988 quads = append(quads, c...) 989 } 990 case Fixed: // collect all Fixed sets 991 remove(&i, true) 992 fixed = append(fixed, c) 993 case Intersect: // merge with other Intersects 994 remove(&i, true) 995 s = append(s, c...) 996 case IntersectOpt: // merge with IntersectOpt 997 remove(&i, true) 998 s = append(s, c.Sub...) 999 optional = append(optional, c.Opt...) 1000 case Save: // push Save outside of Intersect 1001 realloc() 1002 opt = true 1003 tags = append(tags, c.Tags...) 1004 s[i] = c.From 1005 i-- 1006 } 1007 onlyAll = false 1008 } 1009 if onlyAll { 1010 return AllNodes{}, true 1011 } 1012 if len(tags) != 0 { 1013 // don't forget to move Save outside of Intersect at the end 1014 defer func() { 1015 if IsNull(sout) { 1016 return 1017 } 1018 sv := Save{From: sout, Tags: tags} 1019 var topt bool 1020 sout, topt = sv.Optimize(r) 1021 opt = opt || topt 1022 }() 1023 } 1024 if len(optional) != 0 { 1025 // don't forget to add optional paths 1026 defer func() { 1027 if IsNull(sout) { 1028 return 1029 } 1030 out := IntersectOpt{Opt: optional} 1031 if so, ok := sout.(Intersect); ok { 1032 out.Sub = so 1033 } else { 1034 out.Sub = Intersect{sout} 1035 } 1036 var topt bool 1037 sout, topt = out.Optimize(r) 1038 opt = opt || topt 1039 }() 1040 } 1041 if quads != nil { 1042 nq, qopt := quads.Optimize(r) 1043 if IsNull(nq) { 1044 return nil, true 1045 } 1046 opt = opt || qopt 1047 s = append(s, nq) 1048 } 1049 // TODO: intersect fixed 1050 if len(fixed) == 1 { 1051 fix := fixed[0] 1052 if len(s) == 1 { 1053 // try to push fixed down the tree 1054 switch sf := s[0].(type) { 1055 case QuadsAction: 1056 // TODO: accept an array of Fixed values 1057 if len(fix) == 1 { 1058 // we have a single value in Fixed that is intersected with HasA tree 1059 // this means we can add a new constraint: LinksTo(HasA.Dir, fixed) 1060 // result direction of HasA will be preserved 1061 fv := fix[0] 1062 if v := sf.Filter[sf.Result]; v != nil { 1063 // we have the same direction set as a fixed constraint - do filtering 1064 if graph.ToKey(v) != graph.ToKey(fv) { 1065 return nil, true 1066 } else { 1067 return sf, true 1068 } 1069 } 1070 sf = sf.Clone() 1071 sf.SetFilter(sf.Result, fv) // LinksTo(HasA.Dir, fixed) 1072 sf.Size = 0 // re-calculate size 1073 ns, _ := sf.Optimize(r) 1074 return ns, true 1075 } 1076 case NodesFrom: 1077 if sq, ok := sf.Quads.(Quads); ok { 1078 // an optimization above is valid for NodesFrom+Quads as well 1079 // we can add the same constraint to Quads and remove Fixed 1080 qi := -1 1081 for i, qf := range sq { 1082 if qf.Dir == sf.Dir { 1083 qi = i 1084 break 1085 } 1086 } 1087 if qi < 0 { 1088 // no filter on this direction - append 1089 sf.Quads = append(Quads{ 1090 {Dir: sf.Dir, Values: fix}, 1091 }, sq...) 1092 } else { 1093 // already have a filter on this direction - push Fixed inside it 1094 sq = append(Quads{}, sq...) 1095 sf.Quads = sq 1096 qf := &sq[qi] 1097 qf.Values = IntersectShapes(fix, qf.Values) 1098 } 1099 return sf, true 1100 } 1101 } 1102 } 1103 // place fixed as a first iterator 1104 s = append(s, nil) 1105 copy(s[1:], s) 1106 s[0] = fix 1107 } else if len(fixed) > 1 { 1108 ns := make(Intersect, len(s)+len(fixed)) 1109 for i, f := range fixed { 1110 ns[i] = f 1111 } 1112 copy(ns[len(fixed):], s) 1113 s = ns 1114 } 1115 if len(s) == 0 { 1116 if hasAll { 1117 return AllNodes{}, true 1118 } 1119 return nil, true 1120 } else if len(s) == 1 { 1121 return s[0], true 1122 } 1123 // TODO: optimize order 1124 return s, opt 1125 } 1126 1127 // IntersectOpt is like Intersect but it also joins optional query shapes to the main query. 1128 type IntersectOpt struct { 1129 Sub Intersect 1130 Opt []Shape 1131 } 1132 1133 func (s *IntersectOpt) Add(arr ...Shape) { 1134 s.Sub = append(s.Sub, arr...) 1135 } 1136 1137 func (s *IntersectOpt) AddOptional(arr ...Shape) { 1138 s.Opt = append(s.Opt, arr...) 1139 } 1140 1141 func (s IntersectOpt) BuildIterator(qs graph.QuadStore) graph.Iterator { 1142 if len(s.Sub) == 0 && len(s.Opt) == 0 { 1143 return iterator.NewNull() 1144 } 1145 if len(s.Sub) == 0 { 1146 if len(s.Opt) == 0 { 1147 return iterator.NewNull() 1148 } 1149 s.Sub = Intersect{AllNodes{}} 1150 } 1151 sub := make([]graph.Iterator, 0, len(s.Sub)) 1152 opt := make([]graph.Iterator, 0, len(s.Opt)) 1153 for _, c := range s.Sub { 1154 sub = append(sub, c.BuildIterator(qs)) 1155 } 1156 for _, c := range s.Opt { 1157 opt = append(opt, c.BuildIterator(qs)) 1158 } 1159 if len(sub) == 1 && len(opt) == 0 { 1160 return sub[0] 1161 } 1162 it := iterator.NewAnd(sub...) 1163 for _, sit := range opt { 1164 it.AddOptionalIterator(sit) 1165 } 1166 return it 1167 } 1168 1169 func (s IntersectOpt) Optimize(r Optimizer) (_ Shape, opt bool) { 1170 // optimize optional shapes first, reallocate if necessary 1171 newSlice := false 1172 realloc := func() { 1173 opt = true 1174 if newSlice { 1175 return 1176 } 1177 newSlice = true 1178 s.Opt = append([]Shape{}, s.Opt...) 1179 } 1180 for i := 0; i < len(s.Opt); i++ { 1181 o := s.Opt[i] 1182 if IsNull(o) { 1183 realloc() 1184 s.Opt = append(s.Opt[:i], s.Opt[i+1:]...) 1185 i-- 1186 continue 1187 } 1188 o, opt2 := o.Optimize(r) 1189 if !opt2 { 1190 continue 1191 } 1192 realloc() 1193 if IsNull(o) { 1194 s.Opt = append(s.Opt[:i], s.Opt[i+1:]...) 1195 i-- 1196 } else { 1197 s.Opt[i] = o 1198 } 1199 } 1200 if len(s.Opt) == 0 { 1201 // no optional - replace with a regular intersection 1202 si, _ := s.Sub.Optimize(r) 1203 return si, true 1204 } 1205 if len(s.Sub) == 0 { 1206 // force at least All to be in the intersection 1207 s.Sub = Intersect{AllNodes{}} 1208 opt = true 1209 } else { 1210 sub, opt2 := s.Sub.Optimize(r) 1211 if IsNull(sub) { 1212 return nil, true 1213 } 1214 opt = opt || opt2 1215 switch sub := sub.(type) { 1216 case Intersect: 1217 s.Sub = sub 1218 case IntersectOpt: 1219 sub.Opt = append(sub.Opt) 1220 s = sub 1221 opt = true 1222 default: 1223 s.Sub = Intersect{sub} 1224 opt = true 1225 } 1226 } 1227 if r != nil { 1228 ns, nopt := r.OptimizeShape(s) 1229 return ns, opt || nopt 1230 } 1231 return s, opt 1232 } 1233 1234 // Union joins results of multiple queries together. It does not make results unique. 1235 type Union []Shape 1236 1237 func (s Union) BuildIterator(qs graph.QuadStore) graph.Iterator { 1238 if len(s) == 0 { 1239 return iterator.NewNull() 1240 } 1241 sub := make([]graph.Iterator, 0, len(s)) 1242 for _, c := range s { 1243 sub = append(sub, c.BuildIterator(qs)) 1244 } 1245 if len(sub) == 1 { 1246 return sub[0] 1247 } 1248 return iterator.NewOr(sub...) 1249 } 1250 func (s Union) Optimize(r Optimizer) (Shape, bool) { 1251 var opt bool 1252 realloc := func() { 1253 if !opt { 1254 arr := make(Union, len(s)) 1255 copy(arr, s) 1256 s = arr 1257 } 1258 } 1259 // optimize subiterators 1260 for i := 0; i < len(s); i++ { 1261 c := s[i] 1262 if c == nil { 1263 continue 1264 } 1265 v, ok := c.Optimize(r) 1266 if !ok { 1267 continue 1268 } 1269 realloc() 1270 opt = true 1271 s[i] = v 1272 } 1273 if r != nil { 1274 ns, nopt := r.OptimizeShape(s) 1275 return ns, opt || nopt 1276 } 1277 if arr, ft := clearFixedTags([]Shape(s)); ft != nil { 1278 ns, _ := FixedTags{On: Union(arr), Tags: ft}.Optimize(r) 1279 return ns, true 1280 } 1281 // second pass - remove Null 1282 for i := 0; i < len(s); i++ { 1283 c := s[i] 1284 if IsNull(c) { 1285 realloc() 1286 opt = true 1287 s = append(s[:i], s[i+1:]...) 1288 } 1289 } 1290 if len(s) == 0 { 1291 return nil, true 1292 } else if len(s) == 1 { 1293 return s[0], true 1294 } 1295 // TODO: join Fixed 1296 return s, opt 1297 } 1298 1299 // Page provides a simple form of pagination. Can be used to skip or limit results. 1300 type Page struct { 1301 From Shape 1302 Skip int64 1303 Limit int64 // zero means unlimited 1304 } 1305 1306 func (s Page) BuildIterator(qs graph.QuadStore) graph.Iterator { 1307 if IsNull(s.From) { 1308 return iterator.NewNull() 1309 } 1310 it := s.From.BuildIterator(qs) 1311 if s.Skip > 0 { 1312 it = iterator.NewSkip(it, s.Skip) 1313 } 1314 if s.Limit > 0 { 1315 it = iterator.NewLimit(it, s.Limit) 1316 } 1317 return it 1318 } 1319 func (s Page) Optimize(r Optimizer) (Shape, bool) { 1320 if IsNull(s.From) { 1321 return nil, true 1322 } 1323 var opt bool 1324 s.From, opt = s.From.Optimize(r) 1325 if s.Skip <= 0 && s.Limit <= 0 { 1326 return s.From, true 1327 } 1328 if p, ok := s.From.(Page); ok { 1329 p2 := p.ApplyPage(s) 1330 if p2 == nil { 1331 return nil, true 1332 } 1333 s, opt = *p2, true 1334 } 1335 if r != nil { 1336 ns, nopt := r.OptimizeShape(s) 1337 return ns, opt || nopt 1338 } 1339 // TODO: check size 1340 return s, opt 1341 } 1342 func (s Page) ApplyPage(p Page) *Page { 1343 s.Skip += p.Skip 1344 if s.Limit > 0 { 1345 s.Limit -= p.Skip 1346 if s.Limit <= 0 { 1347 return nil 1348 } 1349 if p.Limit > 0 && s.Limit > p.Limit { 1350 s.Limit = p.Limit 1351 } 1352 } else { 1353 s.Limit = p.Limit 1354 } 1355 return &s 1356 } 1357 1358 // Unique makes query results unique. 1359 type Unique struct { 1360 From Shape 1361 } 1362 1363 func (s Unique) BuildIterator(qs graph.QuadStore) graph.Iterator { 1364 if IsNull(s.From) { 1365 return iterator.NewNull() 1366 } 1367 it := s.From.BuildIterator(qs) 1368 return iterator.NewUnique(it) 1369 } 1370 func (s Unique) Optimize(r Optimizer) (Shape, bool) { 1371 if IsNull(s.From) { 1372 return nil, true 1373 } 1374 var opt bool 1375 s.From, opt = s.From.Optimize(r) 1376 if IsNull(s.From) { 1377 return nil, true 1378 } 1379 if r != nil { 1380 ns, nopt := r.OptimizeShape(s) 1381 return ns, opt || nopt 1382 } 1383 return s, opt 1384 } 1385 1386 // Save tags a results of query with provided tags. 1387 type Save struct { 1388 Tags []string 1389 From Shape 1390 } 1391 1392 func (s Save) BuildIterator(qs graph.QuadStore) graph.Iterator { 1393 if IsNull(s.From) { 1394 return iterator.NewNull() 1395 } 1396 it := s.From.BuildIterator(qs) 1397 if len(s.Tags) != 0 { 1398 return iterator.NewSave(it, s.Tags...) 1399 } 1400 return it 1401 } 1402 func (s Save) Optimize(r Optimizer) (Shape, bool) { 1403 if IsNull(s.From) { 1404 return nil, true 1405 } 1406 var opt bool 1407 s.From, opt = s.From.Optimize(r) 1408 if len(s.Tags) == 0 { 1409 return s.From, true 1410 } else if IsNull(s.From) { 1411 return nil, true 1412 } 1413 if r != nil { 1414 ns, nopt := r.OptimizeShape(s) 1415 return ns, opt || nopt 1416 } 1417 return s, opt 1418 } 1419 1420 func FilterQuads(subject, predicate, object, label []quad.Value) Shape { 1421 var q Quads 1422 if len(subject) != 0 { 1423 q = append(q, QuadFilter{Dir: quad.Subject, Values: Lookup(subject)}) 1424 } 1425 if len(predicate) != 0 { 1426 q = append(q, QuadFilter{Dir: quad.Predicate, Values: Lookup(predicate)}) 1427 } 1428 if len(object) != 0 { 1429 q = append(q, QuadFilter{Dir: quad.Object, Values: Lookup(object)}) 1430 } 1431 if len(label) != 0 { 1432 q = append(q, QuadFilter{Dir: quad.Label, Values: Lookup(label)}) 1433 } 1434 return q 1435 } 1436 1437 type Sort struct { 1438 From Shape 1439 } 1440 1441 func (s Sort) BuildIterator(qs graph.QuadStore) graph.Iterator { 1442 if IsNull(s.From) { 1443 return iterator.NewNull() 1444 } 1445 it := s.From.BuildIterator(qs) 1446 return iterator.NewSort(qs, it) 1447 } 1448 func (s Sort) Optimize(r Optimizer) (Shape, bool) { 1449 if IsNull(s.From) { 1450 return nil, true 1451 } 1452 var opt bool 1453 s.From, opt = s.From.Optimize(r) 1454 if IsNull(s.From) { 1455 return nil, true 1456 } 1457 if r != nil { 1458 ns, nopt := r.OptimizeShape(s) 1459 return ns, opt || nopt 1460 } 1461 return s, opt 1462 }