github.com/gopherd/gonum@v0.0.4/graph/formats/rdf/query.go (about)

     1  // Copyright ©2022 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package rdf
     6  
     7  import (
     8  	"sort"
     9  
    10  	"github.com/gopherd/gonum/graph"
    11  )
    12  
    13  // Query represents a step in an RDF graph query. The methods on Query
    14  // provide a simple graph query language.
    15  type Query struct {
    16  	g graph.Directed
    17  
    18  	terms []Term
    19  }
    20  
    21  // NewQuery returns a query of g starting from the given nodes.
    22  // Queries may not be mixed between distinct graphs. The type of
    23  // g must be comparable. Query operations only consider edges that
    24  // are represented by a *Statement or is an edge with lines held
    25  // in a graph.Lines with at least one *Statement.
    26  func NewQuery(g graph.Directed, from ...Term) Query {
    27  	return Query{g: g, terms: from}
    28  }
    29  
    30  // Query returns a query of the receiver starting from the given nodes.
    31  // Queries may not be mixed between distinct graphs.
    32  func (g *Graph) Query(from ...Term) Query {
    33  	return Query{g: g, terms: from}
    34  }
    35  
    36  // Out returns a query holding nodes reachable out from the receiver's
    37  // starting nodes via statements that satisfy fn.
    38  func (q Query) Out(fn func(s *Statement) bool) Query {
    39  	r := Query{g: q.g}
    40  	for _, s := range q.terms {
    41  		it := q.g.From(s.ID())
    42  		for it.Next() {
    43  			if ConnectedByAny(q.g.Edge(s.ID(), it.Node().ID()), fn) {
    44  				r.terms = append(r.terms, it.Node().(Term))
    45  			}
    46  		}
    47  	}
    48  	return r
    49  }
    50  
    51  // In returns a query holding nodes reachable in from the receiver's
    52  // starting nodes via statements that satisfy fn.
    53  func (q Query) In(fn func(s *Statement) bool) Query {
    54  	r := Query{g: q.g}
    55  	for _, s := range q.terms {
    56  		it := q.g.To(s.ID())
    57  		for it.Next() {
    58  			if ConnectedByAny(q.g.Edge(it.Node().ID(), s.ID()), fn) {
    59  				r.terms = append(r.terms, it.Node().(Term))
    60  			}
    61  		}
    62  	}
    63  	return r
    64  }
    65  
    66  // HasAllOut returns a query holding nodes from the receiver's
    67  // initial set where all outgoing statements satisfy fn. The
    68  // query short circuits, so fn is not called after the first
    69  // failure to match.
    70  func (q Query) HasAllOut(fn func(s *Statement) bool) Query {
    71  	r := Query{g: q.g}
    72  	notFn := not(fn)
    73  loop:
    74  	for _, s := range q.terms {
    75  		it := q.g.From(s.ID())
    76  		for it.Next() {
    77  			if ConnectedByAny(q.g.Edge(s.ID(), it.Node().ID()), notFn) {
    78  				continue loop
    79  			}
    80  		}
    81  		r.terms = append(r.terms, s)
    82  	}
    83  	return r
    84  }
    85  
    86  // HasAllIn returns a query holding nodes from the receiver's
    87  // initial set where all incoming statements satisfy fn. The
    88  // query short circuits, so fn is not called after the first
    89  // failure to match.
    90  func (q Query) HasAllIn(fn func(s *Statement) bool) Query {
    91  	r := Query{g: q.g}
    92  	notFn := not(fn)
    93  loop:
    94  	for _, s := range q.terms {
    95  		it := q.g.To(s.ID())
    96  		for it.Next() {
    97  			if ConnectedByAny(q.g.Edge(it.Node().ID(), s.ID()), notFn) {
    98  				continue loop
    99  			}
   100  		}
   101  		r.terms = append(r.terms, s)
   102  	}
   103  	return r
   104  }
   105  
   106  // HasAnyOut returns a query holding nodes from the receiver's
   107  // initial set where any outgoing statements satisfies fn. The
   108  // query short circuits, so fn is not called after the first match.
   109  func (q Query) HasAnyOut(fn func(s *Statement) bool) Query {
   110  	r := Query{g: q.g}
   111  	for _, s := range q.terms {
   112  		it := q.g.From(s.ID())
   113  		for it.Next() {
   114  			if ConnectedByAny(q.g.Edge(s.ID(), it.Node().ID()), fn) {
   115  				r.terms = append(r.terms, s)
   116  				break
   117  			}
   118  		}
   119  	}
   120  	return r
   121  }
   122  
   123  // HasAnyIn returns a query holding nodes from the receiver's
   124  // initial set where any incoming statements satisfies fn. The
   125  // query short circuits, so fn is not called after the first match.
   126  func (q Query) HasAnyIn(fn func(s *Statement) bool) Query {
   127  	r := Query{g: q.g}
   128  	for _, s := range q.terms {
   129  		it := q.g.To(s.ID())
   130  		for it.Next() {
   131  			if ConnectedByAny(q.g.Edge(it.Node().ID(), s.ID()), fn) {
   132  				r.terms = append(r.terms, s)
   133  				break
   134  			}
   135  		}
   136  	}
   137  	return r
   138  }
   139  
   140  // not returns the negation of fn.
   141  func not(fn func(s *Statement) bool) func(s *Statement) bool {
   142  	return func(s *Statement) bool { return !fn(s) }
   143  }
   144  
   145  // And returns a query that holds the conjunction of q and p.
   146  func (q Query) And(p Query) Query {
   147  	if q.g != p.g {
   148  		panic("rdf: binary query operation parameters from distinct graphs")
   149  	}
   150  	sortByID(q.terms)
   151  	sortByID(p.terms)
   152  	r := Query{g: q.g}
   153  	var i, j int
   154  	for i < len(q.terms) && j < len(p.terms) {
   155  		qi := q.terms[i]
   156  		pj := p.terms[j]
   157  		switch {
   158  		case qi.ID() < pj.ID():
   159  			i++
   160  		case pj.ID() < qi.ID():
   161  			j++
   162  		default:
   163  			r.terms = append(r.terms, qi)
   164  			i++
   165  			j++
   166  		}
   167  	}
   168  	return r
   169  }
   170  
   171  // Or returns a query that holds the disjunction of q and p.
   172  func (q Query) Or(p Query) Query {
   173  	if q.g != p.g {
   174  		panic("rdf: binary query operation parameters from distinct graphs")
   175  	}
   176  	sortByID(q.terms)
   177  	sortByID(p.terms)
   178  	r := Query{g: q.g}
   179  	var i, j int
   180  	for i < len(q.terms) && j < len(p.terms) {
   181  		qi := q.terms[i]
   182  		pj := p.terms[j]
   183  		switch {
   184  		case qi.ID() < pj.ID():
   185  			if len(r.terms) == 0 || r.terms[len(r.terms)-1].UID != qi.UID {
   186  				r.terms = append(r.terms, qi)
   187  			}
   188  			i++
   189  		case pj.ID() < qi.ID():
   190  			if len(r.terms) == 0 || r.terms[len(r.terms)-1].UID != pj.UID {
   191  				r.terms = append(r.terms, pj)
   192  			}
   193  			j++
   194  		default:
   195  			if len(r.terms) == 0 || r.terms[len(r.terms)-1].UID != qi.UID {
   196  				r.terms = append(r.terms, qi)
   197  			}
   198  			i++
   199  			j++
   200  		}
   201  	}
   202  	r.terms = append(r.terms, q.terms[i:]...)
   203  	r.terms = append(r.terms, p.terms[j:]...)
   204  	return r
   205  }
   206  
   207  // Not returns a query that holds q less p.
   208  func (q Query) Not(p Query) Query {
   209  	if q.g != p.g {
   210  		panic("rdf: binary query operation parameters from distinct graphs")
   211  	}
   212  	sortByID(q.terms)
   213  	sortByID(p.terms)
   214  	r := Query{g: q.g}
   215  	var i, j int
   216  	for i < len(q.terms) && j < len(p.terms) {
   217  		qi := q.terms[i]
   218  		pj := p.terms[j]
   219  		switch {
   220  		case qi.ID() < pj.ID():
   221  			r.terms = append(r.terms, qi)
   222  			i++
   223  		case pj.ID() < qi.ID():
   224  			j++
   225  		default:
   226  			i++
   227  		}
   228  	}
   229  	if len(r.terms) < len(q.terms) {
   230  		r.terms = append(r.terms, q.terms[i:len(q.terms)+min(0, i-len(r.terms))]...)
   231  	}
   232  	return r
   233  }
   234  
   235  // Repeat repeatedly calls fn on q until the set of results is empty or
   236  // ok is false, and then returns the result. If the last non-empty result
   237  // is wanted, fn should return its input and false when the partial
   238  // traversal returns an empty result.
   239  //
   240  // 	result := start.Repeat(func(q rdf.Query) (rdf.Query, bool) {
   241  // 		r := q.Out(condition)
   242  // 		if r.Len() == 0 {
   243  // 			return q, false
   244  // 		}
   245  // 		return r, true
   246  // 	}).Result()
   247  //
   248  func (q Query) Repeat(fn func(Query) (q Query, ok bool)) Query {
   249  	for {
   250  		var ok bool
   251  		q, ok = fn(q)
   252  		if !ok || len(q.terms) == 0 {
   253  			return q
   254  		}
   255  	}
   256  }
   257  
   258  // Unique returns a copy of the receiver that contains only one instance
   259  // of each term.
   260  func (q Query) Unique() Query {
   261  	sortByID(q.terms)
   262  	r := Query{g: q.g}
   263  	for i, t := range q.terms {
   264  		if i == 0 || t.UID != q.terms[i-1].UID {
   265  			r.terms = append(r.terms, t)
   266  		}
   267  	}
   268  	return r
   269  }
   270  
   271  // Len returns the number of terms held by the query.
   272  func (q Query) Len() int {
   273  	return len(q.terms)
   274  }
   275  
   276  // Result returns the terms held by the query.
   277  func (q Query) Result() []Term {
   278  	return q.terms
   279  }
   280  
   281  func sortByID(terms []Term) {
   282  	sort.Slice(terms, func(i, j int) bool { return terms[i].ID() < terms[j].ID() })
   283  }