github.com/gopherd/gonum@v0.0.4/graph/formats/rdf/graph.go (about)

     1  // Copyright ©2022 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package rdf
     6  
     7  import (
     8  	"fmt"
     9  
    10  	"github.com/gopherd/gonum/graph"
    11  	"github.com/gopherd/gonum/graph/iterator"
    12  	"github.com/gopherd/gonum/graph/multi"
    13  	"github.com/gopherd/gonum/graph/set/uid"
    14  )
    15  
    16  // Graph implements an RDF graph satisfying the graph.Graph and graph.Multigraph
    17  // interfaces.
    18  type Graph struct {
    19  	nodes map[int64]graph.Node
    20  	from  map[int64]map[int64]map[int64]graph.Line
    21  	to    map[int64]map[int64]map[int64]graph.Line
    22  	pred  map[int64]map[*Statement]bool
    23  
    24  	termIDs map[string]int64
    25  	ids     *uid.Set
    26  }
    27  
    28  // NewGraph returns a new empty Graph.
    29  func NewGraph() *Graph {
    30  	return &Graph{
    31  		nodes: make(map[int64]graph.Node),
    32  		from:  make(map[int64]map[int64]map[int64]graph.Line),
    33  		to:    make(map[int64]map[int64]map[int64]graph.Line),
    34  		pred:  make(map[int64]map[*Statement]bool),
    35  
    36  		termIDs: make(map[string]int64),
    37  		ids:     uid.NewSet(),
    38  	}
    39  }
    40  
    41  // addNode adds n to the graph. It panics if the added node ID matches an
    42  // existing node ID.
    43  func (g *Graph) addNode(n graph.Node) {
    44  	if _, exists := g.nodes[n.ID()]; exists {
    45  		panic(fmt.Sprintf("rdf: node ID collision: %d", n.ID()))
    46  	}
    47  	g.nodes[n.ID()] = n
    48  	g.ids.Use(n.ID())
    49  }
    50  
    51  // AddStatement adds s to the graph. It panics if Term UIDs in the statement
    52  // are not consistent with existing terms in the graph. Statements must not
    53  // be altered while being held by the graph. If the UID fields of the terms
    54  // in s are zero, they will be set to values consistent with the rest of the
    55  // graph on return, mutating the parameter, otherwise the UIDs must match terms
    56  // that already exist in the graph. The statement must be a valid RDF statement
    57  // otherwise AddStatement will panic.
    58  func (g *Graph) AddStatement(s *Statement) {
    59  	_, _, kind, err := s.Predicate.Parts()
    60  	if err != nil {
    61  		panic(fmt.Errorf("rdf: error extracting predicate: %w", err))
    62  	}
    63  	if kind != IRI {
    64  		panic(fmt.Errorf("rdf: predicate is not an IRI: %s", s.Predicate.Value))
    65  	}
    66  
    67  	_, _, kind, err = s.Subject.Parts()
    68  	if err != nil {
    69  		panic(fmt.Errorf("rdf: error extracting subject: %w", err))
    70  	}
    71  	switch kind {
    72  	case IRI, Blank:
    73  	default:
    74  		panic(fmt.Errorf("rdf: subject is not an IRI or blank node: %s", s.Subject.Value))
    75  	}
    76  
    77  	_, _, kind, err = s.Object.Parts()
    78  	if err != nil {
    79  		panic(fmt.Errorf("rdf: error extracting object: %w", err))
    80  	}
    81  	if kind == Invalid {
    82  		panic(fmt.Errorf("rdf: object is not a valid term: %s", s.Object.Value))
    83  	}
    84  
    85  	statements, ok := g.pred[s.Predicate.UID]
    86  	if !ok {
    87  		statements = make(map[*Statement]bool)
    88  		g.pred[s.Predicate.UID] = statements
    89  	}
    90  	statements[s] = true
    91  	g.addTerm(&s.Subject)
    92  	g.addTerm(&s.Predicate)
    93  	g.addTerm(&s.Object)
    94  	g.setLine(s)
    95  }
    96  
    97  // addTerm adds t to the graph. It panics if the added node ID matches an existing node ID.
    98  func (g *Graph) addTerm(t *Term) {
    99  	if t.UID == 0 {
   100  		id, ok := g.termIDs[t.Value]
   101  		if ok {
   102  			t.UID = id
   103  			return
   104  		}
   105  		id = g.ids.NewID()
   106  		g.ids.Use(id)
   107  		t.UID = id
   108  		g.termIDs[t.Value] = id
   109  		return
   110  	}
   111  
   112  	id, ok := g.termIDs[t.Value]
   113  	if !ok {
   114  		g.termIDs[t.Value] = t.UID
   115  	} else if id != t.UID {
   116  		panic(fmt.Sprintf("rdf: term ID collision: term:%s new ID:%d old ID:%d", t.Value, t.UID, id))
   117  	}
   118  }
   119  
   120  // AllStatements returns an iterator of the statements that make up the graph.
   121  func (g *Graph) AllStatements() *Statements {
   122  	return &Statements{eit: g.Edges()}
   123  }
   124  
   125  // Edge returns the edge from u to v if such an edge exists and nil otherwise.
   126  // The node v must be directly reachable from u as defined by the From method.
   127  // The returned graph.Edge is a multi.Edge if an edge exists.
   128  func (g *Graph) Edge(uid, vid int64) graph.Edge {
   129  	l := g.Lines(uid, vid)
   130  	if l == graph.Empty {
   131  		return nil
   132  	}
   133  	return multi.Edge{F: g.Node(uid), T: g.Node(vid), Lines: l}
   134  }
   135  
   136  // Edges returns all the edges in the graph. Each edge in the returned slice
   137  // is a multi.Edge.
   138  func (g *Graph) Edges() graph.Edges {
   139  	if len(g.nodes) == 0 {
   140  		return graph.Empty
   141  	}
   142  	var edges []graph.Edge
   143  	for _, u := range g.nodes {
   144  		for _, e := range g.from[u.ID()] {
   145  			var lines []graph.Line
   146  			for _, l := range e {
   147  				lines = append(lines, l)
   148  			}
   149  			if len(lines) != 0 {
   150  				edges = append(edges, multi.Edge{
   151  					F:     g.Node(u.ID()),
   152  					T:     g.Node(lines[0].To().ID()),
   153  					Lines: iterator.NewOrderedLines(lines),
   154  				})
   155  			}
   156  		}
   157  	}
   158  	if len(edges) == 0 {
   159  		return graph.Empty
   160  	}
   161  	return iterator.NewOrderedEdges(edges)
   162  }
   163  
   164  // From returns all nodes in g that can be reached directly from n.
   165  //
   166  // The returned graph.Nodes is only valid until the next mutation of
   167  // the receiver.
   168  func (g *Graph) From(id int64) graph.Nodes {
   169  	if len(g.from[id]) == 0 {
   170  		return graph.Empty
   171  	}
   172  	return iterator.NewNodesByLines(g.nodes, g.from[id])
   173  }
   174  
   175  // FromSubject returns all nodes in g that can be reached directly from an
   176  // RDF subject term.
   177  //
   178  // The returned graph.Nodes is only valid until the next mutation of
   179  // the receiver.
   180  func (g *Graph) FromSubject(t Term) graph.Nodes {
   181  	return g.From(t.UID)
   182  }
   183  
   184  // HasEdgeBetween returns whether an edge exists between nodes x and y without
   185  // considering direction.
   186  func (g *Graph) HasEdgeBetween(xid, yid int64) bool {
   187  	if _, ok := g.from[xid][yid]; ok {
   188  		return true
   189  	}
   190  	_, ok := g.from[yid][xid]
   191  	return ok
   192  }
   193  
   194  // HasEdgeFromTo returns whether an edge exists in the graph from u to v.
   195  func (g *Graph) HasEdgeFromTo(uid, vid int64) bool {
   196  	_, ok := g.from[uid][vid]
   197  	return ok
   198  }
   199  
   200  // Lines returns the lines from u to v if such any such lines exists and nil otherwise.
   201  // The node v must be directly reachable from u as defined by the From method.
   202  func (g *Graph) Lines(uid, vid int64) graph.Lines {
   203  	edge := g.from[uid][vid]
   204  	if len(edge) == 0 {
   205  		return graph.Empty
   206  	}
   207  	var lines []graph.Line
   208  	for _, l := range edge {
   209  		lines = append(lines, l)
   210  	}
   211  	return iterator.NewOrderedLines(lines)
   212  }
   213  
   214  // newLine returns a new Line from the source to the destination node.
   215  // The returned Line will have a graph-unique ID.
   216  // The Line's ID does not become valid in g until the Line is added to g.
   217  func (g *Graph) newLine(from, to graph.Node) graph.Line {
   218  	return multi.Line{F: from, T: to, UID: g.ids.NewID()}
   219  }
   220  
   221  // newNode returns a new unique Node to be added to g. The Node's ID does
   222  // not become valid in g until the Node is added to g.
   223  func (g *Graph) newNode() graph.Node {
   224  	if len(g.nodes) == 0 {
   225  		return multi.Node(0)
   226  	}
   227  	if int64(len(g.nodes)) == uid.Max {
   228  		panic("rdf: cannot allocate node: no slot")
   229  	}
   230  	return multi.Node(g.ids.NewID())
   231  }
   232  
   233  // Node returns the node with the given ID if it exists in the graph,
   234  // and nil otherwise.
   235  func (g *Graph) Node(id int64) graph.Node {
   236  	return g.nodes[id]
   237  }
   238  
   239  // TermFor returns the Term for the given text. The text must be
   240  // an exact match for the Term's Value field.
   241  func (g *Graph) TermFor(text string) (term Term, ok bool) {
   242  	id, ok := g.termIDs[text]
   243  	if !ok {
   244  		return
   245  	}
   246  	n, ok := g.nodes[id]
   247  	if !ok {
   248  		var s map[*Statement]bool
   249  		s, ok = g.pred[id]
   250  		if !ok {
   251  			return
   252  		}
   253  		for k := range s {
   254  			return k.Predicate, true
   255  		}
   256  	}
   257  	return n.(Term), true
   258  }
   259  
   260  // Nodes returns all the nodes in the graph.
   261  //
   262  // The returned graph.Nodes is only valid until the next mutation of
   263  // the receiver.
   264  func (g *Graph) Nodes() graph.Nodes {
   265  	if len(g.nodes) == 0 {
   266  		return graph.Empty
   267  	}
   268  	return iterator.NewNodes(g.nodes)
   269  }
   270  
   271  // Predicates returns a slice of all the predicates used in the graph.
   272  func (g *Graph) Predicates() []Term {
   273  	p := make([]Term, len(g.pred))
   274  	i := 0
   275  	for _, statements := range g.pred {
   276  		for s := range statements {
   277  			p[i] = s.Predicate
   278  			i++
   279  			break
   280  		}
   281  	}
   282  	return p
   283  }
   284  
   285  // removeLine removes the line with the given end point and line IDs from
   286  // the graph, leaving the terminal nodes. If the line does not exist it is
   287  // a no-op.
   288  func (g *Graph) removeLine(fid, tid, id int64) {
   289  	if _, ok := g.nodes[fid]; !ok {
   290  		return
   291  	}
   292  	if _, ok := g.nodes[tid]; !ok {
   293  		return
   294  	}
   295  
   296  	delete(g.from[fid][tid], id)
   297  	if len(g.from[fid][tid]) == 0 {
   298  		delete(g.from[fid], tid)
   299  	}
   300  	delete(g.to[tid][fid], id)
   301  	if len(g.to[tid][fid]) == 0 {
   302  		delete(g.to[tid], fid)
   303  	}
   304  
   305  	g.ids.Release(id)
   306  }
   307  
   308  // removeNode removes the node with the given ID from the graph, as well as
   309  // any edges attached to it. If the node is not in the graph it is a no-op.
   310  func (g *Graph) removeNode(id int64) {
   311  	if _, ok := g.nodes[id]; !ok {
   312  		return
   313  	}
   314  	delete(g.nodes, id)
   315  
   316  	for from := range g.from[id] {
   317  		delete(g.to[from], id)
   318  	}
   319  	delete(g.from, id)
   320  
   321  	for to := range g.to[id] {
   322  		delete(g.from[to], id)
   323  	}
   324  	delete(g.to, id)
   325  
   326  	g.ids.Release(id)
   327  }
   328  
   329  // RemoveStatement removes s from the graph, leaving the terminal nodes if they
   330  // are part of another statement. If the statement does not exist in g it is a no-op.
   331  func (g *Graph) RemoveStatement(s *Statement) {
   332  	if !g.pred[s.Predicate.UID][s] {
   333  		return
   334  	}
   335  
   336  	// Remove the connection.
   337  	g.removeLine(s.Subject.UID, s.Object.UID, s.Predicate.UID)
   338  	statements := g.pred[s.Predicate.UID]
   339  	delete(statements, s)
   340  	if len(statements) == 0 {
   341  		delete(g.pred, s.Predicate.UID)
   342  		if len(g.from[s.Predicate.UID]) == 0 {
   343  			g.ids.Release(s.Predicate.UID)
   344  			delete(g.termIDs, s.Predicate.Value)
   345  		}
   346  	}
   347  
   348  	// Remove any orphan terms.
   349  	if g.From(s.Subject.UID).Len() == 0 && g.To(s.Subject.UID).Len() == 0 {
   350  		g.removeNode(s.Subject.UID)
   351  		delete(g.termIDs, s.Subject.Value)
   352  	}
   353  	if g.From(s.Object.UID).Len() == 0 && g.To(s.Object.UID).Len() == 0 {
   354  		g.removeNode(s.Object.UID)
   355  		delete(g.termIDs, s.Object.Value)
   356  	}
   357  }
   358  
   359  // RemoveTerm removes t and any statements referencing t from the graph. If
   360  // the term is a predicate, all statements with the predicate are removed. If
   361  // the term does not exist it is a no-op.
   362  func (g *Graph) RemoveTerm(t Term) {
   363  	// Remove any predicates.
   364  	if statements, ok := g.pred[t.UID]; ok {
   365  		for s := range statements {
   366  			g.RemoveStatement(s)
   367  		}
   368  	}
   369  
   370  	// Quick return.
   371  	_, nok := g.nodes[t.UID]
   372  	_, fok := g.from[t.UID]
   373  	_, tok := g.to[t.UID]
   374  	if !nok && !fok && !tok {
   375  		return
   376  	}
   377  
   378  	// Remove any statements that impinge on the term.
   379  	to := g.From(t.UID)
   380  	for to.Next() {
   381  		lines := g.Lines(t.UID, to.Node().ID())
   382  		for lines.Next() {
   383  			g.RemoveStatement(lines.Line().(*Statement))
   384  		}
   385  	}
   386  	from := g.To(t.UID)
   387  	if from.Next() {
   388  		lines := g.Lines(from.Node().ID(), t.UID)
   389  		for lines.Next() {
   390  			g.RemoveStatement(lines.Line().(*Statement))
   391  		}
   392  	}
   393  
   394  	// Remove the node.
   395  	g.removeNode(t.UID)
   396  	delete(g.termIDs, t.Value)
   397  }
   398  
   399  // setLine adds l, a line from one node to another. If the nodes do not exist,
   400  // they are added, and are set to the nodes of the line otherwise.
   401  func (g *Graph) setLine(l graph.Line) {
   402  	var (
   403  		from = l.From()
   404  		fid  = from.ID()
   405  		to   = l.To()
   406  		tid  = to.ID()
   407  		lid  = l.ID()
   408  	)
   409  
   410  	if _, ok := g.nodes[fid]; !ok {
   411  		g.addNode(from)
   412  	} else {
   413  		g.nodes[fid] = from
   414  	}
   415  	if _, ok := g.nodes[tid]; !ok {
   416  		g.addNode(to)
   417  	} else {
   418  		g.nodes[tid] = to
   419  	}
   420  
   421  	switch {
   422  	case g.from[fid] == nil:
   423  		g.from[fid] = map[int64]map[int64]graph.Line{tid: {lid: l}}
   424  	case g.from[fid][tid] == nil:
   425  		g.from[fid][tid] = map[int64]graph.Line{lid: l}
   426  	default:
   427  		g.from[fid][tid][lid] = l
   428  	}
   429  	switch {
   430  	case g.to[tid] == nil:
   431  		g.to[tid] = map[int64]map[int64]graph.Line{fid: {lid: l}}
   432  	case g.to[tid][fid] == nil:
   433  		g.to[tid][fid] = map[int64]graph.Line{lid: l}
   434  	default:
   435  		g.to[tid][fid][lid] = l
   436  	}
   437  
   438  	g.ids.Use(lid)
   439  }
   440  
   441  // Statements returns an iterator of the statements that connect the subject
   442  // term node u to the object term node v.
   443  func (g *Graph) Statements(uid, vid int64) *Statements {
   444  	return &Statements{lit: g.Lines(uid, vid)}
   445  }
   446  
   447  // To returns all nodes in g that can reach directly to n.
   448  //
   449  // The returned graph.Nodes is only valid until the next mutation of
   450  // the receiver.
   451  func (g *Graph) To(id int64) graph.Nodes {
   452  	if len(g.to[id]) == 0 {
   453  		return graph.Empty
   454  	}
   455  	return iterator.NewNodesByLines(g.nodes, g.to[id])
   456  }
   457  
   458  // ToObject returns all nodes in g that can reach directly to an RDF object
   459  // term.
   460  //
   461  // The returned graph.Nodes is only valid until the next mutation of
   462  // the receiver.
   463  func (g *Graph) ToObject(t Term) graph.Nodes {
   464  	return g.To(t.UID)
   465  }
   466  
   467  // Statements is an RDF statement iterator.
   468  type Statements struct {
   469  	eit graph.Edges
   470  	lit graph.Lines
   471  }
   472  
   473  // Next returns whether the iterator holds any additional statements.
   474  func (s *Statements) Next() bool {
   475  	if s.lit != nil && s.lit.Next() {
   476  		return true
   477  	}
   478  	if s.eit == nil || !s.eit.Next() {
   479  		return false
   480  	}
   481  	s.lit = s.eit.Edge().(multi.Edge).Lines
   482  	return s.lit.Next()
   483  }
   484  
   485  // Statement returns the current statement.
   486  func (s *Statements) Statement() *Statement {
   487  	return s.lit.Line().(*Statement)
   488  }
   489  
   490  // ConnectedByAny is a helper function to for simplifying graph traversal
   491  // conditions.
   492  func ConnectedByAny(e graph.Edge, with func(*Statement) bool) bool {
   493  	switch e := e.(type) {
   494  	case *Statement:
   495  		return with(e)
   496  	case graph.Lines:
   497  		it := e
   498  		for it.Next() {
   499  			s, ok := it.Line().(*Statement)
   500  			if !ok {
   501  				continue
   502  			}
   503  			ok = with(s)
   504  			if ok {
   505  				return true
   506  			}
   507  		}
   508  	}
   509  	return false
   510  }