gonum.org/v1/gonum@v0.14.0/graph/formats/rdf/graph.go (about) 1 // Copyright ©2022 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package rdf 6 7 import ( 8 "fmt" 9 10 "gonum.org/v1/gonum/graph" 11 "gonum.org/v1/gonum/graph/iterator" 12 "gonum.org/v1/gonum/graph/multi" 13 "gonum.org/v1/gonum/graph/set/uid" 14 ) 15 16 // Graph implements an RDF graph satisfying the graph.Graph and graph.Multigraph 17 // interfaces. 18 type Graph struct { 19 nodes map[int64]graph.Node 20 from map[int64]map[int64]map[int64]graph.Line 21 to map[int64]map[int64]map[int64]graph.Line 22 pred map[int64]map[*Statement]bool 23 24 termIDs map[string]int64 25 ids *uid.Set 26 } 27 28 // NewGraph returns a new empty Graph. 29 func NewGraph() *Graph { 30 return &Graph{ 31 nodes: make(map[int64]graph.Node), 32 from: make(map[int64]map[int64]map[int64]graph.Line), 33 to: make(map[int64]map[int64]map[int64]graph.Line), 34 pred: make(map[int64]map[*Statement]bool), 35 36 termIDs: make(map[string]int64), 37 ids: uid.NewSet(), 38 } 39 } 40 41 // addNode adds n to the graph. It panics if the added node ID matches an 42 // existing node ID. 43 func (g *Graph) addNode(n graph.Node) { 44 if _, exists := g.nodes[n.ID()]; exists { 45 panic(fmt.Sprintf("rdf: node ID collision: %d", n.ID())) 46 } 47 g.nodes[n.ID()] = n 48 g.ids.Use(n.ID()) 49 } 50 51 // AddStatement adds s to the graph. It panics if Term UIDs in the statement 52 // are not consistent with existing terms in the graph. Statements must not 53 // be altered while being held by the graph. If the UID fields of the terms 54 // in s are zero, they will be set to values consistent with the rest of the 55 // graph on return, mutating the parameter, otherwise the UIDs must match terms 56 // that already exist in the graph. The statement must be a valid RDF statement 57 // otherwise AddStatement will panic. 58 func (g *Graph) AddStatement(s *Statement) { 59 _, _, kind, err := s.Predicate.Parts() 60 if err != nil { 61 panic(fmt.Errorf("rdf: error extracting predicate: %w", err)) 62 } 63 if kind != IRI { 64 panic(fmt.Errorf("rdf: predicate is not an IRI: %s", s.Predicate.Value)) 65 } 66 67 _, _, kind, err = s.Subject.Parts() 68 if err != nil { 69 panic(fmt.Errorf("rdf: error extracting subject: %w", err)) 70 } 71 switch kind { 72 case IRI, Blank: 73 default: 74 panic(fmt.Errorf("rdf: subject is not an IRI or blank node: %s", s.Subject.Value)) 75 } 76 77 _, _, kind, err = s.Object.Parts() 78 if err != nil { 79 panic(fmt.Errorf("rdf: error extracting object: %w", err)) 80 } 81 if kind == Invalid { 82 panic(fmt.Errorf("rdf: object is not a valid term: %s", s.Object.Value)) 83 } 84 85 statements, ok := g.pred[s.Predicate.UID] 86 if !ok { 87 statements = make(map[*Statement]bool) 88 g.pred[s.Predicate.UID] = statements 89 } 90 statements[s] = true 91 g.addTerm(&s.Subject) 92 g.addTerm(&s.Predicate) 93 g.addTerm(&s.Object) 94 g.setLine(s) 95 } 96 97 // addTerm adds t to the graph. It panics if the added node ID matches an existing node ID. 98 func (g *Graph) addTerm(t *Term) { 99 if t.UID == 0 { 100 id, ok := g.termIDs[t.Value] 101 if ok { 102 t.UID = id 103 return 104 } 105 id = g.ids.NewID() 106 g.ids.Use(id) 107 t.UID = id 108 g.termIDs[t.Value] = id 109 return 110 } 111 112 id, ok := g.termIDs[t.Value] 113 if !ok { 114 g.termIDs[t.Value] = t.UID 115 } else if id != t.UID { 116 panic(fmt.Sprintf("rdf: term ID collision: term:%s new ID:%d old ID:%d", t.Value, t.UID, id)) 117 } 118 } 119 120 // AllStatements returns an iterator of the statements that make up the graph. 121 func (g *Graph) AllStatements() *Statements { 122 return &Statements{eit: g.Edges()} 123 } 124 125 // Edge returns the edge from u to v if such an edge exists and nil otherwise. 126 // The node v must be directly reachable from u as defined by the From method. 127 // The returned graph.Edge is a multi.Edge if an edge exists. 128 func (g *Graph) Edge(uid, vid int64) graph.Edge { 129 l := g.Lines(uid, vid) 130 if l == graph.Empty { 131 return nil 132 } 133 return multi.Edge{F: g.Node(uid), T: g.Node(vid), Lines: l} 134 } 135 136 // Edges returns all the edges in the graph. Each edge in the returned slice 137 // is a multi.Edge. 138 func (g *Graph) Edges() graph.Edges { 139 if len(g.nodes) == 0 { 140 return graph.Empty 141 } 142 var edges []graph.Edge 143 for _, u := range g.nodes { 144 for _, e := range g.from[u.ID()] { 145 var lines []graph.Line 146 for _, l := range e { 147 lines = append(lines, l) 148 } 149 if len(lines) != 0 { 150 edges = append(edges, multi.Edge{ 151 F: g.Node(u.ID()), 152 T: g.Node(lines[0].To().ID()), 153 Lines: iterator.NewOrderedLines(lines), 154 }) 155 } 156 } 157 } 158 if len(edges) == 0 { 159 return graph.Empty 160 } 161 return iterator.NewOrderedEdges(edges) 162 } 163 164 // From returns all nodes in g that can be reached directly from n. 165 // 166 // The returned graph.Nodes is only valid until the next mutation of 167 // the receiver. 168 func (g *Graph) From(id int64) graph.Nodes { 169 if len(g.from[id]) == 0 { 170 return graph.Empty 171 } 172 return iterator.NewNodesByLines(g.nodes, g.from[id]) 173 } 174 175 // FromSubject returns all nodes in g that can be reached directly from an 176 // RDF subject term. 177 // 178 // The returned graph.Nodes is only valid until the next mutation of 179 // the receiver. 180 func (g *Graph) FromSubject(t Term) graph.Nodes { 181 return g.From(t.UID) 182 } 183 184 // HasEdgeBetween returns whether an edge exists between nodes x and y without 185 // considering direction. 186 func (g *Graph) HasEdgeBetween(xid, yid int64) bool { 187 if _, ok := g.from[xid][yid]; ok { 188 return true 189 } 190 _, ok := g.from[yid][xid] 191 return ok 192 } 193 194 // HasEdgeFromTo returns whether an edge exists in the graph from u to v. 195 func (g *Graph) HasEdgeFromTo(uid, vid int64) bool { 196 _, ok := g.from[uid][vid] 197 return ok 198 } 199 200 // Lines returns the lines from u to v if such any such lines exists and nil otherwise. 201 // The node v must be directly reachable from u as defined by the From method. 202 func (g *Graph) Lines(uid, vid int64) graph.Lines { 203 edge := g.from[uid][vid] 204 if len(edge) == 0 { 205 return graph.Empty 206 } 207 var lines []graph.Line 208 for _, l := range edge { 209 lines = append(lines, l) 210 } 211 return iterator.NewOrderedLines(lines) 212 } 213 214 // newLine returns a new Line from the source to the destination node. 215 // The returned Line will have a graph-unique ID. 216 // The Line's ID does not become valid in g until the Line is added to g. 217 func (g *Graph) newLine(from, to graph.Node) graph.Line { 218 return multi.Line{F: from, T: to, UID: g.ids.NewID()} 219 } 220 221 // newNode returns a new unique Node to be added to g. The Node's ID does 222 // not become valid in g until the Node is added to g. 223 func (g *Graph) newNode() graph.Node { 224 if len(g.nodes) == 0 { 225 return multi.Node(0) 226 } 227 if int64(len(g.nodes)) == uid.Max { 228 panic("rdf: cannot allocate node: no slot") 229 } 230 return multi.Node(g.ids.NewID()) 231 } 232 233 // Node returns the node with the given ID if it exists in the graph, 234 // and nil otherwise. 235 func (g *Graph) Node(id int64) graph.Node { 236 return g.nodes[id] 237 } 238 239 // TermFor returns the Term for the given text. The text must be 240 // an exact match for the Term's Value field. 241 func (g *Graph) TermFor(text string) (term Term, ok bool) { 242 id, ok := g.termIDs[text] 243 if !ok { 244 return 245 } 246 n, ok := g.nodes[id] 247 if !ok { 248 var s map[*Statement]bool 249 s, ok = g.pred[id] 250 if !ok { 251 return 252 } 253 for k := range s { 254 return k.Predicate, true 255 } 256 } 257 return n.(Term), true 258 } 259 260 // Nodes returns all the nodes in the graph. 261 // 262 // The returned graph.Nodes is only valid until the next mutation of 263 // the receiver. 264 func (g *Graph) Nodes() graph.Nodes { 265 if len(g.nodes) == 0 { 266 return graph.Empty 267 } 268 return iterator.NewNodes(g.nodes) 269 } 270 271 // Predicates returns a slice of all the predicates used in the graph. 272 func (g *Graph) Predicates() []Term { 273 p := make([]Term, len(g.pred)) 274 i := 0 275 for _, statements := range g.pred { 276 for s := range statements { 277 p[i] = s.Predicate 278 i++ 279 break 280 } 281 } 282 return p 283 } 284 285 // removeLine removes the line with the given end point and line IDs from 286 // the graph, leaving the terminal nodes. If the line does not exist it is 287 // a no-op. 288 func (g *Graph) removeLine(fid, tid, id int64) { 289 if _, ok := g.nodes[fid]; !ok { 290 return 291 } 292 if _, ok := g.nodes[tid]; !ok { 293 return 294 } 295 296 delete(g.from[fid][tid], id) 297 if len(g.from[fid][tid]) == 0 { 298 delete(g.from[fid], tid) 299 } 300 delete(g.to[tid][fid], id) 301 if len(g.to[tid][fid]) == 0 { 302 delete(g.to[tid], fid) 303 } 304 305 g.ids.Release(id) 306 } 307 308 // removeNode removes the node with the given ID from the graph, as well as 309 // any edges attached to it. If the node is not in the graph it is a no-op. 310 func (g *Graph) removeNode(id int64) { 311 if _, ok := g.nodes[id]; !ok { 312 return 313 } 314 delete(g.nodes, id) 315 316 for from := range g.from[id] { 317 delete(g.to[from], id) 318 } 319 delete(g.from, id) 320 321 for to := range g.to[id] { 322 delete(g.from[to], id) 323 } 324 delete(g.to, id) 325 326 g.ids.Release(id) 327 } 328 329 // RemoveStatement removes s from the graph, leaving the terminal nodes if they 330 // are part of another statement. If the statement does not exist in g it is a no-op. 331 func (g *Graph) RemoveStatement(s *Statement) { 332 if !g.pred[s.Predicate.UID][s] { 333 return 334 } 335 336 // Remove the connection. 337 g.removeLine(s.Subject.UID, s.Object.UID, s.Predicate.UID) 338 statements := g.pred[s.Predicate.UID] 339 delete(statements, s) 340 if len(statements) == 0 { 341 delete(g.pred, s.Predicate.UID) 342 if len(g.from[s.Predicate.UID]) == 0 { 343 g.ids.Release(s.Predicate.UID) 344 delete(g.termIDs, s.Predicate.Value) 345 } 346 } 347 348 // Remove any orphan terms. 349 if g.From(s.Subject.UID).Len() == 0 && g.To(s.Subject.UID).Len() == 0 { 350 g.removeNode(s.Subject.UID) 351 delete(g.termIDs, s.Subject.Value) 352 } 353 if g.From(s.Object.UID).Len() == 0 && g.To(s.Object.UID).Len() == 0 { 354 g.removeNode(s.Object.UID) 355 delete(g.termIDs, s.Object.Value) 356 } 357 } 358 359 // RemoveTerm removes t and any statements referencing t from the graph. If 360 // the term is a predicate, all statements with the predicate are removed. If 361 // the term does not exist it is a no-op. 362 func (g *Graph) RemoveTerm(t Term) { 363 // Remove any predicates. 364 if statements, ok := g.pred[t.UID]; ok { 365 for s := range statements { 366 g.RemoveStatement(s) 367 } 368 } 369 370 // Quick return. 371 _, nok := g.nodes[t.UID] 372 _, fok := g.from[t.UID] 373 _, tok := g.to[t.UID] 374 if !nok && !fok && !tok { 375 return 376 } 377 378 // Remove any statements that impinge on the term. 379 to := g.From(t.UID) 380 for to.Next() { 381 lines := g.Lines(t.UID, to.Node().ID()) 382 for lines.Next() { 383 g.RemoveStatement(lines.Line().(*Statement)) 384 } 385 } 386 from := g.To(t.UID) 387 if from.Next() { 388 lines := g.Lines(from.Node().ID(), t.UID) 389 for lines.Next() { 390 g.RemoveStatement(lines.Line().(*Statement)) 391 } 392 } 393 394 // Remove the node. 395 g.removeNode(t.UID) 396 delete(g.termIDs, t.Value) 397 } 398 399 // setLine adds l, a line from one node to another. If the nodes do not exist, 400 // they are added, and are set to the nodes of the line otherwise. 401 func (g *Graph) setLine(l graph.Line) { 402 var ( 403 from = l.From() 404 fid = from.ID() 405 to = l.To() 406 tid = to.ID() 407 lid = l.ID() 408 ) 409 410 if _, ok := g.nodes[fid]; !ok { 411 g.addNode(from) 412 } else { 413 g.nodes[fid] = from 414 } 415 if _, ok := g.nodes[tid]; !ok { 416 g.addNode(to) 417 } else { 418 g.nodes[tid] = to 419 } 420 421 switch { 422 case g.from[fid] == nil: 423 g.from[fid] = map[int64]map[int64]graph.Line{tid: {lid: l}} 424 case g.from[fid][tid] == nil: 425 g.from[fid][tid] = map[int64]graph.Line{lid: l} 426 default: 427 g.from[fid][tid][lid] = l 428 } 429 switch { 430 case g.to[tid] == nil: 431 g.to[tid] = map[int64]map[int64]graph.Line{fid: {lid: l}} 432 case g.to[tid][fid] == nil: 433 g.to[tid][fid] = map[int64]graph.Line{lid: l} 434 default: 435 g.to[tid][fid][lid] = l 436 } 437 438 g.ids.Use(lid) 439 } 440 441 // Statements returns an iterator of the statements that connect the subject 442 // term node u to the object term node v. 443 func (g *Graph) Statements(uid, vid int64) *Statements { 444 return &Statements{lit: g.Lines(uid, vid)} 445 } 446 447 // To returns all nodes in g that can reach directly to n. 448 // 449 // The returned graph.Nodes is only valid until the next mutation of 450 // the receiver. 451 func (g *Graph) To(id int64) graph.Nodes { 452 if len(g.to[id]) == 0 { 453 return graph.Empty 454 } 455 return iterator.NewNodesByLines(g.nodes, g.to[id]) 456 } 457 458 // ToObject returns all nodes in g that can reach directly to an RDF object 459 // term. 460 // 461 // The returned graph.Nodes is only valid until the next mutation of 462 // the receiver. 463 func (g *Graph) ToObject(t Term) graph.Nodes { 464 return g.To(t.UID) 465 } 466 467 // Statements is an RDF statement iterator. 468 type Statements struct { 469 eit graph.Edges 470 lit graph.Lines 471 } 472 473 // Next returns whether the iterator holds any additional statements. 474 func (s *Statements) Next() bool { 475 if s.lit != nil && s.lit.Next() { 476 return true 477 } 478 if s.eit == nil || !s.eit.Next() { 479 return false 480 } 481 s.lit = s.eit.Edge().(multi.Edge).Lines 482 return s.lit.Next() 483 } 484 485 // Statement returns the current statement. 486 func (s *Statements) Statement() *Statement { 487 return s.lit.Line().(*Statement) 488 } 489 490 // ConnectedByAny is a helper function to for simplifying graph traversal 491 // conditions. 492 func ConnectedByAny(e graph.Edge, with func(*Statement) bool) bool { 493 switch e := e.(type) { 494 case *Statement: 495 return with(e) 496 case graph.Lines: 497 it := e 498 for it.Next() { 499 s, ok := it.Line().(*Statement) 500 if !ok { 501 continue 502 } 503 ok = with(s) 504 if ok { 505 return true 506 } 507 } 508 } 509 return false 510 }