github.com/aretext/aretext@v1.3.0/syntax/parser/computation.go (about) 1 package parser 2 3 // ComputedToken is a token recognized by a computation. 4 type ComputedToken struct { 5 // Offset is the token's start position, 6 // defined relative to the computation's start position. 7 Offset uint64 8 Length uint64 9 Role TokenRole 10 } 11 12 // computation is a result produced by a parser. 13 // computations are composable, so part of one computation 14 // can be re-used when re-parsing an edited text. 15 type computation struct { 16 readLength uint64 17 consumedLength uint64 18 treeHeight uint64 19 startState State 20 endState State 21 tokens []ComputedToken // Only in leaves. 22 leftChild *computation 23 rightChild *computation 24 } 25 26 // newComputation constructs a computation. 27 // readLength is the number of runes read by the parser, 28 // and consumedLength is the number of runes consumed by the parser. 29 // The tokens slice contains any tokens recognized by the parser; 30 // these must have non-zero length, be ordered sequentially by start position, 31 // and be non-overlapping. 32 func newComputation( 33 readLength uint64, 34 consumedLength uint64, 35 startState State, 36 endState State, 37 tokens []ComputedToken, 38 ) *computation { 39 if consumedLength == 0 { 40 panic("computation must consume at least one rune") 41 } 42 43 if consumedLength > readLength { 44 panic("Consumed length must be less than or equal to read length") 45 } 46 47 var lastEndPos uint64 48 for _, tok := range tokens { 49 if tok.Length == 0 { 50 panic("Token must have non-zero length") 51 } 52 53 if tok.Offset < lastEndPos { 54 panic("Token must be sequential and non-overlapping") 55 } 56 57 tokEndPos := tok.Offset + tok.Length 58 if tokEndPos > consumedLength { 59 panic("Token length must be less than consumed length") 60 } 61 62 lastEndPos = tokEndPos 63 } 64 65 return &computation{ 66 readLength: readLength, 67 consumedLength: consumedLength, 68 treeHeight: 1, 69 startState: startState, 70 endState: endState, 71 tokens: tokens, 72 } 73 } 74 75 // ReadLength returns the number of runes read to produce this computation. 76 func (c *computation) ReadLength() uint64 { 77 if c == nil { 78 return 0 79 } else { 80 return c.readLength 81 } 82 } 83 84 // ConsumedLength returns the number of runes consumed to produce this computation. 85 func (c *computation) ConsumedLength() uint64 { 86 if c == nil { 87 return 0 88 } else { 89 return c.consumedLength 90 } 91 } 92 93 // TreeHeight returns the height of the computation tree. 94 func (c *computation) TreeHeight() uint64 { 95 if c == nil { 96 return 0 97 } else { 98 return c.treeHeight 99 } 100 } 101 102 // StartState returns the parse state at the start of the computation. 103 func (c *computation) StartState() State { 104 if c == nil { 105 return EmptyState{} 106 } 107 return c.startState 108 } 109 110 // EndState returns the parse state at the end of the computation. 111 func (c *computation) EndState() State { 112 if c == nil { 113 return EmptyState{} 114 } 115 return c.endState 116 } 117 118 // Append appends one computation after another computation. 119 // The positions of the computations and tokens in the second computation 120 // are "shifted" to start immediately after the end (consumed length) of 121 // the first computation. 122 func (c *computation) Append(other *computation) *computation { 123 if c == nil { 124 return other 125 } else if other == nil { 126 return c 127 } 128 129 // This is the AVL join algorithm from 130 // Blelloch, G. E., Ferizovic, D., & Sun, Y. (2016). Just join for parallel ordered sets. 131 // In Proceedings of the 28th ACM Symposium on Parallelism in Algorithms and Architectures. 132 h1, h2 := c.TreeHeight(), other.TreeHeight() 133 if h1 == h2 { 134 return computationFromChildren(c, other) 135 } else if h1 < h2 { 136 return other.prependSubtree(c) 137 } else { 138 return c.appendSubtree(other) 139 } 140 } 141 142 // prependSubtree inserts a computation *before* a given computation, 143 // rebalancing the tree if necessary (AVL balance invariant). 144 // This assumes that both computations are non-nil. 145 func (c *computation) prependSubtree(other *computation) *computation { 146 if c.leftChild.TreeHeight() <= other.TreeHeight()+1 { 147 // Insert the new tree as a sibling of a left child with approximately the same height. 148 newLeft := computationFromChildren(other, c.leftChild) 149 if newLeft.TreeHeight() <= c.rightChild.TreeHeight()+1 { 150 // The new tree already satisfies the AVL balance invariant. 151 return computationFromChildren(newLeft, c.rightChild) 152 } else { 153 // The new tree violates the AVL balance invariant. 154 // Double-rotate to restore balance. 155 return computationFromChildren(newLeft.rotateLeft(), c.rightChild).rotateRight() 156 } 157 } 158 159 // Recursively search for a sibling with approximately the same height as the inserted subtree. 160 newLeft := c.leftChild.prependSubtree(other) 161 newRoot := computationFromChildren(newLeft, c.rightChild) 162 if newLeft.TreeHeight() <= c.rightChild.TreeHeight()+1 { 163 // The new tree already satisfies the AVL balance invariant. 164 return newRoot 165 } else { 166 // The new tree violates the AVL balance invariant. 167 // Rotate to restore balance. 168 return newRoot.rotateRight() 169 } 170 } 171 172 // appendSubtree inserts a computation *after* a given computation, 173 // rebalancing the tree if necessary (AVL balance invariant). 174 // This assumes that both computations are non-nil. 175 func (c *computation) appendSubtree(other *computation) *computation { 176 if c.rightChild.TreeHeight() <= other.TreeHeight()+1 { 177 // Insert the new tree as a sibling of a right child with approximately the same height. 178 newRight := computationFromChildren(c.rightChild, other) 179 if newRight.TreeHeight() <= c.leftChild.TreeHeight()+1 { 180 // The new tree already satisfies the AVL balance invariant. 181 return computationFromChildren(c.leftChild, newRight) 182 } else { 183 // The new tree violates the AVL balance invariant. 184 // Double-rotate to restore balance. 185 return computationFromChildren(c.leftChild, newRight.rotateRight()).rotateLeft() 186 } 187 } 188 189 // Recursively search for a sibling with approximately the same height as the inserted subtree. 190 newRight := c.rightChild.appendSubtree(other) 191 newRoot := computationFromChildren(c.leftChild, newRight) 192 if newRight.TreeHeight() <= c.leftChild.TreeHeight()+1 { 193 // The new tree already satisfies the AVL balance invariant. 194 return newRoot 195 } else { 196 // The new tree violates the AVL balance invariant. 197 // Rotate to restore balance. 198 return newRoot.rotateLeft() 199 } 200 } 201 202 func (c *computation) rotateLeft() *computation { 203 if c == nil || c.rightChild == nil { 204 // Can't rotate left for an empty tree or tree without a right child. 205 return c 206 } 207 208 // [x] [y'] 209 // / \ / \ 210 // [q] [y] ==> [x'] [s] 211 // / \ / \ 212 // [r] [s] [q] [r] 213 x := c 214 y := x.rightChild 215 q := x.leftChild 216 r := y.leftChild 217 s := y.rightChild 218 219 if r == nil && s == nil { 220 // If y is a leaf, then we can't rotate it into an inner node 221 // without losing information about the original computation, 222 // so copy y into the leaf node position. 223 // This does not change the height of the resulting tree. 224 s = y 225 } 226 227 return computationFromChildren(computationFromChildren(q, r), s) 228 } 229 230 func (c *computation) rotateRight() *computation { 231 if c == nil || c.leftChild == nil { 232 // Can't rotate right for an empty tree or tree without a left child. 233 return c 234 } 235 236 // [x] [y'] 237 // / \ / \ 238 // [y] [s] ==> [q] [x'] 239 // / \ / \ 240 // [q] [r] [r] [s] 241 x := c 242 y := x.leftChild 243 q := y.leftChild 244 r := y.rightChild 245 s := x.rightChild 246 247 if q == nil && r == nil { 248 // If y is a leaf, then we can't rotate it into an inner node 249 // without losing information about the original computation, 250 // so copy y into the leaf node position. 251 // This does not change the height of the resulting tree. 252 q = y 253 } 254 255 return computationFromChildren(q, computationFromChildren(r, s)) 256 } 257 258 func computationFromChildren(leftChild, rightChild *computation) *computation { 259 var startState, endState State 260 261 if leftChild == nil && rightChild == nil { 262 return nil 263 } else if leftChild == nil { 264 startState, endState = rightChild.StartState(), rightChild.EndState() 265 } else if rightChild == nil { 266 startState, endState = leftChild.StartState(), leftChild.EndState() 267 } else { 268 startState, endState = leftChild.StartState(), rightChild.EndState() 269 } 270 271 maxChildTreeHeight := leftChild.TreeHeight() 272 if rightChild.TreeHeight() > maxChildTreeHeight { 273 maxChildTreeHeight = rightChild.TreeHeight() 274 } 275 276 // Right child starts reading after last character consumed by left child. 277 maxReadLength := leftChild.ConsumedLength() + rightChild.ReadLength() 278 if leftChild.ReadLength() > maxReadLength { 279 maxReadLength = leftChild.ReadLength() 280 } 281 282 return &computation{ 283 readLength: maxReadLength, 284 consumedLength: leftChild.ConsumedLength() + rightChild.ConsumedLength(), 285 treeHeight: maxChildTreeHeight + 1, 286 startState: startState, 287 endState: endState, 288 leftChild: leftChild, 289 rightChild: rightChild, 290 } 291 } 292 293 // LargestMatchingSubComputation returns the largest sub-computation that has both 294 // (1) a read range contained within the requested range and (2) a start state 295 // that matches the requested state. 296 // This is used to find a re-usable computation that is still valid after an edit. 297 // A computation is considered *invalid* if it read some text that was edited, 298 // so if the computation did *not* read any edited text, it's definitely still valid. 299 func (c *computation) LargestMatchingSubComputation( 300 rangeStartPos, rangeEndPos uint64, 301 state State, 302 ) *computation { 303 return c.largestSubComputationInRange(0, c.readLength, rangeStartPos, rangeEndPos, state) 304 } 305 306 func (c *computation) largestSubComputationInRange( 307 readStartPos, readEndPos uint64, 308 rangeStartPos, rangeEndPos uint64, 309 state State, 310 ) *computation { 311 312 // First, search until we find a sub-computation with the requested start position. 313 if readStartPos != rangeStartPos { 314 if c.leftChild == nil && c.rightChild == nil { 315 return nil 316 } else if c.leftChild == nil { 317 // Right child has no sibling, so there's only one direction to search. 318 return c.rightChild.largestSubComputationInRange( 319 readStartPos, 320 readEndPos, 321 rangeStartPos, 322 rangeEndPos, 323 state, 324 ) 325 } else if c.rightChild == nil { 326 // Left child has no sibling, so there's only one direction to search. 327 return c.leftChild.largestSubComputationInRange( 328 readStartPos, 329 readEndPos, 330 rangeStartPos, 331 rangeEndPos, 332 state, 333 ) 334 } else if rangeStartPos < readStartPos+c.leftChild.consumedLength { 335 return c.leftChild.largestSubComputationInRange( 336 readStartPos, 337 readStartPos+c.leftChild.readLength, 338 rangeStartPos, 339 rangeEndPos, 340 state, 341 ) 342 } else { 343 // Right child starts reading after last character consumed by left child. 344 newReadStartPos := readStartPos + c.leftChild.consumedLength 345 newReadEndPos := newReadStartPos + c.rightChild.readLength 346 return c.rightChild.largestSubComputationInRange( 347 newReadStartPos, 348 newReadEndPos, 349 rangeStartPos, 350 rangeEndPos, 351 state, 352 ) 353 } 354 } 355 356 // Keep searching smaller and smaller sub-computations with the requested start position 357 // until we find one that didn't read past the end position. 358 if readEndPos > rangeEndPos { 359 if c.leftChild == nil && c.rightChild == nil { 360 return nil 361 } else if c.leftChild == nil { 362 // Right child has no sibling, so there's only one direction to search. 363 return c.rightChild.largestSubComputationInRange( 364 readStartPos, 365 readEndPos, 366 rangeStartPos, 367 rangeEndPos, 368 state, 369 ) 370 } else if c.rightChild == nil { 371 // Left child has no sibling, so there's only one direction to search. 372 return c.leftChild.largestSubComputationInRange( 373 readStartPos, 374 readEndPos, 375 rangeStartPos, 376 rangeEndPos, 377 state, 378 ) 379 } else { 380 return c.leftChild.largestSubComputationInRange( 381 readStartPos, 382 readStartPos+c.leftChild.readLength, 383 rangeStartPos, 384 rangeEndPos, 385 state, 386 ) 387 } 388 } 389 390 // If the start state doesn't match, we can't re-use this computation. 391 if !c.StartState().Equals(state) { 392 return nil 393 } 394 395 return c 396 } 397 398 // TokenAtPosition returns the token containing a position. 399 // If no such token exists, it returns the Token zero value. 400 func (c *computation) TokenAtPosition(pos uint64) Token { 401 var offset uint64 402 for c != nil && pos >= offset && pos < offset+c.consumedLength { 403 // If this is a leaf computation, it will have tokens. 404 // Check if any of them contain the target position. 405 for _, computedToken := range c.tokens { 406 token := Token{ 407 StartPos: offset + computedToken.Offset, 408 EndPos: offset + computedToken.Offset + computedToken.Length, 409 Role: computedToken.Role, 410 } 411 if pos >= token.StartPos && pos < token.EndPos { 412 // Found a token at the target position. 413 return token 414 } 415 } 416 417 if c.leftChild != nil && pos < offset+c.leftChild.consumedLength { 418 // Left child contains the position, so recurse left. 419 c = c.leftChild 420 } else { 421 // Otherwise, recurse right. 422 if c.leftChild != nil { 423 offset += c.leftChild.consumedLength 424 } 425 c = c.rightChild 426 } 427 } 428 429 // No token found at the target position. 430 return Token{} 431 } 432 433 // TokensIntersectingRange returns tokens that overlap the interval [startPos, endPos) 434 func (c *computation) TokensIntersectingRange(startPos, endPos uint64) []Token { 435 if c == nil { 436 return nil 437 } 438 439 var result []Token 440 441 type stackItem struct { 442 offset uint64 443 c *computation 444 } 445 item := stackItem{offset: 0, c: c} 446 stack := []stackItem{item} 447 448 for len(stack) > 0 { 449 item, stack = stack[len(stack)-1], stack[0:len(stack)-1] 450 offset, c := item.offset, item.c 451 452 if endPos <= offset || offset+c.consumedLength <= startPos { 453 // The range doesn't intersect this computation or any of its children. 454 continue 455 } 456 457 // Find all tokens from this computation that intersect the range 458 // (only leaf nodes have tokens). 459 for _, computedToken := range c.tokens { 460 tok := Token{ 461 StartPos: offset + computedToken.Offset, 462 EndPos: offset + computedToken.Offset + computedToken.Length, 463 Role: computedToken.Role, 464 } 465 if !(endPos <= tok.StartPos || startPos >= tok.EndPos) { 466 result = append(result, tok) 467 } 468 } 469 470 // Add tokens from the right child, if it exists. 471 // Push this onto the stack first so tokens are added 472 // AFTER tokens from the left child. 473 if c.rightChild != nil { 474 newOffset := offset 475 if c.leftChild != nil { 476 newOffset += c.leftChild.consumedLength 477 } 478 stack = append(stack, stackItem{ 479 offset: newOffset, 480 c: c.rightChild, 481 }) 482 } 483 484 // Add tokens from the left child, if it exists. 485 if c.leftChild != nil { 486 stack = append(stack, stackItem{ 487 offset: offset, 488 c: c.leftChild, 489 }) 490 } 491 } 492 493 return result 494 } 495 496 // concatLeafComputations combines leaf computations into a single computation. 497 // A leaf computation is a computation constructed by newComputation 498 // without any other computations appended. 499 // This produces the same result as sequentially appending the computations, 500 // but does so more efficiently. 501 func concatLeafComputations(computations []*computation) *computation { 502 if len(computations) == 0 { 503 return nil 504 } 505 506 for _, c := range computations { 507 if c.TreeHeight() > 1 { 508 panic("Expected computation to be a leaf") 509 } 510 } 511 512 // Construct the tree layer-by-layer. This is cheaper than 513 // calling Append repeatedly, because every node we allocate 514 // will be used in the final tree. Additionally, we avoid 515 // the cost of rebalancing the tree since it's balanced by construction. 516 nextComputations := make([]*computation, 0, len(computations)/2+1) 517 for len(computations) > 1 { 518 var i int 519 for i < len(computations) { 520 if i+1 < len(computations) { 521 c1, c2 := computations[i], computations[i+1] 522 nextComputations = append(nextComputations, c1.Append(c2)) 523 i += 2 524 } else { 525 c := computations[i] 526 nextComputations = append(nextComputations, c) 527 i++ 528 } 529 } 530 computations = nextComputations 531 nextComputations = nextComputations[:0] 532 } 533 534 return computations[0] 535 }