github.com/aretext/aretext@v1.3.0/syntax/parser/computation.go (about)

     1  package parser
     2  
     3  // ComputedToken is a token recognized by a computation.
     4  type ComputedToken struct {
     5  	// Offset is the token's start position,
     6  	// defined relative to the computation's start position.
     7  	Offset uint64
     8  	Length uint64
     9  	Role   TokenRole
    10  }
    11  
    12  // computation is a result produced by a parser.
    13  // computations are composable, so part of one computation
    14  // can be re-used when re-parsing an edited text.
    15  type computation struct {
    16  	readLength     uint64
    17  	consumedLength uint64
    18  	treeHeight     uint64
    19  	startState     State
    20  	endState       State
    21  	tokens         []ComputedToken // Only in leaves.
    22  	leftChild      *computation
    23  	rightChild     *computation
    24  }
    25  
    26  // newComputation constructs a computation.
    27  // readLength is the number of runes read by the parser,
    28  // and consumedLength is the number of runes consumed by the parser.
    29  // The tokens slice contains any tokens recognized by the parser;
    30  // these must have non-zero length, be ordered sequentially by start position,
    31  // and be non-overlapping.
    32  func newComputation(
    33  	readLength uint64,
    34  	consumedLength uint64,
    35  	startState State,
    36  	endState State,
    37  	tokens []ComputedToken,
    38  ) *computation {
    39  	if consumedLength == 0 {
    40  		panic("computation must consume at least one rune")
    41  	}
    42  
    43  	if consumedLength > readLength {
    44  		panic("Consumed length must be less than or equal to read length")
    45  	}
    46  
    47  	var lastEndPos uint64
    48  	for _, tok := range tokens {
    49  		if tok.Length == 0 {
    50  			panic("Token must have non-zero length")
    51  		}
    52  
    53  		if tok.Offset < lastEndPos {
    54  			panic("Token must be sequential and non-overlapping")
    55  		}
    56  
    57  		tokEndPos := tok.Offset + tok.Length
    58  		if tokEndPos > consumedLength {
    59  			panic("Token length must be less than consumed length")
    60  		}
    61  
    62  		lastEndPos = tokEndPos
    63  	}
    64  
    65  	return &computation{
    66  		readLength:     readLength,
    67  		consumedLength: consumedLength,
    68  		treeHeight:     1,
    69  		startState:     startState,
    70  		endState:       endState,
    71  		tokens:         tokens,
    72  	}
    73  }
    74  
    75  // ReadLength returns the number of runes read to produce this computation.
    76  func (c *computation) ReadLength() uint64 {
    77  	if c == nil {
    78  		return 0
    79  	} else {
    80  		return c.readLength
    81  	}
    82  }
    83  
    84  // ConsumedLength returns the number of runes consumed to produce this computation.
    85  func (c *computation) ConsumedLength() uint64 {
    86  	if c == nil {
    87  		return 0
    88  	} else {
    89  		return c.consumedLength
    90  	}
    91  }
    92  
    93  // TreeHeight returns the height of the computation tree.
    94  func (c *computation) TreeHeight() uint64 {
    95  	if c == nil {
    96  		return 0
    97  	} else {
    98  		return c.treeHeight
    99  	}
   100  }
   101  
   102  // StartState returns the parse state at the start of the computation.
   103  func (c *computation) StartState() State {
   104  	if c == nil {
   105  		return EmptyState{}
   106  	}
   107  	return c.startState
   108  }
   109  
   110  // EndState returns the parse state at the end of the computation.
   111  func (c *computation) EndState() State {
   112  	if c == nil {
   113  		return EmptyState{}
   114  	}
   115  	return c.endState
   116  }
   117  
   118  // Append appends one computation after another computation.
   119  // The positions of the computations and tokens in the second computation
   120  // are "shifted" to start immediately after the end (consumed length) of
   121  // the first computation.
   122  func (c *computation) Append(other *computation) *computation {
   123  	if c == nil {
   124  		return other
   125  	} else if other == nil {
   126  		return c
   127  	}
   128  
   129  	// This is the AVL join algorithm from
   130  	// Blelloch, G. E., Ferizovic, D., & Sun, Y. (2016). Just join for parallel ordered sets.
   131  	// In Proceedings of the 28th ACM Symposium on Parallelism in Algorithms and Architectures.
   132  	h1, h2 := c.TreeHeight(), other.TreeHeight()
   133  	if h1 == h2 {
   134  		return computationFromChildren(c, other)
   135  	} else if h1 < h2 {
   136  		return other.prependSubtree(c)
   137  	} else {
   138  		return c.appendSubtree(other)
   139  	}
   140  }
   141  
   142  // prependSubtree inserts a computation *before* a given computation,
   143  // rebalancing the tree if necessary (AVL balance invariant).
   144  // This assumes that both computations are non-nil.
   145  func (c *computation) prependSubtree(other *computation) *computation {
   146  	if c.leftChild.TreeHeight() <= other.TreeHeight()+1 {
   147  		// Insert the new tree as a sibling of a left child with approximately the same height.
   148  		newLeft := computationFromChildren(other, c.leftChild)
   149  		if newLeft.TreeHeight() <= c.rightChild.TreeHeight()+1 {
   150  			// The new tree already satisfies the AVL balance invariant.
   151  			return computationFromChildren(newLeft, c.rightChild)
   152  		} else {
   153  			// The new tree violates the AVL balance invariant.
   154  			// Double-rotate to restore balance.
   155  			return computationFromChildren(newLeft.rotateLeft(), c.rightChild).rotateRight()
   156  		}
   157  	}
   158  
   159  	// Recursively search for a sibling with approximately the same height as the inserted subtree.
   160  	newLeft := c.leftChild.prependSubtree(other)
   161  	newRoot := computationFromChildren(newLeft, c.rightChild)
   162  	if newLeft.TreeHeight() <= c.rightChild.TreeHeight()+1 {
   163  		// The new tree already satisfies the AVL balance invariant.
   164  		return newRoot
   165  	} else {
   166  		// The new tree violates the AVL balance invariant.
   167  		// Rotate to restore balance.
   168  		return newRoot.rotateRight()
   169  	}
   170  }
   171  
   172  // appendSubtree inserts a computation *after* a given computation,
   173  // rebalancing the tree if necessary (AVL balance invariant).
   174  // This assumes that both computations are non-nil.
   175  func (c *computation) appendSubtree(other *computation) *computation {
   176  	if c.rightChild.TreeHeight() <= other.TreeHeight()+1 {
   177  		// Insert the new tree as a sibling of a right child with approximately the same height.
   178  		newRight := computationFromChildren(c.rightChild, other)
   179  		if newRight.TreeHeight() <= c.leftChild.TreeHeight()+1 {
   180  			// The new tree already satisfies the AVL balance invariant.
   181  			return computationFromChildren(c.leftChild, newRight)
   182  		} else {
   183  			// The new tree violates the AVL balance invariant.
   184  			// Double-rotate to restore balance.
   185  			return computationFromChildren(c.leftChild, newRight.rotateRight()).rotateLeft()
   186  		}
   187  	}
   188  
   189  	// Recursively search for a sibling with approximately the same height as the inserted subtree.
   190  	newRight := c.rightChild.appendSubtree(other)
   191  	newRoot := computationFromChildren(c.leftChild, newRight)
   192  	if newRight.TreeHeight() <= c.leftChild.TreeHeight()+1 {
   193  		// The new tree already satisfies the AVL balance invariant.
   194  		return newRoot
   195  	} else {
   196  		// The new tree violates the AVL balance invariant.
   197  		// Rotate to restore balance.
   198  		return newRoot.rotateLeft()
   199  	}
   200  }
   201  
   202  func (c *computation) rotateLeft() *computation {
   203  	if c == nil || c.rightChild == nil {
   204  		// Can't rotate left for an empty tree or tree without a right child.
   205  		return c
   206  	}
   207  
   208  	//    [x]                [y']
   209  	//   /   \              /   \
   210  	//  [q]  [y]    ==>   [x']   [s]
   211  	//      /   \        /   \
   212  	//    [r]   [s]     [q]  [r]
   213  	x := c
   214  	y := x.rightChild
   215  	q := x.leftChild
   216  	r := y.leftChild
   217  	s := y.rightChild
   218  
   219  	if r == nil && s == nil {
   220  		// If y is a leaf, then we can't rotate it into an inner node
   221  		// without losing information about the original computation,
   222  		// so copy y into the leaf node position.
   223  		// This does not change the height of the resulting tree.
   224  		s = y
   225  	}
   226  
   227  	return computationFromChildren(computationFromChildren(q, r), s)
   228  }
   229  
   230  func (c *computation) rotateRight() *computation {
   231  	if c == nil || c.leftChild == nil {
   232  		// Can't rotate right for an empty tree or tree without a left child.
   233  		return c
   234  	}
   235  
   236  	//       [x]                [y']
   237  	//      /   \              /   \
   238  	//     [y]  [s]    ==>   [q]   [x']
   239  	//    /   \                    /   \
   240  	//  [q]   [r]                [r]   [s]
   241  	x := c
   242  	y := x.leftChild
   243  	q := y.leftChild
   244  	r := y.rightChild
   245  	s := x.rightChild
   246  
   247  	if q == nil && r == nil {
   248  		// If y is a leaf, then we can't rotate it into an inner node
   249  		// without losing information about the original computation,
   250  		// so copy y into the leaf node position.
   251  		// This does not change the height of the resulting tree.
   252  		q = y
   253  	}
   254  
   255  	return computationFromChildren(q, computationFromChildren(r, s))
   256  }
   257  
   258  func computationFromChildren(leftChild, rightChild *computation) *computation {
   259  	var startState, endState State
   260  
   261  	if leftChild == nil && rightChild == nil {
   262  		return nil
   263  	} else if leftChild == nil {
   264  		startState, endState = rightChild.StartState(), rightChild.EndState()
   265  	} else if rightChild == nil {
   266  		startState, endState = leftChild.StartState(), leftChild.EndState()
   267  	} else {
   268  		startState, endState = leftChild.StartState(), rightChild.EndState()
   269  	}
   270  
   271  	maxChildTreeHeight := leftChild.TreeHeight()
   272  	if rightChild.TreeHeight() > maxChildTreeHeight {
   273  		maxChildTreeHeight = rightChild.TreeHeight()
   274  	}
   275  
   276  	// Right child starts reading after last character consumed by left child.
   277  	maxReadLength := leftChild.ConsumedLength() + rightChild.ReadLength()
   278  	if leftChild.ReadLength() > maxReadLength {
   279  		maxReadLength = leftChild.ReadLength()
   280  	}
   281  
   282  	return &computation{
   283  		readLength:     maxReadLength,
   284  		consumedLength: leftChild.ConsumedLength() + rightChild.ConsumedLength(),
   285  		treeHeight:     maxChildTreeHeight + 1,
   286  		startState:     startState,
   287  		endState:       endState,
   288  		leftChild:      leftChild,
   289  		rightChild:     rightChild,
   290  	}
   291  }
   292  
   293  // LargestMatchingSubComputation returns the largest sub-computation that has both
   294  // (1) a read range contained within the requested range and (2) a start state
   295  // that matches the requested state.
   296  // This is used to find a re-usable computation that is still valid after an edit.
   297  // A computation is considered *invalid* if it read some text that was edited,
   298  // so if the computation did *not* read any edited text, it's definitely still valid.
   299  func (c *computation) LargestMatchingSubComputation(
   300  	rangeStartPos, rangeEndPos uint64,
   301  	state State,
   302  ) *computation {
   303  	return c.largestSubComputationInRange(0, c.readLength, rangeStartPos, rangeEndPos, state)
   304  }
   305  
   306  func (c *computation) largestSubComputationInRange(
   307  	readStartPos, readEndPos uint64,
   308  	rangeStartPos, rangeEndPos uint64,
   309  	state State,
   310  ) *computation {
   311  
   312  	// First, search until we find a sub-computation with the requested start position.
   313  	if readStartPos != rangeStartPos {
   314  		if c.leftChild == nil && c.rightChild == nil {
   315  			return nil
   316  		} else if c.leftChild == nil {
   317  			// Right child has no sibling, so there's only one direction to search.
   318  			return c.rightChild.largestSubComputationInRange(
   319  				readStartPos,
   320  				readEndPos,
   321  				rangeStartPos,
   322  				rangeEndPos,
   323  				state,
   324  			)
   325  		} else if c.rightChild == nil {
   326  			// Left child has no sibling, so there's only one direction to search.
   327  			return c.leftChild.largestSubComputationInRange(
   328  				readStartPos,
   329  				readEndPos,
   330  				rangeStartPos,
   331  				rangeEndPos,
   332  				state,
   333  			)
   334  		} else if rangeStartPos < readStartPos+c.leftChild.consumedLength {
   335  			return c.leftChild.largestSubComputationInRange(
   336  				readStartPos,
   337  				readStartPos+c.leftChild.readLength,
   338  				rangeStartPos,
   339  				rangeEndPos,
   340  				state,
   341  			)
   342  		} else {
   343  			// Right child starts reading after last character consumed by left child.
   344  			newReadStartPos := readStartPos + c.leftChild.consumedLength
   345  			newReadEndPos := newReadStartPos + c.rightChild.readLength
   346  			return c.rightChild.largestSubComputationInRange(
   347  				newReadStartPos,
   348  				newReadEndPos,
   349  				rangeStartPos,
   350  				rangeEndPos,
   351  				state,
   352  			)
   353  		}
   354  	}
   355  
   356  	// Keep searching smaller and smaller sub-computations with the requested start position
   357  	// until we find one that didn't read past the end position.
   358  	if readEndPos > rangeEndPos {
   359  		if c.leftChild == nil && c.rightChild == nil {
   360  			return nil
   361  		} else if c.leftChild == nil {
   362  			// Right child has no sibling, so there's only one direction to search.
   363  			return c.rightChild.largestSubComputationInRange(
   364  				readStartPos,
   365  				readEndPos,
   366  				rangeStartPos,
   367  				rangeEndPos,
   368  				state,
   369  			)
   370  		} else if c.rightChild == nil {
   371  			// Left child has no sibling, so there's only one direction to search.
   372  			return c.leftChild.largestSubComputationInRange(
   373  				readStartPos,
   374  				readEndPos,
   375  				rangeStartPos,
   376  				rangeEndPos,
   377  				state,
   378  			)
   379  		} else {
   380  			return c.leftChild.largestSubComputationInRange(
   381  				readStartPos,
   382  				readStartPos+c.leftChild.readLength,
   383  				rangeStartPos,
   384  				rangeEndPos,
   385  				state,
   386  			)
   387  		}
   388  	}
   389  
   390  	// If the start state doesn't match, we can't re-use this computation.
   391  	if !c.StartState().Equals(state) {
   392  		return nil
   393  	}
   394  
   395  	return c
   396  }
   397  
   398  // TokenAtPosition returns the token containing a position.
   399  // If no such token exists, it returns the Token zero value.
   400  func (c *computation) TokenAtPosition(pos uint64) Token {
   401  	var offset uint64
   402  	for c != nil && pos >= offset && pos < offset+c.consumedLength {
   403  		// If this is a leaf computation, it will have tokens.
   404  		// Check if any of them contain the target position.
   405  		for _, computedToken := range c.tokens {
   406  			token := Token{
   407  				StartPos: offset + computedToken.Offset,
   408  				EndPos:   offset + computedToken.Offset + computedToken.Length,
   409  				Role:     computedToken.Role,
   410  			}
   411  			if pos >= token.StartPos && pos < token.EndPos {
   412  				// Found a token at the target position.
   413  				return token
   414  			}
   415  		}
   416  
   417  		if c.leftChild != nil && pos < offset+c.leftChild.consumedLength {
   418  			// Left child contains the position, so recurse left.
   419  			c = c.leftChild
   420  		} else {
   421  			// Otherwise, recurse right.
   422  			if c.leftChild != nil {
   423  				offset += c.leftChild.consumedLength
   424  			}
   425  			c = c.rightChild
   426  		}
   427  	}
   428  
   429  	// No token found at the target position.
   430  	return Token{}
   431  }
   432  
   433  // TokensIntersectingRange returns tokens that overlap the interval [startPos, endPos)
   434  func (c *computation) TokensIntersectingRange(startPos, endPos uint64) []Token {
   435  	if c == nil {
   436  		return nil
   437  	}
   438  
   439  	var result []Token
   440  
   441  	type stackItem struct {
   442  		offset uint64
   443  		c      *computation
   444  	}
   445  	item := stackItem{offset: 0, c: c}
   446  	stack := []stackItem{item}
   447  
   448  	for len(stack) > 0 {
   449  		item, stack = stack[len(stack)-1], stack[0:len(stack)-1]
   450  		offset, c := item.offset, item.c
   451  
   452  		if endPos <= offset || offset+c.consumedLength <= startPos {
   453  			// The range doesn't intersect this computation or any of its children.
   454  			continue
   455  		}
   456  
   457  		// Find all tokens from this computation that intersect the range
   458  		// (only leaf nodes have tokens).
   459  		for _, computedToken := range c.tokens {
   460  			tok := Token{
   461  				StartPos: offset + computedToken.Offset,
   462  				EndPos:   offset + computedToken.Offset + computedToken.Length,
   463  				Role:     computedToken.Role,
   464  			}
   465  			if !(endPos <= tok.StartPos || startPos >= tok.EndPos) {
   466  				result = append(result, tok)
   467  			}
   468  		}
   469  
   470  		// Add tokens from the right child, if it exists.
   471  		// Push this onto the stack first so tokens are added
   472  		// AFTER tokens from the left child.
   473  		if c.rightChild != nil {
   474  			newOffset := offset
   475  			if c.leftChild != nil {
   476  				newOffset += c.leftChild.consumedLength
   477  			}
   478  			stack = append(stack, stackItem{
   479  				offset: newOffset,
   480  				c:      c.rightChild,
   481  			})
   482  		}
   483  
   484  		// Add tokens from the left child, if it exists.
   485  		if c.leftChild != nil {
   486  			stack = append(stack, stackItem{
   487  				offset: offset,
   488  				c:      c.leftChild,
   489  			})
   490  		}
   491  	}
   492  
   493  	return result
   494  }
   495  
   496  // concatLeafComputations combines leaf computations into a single computation.
   497  // A leaf computation is a computation constructed by newComputation
   498  // without any other computations appended.
   499  // This produces the same result as sequentially appending the computations,
   500  // but does so more efficiently.
   501  func concatLeafComputations(computations []*computation) *computation {
   502  	if len(computations) == 0 {
   503  		return nil
   504  	}
   505  
   506  	for _, c := range computations {
   507  		if c.TreeHeight() > 1 {
   508  			panic("Expected computation to be a leaf")
   509  		}
   510  	}
   511  
   512  	// Construct the tree layer-by-layer.  This is cheaper than
   513  	// calling Append repeatedly, because every node we allocate
   514  	// will be used in the final tree.  Additionally, we avoid
   515  	// the cost of rebalancing the tree since it's balanced by construction.
   516  	nextComputations := make([]*computation, 0, len(computations)/2+1)
   517  	for len(computations) > 1 {
   518  		var i int
   519  		for i < len(computations) {
   520  			if i+1 < len(computations) {
   521  				c1, c2 := computations[i], computations[i+1]
   522  				nextComputations = append(nextComputations, c1.Append(c2))
   523  				i += 2
   524  			} else {
   525  				c := computations[i]
   526  				nextComputations = append(nextComputations, c)
   527  				i++
   528  			}
   529  		}
   530  		computations = nextComputations
   531  		nextComputations = nextComputations[:0]
   532  	}
   533  
   534  	return computations[0]
   535  }