github.com/aretext/aretext@v1.3.0/syntax/parser/parser.go (about)

     1  package parser
     2  
     3  import (
     4  	"math"
     5  
     6  	"github.com/aretext/aretext/text"
     7  )
     8  
     9  // Func incrementally parses a document into tokens.
    10  //
    11  // It returns the number of tokens consumed and a slice of tokens.
    12  // The output MUST be deterministic based solely on the input args.
    13  //
    14  // Each invocation of the function is cached and may be reused
    15  // when reparsing the document after an edit.
    16  //
    17  // The returned tokens must be sequential, non-overlapping,
    18  // have non-zero length, and have positions within the range
    19  // of consumed characters.
    20  //
    21  // Every successful parse must consume at least one rune.
    22  //
    23  // The state parameter allows the parse func to track state across invocations.
    24  // The initial state is always EmptyState.  The parse func must return a non-nil
    25  // state, which will be passed back to the parse func on the next invocation.
    26  type Func func(TrackingRuneIter, State) Result
    27  
    28  // Result represents the result of a single execution of a parse function.
    29  type Result struct {
    30  	NumConsumed    uint64
    31  	ComputedTokens []ComputedToken
    32  	NextState      State
    33  }
    34  
    35  // FailedResult represents a failed parse.
    36  var FailedResult = Result{}
    37  
    38  // IsSuccess returns whether the parse succeeded.
    39  func (r Result) IsSuccess() bool {
    40  	return r.NumConsumed > 0
    41  }
    42  
    43  // IsFailure returns whether the parse failed.
    44  func (r Result) IsFailure() bool {
    45  	return !r.IsSuccess()
    46  }
    47  
    48  // ShiftForward shifts the result offsets forward by the specified number of positions.
    49  func (r Result) ShiftForward(n uint64) Result {
    50  	if n > 0 {
    51  		r.NumConsumed += n
    52  		for i := 0; i < len(r.ComputedTokens); i++ {
    53  			r.ComputedTokens[i].Offset += n
    54  		}
    55  	}
    56  	return r
    57  }
    58  
    59  // P parses a document into tokens.
    60  // It caches the results from the last parse so it can efficiently
    61  // reparse a document after an edit (insertion/deletion).
    62  type P struct {
    63  	parseFunc       Func
    64  	lastComputation *computation
    65  }
    66  
    67  // New constructs a new parser for the language recognized by parseFunc.
    68  func New(f Func) *P {
    69  	// This ensures that the parse func always makes progress.
    70  	f = f.recoverFromFailure()
    71  	return &P{parseFunc: f}
    72  }
    73  
    74  // TokenAtPosition returns the token containing a position.
    75  // If no such token exists, it returns the Token zero value.
    76  func (p *P) TokenAtPosition(pos uint64) Token {
    77  	return p.lastComputation.TokenAtPosition(pos)
    78  }
    79  
    80  // TokensIntersectingRange returns tokens that overlap the interval [startPos, endPos)
    81  func (p *P) TokensIntersectingRange(startPos, endPos uint64) []Token {
    82  	return p.lastComputation.TokensIntersectingRange(startPos, endPos)
    83  }
    84  
    85  // Minimum consumed length for leaf computations on initial parse.
    86  const minInitialConsumedLen = 1024
    87  
    88  // ParseAll parses the entire document.
    89  func (p *P) ParseAll(tree *text.Tree) {
    90  	var pos uint64
    91  	var prevComputation *computation
    92  	state := State(EmptyState{})
    93  	leafComputations := make([]*computation, 0)
    94  	n := tree.NumChars()
    95  	for pos < n {
    96  		c := p.runParseFunc(tree, pos, state)
    97  		pos += c.ConsumedLength()
    98  		state = c.EndState()
    99  
   100  		if prevComputation != nil && prevComputation.ConsumedLength() < minInitialConsumedLen {
   101  			// For the initial parse, combine small leaves. This saves memory by reducing both
   102  			// the number of leaves and parent nodes we need to allocate.
   103  			combineLeaves(prevComputation, c)
   104  		} else {
   105  			leafComputations = append(leafComputations, c)
   106  			prevComputation = c
   107  		}
   108  	}
   109  	c := concatLeafComputations(leafComputations)
   110  	p.lastComputation = c
   111  }
   112  
   113  func combineLeaves(prev, next *computation) {
   114  	for _, tok := range next.tokens {
   115  		tok.Offset += prev.consumedLength
   116  		prev.tokens = append(prev.tokens, tok)
   117  	}
   118  	prev.consumedLength += next.consumedLength
   119  	prev.readLength += next.readLength
   120  	prev.endState = next.endState
   121  }
   122  
   123  // ReparseAfterEdit parses a document after an edit (insertion/deletion),
   124  // re-using cached results from previous computations when possible.
   125  // This should be called *after* at least one invocation of ParseAll().
   126  // It must be called for *every* edit to the document, otherwise the
   127  // tokens may not match the current state of the document.
   128  func (p *P) ReparseAfterEdit(tree *text.Tree, edit Edit) {
   129  	var pos uint64
   130  	var c *computation
   131  	state := State(EmptyState{})
   132  	n := tree.NumChars()
   133  	for pos < n {
   134  		nextComputation := p.findReusableComputation(pos, edit, state)
   135  		if nextComputation == nil {
   136  			nextComputation = p.runParseFunc(tree, pos, state)
   137  		}
   138  		state = nextComputation.EndState()
   139  		pos += nextComputation.ConsumedLength()
   140  		c = c.Append(nextComputation)
   141  	}
   142  	p.lastComputation = c
   143  }
   144  
   145  func (p *P) runParseFunc(tree *text.Tree, pos uint64, state State) *computation {
   146  	reader := tree.ReaderAtPosition(pos)
   147  	trackingIter := NewTrackingRuneIter(reader)
   148  	result := p.parseFunc(trackingIter, state)
   149  	return newComputation(
   150  		trackingIter.MaxRead(),
   151  		result.NumConsumed,
   152  		state,
   153  		result.NextState,
   154  		result.ComputedTokens,
   155  	)
   156  }
   157  
   158  func (p *P) findReusableComputation(pos uint64, edit Edit, state State) *computation {
   159  	if pos < edit.pos {
   160  		// If the parser is starting before the edit, look for a subcomputation
   161  		// from that position up to the start of the edit.
   162  		return p.lastComputation.LargestMatchingSubComputation(
   163  			pos,
   164  			edit.pos,
   165  			state,
   166  		)
   167  	}
   168  
   169  	if edit.numInserted > 0 && pos >= edit.pos+edit.numInserted {
   170  		// If the parser is past the last character inserted,
   171  		// translate the position to the previous document by subtracting
   172  		// the number of inserted characters.
   173  		return p.lastComputation.LargestMatchingSubComputation(
   174  			pos-edit.numInserted,
   175  			math.MaxUint64,
   176  			state,
   177  		)
   178  	}
   179  
   180  	if edit.numDeleted > 0 && pos >= edit.pos {
   181  		// If the parser is past a deletion,
   182  		// translate the position to the previous document by adding
   183  		// the number of deleted characters.
   184  		return p.lastComputation.LargestMatchingSubComputation(
   185  			pos+edit.numDeleted,
   186  			math.MaxUint64,
   187  			state,
   188  		)
   189  	}
   190  
   191  	// The parser is starting within the edit range, so we can can't re-use
   192  	// any of the last computation.
   193  	return nil
   194  }