github.com/aretext/aretext@v1.3.0/syntax/parser/parser.go (about) 1 package parser 2 3 import ( 4 "math" 5 6 "github.com/aretext/aretext/text" 7 ) 8 9 // Func incrementally parses a document into tokens. 10 // 11 // It returns the number of tokens consumed and a slice of tokens. 12 // The output MUST be deterministic based solely on the input args. 13 // 14 // Each invocation of the function is cached and may be reused 15 // when reparsing the document after an edit. 16 // 17 // The returned tokens must be sequential, non-overlapping, 18 // have non-zero length, and have positions within the range 19 // of consumed characters. 20 // 21 // Every successful parse must consume at least one rune. 22 // 23 // The state parameter allows the parse func to track state across invocations. 24 // The initial state is always EmptyState. The parse func must return a non-nil 25 // state, which will be passed back to the parse func on the next invocation. 26 type Func func(TrackingRuneIter, State) Result 27 28 // Result represents the result of a single execution of a parse function. 29 type Result struct { 30 NumConsumed uint64 31 ComputedTokens []ComputedToken 32 NextState State 33 } 34 35 // FailedResult represents a failed parse. 36 var FailedResult = Result{} 37 38 // IsSuccess returns whether the parse succeeded. 39 func (r Result) IsSuccess() bool { 40 return r.NumConsumed > 0 41 } 42 43 // IsFailure returns whether the parse failed. 44 func (r Result) IsFailure() bool { 45 return !r.IsSuccess() 46 } 47 48 // ShiftForward shifts the result offsets forward by the specified number of positions. 49 func (r Result) ShiftForward(n uint64) Result { 50 if n > 0 { 51 r.NumConsumed += n 52 for i := 0; i < len(r.ComputedTokens); i++ { 53 r.ComputedTokens[i].Offset += n 54 } 55 } 56 return r 57 } 58 59 // P parses a document into tokens. 60 // It caches the results from the last parse so it can efficiently 61 // reparse a document after an edit (insertion/deletion). 62 type P struct { 63 parseFunc Func 64 lastComputation *computation 65 } 66 67 // New constructs a new parser for the language recognized by parseFunc. 68 func New(f Func) *P { 69 // This ensures that the parse func always makes progress. 70 f = f.recoverFromFailure() 71 return &P{parseFunc: f} 72 } 73 74 // TokenAtPosition returns the token containing a position. 75 // If no such token exists, it returns the Token zero value. 76 func (p *P) TokenAtPosition(pos uint64) Token { 77 return p.lastComputation.TokenAtPosition(pos) 78 } 79 80 // TokensIntersectingRange returns tokens that overlap the interval [startPos, endPos) 81 func (p *P) TokensIntersectingRange(startPos, endPos uint64) []Token { 82 return p.lastComputation.TokensIntersectingRange(startPos, endPos) 83 } 84 85 // Minimum consumed length for leaf computations on initial parse. 86 const minInitialConsumedLen = 1024 87 88 // ParseAll parses the entire document. 89 func (p *P) ParseAll(tree *text.Tree) { 90 var pos uint64 91 var prevComputation *computation 92 state := State(EmptyState{}) 93 leafComputations := make([]*computation, 0) 94 n := tree.NumChars() 95 for pos < n { 96 c := p.runParseFunc(tree, pos, state) 97 pos += c.ConsumedLength() 98 state = c.EndState() 99 100 if prevComputation != nil && prevComputation.ConsumedLength() < minInitialConsumedLen { 101 // For the initial parse, combine small leaves. This saves memory by reducing both 102 // the number of leaves and parent nodes we need to allocate. 103 combineLeaves(prevComputation, c) 104 } else { 105 leafComputations = append(leafComputations, c) 106 prevComputation = c 107 } 108 } 109 c := concatLeafComputations(leafComputations) 110 p.lastComputation = c 111 } 112 113 func combineLeaves(prev, next *computation) { 114 for _, tok := range next.tokens { 115 tok.Offset += prev.consumedLength 116 prev.tokens = append(prev.tokens, tok) 117 } 118 prev.consumedLength += next.consumedLength 119 prev.readLength += next.readLength 120 prev.endState = next.endState 121 } 122 123 // ReparseAfterEdit parses a document after an edit (insertion/deletion), 124 // re-using cached results from previous computations when possible. 125 // This should be called *after* at least one invocation of ParseAll(). 126 // It must be called for *every* edit to the document, otherwise the 127 // tokens may not match the current state of the document. 128 func (p *P) ReparseAfterEdit(tree *text.Tree, edit Edit) { 129 var pos uint64 130 var c *computation 131 state := State(EmptyState{}) 132 n := tree.NumChars() 133 for pos < n { 134 nextComputation := p.findReusableComputation(pos, edit, state) 135 if nextComputation == nil { 136 nextComputation = p.runParseFunc(tree, pos, state) 137 } 138 state = nextComputation.EndState() 139 pos += nextComputation.ConsumedLength() 140 c = c.Append(nextComputation) 141 } 142 p.lastComputation = c 143 } 144 145 func (p *P) runParseFunc(tree *text.Tree, pos uint64, state State) *computation { 146 reader := tree.ReaderAtPosition(pos) 147 trackingIter := NewTrackingRuneIter(reader) 148 result := p.parseFunc(trackingIter, state) 149 return newComputation( 150 trackingIter.MaxRead(), 151 result.NumConsumed, 152 state, 153 result.NextState, 154 result.ComputedTokens, 155 ) 156 } 157 158 func (p *P) findReusableComputation(pos uint64, edit Edit, state State) *computation { 159 if pos < edit.pos { 160 // If the parser is starting before the edit, look for a subcomputation 161 // from that position up to the start of the edit. 162 return p.lastComputation.LargestMatchingSubComputation( 163 pos, 164 edit.pos, 165 state, 166 ) 167 } 168 169 if edit.numInserted > 0 && pos >= edit.pos+edit.numInserted { 170 // If the parser is past the last character inserted, 171 // translate the position to the previous document by subtracting 172 // the number of inserted characters. 173 return p.lastComputation.LargestMatchingSubComputation( 174 pos-edit.numInserted, 175 math.MaxUint64, 176 state, 177 ) 178 } 179 180 if edit.numDeleted > 0 && pos >= edit.pos { 181 // If the parser is past a deletion, 182 // translate the position to the previous document by adding 183 // the number of deleted characters. 184 return p.lastComputation.LargestMatchingSubComputation( 185 pos+edit.numDeleted, 186 math.MaxUint64, 187 state, 188 ) 189 } 190 191 // The parser is starting within the edit range, so we can can't re-use 192 // any of the last computation. 193 return nil 194 }