github.com/jhump/protocompile@v0.0.0-20221021153901-4f6f732835e8/ast/file_info.go (about) 1 package ast 2 3 import ( 4 "fmt" 5 "sort" 6 ) 7 8 // FileInfo contains information about the contents of a source file, including 9 // details about comments and tokens. A lexer accumulates these details as it 10 // scans the file contents. This allows efficient representation of things like 11 // source positions. 12 type FileInfo struct { 13 // The name of the source file. 14 name string 15 // The raw contents of the source file. 16 data []byte 17 // The offsets for each line in the file. The value is the zero-based byte 18 // offset for a given line. The line is given by its index. So the value at 19 // index 0 is the offset for the first line (which is always zero). The 20 // value at index 1 is the offset at which the second line begins. Etc. 21 lines []int 22 // The info for every comment in the file. This is empty if the file has no 23 // comments. The first entry corresponds to the first comment in the file, 24 // and so on. 25 comments []commentInfo 26 // The info for every token in the file. The last item in the slice 27 // corresponds to the EOF, so every file (even an empty one) has at least 28 // one element. This includes all terminal symbols in the AST as well as 29 // all comments. However, it excludes rune nodes (which can be more 30 // compactly represented by an offset into data). 31 tokens []tokenSpan 32 } 33 34 type commentInfo struct { 35 // the index of the token, in the file's tokens slice, that represents this 36 // comment 37 index int 38 // the index of the token to which this comment is attributed. 39 attributedToIndex int 40 } 41 42 type tokenSpan struct { 43 // the offset into the file of the first character of a token. 44 offset int 45 // the length of the token 46 length int 47 } 48 49 // NewFileInfo creates a new instance for the given file. 50 func NewFileInfo(filename string, contents []byte) *FileInfo { 51 return &FileInfo{ 52 name: filename, 53 data: contents, 54 lines: []int{0}, 55 } 56 } 57 58 func (f *FileInfo) Name() string { 59 return f.name 60 } 61 62 // AddLine adds the offset representing the beginning of the "next" line in the file. 63 // The first line always starts at offset 0, the second line starts at offset-of-newline-char+1. 64 func (f *FileInfo) AddLine(offset int) { 65 if offset < 0 { 66 panic(fmt.Sprintf("invalid offset: %d must not be negative", offset)) 67 } 68 if offset > len(f.data) { 69 panic(fmt.Sprintf("invalid offset: %d is greater than file size %d", offset, len(f.data))) 70 } 71 72 if len(f.lines) > 0 { 73 lastOffset := f.lines[len(f.lines)-1] 74 if offset <= lastOffset { 75 panic(fmt.Sprintf("invalid offset: %d is not greater than previously observed line offset %d", offset, lastOffset)) 76 } 77 } 78 79 f.lines = append(f.lines, offset) 80 } 81 82 // AddToken adds info about a token at the given location to this file. It 83 // returns a value that allows access to all of the token's details. 84 func (f *FileInfo) AddToken(offset, length int) Token { 85 if offset < 0 { 86 panic(fmt.Sprintf("invalid offset: %d must not be negative", offset)) 87 } 88 if length < 0 { 89 panic(fmt.Sprintf("invalid length: %d must not be negative", length)) 90 } 91 if offset+length > len(f.data) { 92 panic(fmt.Sprintf("invalid offset+length: %d is greater than file size %d", offset+length, len(f.data))) 93 } 94 95 tokenID := len(f.tokens) 96 if len(f.tokens) > 0 { 97 lastToken := f.tokens[tokenID-1] 98 lastEnd := lastToken.offset + lastToken.length - 1 99 if offset <= lastEnd { 100 panic(fmt.Sprintf("invalid offset: %d is not greater than previously observed token end %d", offset, lastEnd)) 101 } 102 } 103 104 f.tokens = append(f.tokens, tokenSpan{offset: offset, length: length}) 105 return Token(tokenID) 106 } 107 108 // AddComment adds info about a comment to this file. Comments must first be 109 // added as tokens via f.AddToken(). The given comment argument is the TokenInfo 110 // from that step. The given attributedTo argument indicates another token in the 111 // file with which the comment is associated. If comment's offset is before that 112 // of attributedTo, then this is a leading comment. Otherwise, it is a trailing 113 // comment. 114 func (f *FileInfo) AddComment(comment, attributedTo Token) Comment { 115 if len(f.comments) > 0 { 116 lastComment := f.comments[len(f.comments)-1] 117 if int(comment) <= lastComment.index { 118 panic(fmt.Sprintf("invalid index: %d is not greater than previously observed comment index %d", comment, lastComment.index)) 119 } 120 if int(attributedTo) < lastComment.attributedToIndex { 121 panic(fmt.Sprintf("invalid attribution: %d is not greater than previously observed comment attribution index %d", attributedTo, lastComment.attributedToIndex)) 122 } 123 } 124 125 f.comments = append(f.comments, commentInfo{index: int(comment), attributedToIndex: int(attributedTo)}) 126 return Comment{ 127 fileInfo: f, 128 index: len(f.comments) - 1, 129 } 130 } 131 132 func (f *FileInfo) NodeInfo(n Node) NodeInfo { 133 return NodeInfo{fileInfo: f, startIndex: int(n.Start()), endIndex: int(n.End())} 134 } 135 136 func (f *FileInfo) TokenInfo(t Token) NodeInfo { 137 return NodeInfo{fileInfo: f, startIndex: int(t), endIndex: int(t)} 138 } 139 140 func (f *FileInfo) isDummyFile() bool { 141 return f.lines == nil 142 } 143 144 func (f *FileInfo) SourcePos(offset int) SourcePos { 145 lineNumber := sort.Search(len(f.lines), func(n int) bool { 146 return f.lines[n] > offset 147 }) 148 149 // If it weren't for tabs, we could trivially compute the column 150 // just based on offset and the starting offset of lineNumber :( 151 // Wish this were more efficient... that would require also storing 152 // computed line+column information, which would triple the size of 153 // f's tokens slice... 154 col := 0 155 for i := f.lines[lineNumber-1]; i < offset; i++ { 156 if f.data[i] == '\t' { 157 nextTabStop := 8 - (col % 8) 158 col += nextTabStop 159 } else { 160 col++ 161 } 162 } 163 164 return SourcePos{ 165 Filename: f.name, 166 Offset: offset, 167 Line: lineNumber, 168 // Columns are 1-indexed in this AST 169 Col: col + 1, 170 } 171 } 172 173 // Token represents a single lexed token. 174 type Token int 175 176 func (t Token) asTerminalNode() terminalNode { 177 return terminalNode(t) 178 } 179 180 // NodeInfo represents the details for a node in the source file's AST. 181 type NodeInfo struct { 182 fileInfo *FileInfo 183 startIndex, endIndex int 184 } 185 186 func (n NodeInfo) Start() SourcePos { 187 if n.fileInfo.isDummyFile() { 188 return UnknownPos(n.fileInfo.name) 189 } 190 191 tok := n.fileInfo.tokens[n.startIndex] 192 return n.fileInfo.SourcePos(tok.offset) 193 } 194 195 func (n NodeInfo) End() SourcePos { 196 if n.fileInfo.isDummyFile() { 197 return UnknownPos(n.fileInfo.name) 198 } 199 200 tok := n.fileInfo.tokens[n.endIndex] 201 // find offset of last character in the span 202 offset := tok.offset 203 if tok.length > 0 { 204 offset += tok.length - 1 205 } 206 pos := n.fileInfo.SourcePos(offset) 207 if tok.length > 0 { 208 // We return "open range", so end is the position *after* the 209 // last character in the span. So we adjust 210 pos.Col = pos.Col + 1 211 } 212 return pos 213 } 214 215 func (n NodeInfo) LeadingWhitespace() string { 216 if n.fileInfo.isDummyFile() { 217 return "" 218 } 219 220 tok := n.fileInfo.tokens[n.startIndex] 221 var prevEnd int 222 if n.startIndex > 0 { 223 prevTok := n.fileInfo.tokens[n.startIndex-1] 224 prevEnd = prevTok.offset + prevTok.length 225 } 226 return string(n.fileInfo.data[prevEnd:tok.offset]) 227 } 228 229 func (n NodeInfo) LeadingComments() Comments { 230 if n.fileInfo.isDummyFile() { 231 return Comments{} 232 } 233 234 start := sort.Search(len(n.fileInfo.comments), func(i int) bool { 235 return n.fileInfo.comments[i].attributedToIndex >= n.startIndex 236 }) 237 238 if start == len(n.fileInfo.comments) || n.fileInfo.comments[start].attributedToIndex != n.startIndex { 239 // no comments associated with this token 240 return Comments{} 241 } 242 243 numComments := 0 244 for i := start; i < len(n.fileInfo.comments); i++ { 245 comment := n.fileInfo.comments[i] 246 if comment.attributedToIndex == n.startIndex && 247 comment.index < n.startIndex { 248 numComments++ 249 } else { 250 break 251 } 252 } 253 254 return Comments{ 255 fileInfo: n.fileInfo, 256 first: start, 257 num: numComments, 258 } 259 } 260 261 func (n NodeInfo) TrailingComments() Comments { 262 if n.fileInfo.isDummyFile() { 263 return Comments{} 264 } 265 266 start := sort.Search(len(n.fileInfo.comments), func(i int) bool { 267 comment := n.fileInfo.comments[i] 268 return comment.attributedToIndex >= n.endIndex && 269 comment.index > n.endIndex 270 }) 271 272 if start == len(n.fileInfo.comments) || n.fileInfo.comments[start].attributedToIndex != n.endIndex { 273 // no comments associated with this token 274 return Comments{} 275 } 276 277 numComments := 0 278 for i := start; i < len(n.fileInfo.comments); i++ { 279 comment := n.fileInfo.comments[i] 280 if comment.attributedToIndex == n.endIndex { 281 numComments++ 282 } else { 283 break 284 } 285 } 286 287 return Comments{ 288 fileInfo: n.fileInfo, 289 first: start, 290 num: numComments, 291 } 292 } 293 294 func (n NodeInfo) RawText() string { 295 startTok := n.fileInfo.tokens[n.startIndex] 296 endTok := n.fileInfo.tokens[n.endIndex] 297 return string(n.fileInfo.data[startTok.offset : endTok.offset+endTok.length]) 298 } 299 300 // SourcePos identifies a location in a proto source file. 301 type SourcePos struct { 302 Filename string 303 // The line and column numbers for this position. These are 304 // one-based, so the first line and column is 1 (not zero). If 305 // either is zero, then the line and column are unknown and 306 // only the file name is known. 307 Line, Col int 308 // The offset, in bytes, from the beginning of the file. This 309 // is zero-based: the first character in the file is offset zero. 310 Offset int 311 } 312 313 func (pos SourcePos) String() string { 314 if pos.Line <= 0 || pos.Col <= 0 { 315 return pos.Filename 316 } 317 return fmt.Sprintf("%s:%d:%d", pos.Filename, pos.Line, pos.Col) 318 } 319 320 // Comments represents a range of sequential comments in a source file 321 // (e.g. no interleaving tokens or AST nodes). 322 type Comments struct { 323 fileInfo *FileInfo 324 first, num int 325 } 326 327 func (c Comments) Len() int { 328 return c.num 329 } 330 331 func (c Comments) Index(i int) Comment { 332 if i < 0 || i >= c.num { 333 panic(fmt.Sprintf("index %d out of range (len = %d)", i, c.num)) 334 } 335 return Comment{ 336 fileInfo: c.fileInfo, 337 index: c.first + i, 338 } 339 } 340 341 // Comment represents a single comment in a source file. It indicates 342 // the position of the comment and its contents. 343 type Comment struct { 344 fileInfo *FileInfo 345 index int 346 } 347 348 func (c Comment) Start() SourcePos { 349 comment := c.fileInfo.comments[c.index] 350 tok := c.fileInfo.tokens[comment.index] 351 return c.fileInfo.SourcePos(tok.offset) 352 } 353 354 func (c Comment) End() SourcePos { 355 comment := c.fileInfo.comments[c.index] 356 tok := c.fileInfo.tokens[comment.index] 357 return c.fileInfo.SourcePos(tok.offset + tok.length - 1) 358 } 359 360 func (c Comment) LeadingWhitespace() string { 361 comment := c.fileInfo.comments[c.index] 362 tok := c.fileInfo.tokens[comment.index] 363 var prevEnd int 364 if comment.index > 0 { 365 prevTok := c.fileInfo.tokens[comment.index-1] 366 prevEnd = prevTok.offset + prevTok.length 367 } 368 return string(c.fileInfo.data[prevEnd:tok.offset]) 369 } 370 371 func (c Comment) RawText() string { 372 comment := c.fileInfo.comments[c.index] 373 tok := c.fileInfo.tokens[comment.index] 374 return string(c.fileInfo.data[tok.offset : tok.offset+tok.length]) 375 }