github.com/jhump/protocompile@v0.0.0-20221021153901-4f6f732835e8/ast/file_info.go (about)

     1  package ast
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  )
     7  
     8  // FileInfo contains information about the contents of a source file, including
     9  // details about comments and tokens. A lexer accumulates these details as it
    10  // scans the file contents. This allows efficient representation of things like
    11  // source positions.
    12  type FileInfo struct {
    13  	// The name of the source file.
    14  	name string
    15  	// The raw contents of the source file.
    16  	data []byte
    17  	// The offsets for each line in the file. The value is the zero-based byte
    18  	// offset for a given line. The line is given by its index. So the value at
    19  	// index 0 is the offset for the first line (which is always zero). The
    20  	// value at index 1 is the offset at which the second line begins. Etc.
    21  	lines []int
    22  	// The info for every comment in the file. This is empty if the file has no
    23  	// comments. The first entry corresponds to the first comment in the file,
    24  	// and so on.
    25  	comments []commentInfo
    26  	// The info for every token in the file. The last item in the slice
    27  	// corresponds to the EOF, so every file (even an empty one) has at least
    28  	// one element. This includes all terminal symbols in the AST as well as
    29  	// all comments. However, it excludes rune nodes (which can be more
    30  	// compactly represented by an offset into data).
    31  	tokens []tokenSpan
    32  }
    33  
    34  type commentInfo struct {
    35  	// the index of the token, in the file's tokens slice, that represents this
    36  	// comment
    37  	index int
    38  	// the index of the token to which this comment is attributed.
    39  	attributedToIndex int
    40  }
    41  
    42  type tokenSpan struct {
    43  	// the offset into the file of the first character of a token.
    44  	offset int
    45  	// the length of the token
    46  	length int
    47  }
    48  
    49  // NewFileInfo creates a new instance for the given file.
    50  func NewFileInfo(filename string, contents []byte) *FileInfo {
    51  	return &FileInfo{
    52  		name:  filename,
    53  		data:  contents,
    54  		lines: []int{0},
    55  	}
    56  }
    57  
    58  func (f *FileInfo) Name() string {
    59  	return f.name
    60  }
    61  
    62  // AddLine adds the offset representing the beginning of the "next" line in the file.
    63  // The first line always starts at offset 0, the second line starts at offset-of-newline-char+1.
    64  func (f *FileInfo) AddLine(offset int) {
    65  	if offset < 0 {
    66  		panic(fmt.Sprintf("invalid offset: %d must not be negative", offset))
    67  	}
    68  	if offset > len(f.data) {
    69  		panic(fmt.Sprintf("invalid offset: %d is greater than file size %d", offset, len(f.data)))
    70  	}
    71  
    72  	if len(f.lines) > 0 {
    73  		lastOffset := f.lines[len(f.lines)-1]
    74  		if offset <= lastOffset {
    75  			panic(fmt.Sprintf("invalid offset: %d is not greater than previously observed line offset %d", offset, lastOffset))
    76  		}
    77  	}
    78  
    79  	f.lines = append(f.lines, offset)
    80  }
    81  
    82  // AddToken adds info about a token at the given location to this file. It
    83  // returns a value that allows access to all of the token's details.
    84  func (f *FileInfo) AddToken(offset, length int) Token {
    85  	if offset < 0 {
    86  		panic(fmt.Sprintf("invalid offset: %d must not be negative", offset))
    87  	}
    88  	if length < 0 {
    89  		panic(fmt.Sprintf("invalid length: %d must not be negative", length))
    90  	}
    91  	if offset+length > len(f.data) {
    92  		panic(fmt.Sprintf("invalid offset+length: %d is greater than file size %d", offset+length, len(f.data)))
    93  	}
    94  
    95  	tokenID := len(f.tokens)
    96  	if len(f.tokens) > 0 {
    97  		lastToken := f.tokens[tokenID-1]
    98  		lastEnd := lastToken.offset + lastToken.length - 1
    99  		if offset <= lastEnd {
   100  			panic(fmt.Sprintf("invalid offset: %d is not greater than previously observed token end %d", offset, lastEnd))
   101  		}
   102  	}
   103  
   104  	f.tokens = append(f.tokens, tokenSpan{offset: offset, length: length})
   105  	return Token(tokenID)
   106  }
   107  
   108  // AddComment adds info about a comment to this file. Comments must first be
   109  // added as tokens via f.AddToken(). The given comment argument is the TokenInfo
   110  // from that step. The given attributedTo argument indicates another token in the
   111  // file with which the comment is associated. If comment's offset is before that
   112  // of attributedTo, then this is a leading comment. Otherwise, it is a trailing
   113  // comment.
   114  func (f *FileInfo) AddComment(comment, attributedTo Token) Comment {
   115  	if len(f.comments) > 0 {
   116  		lastComment := f.comments[len(f.comments)-1]
   117  		if int(comment) <= lastComment.index {
   118  			panic(fmt.Sprintf("invalid index: %d is not greater than previously observed comment index %d", comment, lastComment.index))
   119  		}
   120  		if int(attributedTo) < lastComment.attributedToIndex {
   121  			panic(fmt.Sprintf("invalid attribution: %d is not greater than previously observed comment attribution index %d", attributedTo, lastComment.attributedToIndex))
   122  		}
   123  	}
   124  
   125  	f.comments = append(f.comments, commentInfo{index: int(comment), attributedToIndex: int(attributedTo)})
   126  	return Comment{
   127  		fileInfo: f,
   128  		index:    len(f.comments) - 1,
   129  	}
   130  }
   131  
   132  func (f *FileInfo) NodeInfo(n Node) NodeInfo {
   133  	return NodeInfo{fileInfo: f, startIndex: int(n.Start()), endIndex: int(n.End())}
   134  }
   135  
   136  func (f *FileInfo) TokenInfo(t Token) NodeInfo {
   137  	return NodeInfo{fileInfo: f, startIndex: int(t), endIndex: int(t)}
   138  }
   139  
   140  func (f *FileInfo) isDummyFile() bool {
   141  	return f.lines == nil
   142  }
   143  
   144  func (f *FileInfo) SourcePos(offset int) SourcePos {
   145  	lineNumber := sort.Search(len(f.lines), func(n int) bool {
   146  		return f.lines[n] > offset
   147  	})
   148  
   149  	// If it weren't for tabs, we could trivially compute the column
   150  	// just based on offset and the starting offset of lineNumber :(
   151  	// Wish this were more efficient... that would require also storing
   152  	// computed line+column information, which would triple the size of
   153  	// f's tokens slice...
   154  	col := 0
   155  	for i := f.lines[lineNumber-1]; i < offset; i++ {
   156  		if f.data[i] == '\t' {
   157  			nextTabStop := 8 - (col % 8)
   158  			col += nextTabStop
   159  		} else {
   160  			col++
   161  		}
   162  	}
   163  
   164  	return SourcePos{
   165  		Filename: f.name,
   166  		Offset:   offset,
   167  		Line:     lineNumber,
   168  		// Columns are 1-indexed in this AST
   169  		Col: col + 1,
   170  	}
   171  }
   172  
   173  // Token represents a single lexed token.
   174  type Token int
   175  
   176  func (t Token) asTerminalNode() terminalNode {
   177  	return terminalNode(t)
   178  }
   179  
   180  // NodeInfo represents the details for a node in the source file's AST.
   181  type NodeInfo struct {
   182  	fileInfo             *FileInfo
   183  	startIndex, endIndex int
   184  }
   185  
   186  func (n NodeInfo) Start() SourcePos {
   187  	if n.fileInfo.isDummyFile() {
   188  		return UnknownPos(n.fileInfo.name)
   189  	}
   190  
   191  	tok := n.fileInfo.tokens[n.startIndex]
   192  	return n.fileInfo.SourcePos(tok.offset)
   193  }
   194  
   195  func (n NodeInfo) End() SourcePos {
   196  	if n.fileInfo.isDummyFile() {
   197  		return UnknownPos(n.fileInfo.name)
   198  	}
   199  
   200  	tok := n.fileInfo.tokens[n.endIndex]
   201  	// find offset of last character in the span
   202  	offset := tok.offset
   203  	if tok.length > 0 {
   204  		offset += tok.length - 1
   205  	}
   206  	pos := n.fileInfo.SourcePos(offset)
   207  	if tok.length > 0 {
   208  		// We return "open range", so end is the position *after* the
   209  		// last character in the span. So we adjust
   210  		pos.Col = pos.Col + 1
   211  	}
   212  	return pos
   213  }
   214  
   215  func (n NodeInfo) LeadingWhitespace() string {
   216  	if n.fileInfo.isDummyFile() {
   217  		return ""
   218  	}
   219  
   220  	tok := n.fileInfo.tokens[n.startIndex]
   221  	var prevEnd int
   222  	if n.startIndex > 0 {
   223  		prevTok := n.fileInfo.tokens[n.startIndex-1]
   224  		prevEnd = prevTok.offset + prevTok.length
   225  	}
   226  	return string(n.fileInfo.data[prevEnd:tok.offset])
   227  }
   228  
   229  func (n NodeInfo) LeadingComments() Comments {
   230  	if n.fileInfo.isDummyFile() {
   231  		return Comments{}
   232  	}
   233  
   234  	start := sort.Search(len(n.fileInfo.comments), func(i int) bool {
   235  		return n.fileInfo.comments[i].attributedToIndex >= n.startIndex
   236  	})
   237  
   238  	if start == len(n.fileInfo.comments) || n.fileInfo.comments[start].attributedToIndex != n.startIndex {
   239  		// no comments associated with this token
   240  		return Comments{}
   241  	}
   242  
   243  	numComments := 0
   244  	for i := start; i < len(n.fileInfo.comments); i++ {
   245  		comment := n.fileInfo.comments[i]
   246  		if comment.attributedToIndex == n.startIndex &&
   247  			comment.index < n.startIndex {
   248  			numComments++
   249  		} else {
   250  			break
   251  		}
   252  	}
   253  
   254  	return Comments{
   255  		fileInfo: n.fileInfo,
   256  		first:    start,
   257  		num:      numComments,
   258  	}
   259  }
   260  
   261  func (n NodeInfo) TrailingComments() Comments {
   262  	if n.fileInfo.isDummyFile() {
   263  		return Comments{}
   264  	}
   265  
   266  	start := sort.Search(len(n.fileInfo.comments), func(i int) bool {
   267  		comment := n.fileInfo.comments[i]
   268  		return comment.attributedToIndex >= n.endIndex &&
   269  			comment.index > n.endIndex
   270  	})
   271  
   272  	if start == len(n.fileInfo.comments) || n.fileInfo.comments[start].attributedToIndex != n.endIndex {
   273  		// no comments associated with this token
   274  		return Comments{}
   275  	}
   276  
   277  	numComments := 0
   278  	for i := start; i < len(n.fileInfo.comments); i++ {
   279  		comment := n.fileInfo.comments[i]
   280  		if comment.attributedToIndex == n.endIndex {
   281  			numComments++
   282  		} else {
   283  			break
   284  		}
   285  	}
   286  
   287  	return Comments{
   288  		fileInfo: n.fileInfo,
   289  		first:    start,
   290  		num:      numComments,
   291  	}
   292  }
   293  
   294  func (n NodeInfo) RawText() string {
   295  	startTok := n.fileInfo.tokens[n.startIndex]
   296  	endTok := n.fileInfo.tokens[n.endIndex]
   297  	return string(n.fileInfo.data[startTok.offset : endTok.offset+endTok.length])
   298  }
   299  
   300  // SourcePos identifies a location in a proto source file.
   301  type SourcePos struct {
   302  	Filename string
   303  	// The line and column numbers for this position. These are
   304  	// one-based, so the first line and column is 1 (not zero). If
   305  	// either is zero, then the line and column are unknown and
   306  	// only the file name is known.
   307  	Line, Col int
   308  	// The offset, in bytes, from the beginning of the file. This
   309  	// is zero-based: the first character in the file is offset zero.
   310  	Offset int
   311  }
   312  
   313  func (pos SourcePos) String() string {
   314  	if pos.Line <= 0 || pos.Col <= 0 {
   315  		return pos.Filename
   316  	}
   317  	return fmt.Sprintf("%s:%d:%d", pos.Filename, pos.Line, pos.Col)
   318  }
   319  
   320  // Comments represents a range of sequential comments in a source file
   321  // (e.g. no interleaving tokens or AST nodes).
   322  type Comments struct {
   323  	fileInfo   *FileInfo
   324  	first, num int
   325  }
   326  
   327  func (c Comments) Len() int {
   328  	return c.num
   329  }
   330  
   331  func (c Comments) Index(i int) Comment {
   332  	if i < 0 || i >= c.num {
   333  		panic(fmt.Sprintf("index %d out of range (len = %d)", i, c.num))
   334  	}
   335  	return Comment{
   336  		fileInfo: c.fileInfo,
   337  		index:    c.first + i,
   338  	}
   339  }
   340  
   341  // Comment represents a single comment in a source file. It indicates
   342  // the position of the comment and its contents.
   343  type Comment struct {
   344  	fileInfo *FileInfo
   345  	index    int
   346  }
   347  
   348  func (c Comment) Start() SourcePos {
   349  	comment := c.fileInfo.comments[c.index]
   350  	tok := c.fileInfo.tokens[comment.index]
   351  	return c.fileInfo.SourcePos(tok.offset)
   352  }
   353  
   354  func (c Comment) End() SourcePos {
   355  	comment := c.fileInfo.comments[c.index]
   356  	tok := c.fileInfo.tokens[comment.index]
   357  	return c.fileInfo.SourcePos(tok.offset + tok.length - 1)
   358  }
   359  
   360  func (c Comment) LeadingWhitespace() string {
   361  	comment := c.fileInfo.comments[c.index]
   362  	tok := c.fileInfo.tokens[comment.index]
   363  	var prevEnd int
   364  	if comment.index > 0 {
   365  		prevTok := c.fileInfo.tokens[comment.index-1]
   366  		prevEnd = prevTok.offset + prevTok.length
   367  	}
   368  	return string(c.fileInfo.data[prevEnd:tok.offset])
   369  }
   370  
   371  func (c Comment) RawText() string {
   372  	comment := c.fileInfo.comments[c.index]
   373  	tok := c.fileInfo.tokens[comment.index]
   374  	return string(c.fileInfo.data[tok.offset : tok.offset+tok.length])
   375  }