golang.org/x/tools/gopls@v0.15.3/internal/protocol/mapper.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package protocol
     6  
     7  // This file defines Mapper, which wraps a file content buffer
     8  // ([]byte) and provides efficient conversion between every kind of
     9  // position representation.
    10  //
    11  // gopls uses four main representations of position:
    12  //
    13  // 1. byte offsets, e.g. (start, end int), starting from zero.
    14  //
    15  // 2. go/token notation. Use these types when interacting directly
    16  //    with the go/* syntax packages:
    17  //
    18  // 	token.Pos
    19  // 	token.FileSet
    20  // 	token.File
    21  //
    22  //    Because File.Offset and File.Pos panic on invalid inputs,
    23  //    we do not call them directly and instead use the safetoken package
    24  //    for these conversions. This is enforced by a static check.
    25  //
    26  //    Beware also that the methods of token.File have two bugs for which
    27  //    safetoken contains workarounds:
    28  //    - #57490, whereby the parser may create ast.Nodes during error
    29  //      recovery whose computed positions are out of bounds (EOF+1).
    30  //    - #41029, whereby the wrong line number is returned for the EOF position.
    31  //
    32  // 3. the cmd package.
    33  //
    34  //    cmd.point = (line, col8, offset).
    35  //    cmd.Span = (uri URI, start, end cmd.point)
    36  //
    37  //          Line and column are 1-based.
    38  //          Columns are measured in bytes (UTF-8 codes).
    39  //          All fields are optional.
    40  //
    41  //    These types are useful as intermediate conversions of validated
    42  //    ranges (though MappedRange is superior as it is self contained
    43  //    and universally convertible).  Since their fields are optional
    44  //    they are also useful for parsing user-provided positions (e.g. in
    45  //    the CLI) before we have access to file contents.
    46  //
    47  // 4. protocol, the LSP RPC message format.
    48  //
    49  //    protocol.Position = (Line, Character uint32)
    50  //    protocol.Range = (start, end Position)
    51  //    protocol.Location = (URI, protocol.Range)
    52  //
    53  //          Line and Character are 0-based.
    54  //          Characters (columns) are measured in UTF-16 codes.
    55  //
    56  //    protocol.Mapper holds the (URI, Content) of a file, enabling
    57  //    efficient mapping between byte offsets, cmd ranges, and
    58  //    protocol ranges.
    59  //
    60  //    protocol.MappedRange holds a protocol.Mapper and valid (start,
    61  //    end int) byte offsets, enabling infallible, efficient conversion
    62  //    to any other format.
    63  
    64  import (
    65  	"bytes"
    66  	"fmt"
    67  	"go/ast"
    68  	"go/token"
    69  	"sort"
    70  	"strings"
    71  	"sync"
    72  	"unicode/utf8"
    73  
    74  	"golang.org/x/tools/gopls/internal/util/safetoken"
    75  )
    76  
    77  // A Mapper wraps the content of a file and provides mapping
    78  // between byte offsets and notations of position such as:
    79  //
    80  //   - (line, col8) pairs, where col8 is a 1-based UTF-8 column number
    81  //     (bytes), as used by the go/token and cmd packages.
    82  //
    83  //   - (line, col16) pairs, where col16 is a 1-based UTF-16 column
    84  //     number, as used by the LSP protocol.
    85  //
    86  // All conversion methods are named "FromTo", where From and To are the two types.
    87  // For example, the PointPosition method converts from a Point to a Position.
    88  //
    89  // Mapper does not intrinsically depend on go/token-based
    90  // representations.  Use safetoken to map between token.Pos <=> byte
    91  // offsets, or the convenience methods such as PosPosition,
    92  // NodePosition, or NodeRange.
    93  //
    94  // See overview comments at top of this file.
    95  type Mapper struct {
    96  	URI     DocumentURI
    97  	Content []byte
    98  
    99  	// Line-number information is requested only for a tiny
   100  	// fraction of Mappers, so we compute it lazily.
   101  	// Call initLines() before accessing fields below.
   102  	linesOnce sync.Once
   103  	lineStart []int // byte offset of start of ith line (0-based); last=EOF iff \n-terminated
   104  	nonASCII  bool
   105  
   106  	// TODO(adonovan): adding an extra lineStart entry for EOF
   107  	// might simplify every method that accesses it. Try it out.
   108  }
   109  
   110  // NewMapper creates a new mapper for the given URI and content.
   111  func NewMapper(uri DocumentURI, content []byte) *Mapper {
   112  	return &Mapper{URI: uri, Content: content}
   113  }
   114  
   115  // initLines populates the lineStart table.
   116  func (m *Mapper) initLines() {
   117  	m.linesOnce.Do(func() {
   118  		nlines := bytes.Count(m.Content, []byte("\n"))
   119  		m.lineStart = make([]int, 1, nlines+1) // initially []int{0}
   120  		for offset, b := range m.Content {
   121  			if b == '\n' {
   122  				m.lineStart = append(m.lineStart, offset+1)
   123  			}
   124  			if b >= utf8.RuneSelf {
   125  				m.nonASCII = true
   126  			}
   127  		}
   128  	})
   129  }
   130  
   131  // LineCol8Position converts a valid line and UTF-8 column number,
   132  // both 1-based, to a protocol (UTF-16) position.
   133  func (m *Mapper) LineCol8Position(line, col8 int) (Position, error) {
   134  	m.initLines()
   135  	line0 := line - 1 // 0-based
   136  	if !(0 <= line0 && line0 < len(m.lineStart)) {
   137  		return Position{}, fmt.Errorf("line number %d out of range (max %d)", line, len(m.lineStart))
   138  	}
   139  
   140  	// content[start:end] is the preceding partial line.
   141  	start := m.lineStart[line0]
   142  	end := start + col8 - 1
   143  
   144  	// Validate column.
   145  	if end > len(m.Content) {
   146  		return Position{}, fmt.Errorf("column is beyond end of file")
   147  	} else if line0+1 < len(m.lineStart) && end >= m.lineStart[line0+1] {
   148  		return Position{}, fmt.Errorf("column is beyond end of line")
   149  	}
   150  
   151  	char := UTF16Len(m.Content[start:end])
   152  	return Position{Line: uint32(line0), Character: uint32(char)}, nil
   153  }
   154  
   155  // -- conversions from byte offsets --
   156  
   157  // OffsetLocation converts a byte-offset interval to a protocol (UTF-16) location.
   158  func (m *Mapper) OffsetLocation(start, end int) (Location, error) {
   159  	rng, err := m.OffsetRange(start, end)
   160  	if err != nil {
   161  		return Location{}, err
   162  	}
   163  	return m.RangeLocation(rng), nil
   164  }
   165  
   166  // OffsetRange converts a byte-offset interval to a protocol (UTF-16) range.
   167  func (m *Mapper) OffsetRange(start, end int) (Range, error) {
   168  	if start > end {
   169  		return Range{}, fmt.Errorf("start offset (%d) > end (%d)", start, end)
   170  	}
   171  	startPosition, err := m.OffsetPosition(start)
   172  	if err != nil {
   173  		return Range{}, fmt.Errorf("start: %v", err)
   174  	}
   175  	endPosition, err := m.OffsetPosition(end)
   176  	if err != nil {
   177  		return Range{}, fmt.Errorf("end: %v", err)
   178  	}
   179  	return Range{Start: startPosition, End: endPosition}, nil
   180  }
   181  
   182  // OffsetPosition converts a byte offset to a protocol (UTF-16) position.
   183  func (m *Mapper) OffsetPosition(offset int) (Position, error) {
   184  	if !(0 <= offset && offset <= len(m.Content)) {
   185  		return Position{}, fmt.Errorf("invalid offset %d (want 0-%d)", offset, len(m.Content))
   186  	}
   187  	// No error may be returned after this point,
   188  	// even if the offset does not fall at a rune boundary.
   189  	// (See panic in MappedRange.Range reachable.)
   190  
   191  	line, col16 := m.lineCol16(offset)
   192  	return Position{Line: uint32(line), Character: uint32(col16)}, nil
   193  }
   194  
   195  // lineCol16 converts a valid byte offset to line and UTF-16 column numbers, both 0-based.
   196  func (m *Mapper) lineCol16(offset int) (int, int) {
   197  	line, start, cr := m.line(offset)
   198  	var col16 int
   199  	if m.nonASCII {
   200  		col16 = UTF16Len(m.Content[start:offset])
   201  	} else {
   202  		col16 = offset - start
   203  	}
   204  	if cr {
   205  		col16-- // retreat from \r at line end
   206  	}
   207  	return line, col16
   208  }
   209  
   210  // OffsetLineCol8 converts a valid byte offset to line and UTF-8 column numbers, both 1-based.
   211  func (m *Mapper) OffsetLineCol8(offset int) (int, int) {
   212  	line, start, cr := m.line(offset)
   213  	col8 := offset - start
   214  	if cr {
   215  		col8-- // retreat from \r at line end
   216  	}
   217  	return line + 1, col8 + 1
   218  }
   219  
   220  // line returns:
   221  // - the 0-based index of the line that encloses the (valid) byte offset;
   222  // - the start offset of that line; and
   223  // - whether the offset denotes a carriage return (\r) at line end.
   224  func (m *Mapper) line(offset int) (int, int, bool) {
   225  	m.initLines()
   226  	// In effect, binary search returns a 1-based result.
   227  	line := sort.Search(len(m.lineStart), func(i int) bool {
   228  		return offset < m.lineStart[i]
   229  	})
   230  
   231  	// Adjustment for line-endings: \r|\n is the same as |\r\n.
   232  	var eol int
   233  	if line == len(m.lineStart) {
   234  		eol = len(m.Content) // EOF
   235  	} else {
   236  		eol = m.lineStart[line] - 1
   237  	}
   238  	cr := offset == eol && offset > 0 && m.Content[offset-1] == '\r'
   239  
   240  	line-- // 0-based
   241  
   242  	return line, m.lineStart[line], cr
   243  }
   244  
   245  // OffsetMappedRange returns a MappedRange for the given byte offsets.
   246  // A MappedRange can be converted to any other form.
   247  func (m *Mapper) OffsetMappedRange(start, end int) (MappedRange, error) {
   248  	if !(0 <= start && start <= end && end <= len(m.Content)) {
   249  		return MappedRange{}, fmt.Errorf("invalid offsets (%d, %d) (file %s has size %d)", start, end, m.URI, len(m.Content))
   250  	}
   251  	return MappedRange{m, start, end}, nil
   252  }
   253  
   254  // -- conversions from protocol (UTF-16) domain --
   255  
   256  // RangeOffsets converts a protocol (UTF-16) range to start/end byte offsets.
   257  func (m *Mapper) RangeOffsets(r Range) (int, int, error) {
   258  	start, err := m.PositionOffset(r.Start)
   259  	if err != nil {
   260  		return 0, 0, err
   261  	}
   262  	end, err := m.PositionOffset(r.End)
   263  	if err != nil {
   264  		return 0, 0, err
   265  	}
   266  	return start, end, nil
   267  }
   268  
   269  // PositionOffset converts a protocol (UTF-16) position to a byte offset.
   270  func (m *Mapper) PositionOffset(p Position) (int, error) {
   271  	m.initLines()
   272  
   273  	// Validate line number.
   274  	if p.Line > uint32(len(m.lineStart)) {
   275  		return 0, fmt.Errorf("line number %d out of range 0-%d", p.Line, len(m.lineStart))
   276  	} else if p.Line == uint32(len(m.lineStart)) {
   277  		if p.Character == 0 {
   278  			return len(m.Content), nil // EOF
   279  		}
   280  		return 0, fmt.Errorf("column is beyond end of file")
   281  	}
   282  
   283  	offset := m.lineStart[p.Line]
   284  	content := m.Content[offset:] // rest of file from start of enclosing line
   285  
   286  	// Advance bytes up to the required number of UTF-16 codes.
   287  	col8 := 0
   288  	for col16 := 0; col16 < int(p.Character); col16++ {
   289  		r, sz := utf8.DecodeRune(content)
   290  		if sz == 0 {
   291  			return 0, fmt.Errorf("column is beyond end of file")
   292  		}
   293  		if r == '\n' {
   294  			return 0, fmt.Errorf("column is beyond end of line")
   295  		}
   296  		if sz == 1 && r == utf8.RuneError {
   297  			return 0, fmt.Errorf("buffer contains invalid UTF-8 text")
   298  		}
   299  		content = content[sz:]
   300  
   301  		if r >= 0x10000 {
   302  			col16++ // rune was encoded by a pair of surrogate UTF-16 codes
   303  
   304  			if col16 == int(p.Character) {
   305  				break // requested position is in the middle of a rune
   306  			}
   307  		}
   308  		col8 += sz
   309  	}
   310  	return offset + col8, nil
   311  }
   312  
   313  // -- go/token domain convenience methods --
   314  
   315  // PosPosition converts a token pos to a protocol (UTF-16) position.
   316  func (m *Mapper) PosPosition(tf *token.File, pos token.Pos) (Position, error) {
   317  	offset, err := safetoken.Offset(tf, pos)
   318  	if err != nil {
   319  		return Position{}, err
   320  	}
   321  	return m.OffsetPosition(offset)
   322  }
   323  
   324  // PosLocation converts a token range to a protocol (UTF-16) location.
   325  func (m *Mapper) PosLocation(tf *token.File, start, end token.Pos) (Location, error) {
   326  	startOffset, endOffset, err := safetoken.Offsets(tf, start, end)
   327  	if err != nil {
   328  		return Location{}, err
   329  	}
   330  	rng, err := m.OffsetRange(startOffset, endOffset)
   331  	if err != nil {
   332  		return Location{}, err
   333  	}
   334  	return m.RangeLocation(rng), nil
   335  }
   336  
   337  // PosRange converts a token range to a protocol (UTF-16) range.
   338  func (m *Mapper) PosRange(tf *token.File, start, end token.Pos) (Range, error) {
   339  	startOffset, endOffset, err := safetoken.Offsets(tf, start, end)
   340  	if err != nil {
   341  		return Range{}, err
   342  	}
   343  	return m.OffsetRange(startOffset, endOffset)
   344  }
   345  
   346  // NodeRange converts a syntax node range to a protocol (UTF-16) range.
   347  func (m *Mapper) NodeRange(tf *token.File, node ast.Node) (Range, error) {
   348  	return m.PosRange(tf, node.Pos(), node.End())
   349  }
   350  
   351  // RangeLocation pairs a protocol Range with its URI, in a Location.
   352  func (m *Mapper) RangeLocation(rng Range) Location {
   353  	return Location{URI: m.URI, Range: rng}
   354  }
   355  
   356  // PosMappedRange returns a MappedRange for the given token.Pos range.
   357  func (m *Mapper) PosMappedRange(tf *token.File, start, end token.Pos) (MappedRange, error) {
   358  	startOffset, endOffset, err := safetoken.Offsets(tf, start, end)
   359  	if err != nil {
   360  		return MappedRange{}, nil
   361  	}
   362  	return m.OffsetMappedRange(startOffset, endOffset)
   363  }
   364  
   365  // NodeMappedRange returns a MappedRange for the given node range.
   366  func (m *Mapper) NodeMappedRange(tf *token.File, node ast.Node) (MappedRange, error) {
   367  	return m.PosMappedRange(tf, node.Pos(), node.End())
   368  }
   369  
   370  // -- MappedRange --
   371  
   372  // A MappedRange represents a valid byte-offset range of a file.
   373  // Through its Mapper it can be converted into other forms such
   374  // as protocol.Range or UTF-8.
   375  //
   376  // Construct one by calling Mapper.OffsetMappedRange with start/end offsets.
   377  // From the go/token domain, call safetoken.Offsets first,
   378  // or use a helper such as ParsedGoFile.MappedPosRange.
   379  //
   380  // Two MappedRanges produced the same Mapper are equal if and only if they
   381  // denote the same range.  Two MappedRanges produced by different Mappers
   382  // are unequal even when they represent the same range of the same file.
   383  type MappedRange struct {
   384  	Mapper     *Mapper
   385  	start, end int // valid byte offsets:  0 <= start <= end <= len(Mapper.Content)
   386  }
   387  
   388  // Offsets returns the (start, end) byte offsets of this range.
   389  func (mr MappedRange) Offsets() (start, end int) { return mr.start, mr.end }
   390  
   391  // -- convenience functions --
   392  
   393  // URI returns the URI of the range's file.
   394  func (mr MappedRange) URI() DocumentURI {
   395  	return mr.Mapper.URI
   396  }
   397  
   398  // Range returns the range in protocol (UTF-16) form.
   399  func (mr MappedRange) Range() Range {
   400  	rng, err := mr.Mapper.OffsetRange(mr.start, mr.end)
   401  	if err != nil {
   402  		panic(err) // can't happen
   403  	}
   404  	return rng
   405  }
   406  
   407  // Location returns the range in protocol location (UTF-16) form.
   408  func (mr MappedRange) Location() Location {
   409  	return mr.Mapper.RangeLocation(mr.Range())
   410  }
   411  
   412  // String formats the range in UTF-8 notation.
   413  func (mr MappedRange) String() string {
   414  	var s strings.Builder
   415  	startLine, startCol8 := mr.Mapper.OffsetLineCol8(mr.start)
   416  	fmt.Fprintf(&s, "%d:%d", startLine, startCol8)
   417  	if mr.end != mr.start {
   418  		endLine, endCol8 := mr.Mapper.OffsetLineCol8(mr.end)
   419  		if endLine == startLine {
   420  			fmt.Fprintf(&s, "-%d", endCol8)
   421  		} else {
   422  			fmt.Fprintf(&s, "-%d:%d", endLine, endCol8)
   423  		}
   424  	}
   425  	return s.String()
   426  }
   427  
   428  // LocationTextDocumentPositionParams converts its argument to its result.
   429  func LocationTextDocumentPositionParams(loc Location) TextDocumentPositionParams {
   430  	return TextDocumentPositionParams{
   431  		TextDocument: TextDocumentIdentifier{URI: loc.URI},
   432  		Position:     loc.Range.Start,
   433  	}
   434  }