golang.org/x/tools/gopls@v0.15.3/internal/protocol/mapper.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package protocol 6 7 // This file defines Mapper, which wraps a file content buffer 8 // ([]byte) and provides efficient conversion between every kind of 9 // position representation. 10 // 11 // gopls uses four main representations of position: 12 // 13 // 1. byte offsets, e.g. (start, end int), starting from zero. 14 // 15 // 2. go/token notation. Use these types when interacting directly 16 // with the go/* syntax packages: 17 // 18 // token.Pos 19 // token.FileSet 20 // token.File 21 // 22 // Because File.Offset and File.Pos panic on invalid inputs, 23 // we do not call them directly and instead use the safetoken package 24 // for these conversions. This is enforced by a static check. 25 // 26 // Beware also that the methods of token.File have two bugs for which 27 // safetoken contains workarounds: 28 // - #57490, whereby the parser may create ast.Nodes during error 29 // recovery whose computed positions are out of bounds (EOF+1). 30 // - #41029, whereby the wrong line number is returned for the EOF position. 31 // 32 // 3. the cmd package. 33 // 34 // cmd.point = (line, col8, offset). 35 // cmd.Span = (uri URI, start, end cmd.point) 36 // 37 // Line and column are 1-based. 38 // Columns are measured in bytes (UTF-8 codes). 39 // All fields are optional. 40 // 41 // These types are useful as intermediate conversions of validated 42 // ranges (though MappedRange is superior as it is self contained 43 // and universally convertible). Since their fields are optional 44 // they are also useful for parsing user-provided positions (e.g. in 45 // the CLI) before we have access to file contents. 46 // 47 // 4. protocol, the LSP RPC message format. 48 // 49 // protocol.Position = (Line, Character uint32) 50 // protocol.Range = (start, end Position) 51 // protocol.Location = (URI, protocol.Range) 52 // 53 // Line and Character are 0-based. 54 // Characters (columns) are measured in UTF-16 codes. 55 // 56 // protocol.Mapper holds the (URI, Content) of a file, enabling 57 // efficient mapping between byte offsets, cmd ranges, and 58 // protocol ranges. 59 // 60 // protocol.MappedRange holds a protocol.Mapper and valid (start, 61 // end int) byte offsets, enabling infallible, efficient conversion 62 // to any other format. 63 64 import ( 65 "bytes" 66 "fmt" 67 "go/ast" 68 "go/token" 69 "sort" 70 "strings" 71 "sync" 72 "unicode/utf8" 73 74 "golang.org/x/tools/gopls/internal/util/safetoken" 75 ) 76 77 // A Mapper wraps the content of a file and provides mapping 78 // between byte offsets and notations of position such as: 79 // 80 // - (line, col8) pairs, where col8 is a 1-based UTF-8 column number 81 // (bytes), as used by the go/token and cmd packages. 82 // 83 // - (line, col16) pairs, where col16 is a 1-based UTF-16 column 84 // number, as used by the LSP protocol. 85 // 86 // All conversion methods are named "FromTo", where From and To are the two types. 87 // For example, the PointPosition method converts from a Point to a Position. 88 // 89 // Mapper does not intrinsically depend on go/token-based 90 // representations. Use safetoken to map between token.Pos <=> byte 91 // offsets, or the convenience methods such as PosPosition, 92 // NodePosition, or NodeRange. 93 // 94 // See overview comments at top of this file. 95 type Mapper struct { 96 URI DocumentURI 97 Content []byte 98 99 // Line-number information is requested only for a tiny 100 // fraction of Mappers, so we compute it lazily. 101 // Call initLines() before accessing fields below. 102 linesOnce sync.Once 103 lineStart []int // byte offset of start of ith line (0-based); last=EOF iff \n-terminated 104 nonASCII bool 105 106 // TODO(adonovan): adding an extra lineStart entry for EOF 107 // might simplify every method that accesses it. Try it out. 108 } 109 110 // NewMapper creates a new mapper for the given URI and content. 111 func NewMapper(uri DocumentURI, content []byte) *Mapper { 112 return &Mapper{URI: uri, Content: content} 113 } 114 115 // initLines populates the lineStart table. 116 func (m *Mapper) initLines() { 117 m.linesOnce.Do(func() { 118 nlines := bytes.Count(m.Content, []byte("\n")) 119 m.lineStart = make([]int, 1, nlines+1) // initially []int{0} 120 for offset, b := range m.Content { 121 if b == '\n' { 122 m.lineStart = append(m.lineStart, offset+1) 123 } 124 if b >= utf8.RuneSelf { 125 m.nonASCII = true 126 } 127 } 128 }) 129 } 130 131 // LineCol8Position converts a valid line and UTF-8 column number, 132 // both 1-based, to a protocol (UTF-16) position. 133 func (m *Mapper) LineCol8Position(line, col8 int) (Position, error) { 134 m.initLines() 135 line0 := line - 1 // 0-based 136 if !(0 <= line0 && line0 < len(m.lineStart)) { 137 return Position{}, fmt.Errorf("line number %d out of range (max %d)", line, len(m.lineStart)) 138 } 139 140 // content[start:end] is the preceding partial line. 141 start := m.lineStart[line0] 142 end := start + col8 - 1 143 144 // Validate column. 145 if end > len(m.Content) { 146 return Position{}, fmt.Errorf("column is beyond end of file") 147 } else if line0+1 < len(m.lineStart) && end >= m.lineStart[line0+1] { 148 return Position{}, fmt.Errorf("column is beyond end of line") 149 } 150 151 char := UTF16Len(m.Content[start:end]) 152 return Position{Line: uint32(line0), Character: uint32(char)}, nil 153 } 154 155 // -- conversions from byte offsets -- 156 157 // OffsetLocation converts a byte-offset interval to a protocol (UTF-16) location. 158 func (m *Mapper) OffsetLocation(start, end int) (Location, error) { 159 rng, err := m.OffsetRange(start, end) 160 if err != nil { 161 return Location{}, err 162 } 163 return m.RangeLocation(rng), nil 164 } 165 166 // OffsetRange converts a byte-offset interval to a protocol (UTF-16) range. 167 func (m *Mapper) OffsetRange(start, end int) (Range, error) { 168 if start > end { 169 return Range{}, fmt.Errorf("start offset (%d) > end (%d)", start, end) 170 } 171 startPosition, err := m.OffsetPosition(start) 172 if err != nil { 173 return Range{}, fmt.Errorf("start: %v", err) 174 } 175 endPosition, err := m.OffsetPosition(end) 176 if err != nil { 177 return Range{}, fmt.Errorf("end: %v", err) 178 } 179 return Range{Start: startPosition, End: endPosition}, nil 180 } 181 182 // OffsetPosition converts a byte offset to a protocol (UTF-16) position. 183 func (m *Mapper) OffsetPosition(offset int) (Position, error) { 184 if !(0 <= offset && offset <= len(m.Content)) { 185 return Position{}, fmt.Errorf("invalid offset %d (want 0-%d)", offset, len(m.Content)) 186 } 187 // No error may be returned after this point, 188 // even if the offset does not fall at a rune boundary. 189 // (See panic in MappedRange.Range reachable.) 190 191 line, col16 := m.lineCol16(offset) 192 return Position{Line: uint32(line), Character: uint32(col16)}, nil 193 } 194 195 // lineCol16 converts a valid byte offset to line and UTF-16 column numbers, both 0-based. 196 func (m *Mapper) lineCol16(offset int) (int, int) { 197 line, start, cr := m.line(offset) 198 var col16 int 199 if m.nonASCII { 200 col16 = UTF16Len(m.Content[start:offset]) 201 } else { 202 col16 = offset - start 203 } 204 if cr { 205 col16-- // retreat from \r at line end 206 } 207 return line, col16 208 } 209 210 // OffsetLineCol8 converts a valid byte offset to line and UTF-8 column numbers, both 1-based. 211 func (m *Mapper) OffsetLineCol8(offset int) (int, int) { 212 line, start, cr := m.line(offset) 213 col8 := offset - start 214 if cr { 215 col8-- // retreat from \r at line end 216 } 217 return line + 1, col8 + 1 218 } 219 220 // line returns: 221 // - the 0-based index of the line that encloses the (valid) byte offset; 222 // - the start offset of that line; and 223 // - whether the offset denotes a carriage return (\r) at line end. 224 func (m *Mapper) line(offset int) (int, int, bool) { 225 m.initLines() 226 // In effect, binary search returns a 1-based result. 227 line := sort.Search(len(m.lineStart), func(i int) bool { 228 return offset < m.lineStart[i] 229 }) 230 231 // Adjustment for line-endings: \r|\n is the same as |\r\n. 232 var eol int 233 if line == len(m.lineStart) { 234 eol = len(m.Content) // EOF 235 } else { 236 eol = m.lineStart[line] - 1 237 } 238 cr := offset == eol && offset > 0 && m.Content[offset-1] == '\r' 239 240 line-- // 0-based 241 242 return line, m.lineStart[line], cr 243 } 244 245 // OffsetMappedRange returns a MappedRange for the given byte offsets. 246 // A MappedRange can be converted to any other form. 247 func (m *Mapper) OffsetMappedRange(start, end int) (MappedRange, error) { 248 if !(0 <= start && start <= end && end <= len(m.Content)) { 249 return MappedRange{}, fmt.Errorf("invalid offsets (%d, %d) (file %s has size %d)", start, end, m.URI, len(m.Content)) 250 } 251 return MappedRange{m, start, end}, nil 252 } 253 254 // -- conversions from protocol (UTF-16) domain -- 255 256 // RangeOffsets converts a protocol (UTF-16) range to start/end byte offsets. 257 func (m *Mapper) RangeOffsets(r Range) (int, int, error) { 258 start, err := m.PositionOffset(r.Start) 259 if err != nil { 260 return 0, 0, err 261 } 262 end, err := m.PositionOffset(r.End) 263 if err != nil { 264 return 0, 0, err 265 } 266 return start, end, nil 267 } 268 269 // PositionOffset converts a protocol (UTF-16) position to a byte offset. 270 func (m *Mapper) PositionOffset(p Position) (int, error) { 271 m.initLines() 272 273 // Validate line number. 274 if p.Line > uint32(len(m.lineStart)) { 275 return 0, fmt.Errorf("line number %d out of range 0-%d", p.Line, len(m.lineStart)) 276 } else if p.Line == uint32(len(m.lineStart)) { 277 if p.Character == 0 { 278 return len(m.Content), nil // EOF 279 } 280 return 0, fmt.Errorf("column is beyond end of file") 281 } 282 283 offset := m.lineStart[p.Line] 284 content := m.Content[offset:] // rest of file from start of enclosing line 285 286 // Advance bytes up to the required number of UTF-16 codes. 287 col8 := 0 288 for col16 := 0; col16 < int(p.Character); col16++ { 289 r, sz := utf8.DecodeRune(content) 290 if sz == 0 { 291 return 0, fmt.Errorf("column is beyond end of file") 292 } 293 if r == '\n' { 294 return 0, fmt.Errorf("column is beyond end of line") 295 } 296 if sz == 1 && r == utf8.RuneError { 297 return 0, fmt.Errorf("buffer contains invalid UTF-8 text") 298 } 299 content = content[sz:] 300 301 if r >= 0x10000 { 302 col16++ // rune was encoded by a pair of surrogate UTF-16 codes 303 304 if col16 == int(p.Character) { 305 break // requested position is in the middle of a rune 306 } 307 } 308 col8 += sz 309 } 310 return offset + col8, nil 311 } 312 313 // -- go/token domain convenience methods -- 314 315 // PosPosition converts a token pos to a protocol (UTF-16) position. 316 func (m *Mapper) PosPosition(tf *token.File, pos token.Pos) (Position, error) { 317 offset, err := safetoken.Offset(tf, pos) 318 if err != nil { 319 return Position{}, err 320 } 321 return m.OffsetPosition(offset) 322 } 323 324 // PosLocation converts a token range to a protocol (UTF-16) location. 325 func (m *Mapper) PosLocation(tf *token.File, start, end token.Pos) (Location, error) { 326 startOffset, endOffset, err := safetoken.Offsets(tf, start, end) 327 if err != nil { 328 return Location{}, err 329 } 330 rng, err := m.OffsetRange(startOffset, endOffset) 331 if err != nil { 332 return Location{}, err 333 } 334 return m.RangeLocation(rng), nil 335 } 336 337 // PosRange converts a token range to a protocol (UTF-16) range. 338 func (m *Mapper) PosRange(tf *token.File, start, end token.Pos) (Range, error) { 339 startOffset, endOffset, err := safetoken.Offsets(tf, start, end) 340 if err != nil { 341 return Range{}, err 342 } 343 return m.OffsetRange(startOffset, endOffset) 344 } 345 346 // NodeRange converts a syntax node range to a protocol (UTF-16) range. 347 func (m *Mapper) NodeRange(tf *token.File, node ast.Node) (Range, error) { 348 return m.PosRange(tf, node.Pos(), node.End()) 349 } 350 351 // RangeLocation pairs a protocol Range with its URI, in a Location. 352 func (m *Mapper) RangeLocation(rng Range) Location { 353 return Location{URI: m.URI, Range: rng} 354 } 355 356 // PosMappedRange returns a MappedRange for the given token.Pos range. 357 func (m *Mapper) PosMappedRange(tf *token.File, start, end token.Pos) (MappedRange, error) { 358 startOffset, endOffset, err := safetoken.Offsets(tf, start, end) 359 if err != nil { 360 return MappedRange{}, nil 361 } 362 return m.OffsetMappedRange(startOffset, endOffset) 363 } 364 365 // NodeMappedRange returns a MappedRange for the given node range. 366 func (m *Mapper) NodeMappedRange(tf *token.File, node ast.Node) (MappedRange, error) { 367 return m.PosMappedRange(tf, node.Pos(), node.End()) 368 } 369 370 // -- MappedRange -- 371 372 // A MappedRange represents a valid byte-offset range of a file. 373 // Through its Mapper it can be converted into other forms such 374 // as protocol.Range or UTF-8. 375 // 376 // Construct one by calling Mapper.OffsetMappedRange with start/end offsets. 377 // From the go/token domain, call safetoken.Offsets first, 378 // or use a helper such as ParsedGoFile.MappedPosRange. 379 // 380 // Two MappedRanges produced the same Mapper are equal if and only if they 381 // denote the same range. Two MappedRanges produced by different Mappers 382 // are unequal even when they represent the same range of the same file. 383 type MappedRange struct { 384 Mapper *Mapper 385 start, end int // valid byte offsets: 0 <= start <= end <= len(Mapper.Content) 386 } 387 388 // Offsets returns the (start, end) byte offsets of this range. 389 func (mr MappedRange) Offsets() (start, end int) { return mr.start, mr.end } 390 391 // -- convenience functions -- 392 393 // URI returns the URI of the range's file. 394 func (mr MappedRange) URI() DocumentURI { 395 return mr.Mapper.URI 396 } 397 398 // Range returns the range in protocol (UTF-16) form. 399 func (mr MappedRange) Range() Range { 400 rng, err := mr.Mapper.OffsetRange(mr.start, mr.end) 401 if err != nil { 402 panic(err) // can't happen 403 } 404 return rng 405 } 406 407 // Location returns the range in protocol location (UTF-16) form. 408 func (mr MappedRange) Location() Location { 409 return mr.Mapper.RangeLocation(mr.Range()) 410 } 411 412 // String formats the range in UTF-8 notation. 413 func (mr MappedRange) String() string { 414 var s strings.Builder 415 startLine, startCol8 := mr.Mapper.OffsetLineCol8(mr.start) 416 fmt.Fprintf(&s, "%d:%d", startLine, startCol8) 417 if mr.end != mr.start { 418 endLine, endCol8 := mr.Mapper.OffsetLineCol8(mr.end) 419 if endLine == startLine { 420 fmt.Fprintf(&s, "-%d", endCol8) 421 } else { 422 fmt.Fprintf(&s, "-%d:%d", endLine, endCol8) 423 } 424 } 425 return s.String() 426 } 427 428 // LocationTextDocumentPositionParams converts its argument to its result. 429 func LocationTextDocumentPositionParams(loc Location) TextDocumentPositionParams { 430 return TextDocumentPositionParams{ 431 TextDocument: TextDocumentIdentifier{URI: loc.URI}, 432 Position: loc.Range.Start, 433 } 434 }