github.com/jhump/golang-x-tools@v0.0.0-20220218190644-4958d6d39439/internal/span/utf16.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package span 6 7 import ( 8 "fmt" 9 "unicode/utf8" 10 ) 11 12 // ToUTF16Column calculates the utf16 column expressed by the point given the 13 // supplied file contents. 14 // This is used to convert from the native (always in bytes) column 15 // representation and the utf16 counts used by some editors. 16 func ToUTF16Column(p Point, content []byte) (int, error) { 17 if !p.HasPosition() { 18 return -1, fmt.Errorf("ToUTF16Column: point is missing position") 19 } 20 if !p.HasOffset() { 21 return -1, fmt.Errorf("ToUTF16Column: point is missing offset") 22 } 23 offset := p.Offset() // 0-based 24 colZero := p.Column() - 1 // 0-based 25 if colZero == 0 { 26 // 0-based column 0, so it must be chr 1 27 return 1, nil 28 } else if colZero < 0 { 29 return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero) 30 } 31 // work out the offset at the start of the line using the column 32 lineOffset := offset - colZero 33 if lineOffset < 0 || offset > len(content) { 34 return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content)) 35 } 36 // Use the offset to pick out the line start. 37 // This cannot panic: offset > len(content) and lineOffset < offset. 38 start := content[lineOffset:] 39 40 // Now, truncate down to the supplied column. 41 start = start[:colZero] 42 43 cnt := 0 44 for _, r := range string(start) { 45 cnt++ 46 if r > 0xffff { 47 cnt++ 48 } 49 } 50 return cnt + 1, nil // the +1 is for 1-based columns 51 } 52 53 // FromUTF16Column advances the point by the utf16 character offset given the 54 // supplied line contents. 55 // This is used to convert from the utf16 counts used by some editors to the 56 // native (always in bytes) column representation. 57 func FromUTF16Column(p Point, chr int, content []byte) (Point, error) { 58 if !p.HasOffset() { 59 return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset") 60 } 61 // if chr is 1 then no adjustment needed 62 if chr <= 1 { 63 return p, nil 64 } 65 if p.Offset() >= len(content) { 66 return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content)) 67 } 68 remains := content[p.Offset():] 69 // scan forward the specified number of characters 70 for count := 1; count < chr; count++ { 71 if len(remains) <= 0 { 72 return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content") 73 } 74 r, w := utf8.DecodeRune(remains) 75 if r == '\n' { 76 // Per the LSP spec: 77 // 78 // > If the character value is greater than the line length it 79 // > defaults back to the line length. 80 break 81 } 82 remains = remains[w:] 83 if r >= 0x10000 { 84 // a two point rune 85 count++ 86 // if we finished in a two point rune, do not advance past the first 87 if count >= chr { 88 break 89 } 90 } 91 p.v.Column += w 92 p.v.Offset += w 93 } 94 return p, nil 95 }