github.com/jd-ly/tools@v0.5.7/internal/span/utf16.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package span 6 7 import ( 8 "fmt" 9 "unicode/utf16" 10 "unicode/utf8" 11 ) 12 13 // ToUTF16Column calculates the utf16 column expressed by the point given the 14 // supplied file contents. 15 // This is used to convert from the native (always in bytes) column 16 // representation and the utf16 counts used by some editors. 17 func ToUTF16Column(p Point, content []byte) (int, error) { 18 if !p.HasPosition() { 19 return -1, fmt.Errorf("ToUTF16Column: point is missing position") 20 } 21 if !p.HasOffset() { 22 return -1, fmt.Errorf("ToUTF16Column: point is missing offset") 23 } 24 offset := p.Offset() // 0-based 25 colZero := p.Column() - 1 // 0-based 26 if colZero == 0 { 27 // 0-based column 0, so it must be chr 1 28 return 1, nil 29 } else if colZero < 0 { 30 return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero) 31 } 32 // work out the offset at the start of the line using the column 33 lineOffset := offset - colZero 34 if lineOffset < 0 || offset > len(content) { 35 return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content)) 36 } 37 // Use the offset to pick out the line start. 38 // This cannot panic: offset > len(content) and lineOffset < offset. 39 start := content[lineOffset:] 40 41 // Now, truncate down to the supplied column. 42 start = start[:colZero] 43 44 // and count the number of utf16 characters 45 // in theory we could do this by hand more efficiently... 46 return len(utf16.Encode([]rune(string(start)))) + 1, nil 47 } 48 49 // FromUTF16Column advances the point by the utf16 character offset given the 50 // supplied line contents. 51 // This is used to convert from the utf16 counts used by some editors to the 52 // native (always in bytes) column representation. 53 func FromUTF16Column(p Point, chr int, content []byte) (Point, error) { 54 if !p.HasOffset() { 55 return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset") 56 } 57 // if chr is 1 then no adjustment needed 58 if chr <= 1 { 59 return p, nil 60 } 61 if p.Offset() >= len(content) { 62 return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content)) 63 } 64 remains := content[p.Offset():] 65 // scan forward the specified number of characters 66 for count := 1; count < chr; count++ { 67 if len(remains) <= 0 { 68 return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content") 69 } 70 r, w := utf8.DecodeRune(remains) 71 if r == '\n' { 72 // Per the LSP spec: 73 // 74 // > If the character value is greater than the line length it 75 // > defaults back to the line length. 76 break 77 } 78 remains = remains[w:] 79 if r >= 0x10000 { 80 // a two point rune 81 count++ 82 // if we finished in a two point rune, do not advance past the first 83 if count >= chr { 84 break 85 } 86 } 87 p.v.Column += w 88 p.v.Offset += w 89 } 90 return p, nil 91 }