github.com/powerman/golang-tools@v0.1.11-0.20220410185822-5ad214d8d803/internal/span/utf16.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package span
     6  
     7  import (
     8  	"fmt"
     9  	"unicode/utf8"
    10  )
    11  
    12  // ToUTF16Column calculates the utf16 column expressed by the point given the
    13  // supplied file contents.
    14  // This is used to convert from the native (always in bytes) column
    15  // representation and the utf16 counts used by some editors.
    16  func ToUTF16Column(p Point, content []byte) (int, error) {
    17  	if !p.HasPosition() {
    18  		return -1, fmt.Errorf("ToUTF16Column: point is missing position")
    19  	}
    20  	if !p.HasOffset() {
    21  		return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
    22  	}
    23  	offset := p.Offset()      // 0-based
    24  	colZero := p.Column() - 1 // 0-based
    25  	if colZero == 0 {
    26  		// 0-based column 0, so it must be chr 1
    27  		return 1, nil
    28  	} else if colZero < 0 {
    29  		return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero)
    30  	}
    31  	// work out the offset at the start of the line using the column
    32  	lineOffset := offset - colZero
    33  	if lineOffset < 0 || offset > len(content) {
    34  		return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
    35  	}
    36  	// Use the offset to pick out the line start.
    37  	// This cannot panic: offset > len(content) and lineOffset < offset.
    38  	start := content[lineOffset:]
    39  
    40  	// Now, truncate down to the supplied column.
    41  	start = start[:colZero]
    42  
    43  	cnt := 0
    44  	for _, r := range string(start) {
    45  		cnt++
    46  		if r > 0xffff {
    47  			cnt++
    48  		}
    49  	}
    50  	return cnt + 1, nil // the +1 is for 1-based columns
    51  }
    52  
    53  // FromUTF16Column advances the point by the utf16 character offset given the
    54  // supplied line contents.
    55  // This is used to convert from the utf16 counts used by some editors to the
    56  // native (always in bytes) column representation.
    57  func FromUTF16Column(p Point, chr int, content []byte) (Point, error) {
    58  	if !p.HasOffset() {
    59  		return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset")
    60  	}
    61  	// if chr is 1 then no adjustment needed
    62  	if chr <= 1 {
    63  		return p, nil
    64  	}
    65  	if p.Offset() >= len(content) {
    66  		return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content))
    67  	}
    68  	remains := content[p.Offset():]
    69  	// scan forward the specified number of characters
    70  	for count := 1; count < chr; count++ {
    71  		if len(remains) <= 0 {
    72  			return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content")
    73  		}
    74  		r, w := utf8.DecodeRune(remains)
    75  		if r == '\n' {
    76  			// Per the LSP spec:
    77  			//
    78  			// > If the character value is greater than the line length it
    79  			// > defaults back to the line length.
    80  			break
    81  		}
    82  		remains = remains[w:]
    83  		if r >= 0x10000 {
    84  			// a two point rune
    85  			count++
    86  			// if we finished in a two point rune, do not advance past the first
    87  			if count >= chr {
    88  				break
    89  			}
    90  		}
    91  		p.v.Column += w
    92  		p.v.Offset += w
    93  	}
    94  	return p, nil
    95  }