github.com/jd-ly/tools@v0.5.7/internal/span/utf16_test.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package span_test
     6  
     7  import (
     8  	"strings"
     9  	"testing"
    10  
    11  	"github.com/jd-ly/tools/internal/span"
    12  )
    13  
    14  // The funny character below is 4 bytes long in UTF-8; two UTF-16 code points
    15  var funnyString = []byte("𐐀23\n𐐀45")
    16  
    17  var toUTF16Tests = []struct {
    18  	scenario    string
    19  	input       []byte
    20  	line        int    // 1-indexed count
    21  	col         int    // 1-indexed byte position in line
    22  	offset      int    // 0-indexed byte offset into input
    23  	resUTF16col int    // 1-indexed UTF-16 col number
    24  	pre         string // everything before the cursor on the line
    25  	post        string // everything from the cursor onwards
    26  	err         string // expected error string in call to ToUTF16Column
    27  	issue       *bool
    28  }{
    29  	{
    30  		scenario: "cursor missing content",
    31  		input:    nil,
    32  		err:      "ToUTF16Column: point is missing position",
    33  	},
    34  	{
    35  		scenario: "cursor missing position",
    36  		input:    funnyString,
    37  		line:     -1,
    38  		col:      -1,
    39  		err:      "ToUTF16Column: point is missing position",
    40  	},
    41  	{
    42  		scenario: "cursor missing offset",
    43  		input:    funnyString,
    44  		line:     1,
    45  		col:      1,
    46  		offset:   -1,
    47  		err:      "ToUTF16Column: point is missing offset",
    48  	},
    49  	{
    50  		scenario:    "zero length input; cursor at first col, first line",
    51  		input:       []byte(""),
    52  		line:        1,
    53  		col:         1,
    54  		offset:      0,
    55  		resUTF16col: 1,
    56  	},
    57  	{
    58  		scenario:    "cursor before funny character; first line",
    59  		input:       funnyString,
    60  		line:        1,
    61  		col:         1,
    62  		offset:      0,
    63  		resUTF16col: 1,
    64  		pre:         "",
    65  		post:        "𐐀23",
    66  	},
    67  	{
    68  		scenario:    "cursor after funny character; first line",
    69  		input:       funnyString,
    70  		line:        1,
    71  		col:         5, // 4 + 1 (1-indexed)
    72  		offset:      4,
    73  		resUTF16col: 3, // 2 + 1 (1-indexed)
    74  		pre:         "𐐀",
    75  		post:        "23",
    76  	},
    77  	{
    78  		scenario:    "cursor after last character on first line",
    79  		input:       funnyString,
    80  		line:        1,
    81  		col:         7, // 4 + 1 + 1 + 1 (1-indexed)
    82  		offset:      6, // 4 + 1 + 1
    83  		resUTF16col: 5, // 2 + 1 + 1 + 1 (1-indexed)
    84  		pre:         "𐐀23",
    85  		post:        "",
    86  	},
    87  	{
    88  		scenario:    "cursor before funny character; second line",
    89  		input:       funnyString,
    90  		line:        2,
    91  		col:         1,
    92  		offset:      7, // length of first line
    93  		resUTF16col: 1,
    94  		pre:         "",
    95  		post:        "𐐀45",
    96  	},
    97  	{
    98  		scenario:    "cursor after funny character; second line",
    99  		input:       funnyString,
   100  		line:        1,
   101  		col:         5,  // 4 + 1 (1-indexed)
   102  		offset:      11, // 7 (length of first line) + 4
   103  		resUTF16col: 3,  // 2 + 1 (1-indexed)
   104  		pre:         "𐐀",
   105  		post:        "45",
   106  	},
   107  	{
   108  		scenario:    "cursor after last character on second line",
   109  		input:       funnyString,
   110  		line:        2,
   111  		col:         7,  // 4 + 1 + 1 + 1 (1-indexed)
   112  		offset:      13, // 7 (length of first line) + 4 + 1 + 1
   113  		resUTF16col: 5,  // 2 + 1 + 1 + 1 (1-indexed)
   114  		pre:         "𐐀45",
   115  		post:        "",
   116  	},
   117  	{
   118  		scenario: "cursor beyond end of file",
   119  		input:    funnyString,
   120  		line:     2,
   121  		col:      8,  // 4 + 1 + 1 + 1 + 1 (1-indexed)
   122  		offset:   14, // 4 + 1 + 1 + 1
   123  		err:      "ToUTF16Column: offsets 7-14 outside file contents (13)",
   124  	},
   125  }
   126  
   127  var fromUTF16Tests = []struct {
   128  	scenario  string
   129  	input     []byte
   130  	line      int    // 1-indexed line number (isn't actually used)
   131  	offset    int    // 0-indexed byte offset to beginning of line
   132  	utf16col  int    // 1-indexed UTF-16 col number
   133  	resCol    int    // 1-indexed byte position in line
   134  	resOffset int    // 0-indexed byte offset into input
   135  	pre       string // everything before the cursor on the line
   136  	post      string // everything from the cursor onwards
   137  	err       string // expected error string in call to ToUTF16Column
   138  }{
   139  	{
   140  		scenario:  "zero length input; cursor at first col, first line",
   141  		input:     []byte(""),
   142  		line:      1,
   143  		offset:    0,
   144  		utf16col:  1,
   145  		resCol:    1,
   146  		resOffset: 0,
   147  		pre:       "",
   148  		post:      "",
   149  	},
   150  	{
   151  		scenario: "missing offset",
   152  		input:    funnyString,
   153  		line:     1,
   154  		offset:   -1,
   155  		err:      "FromUTF16Column: point is missing offset",
   156  	},
   157  	{
   158  		scenario:  "cursor before funny character",
   159  		input:     funnyString,
   160  		line:      1,
   161  		utf16col:  1,
   162  		resCol:    1,
   163  		resOffset: 0,
   164  		pre:       "",
   165  		post:      "𐐀23",
   166  	},
   167  	{
   168  		scenario:  "cursor after funny character",
   169  		input:     funnyString,
   170  		line:      1,
   171  		utf16col:  3,
   172  		resCol:    5,
   173  		resOffset: 4,
   174  		pre:       "𐐀",
   175  		post:      "23",
   176  	},
   177  	{
   178  		scenario:  "cursor after last character on line",
   179  		input:     funnyString,
   180  		line:      1,
   181  		utf16col:  5,
   182  		resCol:    7,
   183  		resOffset: 6,
   184  		pre:       "𐐀23",
   185  		post:      "",
   186  	},
   187  	{
   188  		scenario:  "cursor beyond last character on line",
   189  		input:     funnyString,
   190  		line:      1,
   191  		offset:    0,
   192  		utf16col:  6,
   193  		resCol:    7,
   194  		resOffset: 6,
   195  		pre:       "𐐀23",
   196  		post:      "",
   197  	},
   198  	{
   199  		scenario:  "cursor before funny character; second line",
   200  		input:     funnyString,
   201  		line:      2,
   202  		offset:    7, // length of first line
   203  		utf16col:  1,
   204  		resCol:    1,
   205  		resOffset: 7,
   206  		pre:       "",
   207  		post:      "𐐀45",
   208  	},
   209  	{
   210  		scenario:  "cursor after funny character; second line",
   211  		input:     funnyString,
   212  		line:      2,
   213  		offset:    7,  // length of first line
   214  		utf16col:  3,  // 2 + 1 (1-indexed)
   215  		resCol:    5,  // 4 + 1 (1-indexed)
   216  		resOffset: 11, // 7 (length of first line) + 4
   217  		pre:       "𐐀",
   218  		post:      "45",
   219  	},
   220  	{
   221  		scenario:  "cursor after last character on second line",
   222  		input:     funnyString,
   223  		line:      2,
   224  		offset:    7,  // length of first line
   225  		utf16col:  5,  // 2 + 1 + 1 + 1 (1-indexed)
   226  		resCol:    7,  // 4 + 1 + 1 + 1 (1-indexed)
   227  		resOffset: 13, // 7 (length of first line) + 4 + 1 + 1
   228  		pre:       "𐐀45",
   229  		post:      "",
   230  	},
   231  	{
   232  		scenario:  "cursor beyond end of file",
   233  		input:     funnyString,
   234  		line:      2,
   235  		offset:    7,
   236  		utf16col:  6,  // 2 + 1 + 1 + 1 + 1(1-indexed)
   237  		resCol:    8,  // 4 + 1 + 1 + 1 + 1 (1-indexed)
   238  		resOffset: 14, // 7 (length of first line) + 4 + 1 + 1 + 1
   239  		err:       "FromUTF16Column: chr goes beyond the content",
   240  	},
   241  	{
   242  		scenario: "offset beyond end of file",
   243  		input:    funnyString,
   244  		line:     2,
   245  		offset:   14,
   246  		utf16col: 2,
   247  		err:      "FromUTF16Column: offset (14) greater than length of content (13)",
   248  	},
   249  }
   250  
   251  func TestToUTF16(t *testing.T) {
   252  	for _, e := range toUTF16Tests {
   253  		t.Run(e.scenario, func(t *testing.T) {
   254  			if e.issue != nil && !*e.issue {
   255  				t.Skip("expected to fail")
   256  			}
   257  			p := span.NewPoint(e.line, e.col, e.offset)
   258  			got, err := span.ToUTF16Column(p, e.input)
   259  			if err != nil {
   260  				if err.Error() != e.err {
   261  					t.Fatalf("expected error %v; got %v", e.err, err)
   262  				}
   263  				return
   264  			}
   265  			if e.err != "" {
   266  				t.Fatalf("unexpected success; wanted %v", e.err)
   267  			}
   268  			if got != e.resUTF16col {
   269  				t.Fatalf("expected result %v; got %v", e.resUTF16col, got)
   270  			}
   271  			pre, post := getPrePost(e.input, p.Offset())
   272  			if string(pre) != e.pre {
   273  				t.Fatalf("expected #%d pre %q; got %q", p.Offset(), e.pre, pre)
   274  			}
   275  			if string(post) != e.post {
   276  				t.Fatalf("expected #%d, post %q; got %q", p.Offset(), e.post, post)
   277  			}
   278  		})
   279  	}
   280  }
   281  
   282  func TestFromUTF16(t *testing.T) {
   283  	for _, e := range fromUTF16Tests {
   284  		t.Run(e.scenario, func(t *testing.T) {
   285  			p := span.NewPoint(e.line, 1, e.offset)
   286  			p, err := span.FromUTF16Column(p, e.utf16col, []byte(e.input))
   287  			if err != nil {
   288  				if err.Error() != e.err {
   289  					t.Fatalf("expected error %v; got %v", e.err, err)
   290  				}
   291  				return
   292  			}
   293  			if e.err != "" {
   294  				t.Fatalf("unexpected success; wanted %v", e.err)
   295  			}
   296  			if p.Column() != e.resCol {
   297  				t.Fatalf("expected resulting col %v; got %v", e.resCol, p.Column())
   298  			}
   299  			if p.Offset() != e.resOffset {
   300  				t.Fatalf("expected resulting offset %v; got %v", e.resOffset, p.Offset())
   301  			}
   302  			pre, post := getPrePost(e.input, p.Offset())
   303  			if string(pre) != e.pre {
   304  				t.Fatalf("expected #%d pre %q; got %q", p.Offset(), e.pre, pre)
   305  			}
   306  			if string(post) != e.post {
   307  				t.Fatalf("expected #%d post %q; got %q", p.Offset(), e.post, post)
   308  			}
   309  		})
   310  	}
   311  }
   312  
   313  func getPrePost(content []byte, offset int) (string, string) {
   314  	pre, post := string(content)[:offset], string(content)[offset:]
   315  	if i := strings.LastIndex(pre, "\n"); i >= 0 {
   316  		pre = pre[i+1:]
   317  	}
   318  	if i := strings.IndexRune(post, '\n'); i >= 0 {
   319  		post = post[:i]
   320  	}
   321  	return pre, post
   322  }