kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/typescript/utf8_test.ts (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  import {OffsetTable} from './utf8';
    18  
    19  describe('utf8 create offset table', () => {
    20    it('should handle 1-byte character encoding', () => {
    21      const buf = Buffer.from('123');
    22      const table = new OffsetTable(buf, 1);
    23      expect(table.buf.length).toEqual(3);
    24      expect(table.offsets).toEqual([
    25        [0, 0],
    26        [1, 1],
    27        [2, 2],
    28        [3, 3],
    29      ]);
    30    });
    31  
    32    it('should handle 3-byte character encoding', () => {
    33      const buf = Buffer.from('12•3');
    34      const table = new OffsetTable(buf, 1);
    35      // Number of bytes = 1 + 1 + 3 + 1 = 6
    36      expect(table.buf.length).toEqual(6);
    37      expect(table.offsets).toEqual([
    38        [0, 0],
    39        [1, 1],
    40        [2, 2],
    41        [3, 5],
    42        [4, 6],
    43      ]);
    44    });
    45  
    46    it('should handle 4-byte character encoding', () => {
    47      const buf = Buffer.from('12🐶3');
    48      const table = new OffsetTable(buf, 1);
    49      // Number of bytes = 1 + 1 + 4 + 1 = 7
    50      expect(table.buf.length).toEqual(7);
    51      expect(table.offsets).toEqual([
    52        [0, 0],
    53        [1, 1],
    54        [2, 2],
    55        // utf16 offset 3 is skipped because it's within a surrogate pair.
    56        [4, 6],
    57        [5, 7],
    58      ]);
    59    });
    60  
    61    it('should handle mix of 3-byte and 4-byte character encoding', () => {
    62      const buf = Buffer.from('🐶•🐶•');
    63      const table = new OffsetTable(buf, 1);
    64      // Number of bytes = 4 + 3 + 4 + 3 = 14
    65      expect(table.buf.length).toEqual(14);
    66      expect(table.offsets).toEqual([
    67        [0, 0],
    68        [2, 4],
    69        [3, 7],
    70        [5, 11],
    71        [6, 14],
    72      ]);
    73    });
    74  
    75    it('should work when span size is greater than 1', () => {
    76      const buf = Buffer.from('🐶•🐶•');
    77      const table = new OffsetTable(buf, 2);
    78      // Number of bytes = 4 + 3 + 4 + 3 = 14
    79      expect(table.buf.length).toEqual(14);
    80      expect(table.offsets).toEqual([
    81        [0, 0],
    82        [2, 4],
    83        // utf16 offset 4 is skipped because it's within a surrogate pair.
    84        // Backoff to use offset 5.
    85        [5, 11],
    86        [6, 14],
    87      ]);
    88    });
    89  
    90    it('should work when span size is greater than Buffer size', () => {
    91      const buf = Buffer.from('🐶•🐶•');
    92      const table = new OffsetTable(buf, 32);
    93      // Number of bytes = 4 + 3 + 4 + 3 = 14
    94      expect(table.buf.length).toEqual(14);
    95      expect(table.offsets).toEqual([
    96        [0, 0],
    97      ]);
    98    });
    99  });
   100  
   101  describe('lookupUtf8', () => {
   102    it('should throw an error at invalid lookup positions', () => {
   103      const buf = Buffer.from('🐶');
   104      const table = new OffsetTable(buf, 1);
   105      expect(() => table.lookupUtf8(0)).not.toThrow();
   106      // offset 1 is within a surrogate pair so it's invalid.
   107      expect(() => table.lookupUtf8(1))
   108          .toThrowError('The lookup offset is invalid');
   109    });
   110  
   111    it('should find the offsets when span size is greater than 1', () => {
   112      const buf = Buffer.from('🐶•🐶•');
   113      const table = new OffsetTable(buf, 32);
   114      expect(table.lookupUtf8(0)).toEqual(0);
   115      expect(table.lookupUtf8(2)).toEqual(4);
   116      expect(table.lookupUtf8(3)).toEqual(7);
   117      expect(table.lookupUtf8(5)).toEqual(11);
   118      expect(table.lookupUtf8(6)).toEqual(14);
   119    });
   120  
   121    it('should find the offsets when there are multiple spans', () => {
   122      const buf = Buffer.from('🐶•🐶•');
   123      const table = new OffsetTable(buf, 2);
   124      expect(table.lookupUtf8(0)).toEqual(0);
   125      expect(table.lookupUtf8(2)).toEqual(4);
   126      expect(table.lookupUtf8(3)).toEqual(7);
   127      expect(table.lookupUtf8(5)).toEqual(11);
   128      expect(table.lookupUtf8(6)).toEqual(14);
   129    });
   130  });
   131  
   132  describe('lookupUtf16', () => {
   133    it('should throw an error at invalid lookup positions', () => {
   134      const buf = Buffer.from('🐶');
   135      const table = new OffsetTable(buf, 1);
   136      expect(() => table.lookupUtf16(0)).not.toThrow();
   137      // offset 1 is within a surrogate pair so it's invalid.
   138      expect(() => table.lookupUtf16(1))
   139          .toThrowError('The lookup offset is invalid');
   140    });
   141  
   142    it('should find the offsets when span size is greater than 1', () => {
   143      const buf = Buffer.from('🐶•🐶•');
   144      const table = new OffsetTable(buf, 32);
   145      expect(table.lookupUtf16(0)).toEqual(0);
   146      expect(table.lookupUtf16(4)).toEqual(2);
   147      expect(table.lookupUtf16(7)).toEqual(3);
   148      expect(table.lookupUtf16(11)).toEqual(5);
   149      expect(table.lookupUtf16(14)).toEqual(6);
   150    });
   151  
   152    it('should find the offsets when there are multiple spans', () => {
   153      const buf = Buffer.from('🐶•🐶•');
   154      const table = new OffsetTable(buf, 2);
   155      expect(table.lookupUtf16(0)).toEqual(0);
   156      expect(table.lookupUtf16(4)).toEqual(2);
   157      expect(table.lookupUtf16(7)).toEqual(3);
   158      expect(table.lookupUtf16(11)).toEqual(5);
   159      expect(table.lookupUtf16(14)).toEqual(6);
   160    });
   161  });