kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/typescript/utf8_test.ts (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 import {OffsetTable} from './utf8'; 18 19 describe('utf8 create offset table', () => { 20 it('should handle 1-byte character encoding', () => { 21 const buf = Buffer.from('123'); 22 const table = new OffsetTable(buf, 1); 23 expect(table.buf.length).toEqual(3); 24 expect(table.offsets).toEqual([ 25 [0, 0], 26 [1, 1], 27 [2, 2], 28 [3, 3], 29 ]); 30 }); 31 32 it('should handle 3-byte character encoding', () => { 33 const buf = Buffer.from('12•3'); 34 const table = new OffsetTable(buf, 1); 35 // Number of bytes = 1 + 1 + 3 + 1 = 6 36 expect(table.buf.length).toEqual(6); 37 expect(table.offsets).toEqual([ 38 [0, 0], 39 [1, 1], 40 [2, 2], 41 [3, 5], 42 [4, 6], 43 ]); 44 }); 45 46 it('should handle 4-byte character encoding', () => { 47 const buf = Buffer.from('12🐶3'); 48 const table = new OffsetTable(buf, 1); 49 // Number of bytes = 1 + 1 + 4 + 1 = 7 50 expect(table.buf.length).toEqual(7); 51 expect(table.offsets).toEqual([ 52 [0, 0], 53 [1, 1], 54 [2, 2], 55 // utf16 offset 3 is skipped because it's within a surrogate pair. 56 [4, 6], 57 [5, 7], 58 ]); 59 }); 60 61 it('should handle mix of 3-byte and 4-byte character encoding', () => { 62 const buf = Buffer.from('🐶•🐶•'); 63 const table = new OffsetTable(buf, 1); 64 // Number of bytes = 4 + 3 + 4 + 3 = 14 65 expect(table.buf.length).toEqual(14); 66 expect(table.offsets).toEqual([ 67 [0, 0], 68 [2, 4], 69 [3, 7], 70 [5, 11], 71 [6, 14], 72 ]); 73 }); 74 75 it('should work when span size is greater than 1', () => { 76 const buf = Buffer.from('🐶•🐶•'); 77 const table = new OffsetTable(buf, 2); 78 // Number of bytes = 4 + 3 + 4 + 3 = 14 79 expect(table.buf.length).toEqual(14); 80 expect(table.offsets).toEqual([ 81 [0, 0], 82 [2, 4], 83 // utf16 offset 4 is skipped because it's within a surrogate pair. 84 // Backoff to use offset 5. 85 [5, 11], 86 [6, 14], 87 ]); 88 }); 89 90 it('should work when span size is greater than Buffer size', () => { 91 const buf = Buffer.from('🐶•🐶•'); 92 const table = new OffsetTable(buf, 32); 93 // Number of bytes = 4 + 3 + 4 + 3 = 14 94 expect(table.buf.length).toEqual(14); 95 expect(table.offsets).toEqual([ 96 [0, 0], 97 ]); 98 }); 99 }); 100 101 describe('lookupUtf8', () => { 102 it('should throw an error at invalid lookup positions', () => { 103 const buf = Buffer.from('🐶'); 104 const table = new OffsetTable(buf, 1); 105 expect(() => table.lookupUtf8(0)).not.toThrow(); 106 // offset 1 is within a surrogate pair so it's invalid. 107 expect(() => table.lookupUtf8(1)) 108 .toThrowError('The lookup offset is invalid'); 109 }); 110 111 it('should find the offsets when span size is greater than 1', () => { 112 const buf = Buffer.from('🐶•🐶•'); 113 const table = new OffsetTable(buf, 32); 114 expect(table.lookupUtf8(0)).toEqual(0); 115 expect(table.lookupUtf8(2)).toEqual(4); 116 expect(table.lookupUtf8(3)).toEqual(7); 117 expect(table.lookupUtf8(5)).toEqual(11); 118 expect(table.lookupUtf8(6)).toEqual(14); 119 }); 120 121 it('should find the offsets when there are multiple spans', () => { 122 const buf = Buffer.from('🐶•🐶•'); 123 const table = new OffsetTable(buf, 2); 124 expect(table.lookupUtf8(0)).toEqual(0); 125 expect(table.lookupUtf8(2)).toEqual(4); 126 expect(table.lookupUtf8(3)).toEqual(7); 127 expect(table.lookupUtf8(5)).toEqual(11); 128 expect(table.lookupUtf8(6)).toEqual(14); 129 }); 130 }); 131 132 describe('lookupUtf16', () => { 133 it('should throw an error at invalid lookup positions', () => { 134 const buf = Buffer.from('🐶'); 135 const table = new OffsetTable(buf, 1); 136 expect(() => table.lookupUtf16(0)).not.toThrow(); 137 // offset 1 is within a surrogate pair so it's invalid. 138 expect(() => table.lookupUtf16(1)) 139 .toThrowError('The lookup offset is invalid'); 140 }); 141 142 it('should find the offsets when span size is greater than 1', () => { 143 const buf = Buffer.from('🐶•🐶•'); 144 const table = new OffsetTable(buf, 32); 145 expect(table.lookupUtf16(0)).toEqual(0); 146 expect(table.lookupUtf16(4)).toEqual(2); 147 expect(table.lookupUtf16(7)).toEqual(3); 148 expect(table.lookupUtf16(11)).toEqual(5); 149 expect(table.lookupUtf16(14)).toEqual(6); 150 }); 151 152 it('should find the offsets when there are multiple spans', () => { 153 const buf = Buffer.from('🐶•🐶•'); 154 const table = new OffsetTable(buf, 2); 155 expect(table.lookupUtf16(0)).toEqual(0); 156 expect(table.lookupUtf16(4)).toEqual(2); 157 expect(table.lookupUtf16(7)).toEqual(3); 158 expect(table.lookupUtf16(11)).toEqual(5); 159 expect(table.lookupUtf16(14)).toEqual(6); 160 }); 161 });