github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/internal/cmap/cmap_test.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package cmap 7 8 import ( 9 "testing" 10 ) 11 12 func init() { 13 // Uncomment when debugging to get debug or trace logging output. 14 //common.SetLogger(common.NewConsoleLogger(common.LogLevelDebug)) 15 //common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace)) 16 } 17 18 // cmap1Data represents a basic CMap. 19 const cmap1Data = ` 20 /CIDInit /ProcSet findresource begin 21 12 dict begin 22 begincmap 23 /CIDSystemInfo 24 << /Registry (Adobe) 25 /Ordering (UCS) 26 /Supplement 0 27 >> def 28 /CMapName /Adobe-Identity-UCS def 29 /CMapType 2 def 30 1 begincodespacerange 31 <0000> <FFFF> 32 endcodespacerange 33 8 beginbfchar 34 <0003> <0020> 35 <0007> <0024> 36 <0033> <0050> 37 <0035> <0052> 38 <0037> <0054> 39 <005A> <0077> 40 <005C> <0079> 41 <005F> <007C> 42 endbfchar 43 7 beginbfrange 44 <000F> <0017> <002C> 45 <001B> <001D> <0038> 46 <0025> <0026> <0042> 47 <002F> <0031> <004C> 48 <0044> <004C> <0061> 49 <004F> <0053> <006C> 50 <0055> <0057> <0072> 51 endbfrange 52 endcmap 53 CMapName currentdict /CMap defineresource pop 54 end 55 end 56 ` 57 58 // TestCMapParser tests basic loading of a simple CMap. 59 func TestCMapParser1(t *testing.T) { 60 cmap, err := LoadCmapFromData([]byte(cmap1Data)) 61 if err != nil { 62 t.Error("Failed: ", err) 63 return 64 } 65 66 if cmap.Name() != "Adobe-Identity-UCS" { 67 t.Errorf("CMap name incorrect (%s)", cmap.Name()) 68 return 69 } 70 71 if cmap.Type() != 2 { 72 t.Errorf("CMap type incorrect") 73 return 74 } 75 76 if len(cmap.codespaces) != 1 { 77 t.Errorf("len codespace != 1 (%d)", len(cmap.codespaces)) 78 return 79 } 80 81 if cmap.codespaces[0].low != 0 { 82 t.Errorf("code space low range != 0 (%d)", cmap.codespaces[0].low) 83 return 84 } 85 86 if cmap.codespaces[0].high != 0xFFFF { 87 t.Errorf("code space high range != 0xffff (%d)", cmap.codespaces[0].high) 88 return 89 } 90 91 expectedMappings := map[uint64]rune{ 92 0x0003: 0x0020, 93 0x005F: 0x007C, 94 0x000F: 0x002C, 95 0x000F + 5: 0x002C + 5, 96 0x001B: 0x0038, 97 0x001B + 2: 0x0038 + 2, 98 0x002F: 0x004C, 99 0x0044: 0x0061, 100 0x004F: 0x006C, 101 0x0055: 0x0072, 102 } 103 104 for k, expected := range expectedMappings { 105 if v := cmap.CharcodeToUnicode(k); v != string(expected) { 106 t.Errorf("incorrect mapping, expecting 0x%X -> 0x%X (%#v)", k, expected, v) 107 return 108 } 109 } 110 111 v := cmap.CharcodeToUnicode(0x99) 112 if v != "?" { //!= "notdef" { 113 t.Errorf("Unmapped code, expected to map to undefined") 114 return 115 } 116 117 charcodes := []byte{0x00, 0x03, 0x00, 0x0F} 118 s := cmap.CharcodeBytesToUnicode(charcodes) 119 if s != " ," { 120 t.Error("Incorrect charcode bytes -> string mapping") 121 return 122 } 123 } 124 125 const cmap2Data = ` 126 /CIDInit /ProcSet findresource begin 127 12 dict begin 128 begincmap 129 /CIDSystemInfo 130 << /Registry (Adobe) 131 /Ordering (UCS) 132 /Supplement 0 133 >> def 134 /CMapName /Adobe-Identity-UCS def 135 /CMapType 2 def 136 1 begincodespacerange 137 <0000> <FFFF> 138 endcodespacerange 139 7 beginbfrange 140 <0080> <00FF> <002C> 141 <802F> <902F> <0038> 142 endbfrange 143 endcmap 144 CMapName currentdict /CMap defineresource pop 145 end 146 end 147 ` 148 149 // TestCMapParser2 tests a bug that came up when 2-byte character codes had the higher byte set to 0, 150 // e.g. 0x0080, and the character map was not taking the number of bytes of the input codemap into account. 151 func TestCMapParser2(t *testing.T) { 152 //common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace)) 153 154 cmap, err := LoadCmapFromData([]byte(cmap2Data)) 155 if err != nil { 156 t.Error("Failed: ", err) 157 return 158 } 159 160 if cmap.Name() != "Adobe-Identity-UCS" { 161 t.Errorf("CMap name incorrect (%s)", cmap.Name()) 162 return 163 } 164 165 if cmap.Type() != 2 { 166 t.Errorf("CMap type incorrect") 167 return 168 } 169 170 if len(cmap.codespaces) != 1 { 171 t.Errorf("len codespace != 1 (%d)", len(cmap.codespaces)) 172 return 173 } 174 175 if cmap.codespaces[0].low != 0 { 176 t.Errorf("code space low range != 0 (%d)", cmap.codespaces[0].low) 177 return 178 } 179 180 if cmap.codespaces[0].high != 0xFFFF { 181 t.Errorf("code space high range != 0xffff (%d)", cmap.codespaces[0].high) 182 return 183 } 184 185 expectedMappings := map[uint64]rune{ 186 0x0080: 0x002C, 187 0x802F: 0x0038, 188 } 189 190 for k, expected := range expectedMappings { 191 if v := cmap.CharcodeToUnicode(k); v != string(expected) { 192 t.Errorf("incorrect mapping, expecting 0x%X -> 0x%X (got 0x%X)", k, expected, v) 193 return 194 } 195 } 196 197 // Check byte sequence mappings. 198 excpectedSequenceMappings := []struct { 199 bytes []byte 200 expected string 201 }{ 202 {[]byte{0x80, 0x2F, 0x00, 0x80}, string([]rune{0x0038, 0x002C})}, 203 } 204 205 for _, exp := range excpectedSequenceMappings { 206 str := cmap.CharcodeBytesToUnicode(exp.bytes) 207 if str != exp.expected { 208 t.Errorf("Incorrect byte sequence mapping -> % X -> % X (got % X)", exp.bytes, []rune(exp.expected), []rune(str)) 209 return 210 } 211 } 212 } 213 214 // cmapData3 is a CMap with a mixture of 1 and 2 byte codespaces. 215 const cmapData3 = ` 216 /CIDInit /ProcSet findresource begin 217 12 dict begin begincmap 218 /CIDSystemInfo 219 3 dict dup begin 220 /Registry (Adobe) def 221 /Supplement 2 def 222 end def 223 224 /CMapName /test-1 def 225 /CMapType 1 def 226 227 4 begincodespacerange 228 <00> <80> 229 <8100> <9fff> 230 <a0> <df> 231 <d040> <fbfc> 232 endcodespacerange 233 7 beginbfrange 234 <00> <80> <10> 235 <8100> <9f00> <1000> 236 <a0> <d0> <90> 237 <d140> <f000> <a000> 238 endbfrange 239 endcmap 240 ` 241 242 // TestCMapParser3 test case of a CMap with mixed number of 1 and 2 bytes in the codespace range. 243 func TestCMapParser3(t *testing.T) { 244 //common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace)) 245 246 cmap, err := LoadCmapFromData([]byte(cmapData3)) 247 if err != nil { 248 t.Error("Failed: ", err) 249 return 250 } 251 252 if cmap.Name() != "test-1" { 253 t.Errorf("CMap name incorrect (%s)", cmap.Name()) 254 return 255 } 256 257 if cmap.Type() != 1 { 258 t.Errorf("CMap type incorrect") 259 return 260 } 261 262 // Check codespaces. 263 expectedCodespaces := []struct { 264 numBytes int 265 low uint64 266 high uint64 267 }{ 268 {1, 0x00, 0x80}, 269 {2, 0x8100, 0x9fff}, 270 {1, 0xa0, 0xdf}, 271 {2, 0xd040, 0xfbfc}, 272 } 273 274 if len(cmap.codespaces) != len(expectedCodespaces) { 275 t.Errorf("len codespace != %d (%d)", len(expectedCodespaces), len(cmap.codespaces)) 276 return 277 } 278 279 for i, cs := range cmap.codespaces { 280 exp := expectedCodespaces[i] 281 if cs.numBytes != exp.numBytes { 282 t.Errorf("code space number of bytes != %d (%d)", exp.numBytes, cs.numBytes) 283 return 284 } 285 286 if cs.low != exp.low { 287 t.Errorf("code space low range != %d (%d)", exp.low, cs.low) 288 return 289 } 290 291 if cs.high != exp.high { 292 t.Errorf("code space high range != 0x%X (0x%X)", exp.high, cs.high) 293 return 294 } 295 } 296 297 // Check mappings. 298 expectedMappings := map[uint64]rune{ 299 0x0080: 0x10 + 0x80, 300 0x8100: 0x1000, 301 0x00a0: 0x90, 302 0xd140: 0xa000, 303 } 304 for k, expected := range expectedMappings { 305 if v := cmap.CharcodeToUnicode(k); v != string(expected) { 306 t.Errorf("incorrect mapping, expecting 0x%X -> 0x%X (got 0x%X)", k, expected, v) 307 return 308 } 309 } 310 311 // Check byte sequence mappings. 312 excpectedSequenceMappings := []struct { 313 bytes []byte 314 expected string 315 }{ 316 {[]byte{0x80, 0x81, 0x00, 0xa1, 0xd1, 0x80, 0x00}, string([]rune{0x90, 0x1000, 0x91, 0xa000 + 0x40, 0x10})}, 317 } 318 319 for _, exp := range excpectedSequenceMappings { 320 str := cmap.CharcodeBytesToUnicode(exp.bytes) 321 if str != exp.expected { 322 t.Errorf("Incorrect byte sequence mapping -> % X -> % X (got % X)", exp.bytes, []rune(exp.expected), []rune(str)) 323 return 324 } 325 } 326 }