github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/model/textencoding/winansi.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package textencoding 7 8 import ( 9 "github.com/unidoc/unidoc/common" 10 "github.com/unidoc/unidoc/pdf/core" 11 ) 12 13 // WinAnsiEncoding. 14 type WinAnsiEncoder struct { 15 } 16 17 func NewWinAnsiTextEncoder() WinAnsiEncoder { 18 encoder := WinAnsiEncoder{} 19 return encoder 20 } 21 22 func (winenc WinAnsiEncoder) ToPdfObject() core.PdfObject { 23 return core.MakeName("WinAnsiEncoding") 24 } 25 26 // Convert a raw utf8 string (series of runes) to an encoded string (series of character codes) to be used in PDF. 27 func (winenc WinAnsiEncoder) Encode(raw string) string { 28 encoded := []byte{} 29 for _, rune := range raw { 30 code, has := winenc.RuneToCharcode(rune) 31 if has { 32 encoded = append(encoded, code) 33 } 34 } 35 36 return string(encoded) 37 } 38 39 // Conversion between character code and glyph name. 40 // The bool return flag is true if there was a match, and false otherwise. 41 func (winenc WinAnsiEncoder) CharcodeToGlyph(code byte) (string, bool) { 42 glyph, has := winansiEncodingCharcodeToGlyphMap[code] 43 if !has { 44 common.Log.Debug("Charcode -> Glyph error: charcode not found: %d\n", code) 45 return "", false 46 } 47 return glyph, true 48 } 49 50 // Conversion between glyph name and character code. 51 // The bool return flag is true if there was a match, and false otherwise. 52 func (winenc WinAnsiEncoder) GlyphToCharcode(glyph string) (byte, bool) { 53 code, found := winansiEncodingGlyphToCharcodeMap[glyph] 54 if !found { 55 common.Log.Debug("Glyph -> Charcode error: glyph not found: %s\n", glyph) 56 return 0, false 57 } 58 59 return code, true 60 } 61 62 // Convert rune to character code. 63 // The bool return flag is true if there was a match, and false otherwise. 64 func (winenc WinAnsiEncoder) RuneToCharcode(val rune) (byte, bool) { 65 glyph, found := winenc.RuneToGlyph(val) 66 if !found { 67 return 0, false 68 } 69 70 code, found := winansiEncodingGlyphToCharcodeMap[glyph] 71 if !found { 72 common.Log.Debug("Glyph -> Charcode error: glyph not found %s\n", glyph) 73 return 0, false 74 } 75 76 return code, true 77 } 78 79 // Convert character code to rune. 80 // The bool return flag is true if there was a match, and false otherwise. 81 func (winenc WinAnsiEncoder) CharcodeToRune(charcode byte) (rune, bool) { 82 glyph, found := winansiEncodingCharcodeToGlyphMap[charcode] 83 if !found { 84 common.Log.Debug("Charcode -> Glyph error: charcode not found: %d\n", charcode) 85 return 0, false 86 } 87 88 ucode, found := glyphToRune(glyph, glyphlistGlyphToRuneMap) 89 if !found { 90 return 0, false 91 } 92 93 return ucode, true 94 } 95 96 // Convert rune to glyph name. 97 // The bool return flag is true if there was a match, and false otherwise. 98 func (winenc WinAnsiEncoder) RuneToGlyph(val rune) (string, bool) { 99 return runeToGlyph(val, glyphlistRuneToGlyphMap) 100 } 101 102 // Convert glyph to rune. 103 // The bool return flag is true if there was a match, and false otherwise. 104 func (winenc WinAnsiEncoder) GlyphToRune(glyph string) (rune, bool) { 105 return glyphToRune(glyph, glyphlistGlyphToRuneMap) 106 } 107 108 // Charcode to glyph name map (WinAnsiEncoding). 109 var winansiEncodingCharcodeToGlyphMap = map[byte]string{ 110 32: "space", 111 33: "exclam", 112 34: "quotedbl", 113 35: "numbersign", 114 36: "dollar", 115 37: "percent", 116 38: "ampersand", 117 39: "quotesingle", 118 40: "parenleft", 119 41: "parenright", 120 42: "asterisk", 121 43: "plus", 122 44: "comma", 123 45: "hyphen", 124 46: "period", 125 47: "slash", 126 48: "zero", 127 49: "one", 128 50: "two", 129 51: "three", 130 52: "four", 131 53: "five", 132 54: "six", 133 55: "seven", 134 56: "eight", 135 57: "nine", 136 58: "colon", 137 59: "semicolon", 138 60: "less", 139 61: "equal", 140 62: "greater", 141 63: "question", 142 64: "at", 143 65: "A", 144 66: "B", 145 67: "C", 146 68: "D", 147 69: "E", 148 70: "F", 149 71: "G", 150 72: "H", 151 73: "I", 152 74: "J", 153 75: "K", 154 76: "L", 155 77: "M", 156 78: "N", 157 79: "O", 158 80: "P", 159 81: "Q", 160 82: "R", 161 83: "S", 162 84: "T", 163 85: "U", 164 86: "V", 165 87: "W", 166 88: "X", 167 89: "Y", 168 90: "Z", 169 91: "bracketleft", 170 92: "backslash", 171 93: "bracketright", 172 94: "asciicircum", 173 95: "underscore", 174 96: "grave", 175 97: "a", 176 98: "b", 177 99: "c", 178 100: "d", 179 101: "e", 180 102: "f", 181 103: "g", 182 104: "h", 183 105: "i", 184 106: "j", 185 107: "k", 186 108: "l", 187 109: "m", 188 110: "n", 189 111: "o", 190 112: "p", 191 113: "q", 192 114: "r", 193 115: "s", 194 116: "t", 195 117: "u", 196 118: "v", 197 119: "w", 198 120: "x", 199 121: "y", 200 122: "z", 201 123: "braceleft", 202 124: "bar", 203 125: "braceright", 204 126: "asciitilde", 205 127: "bullet", 206 128: "Euro", 207 129: "bullet", 208 130: "quotesinglbase", 209 131: "florin", 210 132: "quotedblbase", 211 133: "ellipsis", 212 134: "dagger", 213 135: "daggerdbl", 214 136: "circumflex", 215 137: "perthousand", 216 138: "Scaron", 217 139: "guilsinglleft", 218 140: "OE", 219 141: "bullet", 220 142: "Zcaron", 221 143: "bullet", 222 144: "bullet", 223 145: "quoteleft", 224 146: "quoteright", 225 147: "quotedblleft", 226 148: "quotedblright", 227 149: "bullet", 228 150: "endash", 229 151: "emdash", 230 152: "tilde", 231 153: "trademark", 232 154: "scaron", 233 155: "guilsinglright", 234 156: "oe", 235 157: "bullet", 236 158: "zcaron", 237 159: "Ydieresis", 238 160: "space", 239 161: "exclamdown", 240 162: "cent", 241 163: "sterling", 242 164: "currency", 243 165: "yen", 244 166: "brokenbar", 245 167: "section", 246 168: "dieresis", 247 169: "copyright", 248 170: "ordfeminine", 249 171: "guillemotleft", 250 172: "logicalnot", 251 173: "hyphen", 252 174: "registered", 253 175: "macron", 254 176: "degree", 255 177: "plusminus", 256 178: "twosuperior", 257 179: "threesuperior", 258 180: "acute", 259 181: "mu", 260 182: "paragraph", 261 183: "periodcentered", 262 184: "cedilla", 263 185: "onesuperior", 264 186: "ordmasculine", 265 187: "guillemotright", 266 188: "onequarter", 267 189: "onehalf", 268 190: "threequarters", 269 191: "questiondown", 270 192: "Agrave", 271 193: "Aacute", 272 194: "Acircumflex", 273 195: "Atilde", 274 196: "Adieresis", 275 197: "Aring", 276 198: "AE", 277 199: "Ccedilla", 278 200: "Egrave", 279 201: "Eacute", 280 202: "Ecircumflex", 281 203: "Edieresis", 282 204: "Igrave", 283 205: "Iacute", 284 206: "Icircumflex", 285 207: "Idieresis", 286 208: "Eth", 287 209: "Ntilde", 288 210: "Ograve", 289 211: "Oacute", 290 212: "Ocircumflex", 291 213: "Otilde", 292 214: "Odieresis", 293 215: "multiply", 294 216: "Oslash", 295 217: "Ugrave", 296 218: "Uacute", 297 219: "Ucircumflex", 298 220: "Udieresis", 299 221: "Yacute", 300 222: "Thorn", 301 223: "germandbls", 302 224: "agrave", 303 225: "aacute", 304 226: "acircumflex", 305 227: "atilde", 306 228: "adieresis", 307 229: "aring", 308 230: "ae", 309 231: "ccedilla", 310 232: "egrave", 311 233: "eacute", 312 234: "ecircumflex", 313 235: "edieresis", 314 236: "igrave", 315 237: "iacute", 316 238: "icircumflex", 317 239: "idieresis", 318 240: "eth", 319 241: "ntilde", 320 242: "ograve", 321 243: "oacute", 322 244: "ocircumflex", 323 245: "otilde", 324 246: "odieresis", 325 247: "divide", 326 248: "oslash", 327 249: "ugrave", 328 250: "uacute", 329 251: "ucircumflex", 330 252: "udieresis", 331 253: "yacute", 332 254: "thorn", 333 255: "ydieresis", 334 } 335 336 // Glyph to charcode map (WinAnsiEncoding). 337 var winansiEncodingGlyphToCharcodeMap = map[string]byte{ 338 "space": 32, 339 "exclam": 33, 340 "quotedbl": 34, 341 "numbersign": 35, 342 "dollar": 36, 343 "percent": 37, 344 "ampersand": 38, 345 "quotesingle": 39, 346 "parenleft": 40, 347 "parenright": 41, 348 "asterisk": 42, 349 "plus": 43, 350 "comma": 44, 351 "hyphen": 45, 352 "period": 46, 353 "slash": 47, 354 "zero": 48, 355 "one": 49, 356 "two": 50, 357 "three": 51, 358 "four": 52, 359 "five": 53, 360 "six": 54, 361 "seven": 55, 362 "eight": 56, 363 "nine": 57, 364 "colon": 58, 365 "semicolon": 59, 366 "less": 60, 367 "equal": 61, 368 "greater": 62, 369 "question": 63, 370 "at": 64, 371 "A": 65, 372 "B": 66, 373 "C": 67, 374 "D": 68, 375 "E": 69, 376 "F": 70, 377 "G": 71, 378 "H": 72, 379 "I": 73, 380 "J": 74, 381 "K": 75, 382 "L": 76, 383 "M": 77, 384 "N": 78, 385 "O": 79, 386 "P": 80, 387 "Q": 81, 388 "R": 82, 389 "S": 83, 390 "T": 84, 391 "U": 85, 392 "V": 86, 393 "W": 87, 394 "X": 88, 395 "Y": 89, 396 "Z": 90, 397 "bracketleft": 91, 398 "backslash": 92, 399 "bracketright": 93, 400 "asciicircum": 94, 401 "underscore": 95, 402 "grave": 96, 403 "a": 97, 404 "b": 98, 405 "c": 99, 406 "d": 100, 407 "e": 101, 408 "f": 102, 409 "g": 103, 410 "h": 104, 411 "i": 105, 412 "j": 106, 413 "k": 107, 414 "l": 108, 415 "m": 109, 416 "n": 110, 417 "o": 111, 418 "p": 112, 419 "q": 113, 420 "r": 114, 421 "s": 115, 422 "t": 116, 423 "u": 117, 424 "v": 118, 425 "w": 119, 426 "x": 120, 427 "y": 121, 428 "z": 122, 429 "braceleft": 123, 430 "bar": 124, 431 "braceright": 125, 432 "asciitilde": 126, 433 "bullet": 127, 434 "Euro": 128, 435 //"bullet": 129, 436 "quotesinglbase": 130, 437 "florin": 131, 438 "quotedblbase": 132, 439 "ellipsis": 133, 440 "dagger": 134, 441 "daggerdbl": 135, 442 "circumflex": 136, 443 "perthousand": 137, 444 "Scaron": 138, 445 "guilsinglleft": 139, 446 "OE": 140, 447 //"bullet": 141, 448 "Zcaron": 142, 449 //"bullet": 143, 450 //"bullet": 144, 451 "quoteleft": 145, 452 "quoteright": 146, 453 "quotedblleft": 147, 454 "quotedblright": 148, 455 //"bullet": 149, 456 "endash": 150, 457 "emdash": 151, 458 "tilde": 152, 459 "trademark": 153, 460 "scaron": 154, 461 "guilsinglright": 155, 462 "oe": 156, 463 //"bullet": 157, 464 "zcaron": 158, 465 "Ydieresis": 159, 466 //"space": 160, 467 "exclamdown": 161, 468 "cent": 162, 469 "sterling": 163, 470 "currency": 164, 471 "yen": 165, 472 "brokenbar": 166, 473 "section": 167, 474 "dieresis": 168, 475 "copyright": 169, 476 "ordfeminine": 170, 477 "guillemotleft": 171, 478 "logicalnot": 172, 479 //"hyphen": 173, 480 "registered": 174, 481 "macron": 175, 482 "degree": 176, 483 "plusminus": 177, 484 "twosuperior": 178, 485 "threesuperior": 179, 486 "acute": 180, 487 "mu": 181, 488 "paragraph": 182, 489 "periodcentered": 183, 490 "cedilla": 184, 491 "onesuperior": 185, 492 "ordmasculine": 186, 493 "guillemotright": 187, 494 "onequarter": 188, 495 "onehalf": 189, 496 "threequarters": 190, 497 "questiondown": 191, 498 "Agrave": 192, 499 "Aacute": 193, 500 "Acircumflex": 194, 501 "Atilde": 195, 502 "Adieresis": 196, 503 "Aring": 197, 504 "AE": 198, 505 "Ccedilla": 199, 506 "Egrave": 200, 507 "Eacute": 201, 508 "Ecircumflex": 202, 509 "Edieresis": 203, 510 "Igrave": 204, 511 "Iacute": 205, 512 "Icircumflex": 206, 513 "Idieresis": 207, 514 "Eth": 208, 515 "Ntilde": 209, 516 "Ograve": 210, 517 "Oacute": 211, 518 "Ocircumflex": 212, 519 "Otilde": 213, 520 "Odieresis": 214, 521 "multiply": 215, 522 "Oslash": 216, 523 "Ugrave": 217, 524 "Uacute": 218, 525 "Ucircumflex": 219, 526 "Udieresis": 220, 527 "Yacute": 221, 528 "Thorn": 222, 529 "germandbls": 223, 530 "agrave": 224, 531 "aacute": 225, 532 "acircumflex": 226, 533 "atilde": 227, 534 "adieresis": 228, 535 "aring": 229, 536 "ae": 230, 537 "ccedilla": 231, 538 "egrave": 232, 539 "eacute": 233, 540 "ecircumflex": 234, 541 "edieresis": 235, 542 "igrave": 236, 543 "iacute": 237, 544 "icircumflex": 238, 545 "idieresis": 239, 546 "eth": 240, 547 "ntilde": 241, 548 "ograve": 242, 549 "oacute": 243, 550 "ocircumflex": 244, 551 "otilde": 245, 552 "odieresis": 246, 553 "divide": 247, 554 "oslash": 248, 555 "ugrave": 249, 556 "uacute": 250, 557 "ucircumflex": 251, 558 "udieresis": 252, 559 "yacute": 253, 560 "thorn": 254, 561 "ydieresis": 255, 562 }