github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/model/textencoding/winansi.go (about)

     1  /*
     2   * This file is subject to the terms and conditions defined in
     3   * file 'LICENSE.md', which is part of this source code package.
     4   */
     5  
     6  package textencoding
     7  
     8  import (
     9  	"github.com/unidoc/unidoc/common"
    10  	"github.com/unidoc/unidoc/pdf/core"
    11  )
    12  
    13  // WinAnsiEncoding.
    14  type WinAnsiEncoder struct {
    15  }
    16  
    17  func NewWinAnsiTextEncoder() WinAnsiEncoder {
    18  	encoder := WinAnsiEncoder{}
    19  	return encoder
    20  }
    21  
    22  func (winenc WinAnsiEncoder) ToPdfObject() core.PdfObject {
    23  	return core.MakeName("WinAnsiEncoding")
    24  }
    25  
    26  // Convert a raw utf8 string (series of runes) to an encoded string (series of character codes) to be used in PDF.
    27  func (winenc WinAnsiEncoder) Encode(raw string) string {
    28  	encoded := []byte{}
    29  	for _, rune := range raw {
    30  		code, has := winenc.RuneToCharcode(rune)
    31  		if has {
    32  			encoded = append(encoded, code)
    33  		}
    34  	}
    35  
    36  	return string(encoded)
    37  }
    38  
    39  // Conversion between character code and glyph name.
    40  // The bool return flag is true if there was a match, and false otherwise.
    41  func (winenc WinAnsiEncoder) CharcodeToGlyph(code byte) (string, bool) {
    42  	glyph, has := winansiEncodingCharcodeToGlyphMap[code]
    43  	if !has {
    44  		common.Log.Debug("Charcode -> Glyph error: charcode not found: %d\n", code)
    45  		return "", false
    46  	}
    47  	return glyph, true
    48  }
    49  
    50  // Conversion between glyph name and character code.
    51  // The bool return flag is true if there was a match, and false otherwise.
    52  func (winenc WinAnsiEncoder) GlyphToCharcode(glyph string) (byte, bool) {
    53  	code, found := winansiEncodingGlyphToCharcodeMap[glyph]
    54  	if !found {
    55  		common.Log.Debug("Glyph -> Charcode error: glyph not found: %s\n", glyph)
    56  		return 0, false
    57  	}
    58  
    59  	return code, true
    60  }
    61  
    62  // Convert rune to character code.
    63  // The bool return flag is true if there was a match, and false otherwise.
    64  func (winenc WinAnsiEncoder) RuneToCharcode(val rune) (byte, bool) {
    65  	glyph, found := winenc.RuneToGlyph(val)
    66  	if !found {
    67  		return 0, false
    68  	}
    69  
    70  	code, found := winansiEncodingGlyphToCharcodeMap[glyph]
    71  	if !found {
    72  		common.Log.Debug("Glyph -> Charcode error: glyph not found %s\n", glyph)
    73  		return 0, false
    74  	}
    75  
    76  	return code, true
    77  }
    78  
    79  // Convert character code to rune.
    80  // The bool return flag is true if there was a match, and false otherwise.
    81  func (winenc WinAnsiEncoder) CharcodeToRune(charcode byte) (rune, bool) {
    82  	glyph, found := winansiEncodingCharcodeToGlyphMap[charcode]
    83  	if !found {
    84  		common.Log.Debug("Charcode -> Glyph error: charcode not found: %d\n", charcode)
    85  		return 0, false
    86  	}
    87  
    88  	ucode, found := glyphToRune(glyph, glyphlistGlyphToRuneMap)
    89  	if !found {
    90  		return 0, false
    91  	}
    92  
    93  	return ucode, true
    94  }
    95  
    96  // Convert rune to glyph name.
    97  // The bool return flag is true if there was a match, and false otherwise.
    98  func (winenc WinAnsiEncoder) RuneToGlyph(val rune) (string, bool) {
    99  	return runeToGlyph(val, glyphlistRuneToGlyphMap)
   100  }
   101  
   102  // Convert glyph to rune.
   103  // The bool return flag is true if there was a match, and false otherwise.
   104  func (winenc WinAnsiEncoder) GlyphToRune(glyph string) (rune, bool) {
   105  	return glyphToRune(glyph, glyphlistGlyphToRuneMap)
   106  }
   107  
   108  // Charcode to glyph name map (WinAnsiEncoding).
   109  var winansiEncodingCharcodeToGlyphMap = map[byte]string{
   110  	32:  "space",
   111  	33:  "exclam",
   112  	34:  "quotedbl",
   113  	35:  "numbersign",
   114  	36:  "dollar",
   115  	37:  "percent",
   116  	38:  "ampersand",
   117  	39:  "quotesingle",
   118  	40:  "parenleft",
   119  	41:  "parenright",
   120  	42:  "asterisk",
   121  	43:  "plus",
   122  	44:  "comma",
   123  	45:  "hyphen",
   124  	46:  "period",
   125  	47:  "slash",
   126  	48:  "zero",
   127  	49:  "one",
   128  	50:  "two",
   129  	51:  "three",
   130  	52:  "four",
   131  	53:  "five",
   132  	54:  "six",
   133  	55:  "seven",
   134  	56:  "eight",
   135  	57:  "nine",
   136  	58:  "colon",
   137  	59:  "semicolon",
   138  	60:  "less",
   139  	61:  "equal",
   140  	62:  "greater",
   141  	63:  "question",
   142  	64:  "at",
   143  	65:  "A",
   144  	66:  "B",
   145  	67:  "C",
   146  	68:  "D",
   147  	69:  "E",
   148  	70:  "F",
   149  	71:  "G",
   150  	72:  "H",
   151  	73:  "I",
   152  	74:  "J",
   153  	75:  "K",
   154  	76:  "L",
   155  	77:  "M",
   156  	78:  "N",
   157  	79:  "O",
   158  	80:  "P",
   159  	81:  "Q",
   160  	82:  "R",
   161  	83:  "S",
   162  	84:  "T",
   163  	85:  "U",
   164  	86:  "V",
   165  	87:  "W",
   166  	88:  "X",
   167  	89:  "Y",
   168  	90:  "Z",
   169  	91:  "bracketleft",
   170  	92:  "backslash",
   171  	93:  "bracketright",
   172  	94:  "asciicircum",
   173  	95:  "underscore",
   174  	96:  "grave",
   175  	97:  "a",
   176  	98:  "b",
   177  	99:  "c",
   178  	100: "d",
   179  	101: "e",
   180  	102: "f",
   181  	103: "g",
   182  	104: "h",
   183  	105: "i",
   184  	106: "j",
   185  	107: "k",
   186  	108: "l",
   187  	109: "m",
   188  	110: "n",
   189  	111: "o",
   190  	112: "p",
   191  	113: "q",
   192  	114: "r",
   193  	115: "s",
   194  	116: "t",
   195  	117: "u",
   196  	118: "v",
   197  	119: "w",
   198  	120: "x",
   199  	121: "y",
   200  	122: "z",
   201  	123: "braceleft",
   202  	124: "bar",
   203  	125: "braceright",
   204  	126: "asciitilde",
   205  	127: "bullet",
   206  	128: "Euro",
   207  	129: "bullet",
   208  	130: "quotesinglbase",
   209  	131: "florin",
   210  	132: "quotedblbase",
   211  	133: "ellipsis",
   212  	134: "dagger",
   213  	135: "daggerdbl",
   214  	136: "circumflex",
   215  	137: "perthousand",
   216  	138: "Scaron",
   217  	139: "guilsinglleft",
   218  	140: "OE",
   219  	141: "bullet",
   220  	142: "Zcaron",
   221  	143: "bullet",
   222  	144: "bullet",
   223  	145: "quoteleft",
   224  	146: "quoteright",
   225  	147: "quotedblleft",
   226  	148: "quotedblright",
   227  	149: "bullet",
   228  	150: "endash",
   229  	151: "emdash",
   230  	152: "tilde",
   231  	153: "trademark",
   232  	154: "scaron",
   233  	155: "guilsinglright",
   234  	156: "oe",
   235  	157: "bullet",
   236  	158: "zcaron",
   237  	159: "Ydieresis",
   238  	160: "space",
   239  	161: "exclamdown",
   240  	162: "cent",
   241  	163: "sterling",
   242  	164: "currency",
   243  	165: "yen",
   244  	166: "brokenbar",
   245  	167: "section",
   246  	168: "dieresis",
   247  	169: "copyright",
   248  	170: "ordfeminine",
   249  	171: "guillemotleft",
   250  	172: "logicalnot",
   251  	173: "hyphen",
   252  	174: "registered",
   253  	175: "macron",
   254  	176: "degree",
   255  	177: "plusminus",
   256  	178: "twosuperior",
   257  	179: "threesuperior",
   258  	180: "acute",
   259  	181: "mu",
   260  	182: "paragraph",
   261  	183: "periodcentered",
   262  	184: "cedilla",
   263  	185: "onesuperior",
   264  	186: "ordmasculine",
   265  	187: "guillemotright",
   266  	188: "onequarter",
   267  	189: "onehalf",
   268  	190: "threequarters",
   269  	191: "questiondown",
   270  	192: "Agrave",
   271  	193: "Aacute",
   272  	194: "Acircumflex",
   273  	195: "Atilde",
   274  	196: "Adieresis",
   275  	197: "Aring",
   276  	198: "AE",
   277  	199: "Ccedilla",
   278  	200: "Egrave",
   279  	201: "Eacute",
   280  	202: "Ecircumflex",
   281  	203: "Edieresis",
   282  	204: "Igrave",
   283  	205: "Iacute",
   284  	206: "Icircumflex",
   285  	207: "Idieresis",
   286  	208: "Eth",
   287  	209: "Ntilde",
   288  	210: "Ograve",
   289  	211: "Oacute",
   290  	212: "Ocircumflex",
   291  	213: "Otilde",
   292  	214: "Odieresis",
   293  	215: "multiply",
   294  	216: "Oslash",
   295  	217: "Ugrave",
   296  	218: "Uacute",
   297  	219: "Ucircumflex",
   298  	220: "Udieresis",
   299  	221: "Yacute",
   300  	222: "Thorn",
   301  	223: "germandbls",
   302  	224: "agrave",
   303  	225: "aacute",
   304  	226: "acircumflex",
   305  	227: "atilde",
   306  	228: "adieresis",
   307  	229: "aring",
   308  	230: "ae",
   309  	231: "ccedilla",
   310  	232: "egrave",
   311  	233: "eacute",
   312  	234: "ecircumflex",
   313  	235: "edieresis",
   314  	236: "igrave",
   315  	237: "iacute",
   316  	238: "icircumflex",
   317  	239: "idieresis",
   318  	240: "eth",
   319  	241: "ntilde",
   320  	242: "ograve",
   321  	243: "oacute",
   322  	244: "ocircumflex",
   323  	245: "otilde",
   324  	246: "odieresis",
   325  	247: "divide",
   326  	248: "oslash",
   327  	249: "ugrave",
   328  	250: "uacute",
   329  	251: "ucircumflex",
   330  	252: "udieresis",
   331  	253: "yacute",
   332  	254: "thorn",
   333  	255: "ydieresis",
   334  }
   335  
   336  // Glyph to charcode map (WinAnsiEncoding).
   337  var winansiEncodingGlyphToCharcodeMap = map[string]byte{
   338  	"space":        32,
   339  	"exclam":       33,
   340  	"quotedbl":     34,
   341  	"numbersign":   35,
   342  	"dollar":       36,
   343  	"percent":      37,
   344  	"ampersand":    38,
   345  	"quotesingle":  39,
   346  	"parenleft":    40,
   347  	"parenright":   41,
   348  	"asterisk":     42,
   349  	"plus":         43,
   350  	"comma":        44,
   351  	"hyphen":       45,
   352  	"period":       46,
   353  	"slash":        47,
   354  	"zero":         48,
   355  	"one":          49,
   356  	"two":          50,
   357  	"three":        51,
   358  	"four":         52,
   359  	"five":         53,
   360  	"six":          54,
   361  	"seven":        55,
   362  	"eight":        56,
   363  	"nine":         57,
   364  	"colon":        58,
   365  	"semicolon":    59,
   366  	"less":         60,
   367  	"equal":        61,
   368  	"greater":      62,
   369  	"question":     63,
   370  	"at":           64,
   371  	"A":            65,
   372  	"B":            66,
   373  	"C":            67,
   374  	"D":            68,
   375  	"E":            69,
   376  	"F":            70,
   377  	"G":            71,
   378  	"H":            72,
   379  	"I":            73,
   380  	"J":            74,
   381  	"K":            75,
   382  	"L":            76,
   383  	"M":            77,
   384  	"N":            78,
   385  	"O":            79,
   386  	"P":            80,
   387  	"Q":            81,
   388  	"R":            82,
   389  	"S":            83,
   390  	"T":            84,
   391  	"U":            85,
   392  	"V":            86,
   393  	"W":            87,
   394  	"X":            88,
   395  	"Y":            89,
   396  	"Z":            90,
   397  	"bracketleft":  91,
   398  	"backslash":    92,
   399  	"bracketright": 93,
   400  	"asciicircum":  94,
   401  	"underscore":   95,
   402  	"grave":        96,
   403  	"a":            97,
   404  	"b":            98,
   405  	"c":            99,
   406  	"d":            100,
   407  	"e":            101,
   408  	"f":            102,
   409  	"g":            103,
   410  	"h":            104,
   411  	"i":            105,
   412  	"j":            106,
   413  	"k":            107,
   414  	"l":            108,
   415  	"m":            109,
   416  	"n":            110,
   417  	"o":            111,
   418  	"p":            112,
   419  	"q":            113,
   420  	"r":            114,
   421  	"s":            115,
   422  	"t":            116,
   423  	"u":            117,
   424  	"v":            118,
   425  	"w":            119,
   426  	"x":            120,
   427  	"y":            121,
   428  	"z":            122,
   429  	"braceleft":    123,
   430  	"bar":          124,
   431  	"braceright":   125,
   432  	"asciitilde":   126,
   433  	"bullet":       127,
   434  	"Euro":         128,
   435  	//"bullet":         129,
   436  	"quotesinglbase": 130,
   437  	"florin":         131,
   438  	"quotedblbase":   132,
   439  	"ellipsis":       133,
   440  	"dagger":         134,
   441  	"daggerdbl":      135,
   442  	"circumflex":     136,
   443  	"perthousand":    137,
   444  	"Scaron":         138,
   445  	"guilsinglleft":  139,
   446  	"OE":             140,
   447  	//"bullet":         141,
   448  	"Zcaron": 142,
   449  	//"bullet":         143,
   450  	//"bullet":         144,
   451  	"quoteleft":     145,
   452  	"quoteright":    146,
   453  	"quotedblleft":  147,
   454  	"quotedblright": 148,
   455  	//"bullet":         149,
   456  	"endash":         150,
   457  	"emdash":         151,
   458  	"tilde":          152,
   459  	"trademark":      153,
   460  	"scaron":         154,
   461  	"guilsinglright": 155,
   462  	"oe":             156,
   463  	//"bullet":         157,
   464  	"zcaron":    158,
   465  	"Ydieresis": 159,
   466  	//"space":          160,
   467  	"exclamdown":    161,
   468  	"cent":          162,
   469  	"sterling":      163,
   470  	"currency":      164,
   471  	"yen":           165,
   472  	"brokenbar":     166,
   473  	"section":       167,
   474  	"dieresis":      168,
   475  	"copyright":     169,
   476  	"ordfeminine":   170,
   477  	"guillemotleft": 171,
   478  	"logicalnot":    172,
   479  	//"hyphen":         173,
   480  	"registered":     174,
   481  	"macron":         175,
   482  	"degree":         176,
   483  	"plusminus":      177,
   484  	"twosuperior":    178,
   485  	"threesuperior":  179,
   486  	"acute":          180,
   487  	"mu":             181,
   488  	"paragraph":      182,
   489  	"periodcentered": 183,
   490  	"cedilla":        184,
   491  	"onesuperior":    185,
   492  	"ordmasculine":   186,
   493  	"guillemotright": 187,
   494  	"onequarter":     188,
   495  	"onehalf":        189,
   496  	"threequarters":  190,
   497  	"questiondown":   191,
   498  	"Agrave":         192,
   499  	"Aacute":         193,
   500  	"Acircumflex":    194,
   501  	"Atilde":         195,
   502  	"Adieresis":      196,
   503  	"Aring":          197,
   504  	"AE":             198,
   505  	"Ccedilla":       199,
   506  	"Egrave":         200,
   507  	"Eacute":         201,
   508  	"Ecircumflex":    202,
   509  	"Edieresis":      203,
   510  	"Igrave":         204,
   511  	"Iacute":         205,
   512  	"Icircumflex":    206,
   513  	"Idieresis":      207,
   514  	"Eth":            208,
   515  	"Ntilde":         209,
   516  	"Ograve":         210,
   517  	"Oacute":         211,
   518  	"Ocircumflex":    212,
   519  	"Otilde":         213,
   520  	"Odieresis":      214,
   521  	"multiply":       215,
   522  	"Oslash":         216,
   523  	"Ugrave":         217,
   524  	"Uacute":         218,
   525  	"Ucircumflex":    219,
   526  	"Udieresis":      220,
   527  	"Yacute":         221,
   528  	"Thorn":          222,
   529  	"germandbls":     223,
   530  	"agrave":         224,
   531  	"aacute":         225,
   532  	"acircumflex":    226,
   533  	"atilde":         227,
   534  	"adieresis":      228,
   535  	"aring":          229,
   536  	"ae":             230,
   537  	"ccedilla":       231,
   538  	"egrave":         232,
   539  	"eacute":         233,
   540  	"ecircumflex":    234,
   541  	"edieresis":      235,
   542  	"igrave":         236,
   543  	"iacute":         237,
   544  	"icircumflex":    238,
   545  	"idieresis":      239,
   546  	"eth":            240,
   547  	"ntilde":         241,
   548  	"ograve":         242,
   549  	"oacute":         243,
   550  	"ocircumflex":    244,
   551  	"otilde":         245,
   552  	"odieresis":      246,
   553  	"divide":         247,
   554  	"oslash":         248,
   555  	"ugrave":         249,
   556  	"uacute":         250,
   557  	"ucircumflex":    251,
   558  	"udieresis":      252,
   559  	"yacute":         253,
   560  	"thorn":          254,
   561  	"ydieresis":      255,
   562  }