github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/chardet/single_byte.go (about) 1 package chardet 2 3 // Recognizer for single byte charset family 4 type recognizerSingleByte struct { 5 charset string 6 hasC1ByteCharset string 7 language string 8 charMap *[256]byte 9 ngram *[64]uint32 10 } 11 12 func (r *recognizerSingleByte) Match(input *recognizerInput) recognizerOutput { 13 var charset string = r.charset 14 if input.hasC1Bytes && len(r.hasC1ByteCharset) > 0 { 15 charset = r.hasC1ByteCharset 16 } 17 return recognizerOutput{ 18 Charset: charset, 19 Language: r.language, 20 Confidence: r.parseNgram(input.input), 21 } 22 } 23 24 type ngramState struct { 25 ngram uint32 26 ignoreSpace bool 27 ngramCount, ngramHit uint32 28 table *[64]uint32 29 } 30 31 func newNgramState(table *[64]uint32) *ngramState { 32 return &ngramState{ 33 ngram: 0, 34 ignoreSpace: false, 35 ngramCount: 0, 36 ngramHit: 0, 37 table: table, 38 } 39 } 40 41 func (s *ngramState) AddByte(b byte) { 42 const ngramMask = 0xFFFFFF 43 if !(b == 0x20 && s.ignoreSpace) { 44 s.ngram = ((s.ngram << 8) | uint32(b)) & ngramMask 45 s.ignoreSpace = (s.ngram == 0x20) 46 s.ngramCount++ 47 if s.lookup() { 48 s.ngramHit++ 49 } 50 } 51 s.ignoreSpace = (b == 0x20) 52 } 53 54 func (s *ngramState) HitRate() float32 { 55 if s.ngramCount == 0 { 56 return 0 57 } 58 return float32(s.ngramHit) / float32(s.ngramCount) 59 } 60 61 func (s *ngramState) lookup() bool { 62 var index int 63 if s.table[index+32] <= s.ngram { 64 index += 32 65 } 66 if s.table[index+16] <= s.ngram { 67 index += 16 68 } 69 if s.table[index+8] <= s.ngram { 70 index += 8 71 } 72 if s.table[index+4] <= s.ngram { 73 index += 4 74 } 75 if s.table[index+2] <= s.ngram { 76 index += 2 77 } 78 if s.table[index+1] <= s.ngram { 79 index += 1 80 } 81 if s.table[index] > s.ngram { 82 index -= 1 83 } 84 if index < 0 || s.table[index] != s.ngram { 85 return false 86 } 87 return true 88 } 89 90 func (r *recognizerSingleByte) parseNgram(input []byte) int { 91 state := newNgramState(r.ngram) 92 for _, inChar := range input { 93 c := r.charMap[inChar] 94 if c != 0 { 95 state.AddByte(c) 96 } 97 } 98 state.AddByte(0x20) 99 rate := state.HitRate() 100 if rate > 0.33 { 101 return 98 102 } 103 return int(rate * 300) 104 } 105 106 var charMap_8859_1 = [256]byte{ 107 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 108 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 109 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 110 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 111 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 112 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 113 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 114 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 115 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 116 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 117 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 118 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 119 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 120 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 121 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 122 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 123 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 124 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 125 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 126 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 127 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 128 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 129 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 130 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 131 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 132 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 133 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 134 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 135 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 136 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 137 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 138 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 139 } 140 141 var ngrams_8859_1_en = [64]uint32{ 142 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, 143 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, 144 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, 145 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, 146 } 147 148 var ngrams_8859_1_da = [64]uint32{ 149 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, 150 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, 151 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, 152 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, 153 } 154 155 var ngrams_8859_1_de = [64]uint32{ 156 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, 157 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, 158 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, 159 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, 160 } 161 162 var ngrams_8859_1_es = [64]uint32{ 163 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, 164 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, 165 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, 166 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, 167 } 168 169 var ngrams_8859_1_fr = [64]uint32{ 170 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, 171 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, 172 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, 173 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220, 174 } 175 176 var ngrams_8859_1_it = [64]uint32{ 177 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, 178 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, 179 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, 180 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, 181 } 182 183 var ngrams_8859_1_nl = [64]uint32{ 184 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, 185 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, 186 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, 187 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, 188 } 189 190 var ngrams_8859_1_no = [64]uint32{ 191 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, 192 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, 193 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, 194 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, 195 } 196 197 var ngrams_8859_1_pt = [64]uint32{ 198 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, 199 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, 200 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, 201 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, 202 } 203 204 var ngrams_8859_1_sv = [64]uint32{ 205 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, 206 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, 207 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, 208 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, 209 } 210 211 func newRecognizer_8859_1(language string, ngram *[64]uint32) *recognizerSingleByte { 212 return &recognizerSingleByte{ 213 charset: "ISO-8859-1", 214 hasC1ByteCharset: "windows-1252", 215 language: language, 216 charMap: &charMap_8859_1, 217 ngram: ngram, 218 } 219 } 220 221 func newRecognizer_8859_1_en() *recognizerSingleByte { 222 return newRecognizer_8859_1("en", &ngrams_8859_1_en) 223 } 224 func newRecognizer_8859_1_da() *recognizerSingleByte { 225 return newRecognizer_8859_1("da", &ngrams_8859_1_da) 226 } 227 func newRecognizer_8859_1_de() *recognizerSingleByte { 228 return newRecognizer_8859_1("de", &ngrams_8859_1_de) 229 } 230 func newRecognizer_8859_1_es() *recognizerSingleByte { 231 return newRecognizer_8859_1("es", &ngrams_8859_1_es) 232 } 233 func newRecognizer_8859_1_fr() *recognizerSingleByte { 234 return newRecognizer_8859_1("fr", &ngrams_8859_1_fr) 235 } 236 func newRecognizer_8859_1_it() *recognizerSingleByte { 237 return newRecognizer_8859_1("it", &ngrams_8859_1_it) 238 } 239 func newRecognizer_8859_1_nl() *recognizerSingleByte { 240 return newRecognizer_8859_1("nl", &ngrams_8859_1_nl) 241 } 242 func newRecognizer_8859_1_no() *recognizerSingleByte { 243 return newRecognizer_8859_1("no", &ngrams_8859_1_no) 244 } 245 func newRecognizer_8859_1_pt() *recognizerSingleByte { 246 return newRecognizer_8859_1("pt", &ngrams_8859_1_pt) 247 } 248 func newRecognizer_8859_1_sv() *recognizerSingleByte { 249 return newRecognizer_8859_1("sv", &ngrams_8859_1_sv) 250 } 251 252 var charMap_8859_2 = [256]byte{ 253 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 254 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 255 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 256 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 257 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 258 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 259 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 260 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 261 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 262 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 263 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 264 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 265 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 266 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 267 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 268 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 269 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 270 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 271 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 272 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 273 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, 274 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 275 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, 276 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 277 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 278 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 279 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 280 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 281 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 282 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 283 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 284 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 285 } 286 287 var ngrams_8859_2_cs = [64]uint32{ 288 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, 289 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, 290 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, 291 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, 292 } 293 294 var ngrams_8859_2_hu = [64]uint32{ 295 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, 296 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, 297 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, 298 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, 299 } 300 301 var ngrams_8859_2_pl = [64]uint32{ 302 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, 303 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, 304 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, 305 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, 306 } 307 308 var ngrams_8859_2_ro = [64]uint32{ 309 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, 310 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, 311 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, 312 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, 313 } 314 315 func newRecognizer_8859_2(language string, ngram *[64]uint32) *recognizerSingleByte { 316 return &recognizerSingleByte{ 317 charset: "ISO-8859-2", 318 hasC1ByteCharset: "windows-1250", 319 language: language, 320 charMap: &charMap_8859_2, 321 ngram: ngram, 322 } 323 } 324 325 func newRecognizer_8859_2_cs() *recognizerSingleByte { 326 return newRecognizer_8859_1("cs", &ngrams_8859_2_cs) 327 } 328 func newRecognizer_8859_2_hu() *recognizerSingleByte { 329 return newRecognizer_8859_1("hu", &ngrams_8859_2_hu) 330 } 331 func newRecognizer_8859_2_pl() *recognizerSingleByte { 332 return newRecognizer_8859_1("pl", &ngrams_8859_2_pl) 333 } 334 func newRecognizer_8859_2_ro() *recognizerSingleByte { 335 return newRecognizer_8859_1("ro", &ngrams_8859_2_ro) 336 } 337 338 var charMap_8859_5 = [256]byte{ 339 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 340 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 341 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 342 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 343 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 344 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 345 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 346 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 347 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 348 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 349 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 350 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 351 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 352 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 353 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 354 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 355 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 356 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 357 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 358 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 359 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 360 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 361 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 362 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 363 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 364 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 365 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 366 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 367 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 368 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 369 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 370 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 371 } 372 373 var ngrams_8859_5_ru = [64]uint32{ 374 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, 375 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, 376 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, 377 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, 378 } 379 380 func newRecognizer_8859_5(language string, ngram *[64]uint32) *recognizerSingleByte { 381 return &recognizerSingleByte{ 382 charset: "ISO-8859-5", 383 language: language, 384 charMap: &charMap_8859_5, 385 ngram: ngram, 386 } 387 } 388 389 func newRecognizer_8859_5_ru() *recognizerSingleByte { 390 return newRecognizer_8859_5("ru", &ngrams_8859_5_ru) 391 } 392 393 var charMap_8859_6 = [256]byte{ 394 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 395 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 396 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 397 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 398 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 399 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 400 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 401 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 402 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 403 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 404 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 405 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 406 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 407 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 408 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 409 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 410 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 411 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 412 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 413 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 414 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 415 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 416 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 417 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 418 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 419 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 420 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 421 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, 422 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 423 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, 424 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 425 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 426 } 427 428 var ngrams_8859_6_ar = [64]uint32{ 429 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, 430 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, 431 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, 432 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, 433 } 434 435 func newRecognizer_8859_6(language string, ngram *[64]uint32) *recognizerSingleByte { 436 return &recognizerSingleByte{ 437 charset: "ISO-8859-6", 438 language: language, 439 charMap: &charMap_8859_6, 440 ngram: ngram, 441 } 442 } 443 444 func newRecognizer_8859_6_ar() *recognizerSingleByte { 445 return newRecognizer_8859_6("ar", &ngrams_8859_6_ar) 446 } 447 448 var charMap_8859_7 = [256]byte{ 449 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 450 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 451 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 452 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 453 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 454 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 455 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 456 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 457 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 458 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 459 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 460 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 461 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 462 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 463 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 464 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 465 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 466 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 467 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 468 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 469 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, 470 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 471 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, 472 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, 473 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 474 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 475 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 476 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, 477 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 478 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 479 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 480 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 481 } 482 483 var ngrams_8859_7_el = [64]uint32{ 484 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, 485 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, 486 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, 487 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, 488 } 489 490 func newRecognizer_8859_7(language string, ngram *[64]uint32) *recognizerSingleByte { 491 return &recognizerSingleByte{ 492 charset: "ISO-8859-7", 493 hasC1ByteCharset: "windows-1253", 494 language: language, 495 charMap: &charMap_8859_7, 496 ngram: ngram, 497 } 498 } 499 500 func newRecognizer_8859_7_el() *recognizerSingleByte { 501 return newRecognizer_8859_7("el", &ngrams_8859_7_el) 502 } 503 504 var charMap_8859_8 = [256]byte{ 505 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 506 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 507 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 508 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 509 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 510 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 511 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 512 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 513 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 514 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 515 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 516 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 517 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 518 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 519 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 520 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 521 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 522 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 523 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 524 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 525 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 526 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 527 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 528 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 529 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 530 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 531 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 532 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 533 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 534 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 535 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 536 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, 537 } 538 539 var ngrams_8859_8_I_he = [64]uint32{ 540 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, 541 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, 542 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, 543 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, 544 } 545 546 var ngrams_8859_8_he = [64]uint32{ 547 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, 548 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, 549 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, 550 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, 551 } 552 553 func newRecognizer_8859_8(language string, ngram *[64]uint32) *recognizerSingleByte { 554 return &recognizerSingleByte{ 555 charset: "ISO-8859-8", 556 hasC1ByteCharset: "windows-1255", 557 language: language, 558 charMap: &charMap_8859_8, 559 ngram: ngram, 560 } 561 } 562 563 func newRecognizer_8859_8_I_he() *recognizerSingleByte { 564 r := newRecognizer_8859_8("he", &ngrams_8859_8_I_he) 565 r.charset = "ISO-8859-8-I" 566 return r 567 } 568 569 func newRecognizer_8859_8_he() *recognizerSingleByte { 570 return newRecognizer_8859_8("he", &ngrams_8859_8_he) 571 } 572 573 var charMap_8859_9 = [256]byte{ 574 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 575 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 576 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 577 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 578 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 579 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 580 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 581 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 582 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 583 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 584 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 585 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 586 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 587 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 588 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 589 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 590 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 591 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 592 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 593 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 594 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 595 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 596 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 597 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 598 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 599 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 600 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 601 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, 602 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 603 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 604 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 605 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 606 } 607 608 var ngrams_8859_9_tr = [64]uint32{ 609 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, 610 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, 611 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, 612 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, 613 } 614 615 func newRecognizer_8859_9(language string, ngram *[64]uint32) *recognizerSingleByte { 616 return &recognizerSingleByte{ 617 charset: "ISO-8859-9", 618 hasC1ByteCharset: "windows-1254", 619 language: language, 620 charMap: &charMap_8859_9, 621 ngram: ngram, 622 } 623 } 624 625 func newRecognizer_8859_9_tr() *recognizerSingleByte { 626 return newRecognizer_8859_9("tr", &ngrams_8859_9_tr) 627 } 628 629 var charMap_windows_1256 = [256]byte{ 630 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 631 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 632 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 633 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 634 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 635 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 636 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 637 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 638 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 639 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 640 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 641 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 642 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 643 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 644 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 645 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 646 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 647 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, 648 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 649 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, 650 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 651 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 652 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 653 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 654 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 655 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 656 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, 657 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 658 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 659 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 660 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, 661 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, 662 } 663 664 var ngrams_windows_1256 = [64]uint32{ 665 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, 666 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, 667 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, 668 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, 669 } 670 671 func newRecognizer_windows_1256() *recognizerSingleByte { 672 return &recognizerSingleByte{ 673 charset: "windows-1256", 674 language: "ar", 675 charMap: &charMap_windows_1256, 676 ngram: &ngrams_windows_1256, 677 } 678 } 679 680 var charMap_windows_1251 = [256]byte{ 681 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 682 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 683 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 684 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 685 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 686 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 687 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 688 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 689 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 690 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 691 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 692 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 693 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 694 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 695 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 696 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 697 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 698 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 699 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 700 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 701 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, 702 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, 703 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, 704 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, 705 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 706 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 707 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 708 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 709 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 710 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 711 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 712 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 713 } 714 715 var ngrams_windows_1251 = [64]uint32{ 716 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, 717 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, 718 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, 719 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, 720 } 721 722 func newRecognizer_windows_1251() *recognizerSingleByte { 723 return &recognizerSingleByte{ 724 charset: "windows-1251", 725 language: "ar", 726 charMap: &charMap_windows_1251, 727 ngram: &ngrams_windows_1251, 728 } 729 } 730 731 var charMap_KOI8_R = [256]byte{ 732 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 733 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 734 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 735 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 736 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 737 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 738 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 739 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 740 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 741 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 742 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 743 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 744 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 745 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 746 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 747 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 748 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 749 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 750 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 751 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 752 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 753 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 754 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 755 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 756 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 757 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 758 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 759 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 760 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 761 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 762 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 763 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 764 } 765 766 var ngrams_KOI8_R = [64]uint32{ 767 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, 768 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, 769 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, 770 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, 771 } 772 773 func newRecognizer_KOI8_R() *recognizerSingleByte { 774 return &recognizerSingleByte{ 775 charset: "KOI8-R", 776 language: "ru", 777 charMap: &charMap_KOI8_R, 778 ngram: &ngrams_KOI8_R, 779 } 780 } 781 782 var charMap_IBM424_he = [256]byte{ 783 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ 784 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 785 /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 786 /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 787 /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 788 /* 4- */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 789 /* 5- */ 0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 790 /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 791 /* 7- */ 0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, 792 /* 8- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 793 /* 9- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 794 /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 795 /* B- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 796 /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 797 /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 798 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 799 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 800 } 801 802 var ngrams_IBM424_he_rtl = [64]uint32{ 803 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, 804 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, 805 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, 806 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, 807 } 808 809 var ngrams_IBM424_he_ltr = [64]uint32{ 810 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141, 811 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054, 812 0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940, 813 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651, 814 } 815 816 func newRecognizer_IBM424_he(charset string, ngram *[64]uint32) *recognizerSingleByte { 817 return &recognizerSingleByte{ 818 charset: charset, 819 language: "he", 820 charMap: &charMap_IBM424_he, 821 ngram: ngram, 822 } 823 } 824 825 func newRecognizer_IBM424_he_rtl() *recognizerSingleByte { 826 return newRecognizer_IBM424_he("IBM424_rtl", &ngrams_IBM424_he_rtl) 827 } 828 829 func newRecognizer_IBM424_he_ltr() *recognizerSingleByte { 830 return newRecognizer_IBM424_he("IBM424_ltr", &ngrams_IBM424_he_ltr) 831 } 832 833 var charMap_IBM420_ar = [256]byte{ 834 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ 835 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 836 /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 837 /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 838 /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 839 /* 4- */ 0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 840 /* 5- */ 0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 841 /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 842 /* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 843 /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 844 /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 845 /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 846 /* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 847 /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, 848 /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 849 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, 850 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, 851 } 852 853 var ngrams_IBM420_ar_rtl = [64]uint32{ 854 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158, 855 0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB, 856 0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40, 857 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40, 858 } 859 860 var ngrams_IBM420_ar_ltr = [64]uint32{ 861 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, 862 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD, 863 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156, 864 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156, 865 } 866 867 func newRecognizer_IBM420_ar(charset string, ngram *[64]uint32) *recognizerSingleByte { 868 return &recognizerSingleByte{ 869 charset: charset, 870 language: "ar", 871 charMap: &charMap_IBM420_ar, 872 ngram: ngram, 873 } 874 } 875 876 func newRecognizer_IBM420_ar_rtl() *recognizerSingleByte { 877 return newRecognizer_IBM420_ar("IBM420_rtl", &ngrams_IBM420_ar_rtl) 878 } 879 880 func newRecognizer_IBM420_ar_ltr() *recognizerSingleByte { 881 return newRecognizer_IBM420_ar("IBM420_ltr", &ngrams_IBM420_ar_ltr) 882 }