github.com/andybalholm/brotli@v1.0.6/transform.go (about)

     1  package brotli
     2  
     3  const (
     4  	transformIdentity       = 0
     5  	transformOmitLast1      = 1
     6  	transformOmitLast2      = 2
     7  	transformOmitLast3      = 3
     8  	transformOmitLast4      = 4
     9  	transformOmitLast5      = 5
    10  	transformOmitLast6      = 6
    11  	transformOmitLast7      = 7
    12  	transformOmitLast8      = 8
    13  	transformOmitLast9      = 9
    14  	transformUppercaseFirst = 10
    15  	transformUppercaseAll   = 11
    16  	transformOmitFirst1     = 12
    17  	transformOmitFirst2     = 13
    18  	transformOmitFirst3     = 14
    19  	transformOmitFirst4     = 15
    20  	transformOmitFirst5     = 16
    21  	transformOmitFirst6     = 17
    22  	transformOmitFirst7     = 18
    23  	transformOmitFirst8     = 19
    24  	transformOmitFirst9     = 20
    25  	transformShiftFirst     = 21
    26  	transformShiftAll       = 22 + iota - 22
    27  	numTransformTypes
    28  )
    29  
    30  const transformsMaxCutOff = transformOmitLast9
    31  
    32  type transforms struct {
    33  	prefix_suffix_size uint16
    34  	prefix_suffix      []byte
    35  	prefix_suffix_map  []uint16
    36  	num_transforms     uint32
    37  	transforms         []byte
    38  	params             []byte
    39  	cutOffTransforms   [transformsMaxCutOff + 1]int16
    40  }
    41  
    42  func transformPrefixId(t *transforms, I int) byte {
    43  	return t.transforms[(I*3)+0]
    44  }
    45  
    46  func transformType(t *transforms, I int) byte {
    47  	return t.transforms[(I*3)+1]
    48  }
    49  
    50  func transformSuffixId(t *transforms, I int) byte {
    51  	return t.transforms[(I*3)+2]
    52  }
    53  
    54  func transformPrefix(t *transforms, I int) []byte {
    55  	return t.prefix_suffix[t.prefix_suffix_map[transformPrefixId(t, I)]:]
    56  }
    57  
    58  func transformSuffix(t *transforms, I int) []byte {
    59  	return t.prefix_suffix[t.prefix_suffix_map[transformSuffixId(t, I)]:]
    60  }
    61  
    62  /* RFC 7932 transforms string data */
    63  const kPrefixSuffix string = "\001 \002, \010 of the \004 of \002s \001.\005 and \004 " + "in \001\"\004 to \002\">\001\n\002. \001]\005 for \003 a \006 " + "that \001'\006 with \006 from \004 by \001(\006. T" + "he \004 on \004 as \004 is \004ing \002\n\t\001:\003ed " + "\002=\"\004 at \003ly \001,\002='\005.com/\007. This \005" + " not \003er \003al \004ful \004ive \005less \004es" + "t \004ize \002\xc2\xa0\004ous \005 the \002e \000"
    64  
    65  var kPrefixSuffixMap = [50]uint16{
    66  	0x00,
    67  	0x02,
    68  	0x05,
    69  	0x0E,
    70  	0x13,
    71  	0x16,
    72  	0x18,
    73  	0x1E,
    74  	0x23,
    75  	0x25,
    76  	0x2A,
    77  	0x2D,
    78  	0x2F,
    79  	0x32,
    80  	0x34,
    81  	0x3A,
    82  	0x3E,
    83  	0x45,
    84  	0x47,
    85  	0x4E,
    86  	0x55,
    87  	0x5A,
    88  	0x5C,
    89  	0x63,
    90  	0x68,
    91  	0x6D,
    92  	0x72,
    93  	0x77,
    94  	0x7A,
    95  	0x7C,
    96  	0x80,
    97  	0x83,
    98  	0x88,
    99  	0x8C,
   100  	0x8E,
   101  	0x91,
   102  	0x97,
   103  	0x9F,
   104  	0xA5,
   105  	0xA9,
   106  	0xAD,
   107  	0xB2,
   108  	0xB7,
   109  	0xBD,
   110  	0xC2,
   111  	0xC7,
   112  	0xCA,
   113  	0xCF,
   114  	0xD5,
   115  	0xD8,
   116  }
   117  
   118  /* RFC 7932 transforms */
   119  var kTransformsData = []byte{
   120  	49,
   121  	transformIdentity,
   122  	49,
   123  	49,
   124  	transformIdentity,
   125  	0,
   126  	0,
   127  	transformIdentity,
   128  	0,
   129  	49,
   130  	transformOmitFirst1,
   131  	49,
   132  	49,
   133  	transformUppercaseFirst,
   134  	0,
   135  	49,
   136  	transformIdentity,
   137  	47,
   138  	0,
   139  	transformIdentity,
   140  	49,
   141  	4,
   142  	transformIdentity,
   143  	0,
   144  	49,
   145  	transformIdentity,
   146  	3,
   147  	49,
   148  	transformUppercaseFirst,
   149  	49,
   150  	49,
   151  	transformIdentity,
   152  	6,
   153  	49,
   154  	transformOmitFirst2,
   155  	49,
   156  	49,
   157  	transformOmitLast1,
   158  	49,
   159  	1,
   160  	transformIdentity,
   161  	0,
   162  	49,
   163  	transformIdentity,
   164  	1,
   165  	0,
   166  	transformUppercaseFirst,
   167  	0,
   168  	49,
   169  	transformIdentity,
   170  	7,
   171  	49,
   172  	transformIdentity,
   173  	9,
   174  	48,
   175  	transformIdentity,
   176  	0,
   177  	49,
   178  	transformIdentity,
   179  	8,
   180  	49,
   181  	transformIdentity,
   182  	5,
   183  	49,
   184  	transformIdentity,
   185  	10,
   186  	49,
   187  	transformIdentity,
   188  	11,
   189  	49,
   190  	transformOmitLast3,
   191  	49,
   192  	49,
   193  	transformIdentity,
   194  	13,
   195  	49,
   196  	transformIdentity,
   197  	14,
   198  	49,
   199  	transformOmitFirst3,
   200  	49,
   201  	49,
   202  	transformOmitLast2,
   203  	49,
   204  	49,
   205  	transformIdentity,
   206  	15,
   207  	49,
   208  	transformIdentity,
   209  	16,
   210  	0,
   211  	transformUppercaseFirst,
   212  	49,
   213  	49,
   214  	transformIdentity,
   215  	12,
   216  	5,
   217  	transformIdentity,
   218  	49,
   219  	0,
   220  	transformIdentity,
   221  	1,
   222  	49,
   223  	transformOmitFirst4,
   224  	49,
   225  	49,
   226  	transformIdentity,
   227  	18,
   228  	49,
   229  	transformIdentity,
   230  	17,
   231  	49,
   232  	transformIdentity,
   233  	19,
   234  	49,
   235  	transformIdentity,
   236  	20,
   237  	49,
   238  	transformOmitFirst5,
   239  	49,
   240  	49,
   241  	transformOmitFirst6,
   242  	49,
   243  	47,
   244  	transformIdentity,
   245  	49,
   246  	49,
   247  	transformOmitLast4,
   248  	49,
   249  	49,
   250  	transformIdentity,
   251  	22,
   252  	49,
   253  	transformUppercaseAll,
   254  	49,
   255  	49,
   256  	transformIdentity,
   257  	23,
   258  	49,
   259  	transformIdentity,
   260  	24,
   261  	49,
   262  	transformIdentity,
   263  	25,
   264  	49,
   265  	transformOmitLast7,
   266  	49,
   267  	49,
   268  	transformOmitLast1,
   269  	26,
   270  	49,
   271  	transformIdentity,
   272  	27,
   273  	49,
   274  	transformIdentity,
   275  	28,
   276  	0,
   277  	transformIdentity,
   278  	12,
   279  	49,
   280  	transformIdentity,
   281  	29,
   282  	49,
   283  	transformOmitFirst9,
   284  	49,
   285  	49,
   286  	transformOmitFirst7,
   287  	49,
   288  	49,
   289  	transformOmitLast6,
   290  	49,
   291  	49,
   292  	transformIdentity,
   293  	21,
   294  	49,
   295  	transformUppercaseFirst,
   296  	1,
   297  	49,
   298  	transformOmitLast8,
   299  	49,
   300  	49,
   301  	transformIdentity,
   302  	31,
   303  	49,
   304  	transformIdentity,
   305  	32,
   306  	47,
   307  	transformIdentity,
   308  	3,
   309  	49,
   310  	transformOmitLast5,
   311  	49,
   312  	49,
   313  	transformOmitLast9,
   314  	49,
   315  	0,
   316  	transformUppercaseFirst,
   317  	1,
   318  	49,
   319  	transformUppercaseFirst,
   320  	8,
   321  	5,
   322  	transformIdentity,
   323  	21,
   324  	49,
   325  	transformUppercaseAll,
   326  	0,
   327  	49,
   328  	transformUppercaseFirst,
   329  	10,
   330  	49,
   331  	transformIdentity,
   332  	30,
   333  	0,
   334  	transformIdentity,
   335  	5,
   336  	35,
   337  	transformIdentity,
   338  	49,
   339  	47,
   340  	transformIdentity,
   341  	2,
   342  	49,
   343  	transformUppercaseFirst,
   344  	17,
   345  	49,
   346  	transformIdentity,
   347  	36,
   348  	49,
   349  	transformIdentity,
   350  	33,
   351  	5,
   352  	transformIdentity,
   353  	0,
   354  	49,
   355  	transformUppercaseFirst,
   356  	21,
   357  	49,
   358  	transformUppercaseFirst,
   359  	5,
   360  	49,
   361  	transformIdentity,
   362  	37,
   363  	0,
   364  	transformIdentity,
   365  	30,
   366  	49,
   367  	transformIdentity,
   368  	38,
   369  	0,
   370  	transformUppercaseAll,
   371  	0,
   372  	49,
   373  	transformIdentity,
   374  	39,
   375  	0,
   376  	transformUppercaseAll,
   377  	49,
   378  	49,
   379  	transformIdentity,
   380  	34,
   381  	49,
   382  	transformUppercaseAll,
   383  	8,
   384  	49,
   385  	transformUppercaseFirst,
   386  	12,
   387  	0,
   388  	transformIdentity,
   389  	21,
   390  	49,
   391  	transformIdentity,
   392  	40,
   393  	0,
   394  	transformUppercaseFirst,
   395  	12,
   396  	49,
   397  	transformIdentity,
   398  	41,
   399  	49,
   400  	transformIdentity,
   401  	42,
   402  	49,
   403  	transformUppercaseAll,
   404  	17,
   405  	49,
   406  	transformIdentity,
   407  	43,
   408  	0,
   409  	transformUppercaseFirst,
   410  	5,
   411  	49,
   412  	transformUppercaseAll,
   413  	10,
   414  	0,
   415  	transformIdentity,
   416  	34,
   417  	49,
   418  	transformUppercaseFirst,
   419  	33,
   420  	49,
   421  	transformIdentity,
   422  	44,
   423  	49,
   424  	transformUppercaseAll,
   425  	5,
   426  	45,
   427  	transformIdentity,
   428  	49,
   429  	0,
   430  	transformIdentity,
   431  	33,
   432  	49,
   433  	transformUppercaseFirst,
   434  	30,
   435  	49,
   436  	transformUppercaseAll,
   437  	30,
   438  	49,
   439  	transformIdentity,
   440  	46,
   441  	49,
   442  	transformUppercaseAll,
   443  	1,
   444  	49,
   445  	transformUppercaseFirst,
   446  	34,
   447  	0,
   448  	transformUppercaseFirst,
   449  	33,
   450  	0,
   451  	transformUppercaseAll,
   452  	30,
   453  	0,
   454  	transformUppercaseAll,
   455  	1,
   456  	49,
   457  	transformUppercaseAll,
   458  	33,
   459  	49,
   460  	transformUppercaseAll,
   461  	21,
   462  	49,
   463  	transformUppercaseAll,
   464  	12,
   465  	0,
   466  	transformUppercaseAll,
   467  	5,
   468  	49,
   469  	transformUppercaseAll,
   470  	34,
   471  	0,
   472  	transformUppercaseAll,
   473  	12,
   474  	0,
   475  	transformUppercaseFirst,
   476  	30,
   477  	0,
   478  	transformUppercaseAll,
   479  	34,
   480  	0,
   481  	transformUppercaseFirst,
   482  	34,
   483  }
   484  
   485  var kBrotliTransforms = transforms{
   486  	217,
   487  	[]byte(kPrefixSuffix),
   488  	kPrefixSuffixMap[:],
   489  	121,
   490  	kTransformsData,
   491  	nil, /* no extra parameters */
   492  	[transformsMaxCutOff + 1]int16{0, 12, 27, 23, 42, 63, 56, 48, 59, 64},
   493  }
   494  
   495  func getTransforms() *transforms {
   496  	return &kBrotliTransforms
   497  }
   498  
   499  func toUpperCase(p []byte) int {
   500  	if p[0] < 0xC0 {
   501  		if p[0] >= 'a' && p[0] <= 'z' {
   502  			p[0] ^= 32
   503  		}
   504  
   505  		return 1
   506  	}
   507  
   508  	/* An overly simplified uppercasing model for UTF-8. */
   509  	if p[0] < 0xE0 {
   510  		p[1] ^= 32
   511  		return 2
   512  	}
   513  
   514  	/* An arbitrary transform for three byte characters. */
   515  	p[2] ^= 5
   516  
   517  	return 3
   518  }
   519  
   520  func shiftTransform(word []byte, word_len int, parameter uint16) int {
   521  	/* Limited sign extension: scalar < (1 << 24). */
   522  	var scalar uint32 = (uint32(parameter) & 0x7FFF) + (0x1000000 - (uint32(parameter) & 0x8000))
   523  	if word[0] < 0x80 {
   524  		/* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
   525  		scalar += uint32(word[0])
   526  
   527  		word[0] = byte(scalar & 0x7F)
   528  		return 1
   529  	} else if word[0] < 0xC0 {
   530  		/* Continuation / 10AAAAAA. */
   531  		return 1
   532  	} else if word[0] < 0xE0 {
   533  		/* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
   534  		if word_len < 2 {
   535  			return 1
   536  		}
   537  		scalar += uint32(word[1]&0x3F | (word[0]&0x1F)<<6)
   538  		word[0] = byte(0xC0 | (scalar>>6)&0x1F)
   539  		word[1] = byte(uint32(word[1]&0xC0) | scalar&0x3F)
   540  		return 2
   541  	} else if word[0] < 0xF0 {
   542  		/* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
   543  		if word_len < 3 {
   544  			return word_len
   545  		}
   546  		scalar += uint32(word[2])&0x3F | uint32(word[1]&0x3F)<<6 | uint32(word[0]&0x0F)<<12
   547  		word[0] = byte(0xE0 | (scalar>>12)&0x0F)
   548  		word[1] = byte(uint32(word[1]&0xC0) | (scalar>>6)&0x3F)
   549  		word[2] = byte(uint32(word[2]&0xC0) | scalar&0x3F)
   550  		return 3
   551  	} else if word[0] < 0xF8 {
   552  		/* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
   553  		if word_len < 4 {
   554  			return word_len
   555  		}
   556  		scalar += uint32(word[3])&0x3F | uint32(word[2]&0x3F)<<6 | uint32(word[1]&0x3F)<<12 | uint32(word[0]&0x07)<<18
   557  		word[0] = byte(0xF0 | (scalar>>18)&0x07)
   558  		word[1] = byte(uint32(word[1]&0xC0) | (scalar>>12)&0x3F)
   559  		word[2] = byte(uint32(word[2]&0xC0) | (scalar>>6)&0x3F)
   560  		word[3] = byte(uint32(word[3]&0xC0) | scalar&0x3F)
   561  		return 4
   562  	}
   563  
   564  	return 1
   565  }
   566  
   567  func transformDictionaryWord(dst []byte, word []byte, len int, trans *transforms, transform_idx int) int {
   568  	var idx int = 0
   569  	var prefix []byte = transformPrefix(trans, transform_idx)
   570  	var type_ byte = transformType(trans, transform_idx)
   571  	var suffix []byte = transformSuffix(trans, transform_idx)
   572  	{
   573  		var prefix_len int = int(prefix[0])
   574  		prefix = prefix[1:]
   575  		for {
   576  			tmp1 := prefix_len
   577  			prefix_len--
   578  			if tmp1 == 0 {
   579  				break
   580  			}
   581  			dst[idx] = prefix[0]
   582  			idx++
   583  			prefix = prefix[1:]
   584  		}
   585  	}
   586  	{
   587  		var t int = int(type_)
   588  		var i int = 0
   589  		if t <= transformOmitLast9 {
   590  			len -= t
   591  		} else if t >= transformOmitFirst1 && t <= transformOmitFirst9 {
   592  			var skip int = t - (transformOmitFirst1 - 1)
   593  			word = word[skip:]
   594  			len -= skip
   595  		}
   596  
   597  		for i < len {
   598  			dst[idx] = word[i]
   599  			idx++
   600  			i++
   601  		}
   602  		if t == transformUppercaseFirst {
   603  			toUpperCase(dst[idx-len:])
   604  		} else if t == transformUppercaseAll {
   605  			var uppercase []byte = dst
   606  			uppercase = uppercase[idx-len:]
   607  			for len > 0 {
   608  				var step int = toUpperCase(uppercase)
   609  				uppercase = uppercase[step:]
   610  				len -= step
   611  			}
   612  		} else if t == transformShiftFirst {
   613  			var param uint16 = uint16(trans.params[transform_idx*2]) + uint16(trans.params[transform_idx*2+1])<<8
   614  			shiftTransform(dst[idx-len:], int(len), param)
   615  		} else if t == transformShiftAll {
   616  			var param uint16 = uint16(trans.params[transform_idx*2]) + uint16(trans.params[transform_idx*2+1])<<8
   617  			var shift []byte = dst
   618  			shift = shift[idx-len:]
   619  			for len > 0 {
   620  				var step int = shiftTransform(shift, int(len), param)
   621  				shift = shift[step:]
   622  				len -= step
   623  			}
   624  		}
   625  	}
   626  	{
   627  		var suffix_len int = int(suffix[0])
   628  		suffix = suffix[1:]
   629  		for {
   630  			tmp2 := suffix_len
   631  			suffix_len--
   632  			if tmp2 == 0 {
   633  				break
   634  			}
   635  			dst[idx] = suffix[0]
   636  			idx++
   637  			suffix = suffix[1:]
   638  		}
   639  		return idx
   640  	}
   641  }