github.com/egonelbre/exp@v0.0.0-20240430123955-ed1d3aa93911/permutation/code.go (about) 1 package permutation 2 3 import ( 4 "math/bits" 5 ) 6 7 const ( 8 base = 12 9 nybblemask = 1<<4 - 1 10 fullmask = ^uint64(0) 11 maskstarts = 1<<(4*0) | 1<<(4*1) | 1<<(4*2) | 1<<(4*3) | 1<<(4*4) | 1<<(4*5) | 1<<(4*6) | 1<<(4*7) | 1<<(4*8) | 1<<(4*9) | 1<<(4*10) | 1<<(4*11) 12 ) 13 14 var ( 15 fact = [base]int{ 16 39916800, 17 3628800, 18 362880, 19 40320, 20 5040, 21 720, 22 120, 23 24, 24 6, 25 2, 26 1, 27 1, 28 } 29 30 masks [base]uint64 31 ) 32 33 func init() { 34 /* 35 fact[0] = 1 36 for i := 1; i < base; i++ { 37 fact[i] = fact[i-1] * i 38 } 39 for i := 0; i < base/2; i++ { 40 fact[i], fact[base-i-1] = fact[base-i-1], fact[i] 41 } 42 */ 43 44 for min := 0; min < base; min++ { 45 v := min 46 for i := 0; i < base; i++ { 47 v = v<<4 | min 48 } 49 masks[min] = ^uint64(v) 50 } 51 } 52 53 func Code(perm [base]byte) int { return CodeCopy(perm) } 54 55 func CodeCopy(perm [base]byte) int { 56 r := 0 57 for min := byte(0); min < base-1; min++ { 58 z := 0 59 n := base - min 60 for i, v := range perm[:n] { 61 if v == min { 62 z = i 63 break 64 } 65 } 66 copy(perm[z:], perm[z+1:n]) 67 r += z * fact[min] 68 } 69 return r 70 } 71 72 func CodeCopyBit(perm [base]byte) int { 73 return codebit(uint64(PackNybbleUnroll(perm))) 74 } 75 76 func CodeCopyBitNonstandard(perm [base]byte) int { 77 return codebit(PackNybbleUnroll3(perm)) 78 } 79 80 func codebit(perm uint64) int { 81 r := 0 82 for min, mask := range masks { 83 filtered := perm ^ mask 84 filtered &= filtered >> 2 85 filtered &= filtered >> 1 86 filtered &= maskstarts 87 z := byte(bits.TrailingZeros64(filtered)) 88 upper := (perm >> (z + 4)) << z 89 lower := perm &^ (fullmask << z) 90 perm = upper | lower 91 r += int(z/4) * fact[min] 92 } 93 return r 94 } 95 96 func CodeCount(perm [base]byte) int { 97 r := 0 98 for min := byte(0); min < base-1; min++ { 99 z := 0 100 for _, v := range perm { 101 if v > min { 102 z++ 103 } else if v == min { 104 break 105 } 106 } 107 r += z * fact[min] 108 } 109 return r 110 } 111 112 func CodeTable(perm [base]byte) int { 113 var index [base]byte 114 for i, v := range perm { 115 index[v] = byte(i) 116 } 117 118 r := 0 119 for min := byte(0); min < base-1; min++ { 120 z := 0 121 for _, v := range perm[:index[min]] { 122 if v > min { 123 z++ 124 } 125 } 126 r += z * fact[min] 127 } 128 return r 129 } 130 131 func CodeTable2(perm [base]byte) int { 132 var index [base]byte 133 for i, v := range perm { 134 index[v] = byte(i) 135 } 136 137 r := 0 138 for min := byte(0); min < base-1; min++ { 139 z := index[min] 140 for i, pos := range index { 141 if pos > z { 142 index[i] = pos - 1 143 } 144 } 145 r += int(z) * fact[min] 146 } 147 return r 148 } 149 150 func CodeShuffle(perm [base]byte) int { 151 var state = [base]byte{ 152 0, 1, 2, 3, 153 4, 5, 6, 7, 154 8, 9, 10, 11, 155 } 156 var inverse = [base]byte{ 157 0, 1, 2, 3, 158 4, 5, 6, 7, 159 8, 9, 10, 11, 160 } 161 for i := range perm { 162 /* 163 * this cryptic looking code is an optimized version of 164 * this pseudocode: 165 * 166 * j = inverse[perm[i]] 167 * swap entries state[i] and state[j] in inverse 168 * swap entries i and j in state 169 * perm[i] = j - i 170 * 171 * two optimizations are performed: 172 * - inverse[state[k]] == k and state[inverse[k]] == k 173 * - after iteration i, inverse[perm[i]] and state[i] 174 * are never read again, so we can omit assigning to 175 * them. 176 */ 177 j := inverse[perm[i]] 178 inverse[state[i]] = j 179 state[j] = state[i] 180 perm[i] = j - byte(i) 181 } 182 183 total := 0 184 for i, v := range perm { 185 total += int(v) * fact[i] 186 } 187 188 return total 189 } 190 191 func CodeShuffleUnroll(perm [base]byte) int { 192 var state = [base]byte{ 193 0, 1, 2, 3, 194 4, 5, 6, 7, 195 8, 9, 10, 11, 196 } 197 var inverse = [base]byte{ 198 0, 1, 2, 3, 199 4, 5, 6, 7, 200 8, 9, 10, 11, 201 } 202 203 total := 0 204 205 j0 := inverse[perm[0]] 206 inverse[state[0]], state[j0] = j0, state[0] 207 total += int(j0-0) * 39916800 208 209 j1 := inverse[perm[1]] 210 inverse[state[1]], state[j1] = j1, state[1] 211 total += int(j1-1) * 3628800 212 213 j2 := inverse[perm[2]] 214 inverse[state[2]], state[j2] = j2, state[2] 215 total += int(j2-2) * 362880 216 217 j3 := inverse[perm[3]] 218 inverse[state[3]], state[j3] = j3, state[3] 219 total += int(j3-3) * 40320 220 221 j4 := inverse[perm[4]] 222 inverse[state[4]], state[j4] = j4, state[4] 223 total += int(j4-4) * 5040 224 225 j5 := inverse[perm[5]] 226 inverse[state[5]], state[j5] = j5, state[5] 227 total += int(j5-5) * 720 228 229 j6 := inverse[perm[6]] 230 inverse[state[6]], state[j6] = j6, state[6] 231 total += int(j6-6) * 120 232 233 j7 := inverse[perm[7]] 234 inverse[state[7]], state[j7] = j7, state[7] 235 total += int(j7-7) * 24 236 237 j8 := inverse[perm[8]] 238 inverse[state[8]], state[j8] = j8, state[8] 239 total += int(j8-8) * 6 240 241 j9 := inverse[perm[9]] 242 inverse[state[9]], state[j9] = j9, state[9] 243 total += int(j9-9) * 2 244 245 j10 := inverse[perm[10]] 246 inverse[state[10]], state[j10] = j10, state[10] 247 total += int(j10-10) * 1 248 249 return total 250 } 251 252 func DecodeShuffle(v int) [base]byte { 253 var state = [base]byte{ 254 0, 1, 2, 3, 255 4, 5, 6, 7, 256 8, 9, 10, 11, 257 } 258 259 var perm [base]byte 260 for i := range perm { 261 perm[i] = byte(v / fact[i]) 262 v = v % fact[i] 263 } 264 265 for i := byte(0); i < base; i++ { 266 j := i + perm[i] 267 perm[i] = state[j] 268 state[j] = state[i] 269 } 270 return perm 271 } 272 273 func PackNybble(perm [base]byte) int { 274 r := 0 275 for _, v := range perm { 276 r = r<<4 + int(v) 277 } 278 return r 279 } 280 281 func PackTight(perm [base]byte) int { 282 r := 0 283 for _, v := range perm { 284 r = r*12 + int(v) 285 } 286 return r 287 } 288 289 func PackNybbleUnroll(perm [base]byte) int { 290 return int(perm[0]) | 291 int(perm[1])<<(4*1) | 292 int(perm[2])<<(4*2) | 293 int(perm[3])<<(4*3) | 294 int(perm[4])<<(4*4) | 295 int(perm[5])<<(4*5) | 296 int(perm[6])<<(4*6) | 297 int(perm[7])<<(4*7) | 298 int(perm[8])<<(4*8) | 299 int(perm[9])<<(4*9) | 300 int(perm[10])<<(4*10) | 301 int(perm[11])<<(4*11) 302 } 303 304 func UnpackNybbleUnroll(v int) [base]byte { 305 const mask = 1<<4 - 1 306 return [base]byte{ 307 byte((v >> (4 * 0)) & mask), 308 byte((v >> (4 * 1)) & mask), 309 byte((v >> (4 * 2)) & mask), 310 byte((v >> (4 * 3)) & mask), 311 byte((v >> (4 * 4)) & mask), 312 byte((v >> (4 * 5)) & mask), 313 byte((v >> (4 * 6)) & mask), 314 byte((v >> (4 * 7)) & mask), 315 byte((v >> (4 * 8)) & mask), 316 byte((v >> (4 * 9)) & mask), 317 byte((v >> (4 * 10)) & mask), 318 byte((v >> (4 * 11)) & mask), 319 } 320 } 321 322 func PackNybbleUnrollReverse(perm [base]byte) int { 323 return int(perm[0]<<(4*11)) | 324 int(perm[1])<<(4*10) | 325 int(perm[2])<<(4*9) | 326 int(perm[3])<<(4*8) | 327 int(perm[4])<<(4*7) | 328 int(perm[5])<<(4*6) | 329 int(perm[6])<<(4*5) | 330 int(perm[7])<<(4*4) | 331 int(perm[8])<<(4*3) | 332 int(perm[9])<<(4*2) | 333 int(perm[10])<<(4*1) | 334 int(perm[11])<<(4*0) 335 } 336 337 func PackNybbleUnroll2(perm [base]byte) int { 338 i0 := int(perm[0]) | int(perm[1])<<8 | int(perm[2])<<16 | int(perm[3])<<24 339 i1 := int(perm[4]) | int(perm[5])<<8 | int(perm[6])<<16 | int(perm[7])<<24 340 i2 := int(perm[8]) | int(perm[9])<<8 | int(perm[10])<<16 | int(perm[11])<<24 341 return i0 | i1<<4 | i2<<32 342 } 343 344 func PackNybbleUnroll3(perm [base]byte) uint64 { 345 i0 := uint64(perm[0]) | uint64(perm[1])<<8 | uint64(perm[2])<<16 | uint64(perm[3])<<24 346 i1 := uint64(perm[4]) | uint64(perm[5])<<8 | uint64(perm[6])<<16 | uint64(perm[7])<<24 347 i2 := uint64(perm[8]) | uint64(perm[9])<<8 348 i3 := uint64(perm[10]) | uint64(perm[11])<<8 349 return i0 | i1<<4 | i2<<32 | i3<<36 350 } 351 352 const ( 353 b12_0 = 1 354 b12_1 = b12_0 * 12 355 b12_2 = b12_1 * 12 356 b12_3 = b12_2 * 12 357 b12_4 = b12_3 * 12 358 b12_5 = b12_4 * 12 359 b12_6 = b12_5 * 12 360 b12_7 = b12_6 * 12 361 b12_8 = b12_7 * 12 362 b12_9 = b12_8 * 12 363 b12_10 = b12_9 * 12 364 b12_11 = b12_10 * 12 365 ) 366 367 func PackTightUnroll(perm [base]byte) int { 368 return int(perm[0])*b12_0 | 369 int(perm[1])*b12_1 | 370 int(perm[2])*b12_2 | 371 int(perm[3])*b12_3 | 372 int(perm[4])*b12_4 | 373 int(perm[5])*b12_5 | 374 int(perm[6])*b12_6 | 375 int(perm[7])*b12_7 | 376 int(perm[8])*b12_8 | 377 int(perm[9])*b12_9 | 378 int(perm[10])*b12_10 | 379 int(perm[11])*b12_11 380 }