github.com/ledgerwatch/erigon-lib@v1.0.0/sais/sais.c (about) 1 /* 2 * sais.c for sais-lite 3 * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person 6 * obtaining a copy of this software and associated documentation 7 * files (the "Software"), to deal in the Software without 8 * restriction, including without limitation the rights to use, 9 * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following 12 * conditions: 13 * 14 * The above copyright notice and this permission notice shall be 15 * included in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 #include "sais.h" 28 #include <assert.h> 29 #include <stdlib.h> 30 31 #ifndef UCHAR_SIZE 32 #define UCHAR_SIZE 256 33 #endif 34 #ifndef MINBUCKETSIZE 35 #define MINBUCKETSIZE 256 36 #endif 37 38 #define sais_index_type int 39 #define sais_bool_type int 40 #define SAIS_LMSSORT2_LIMIT 0x3fffffff 41 42 #define SAIS_MYMALLOC(_num, _type) ((_type *)malloc((_num) * sizeof(_type))) 43 #define SAIS_MYFREE(_ptr, _num, _type) free((_ptr)) 44 #define chr(_a) (cs == sizeof(sais_index_type) ? ((sais_index_type *)T)[(_a)] : ((unsigned char *)T)[(_a)]) 45 46 /* find the start or end of each bucket */ 47 static void 48 getCounts(const void *T, sais_index_type *C, sais_index_type n, sais_index_type k, int cs) 49 { 50 sais_index_type i; 51 for (i = 0; i < k; ++i) 52 { 53 C[i] = 0; 54 } 55 for (i = 0; i < n; ++i) 56 { 57 ++C[chr(i)]; 58 } 59 } 60 static void 61 getBuckets(const sais_index_type *C, sais_index_type *B, sais_index_type k, sais_bool_type end) 62 { 63 sais_index_type i, sum = 0; 64 if (end) 65 { 66 for (i = 0; i < k; ++i) 67 { 68 sum += C[i]; 69 B[i] = sum; 70 } 71 } 72 else 73 { 74 for (i = 0; i < k; ++i) 75 { 76 sum += C[i]; 77 B[i] = sum - C[i]; 78 } 79 } 80 } 81 82 /* sort all type LMS suffixes */ 83 static void 84 LMSsort1(const void *T, sais_index_type *SA, 85 sais_index_type *C, sais_index_type *B, 86 sais_index_type n, sais_index_type k, int cs) 87 { 88 sais_index_type *b, i, j; 89 sais_index_type c0, c1; 90 91 /* compute SAl */ 92 if (C == B) 93 { 94 getCounts(T, C, n, k, cs); 95 } 96 getBuckets(C, B, k, 0); /* find starts of buckets */ 97 j = n - 1; 98 b = SA + B[c1 = chr(j)]; 99 --j; 100 *b++ = (chr(j) < c1) ? ~j : j; 101 for (i = 0; i < n; ++i) 102 { 103 if (0 < (j = SA[i])) 104 { 105 assert(chr(j) >= chr(j + 1)); 106 if ((c0 = chr(j)) != c1) 107 { 108 B[c1] = b - SA; 109 b = SA + B[c1 = c0]; 110 } 111 assert(i < (b - SA)); 112 --j; 113 *b++ = (chr(j) < c1) ? ~j : j; 114 SA[i] = 0; 115 } 116 else if (j < 0) 117 { 118 SA[i] = ~j; 119 } 120 } 121 /* compute SAs */ 122 if (C == B) 123 { 124 getCounts(T, C, n, k, cs); 125 } 126 getBuckets(C, B, k, 1); /* find ends of buckets */ 127 for (i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) 128 { 129 if (0 < (j = SA[i])) 130 { 131 assert(chr(j) <= chr(j + 1)); 132 if ((c0 = chr(j)) != c1) 133 { 134 B[c1] = b - SA; 135 b = SA + B[c1 = c0]; 136 } 137 assert((b - SA) <= i); 138 --j; 139 *--b = (chr(j) > c1) ? ~(j + 1) : j; 140 SA[i] = 0; 141 } 142 } 143 } 144 static sais_index_type 145 LMSpostproc1(const void *T, sais_index_type *SA, 146 sais_index_type n, sais_index_type m, int cs) 147 { 148 sais_index_type i, j, p, q, plen, qlen, name; 149 sais_index_type c0, c1; 150 sais_bool_type diff; 151 152 /* compact all the sorted substrings into the first m items of SA 153 2*m must be not larger than n (proveable) */ 154 assert(0 < n); 155 for (i = 0; (p = SA[i]) < 0; ++i) 156 { 157 SA[i] = ~p; 158 assert((i + 1) < n); 159 } 160 if (i < m) 161 { 162 for (j = i, ++i;; ++i) 163 { 164 assert(i < n); 165 if ((p = SA[i]) < 0) 166 { 167 SA[j++] = ~p; 168 SA[i] = 0; 169 if (j == m) 170 { 171 break; 172 } 173 } 174 } 175 } 176 177 /* store the length of all substrings */ 178 i = n - 1; 179 j = n - 1; 180 c0 = chr(n - 1); 181 do 182 { 183 c1 = c0; 184 } while ((0 <= --i) && ((c0 = chr(i)) >= c1)); 185 for (; 0 <= i;) 186 { 187 do 188 { 189 c1 = c0; 190 } while ((0 <= --i) && ((c0 = chr(i)) <= c1)); 191 if (0 <= i) 192 { 193 SA[m + ((i + 1) >> 1)] = j - i; 194 j = i + 1; 195 do 196 { 197 c1 = c0; 198 } while ((0 <= --i) && ((c0 = chr(i)) >= c1)); 199 } 200 } 201 202 /* find the lexicographic names of all substrings */ 203 for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i) 204 { 205 p = SA[i], plen = SA[m + (p >> 1)], diff = 1; 206 if ((plen == qlen) && ((q + plen) < n)) 207 { 208 for (j = 0; (j < plen) && (chr(p + j) == chr(q + j)); ++j) 209 { 210 } 211 if (j == plen) 212 { 213 diff = 0; 214 } 215 } 216 if (diff != 0) 217 { 218 ++name, q = p, qlen = plen; 219 } 220 SA[m + (p >> 1)] = name; 221 } 222 223 return name; 224 } 225 static void 226 LMSsort2(const void *T, sais_index_type *SA, 227 sais_index_type *C, sais_index_type *B, sais_index_type *D, 228 sais_index_type n, sais_index_type k, int cs) 229 { 230 sais_index_type *b, i, j, t, d; 231 sais_index_type c0, c1; 232 assert(C != B); 233 234 /* compute SAl */ 235 getBuckets(C, B, k, 0); /* find starts of buckets */ 236 j = n - 1; 237 b = SA + B[c1 = chr(j)]; 238 --j; 239 t = (chr(j) < c1); 240 j += n; 241 *b++ = (t & 1) ? ~j : j; 242 for (i = 0, d = 0; i < n; ++i) 243 { 244 if (0 < (j = SA[i])) 245 { 246 if (n <= j) 247 { 248 d += 1; 249 j -= n; 250 } 251 assert(chr(j) >= chr(j + 1)); 252 if ((c0 = chr(j)) != c1) 253 { 254 B[c1] = b - SA; 255 b = SA + B[c1 = c0]; 256 } 257 assert(i < (b - SA)); 258 --j; 259 t = c0; 260 t = (t << 1) | (chr(j) < c1); 261 if (D[t] != d) 262 { 263 j += n; 264 D[t] = d; 265 } 266 *b++ = (t & 1) ? ~j : j; 267 SA[i] = 0; 268 } 269 else if (j < 0) 270 { 271 SA[i] = ~j; 272 } 273 } 274 for (i = n - 1; 0 <= i; --i) 275 { 276 if (0 < SA[i]) 277 { 278 if (SA[i] < n) 279 { 280 SA[i] += n; 281 for (j = i - 1; SA[j] < n; --j) 282 { 283 } 284 SA[j] -= n; 285 i = j; 286 } 287 } 288 } 289 290 /* compute SAs */ 291 getBuckets(C, B, k, 1); /* find ends of buckets */ 292 for (i = n - 1, d += 1, b = SA + B[c1 = 0]; 0 <= i; --i) 293 { 294 if (0 < (j = SA[i])) 295 { 296 if (n <= j) 297 { 298 d += 1; 299 j -= n; 300 } 301 assert(chr(j) <= chr(j + 1)); 302 if ((c0 = chr(j)) != c1) 303 { 304 B[c1] = b - SA; 305 b = SA + B[c1 = c0]; 306 } 307 assert((b - SA) <= i); 308 --j; 309 t = c0; 310 t = (t << 1) | (chr(j) > c1); 311 if (D[t] != d) 312 { 313 j += n; 314 D[t] = d; 315 } 316 *--b = (t & 1) ? ~(j + 1) : j; 317 SA[i] = 0; 318 } 319 } 320 } 321 static sais_index_type 322 LMSpostproc2(sais_index_type *SA, sais_index_type n, sais_index_type m) 323 { 324 sais_index_type i, j, d, name; 325 326 /* compact all the sorted LMS substrings into the first m items of SA */ 327 assert(0 < n); 328 for (i = 0, name = 0; (j = SA[i]) < 0; ++i) 329 { 330 j = ~j; 331 if (n <= j) 332 { 333 name += 1; 334 } 335 SA[i] = j; 336 assert((i + 1) < n); 337 } 338 if (i < m) 339 { 340 for (d = i, ++i;; ++i) 341 { 342 assert(i < n); 343 if ((j = SA[i]) < 0) 344 { 345 j = ~j; 346 if (n <= j) 347 { 348 name += 1; 349 } 350 SA[d++] = j; 351 SA[i] = 0; 352 if (d == m) 353 { 354 break; 355 } 356 } 357 } 358 } 359 if (name < m) 360 { 361 /* store the lexicographic names */ 362 for (i = m - 1, d = name + 1; 0 <= i; --i) 363 { 364 if (n <= (j = SA[i])) 365 { 366 j -= n; 367 --d; 368 } 369 SA[m + (j >> 1)] = d; 370 } 371 } 372 else 373 { 374 /* unset flags */ 375 for (i = 0; i < m; ++i) 376 { 377 if (n <= (j = SA[i])) 378 { 379 j -= n; 380 SA[i] = j; 381 } 382 } 383 } 384 385 return name; 386 } 387 388 /* compute SA and BWT */ 389 static void 390 induceSA(const void *T, sais_index_type *SA, 391 sais_index_type *C, sais_index_type *B, 392 sais_index_type n, sais_index_type k, int cs) 393 { 394 sais_index_type *b, i, j; 395 sais_index_type c0, c1; 396 /* compute SAl */ 397 if (C == B) 398 { 399 getCounts(T, C, n, k, cs); 400 } 401 getBuckets(C, B, k, 0); /* find starts of buckets */ 402 j = n - 1; 403 b = SA + B[c1 = chr(j)]; 404 *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j; 405 for (i = 0; i < n; ++i) 406 { 407 j = SA[i], SA[i] = ~j; 408 if (0 < j) 409 { 410 --j; 411 assert(chr(j) >= chr(j + 1)); 412 if ((c0 = chr(j)) != c1) 413 { 414 B[c1] = b - SA; 415 b = SA + B[c1 = c0]; 416 } 417 assert(i < (b - SA)); 418 *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j; 419 } 420 } 421 /* compute SAs */ 422 if (C == B) 423 { 424 getCounts(T, C, n, k, cs); 425 } 426 getBuckets(C, B, k, 1); /* find ends of buckets */ 427 for (i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) 428 { 429 if (0 < (j = SA[i])) 430 { 431 --j; 432 assert(chr(j) <= chr(j + 1)); 433 if ((c0 = chr(j)) != c1) 434 { 435 B[c1] = b - SA; 436 b = SA + B[c1 = c0]; 437 } 438 assert((b - SA) <= i); 439 *--b = ((j == 0) || (chr(j - 1) > c1)) ? ~j : j; 440 } 441 else 442 { 443 SA[i] = ~j; 444 } 445 } 446 } 447 static sais_index_type 448 computeBWT(const void *T, sais_index_type *SA, 449 sais_index_type *C, sais_index_type *B, 450 sais_index_type n, sais_index_type k, int cs) 451 { 452 sais_index_type *b, i, j, pidx = -1; 453 sais_index_type c0, c1; 454 /* compute SAl */ 455 if (C == B) 456 { 457 getCounts(T, C, n, k, cs); 458 } 459 getBuckets(C, B, k, 0); /* find starts of buckets */ 460 j = n - 1; 461 b = SA + B[c1 = chr(j)]; 462 *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j; 463 for (i = 0; i < n; ++i) 464 { 465 if (0 < (j = SA[i])) 466 { 467 --j; 468 assert(chr(j) >= chr(j + 1)); 469 SA[i] = ~((sais_index_type)(c0 = chr(j))); 470 if (c0 != c1) 471 { 472 B[c1] = b - SA; 473 b = SA + B[c1 = c0]; 474 } 475 assert(i < (b - SA)); 476 *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j; 477 } 478 else if (j != 0) 479 { 480 SA[i] = ~j; 481 } 482 } 483 /* compute SAs */ 484 if (C == B) 485 { 486 getCounts(T, C, n, k, cs); 487 } 488 getBuckets(C, B, k, 1); /* find ends of buckets */ 489 for (i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) 490 { 491 if (0 < (j = SA[i])) 492 { 493 --j; 494 assert(chr(j) <= chr(j + 1)); 495 SA[i] = (c0 = chr(j)); 496 if (c0 != c1) 497 { 498 B[c1] = b - SA; 499 b = SA + B[c1 = c0]; 500 } 501 assert((b - SA) <= i); 502 *--b = ((0 < j) && (chr(j - 1) > c1)) ? ~((sais_index_type)chr(j - 1)) : j; 503 } 504 else if (j != 0) 505 { 506 SA[i] = ~j; 507 } 508 else 509 { 510 pidx = i; 511 } 512 } 513 return pidx; 514 } 515 516 /* find the suffix array SA of T[0..n-1] in {0..255}^n */ 517 static sais_index_type 518 sais_main(const void *T, sais_index_type *SA, 519 sais_index_type fs, sais_index_type n, sais_index_type k, int cs, 520 sais_bool_type isbwt) 521 { 522 sais_index_type *C, *B, *D, *RA, *b; 523 sais_index_type i, j, m, p, q, t, name, pidx = 0, newfs; 524 sais_index_type c0, c1; 525 unsigned int flags; 526 527 assert((T != NULL) && (SA != NULL)); 528 assert((0 <= fs) && (0 < n) && (1 <= k)); 529 530 if (k <= MINBUCKETSIZE) 531 { 532 if ((C = SAIS_MYMALLOC(k, sais_index_type)) == NULL) 533 { 534 return -2; 535 } 536 if (k <= fs) 537 { 538 B = SA + (n + fs - k); 539 flags = 1; 540 } 541 else 542 { 543 if ((B = SAIS_MYMALLOC(k, sais_index_type)) == NULL) 544 { 545 SAIS_MYFREE(C, k, sais_index_type); 546 return -2; 547 } 548 flags = 3; 549 } 550 } 551 else if (k <= fs) 552 { 553 C = SA + (n + fs - k); 554 if (k <= (fs - k)) 555 { 556 B = C - k; 557 flags = 0; 558 } 559 else if (k <= (MINBUCKETSIZE * 4)) 560 { 561 if ((B = SAIS_MYMALLOC(k, sais_index_type)) == NULL) 562 { 563 return -2; 564 } 565 flags = 2; 566 } 567 else 568 { 569 B = C; 570 flags = 8; 571 } 572 } 573 else 574 { 575 if ((C = B = SAIS_MYMALLOC(k, sais_index_type)) == NULL) 576 { 577 return -2; 578 } 579 flags = 4 | 8; 580 } 581 if ((n <= SAIS_LMSSORT2_LIMIT) && (2 <= (n / k))) 582 { 583 if (flags & 1) 584 { 585 flags |= ((k * 2) <= (fs - k)) ? 32 : 16; 586 } 587 else if ((flags == 0) && ((k * 2) <= (fs - k * 2))) 588 { 589 flags |= 32; 590 } 591 } 592 /* stage 1: reduce the problem by at least 1/2 593 sort all the LMS-substrings */ 594 getCounts(T, C, n, k, cs); 595 getBuckets(C, B, k, 1); /* find ends of buckets */ 596 for (i = 0; i < n; ++i) 597 { 598 SA[i] = 0; 599 } 600 b = &t; 601 i = n - 1; 602 j = n; 603 m = 0; 604 c0 = chr(n - 1); 605 do 606 { 607 c1 = c0; 608 } while ((0 <= --i) && ((c0 = chr(i)) >= c1)); 609 for (; 0 <= i;) 610 { 611 do 612 { 613 c1 = c0; 614 } while ((0 <= --i) && ((c0 = chr(i)) <= c1)); 615 if (0 <= i) 616 { 617 *b = j; 618 b = SA + --B[c1]; 619 j = i; 620 ++m; 621 do 622 { 623 c1 = c0; 624 } while ((0 <= --i) && ((c0 = chr(i)) >= c1)); 625 } 626 } 627 628 if (1 < m) 629 { 630 if (flags & (16 | 32)) 631 { 632 if (flags & 16) 633 { 634 if ((D = SAIS_MYMALLOC(k * 2, sais_index_type)) == NULL) 635 { 636 if (flags & (1 | 4)) 637 { 638 SAIS_MYFREE(C, k, sais_index_type); 639 } 640 if (flags & 2) 641 { 642 SAIS_MYFREE(B, k, sais_index_type); 643 } 644 return -2; 645 } 646 } 647 else 648 { 649 D = B - k * 2; 650 } 651 assert((j + 1) < n); 652 ++B[chr(j + 1)]; 653 for (i = 0, j = 0; i < k; ++i) 654 { 655 j += C[i]; 656 if (B[i] != j) 657 { 658 assert(SA[B[i]] != 0); 659 SA[B[i]] += n; 660 } 661 D[i] = D[i + k] = 0; 662 } 663 LMSsort2(T, SA, C, B, D, n, k, cs); 664 name = LMSpostproc2(SA, n, m); 665 if (flags & 16) 666 { 667 SAIS_MYFREE(D, k * 2, sais_index_type); 668 } 669 } 670 else 671 { 672 LMSsort1(T, SA, C, B, n, k, cs); 673 name = LMSpostproc1(T, SA, n, m, cs); 674 } 675 } 676 else if (m == 1) 677 { 678 *b = j + 1; 679 name = 1; 680 } 681 else 682 { 683 name = 0; 684 } 685 686 /* stage 2: solve the reduced problem 687 recurse if names are not yet unique */ 688 if (name < m) 689 { 690 if (flags & 4) 691 { 692 SAIS_MYFREE(C, k, sais_index_type); 693 } 694 if (flags & 2) 695 { 696 SAIS_MYFREE(B, k, sais_index_type); 697 } 698 newfs = (n + fs) - (m * 2); 699 if ((flags & (1 | 4 | 8)) == 0) 700 { 701 if ((k + name) <= newfs) 702 { 703 newfs -= k; 704 } 705 else 706 { 707 flags |= 8; 708 } 709 } 710 assert((n >> 1) <= (newfs + m)); 711 RA = SA + m + newfs; 712 for (i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) 713 { 714 if (SA[i] != 0) 715 { 716 RA[j--] = SA[i] - 1; 717 } 718 } 719 if (sais_main(RA, SA, newfs, m, name, sizeof(sais_index_type), 0) != 0) 720 { 721 if (flags & 1) 722 { 723 SAIS_MYFREE(C, k, sais_index_type); 724 } 725 return -2; 726 } 727 728 i = n - 1; 729 j = m - 1; 730 c0 = chr(n - 1); 731 do 732 { 733 c1 = c0; 734 } while ((0 <= --i) && ((c0 = chr(i)) >= c1)); 735 for (; 0 <= i;) 736 { 737 do 738 { 739 c1 = c0; 740 } while ((0 <= --i) && ((c0 = chr(i)) <= c1)); 741 if (0 <= i) 742 { 743 RA[j--] = i + 1; 744 do 745 { 746 c1 = c0; 747 } while ((0 <= --i) && ((c0 = chr(i)) >= c1)); 748 } 749 } 750 for (i = 0; i < m; ++i) 751 { 752 SA[i] = RA[SA[i]]; 753 } 754 if (flags & 4) 755 { 756 if ((C = B = SAIS_MYMALLOC(k, int)) == NULL) 757 { 758 return -2; 759 } 760 } 761 if (flags & 2) 762 { 763 if ((B = SAIS_MYMALLOC(k, int)) == NULL) 764 { 765 if (flags & 1) 766 { 767 SAIS_MYFREE(C, k, sais_index_type); 768 } 769 return -2; 770 } 771 } 772 } 773 774 /* stage 3: induce the result for the original problem */ 775 if (flags & 8) 776 { 777 getCounts(T, C, n, k, cs); 778 } 779 /* put all left-most S characters into their buckets */ 780 if (1 < m) 781 { 782 getBuckets(C, B, k, 1); /* find ends of buckets */ 783 i = m - 1, j = n, p = SA[m - 1], c1 = chr(p); 784 do 785 { 786 q = B[c0 = c1]; 787 while (q < j) 788 { 789 SA[--j] = 0; 790 } 791 do 792 { 793 SA[--j] = p; 794 if (--i < 0) 795 { 796 break; 797 } 798 p = SA[i]; 799 } while ((c1 = chr(p)) == c0); 800 } while (0 <= i); 801 while (0 < j) 802 { 803 SA[--j] = 0; 804 } 805 } 806 if (isbwt == 0) 807 { 808 induceSA(T, SA, C, B, n, k, cs); 809 } 810 else 811 { 812 pidx = computeBWT(T, SA, C, B, n, k, cs); 813 } 814 if (flags & (1 | 4)) 815 { 816 SAIS_MYFREE(C, k, sais_index_type); 817 } 818 if (flags & 2) 819 { 820 SAIS_MYFREE(B, k, sais_index_type); 821 } 822 823 return pidx; 824 } 825 826 /*---------------------------------------------------------------------------*/ 827 828 int sais(const unsigned char *T, int *SA, int n) 829 { 830 831 if ((T == NULL) || (SA == NULL) || (n < 0)) 832 { 833 return -1; 834 } 835 if (n <= 1) 836 { 837 if (n == 1) 838 { 839 SA[0] = 0; 840 } 841 return 0; 842 } 843 int result = sais_main(T, SA, 0, n, UCHAR_SIZE, sizeof(unsigned char), 0); 844 845 // for (int i = 0; i < n; i++) 846 // printf("%d ", SA[i]); 847 848 // printf("\n"); 849 850 return result; 851 } 852 853 int sais_int(const int *T, int *SA, int n, int k) 854 { 855 if ((T == NULL) || (SA == NULL) || (n < 0) || (k <= 0)) 856 { 857 return -1; 858 } 859 if (n <= 1) 860 { 861 if (n == 1) 862 { 863 SA[0] = 0; 864 } 865 return 0; 866 } 867 return sais_main(T, SA, 0, n, k, sizeof(int), 0); 868 } 869 870 int sais_bwt(const unsigned char *T, unsigned char *U, int *A, int n) 871 { 872 int i, pidx; 873 if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0)) 874 { 875 return -1; 876 } 877 if (n <= 1) 878 { 879 if (n == 1) 880 { 881 U[0] = T[0]; 882 } 883 return n; 884 } 885 pidx = sais_main(T, A, 0, n, UCHAR_SIZE, sizeof(unsigned char), 1); 886 if (pidx < 0) 887 { 888 return pidx; 889 } 890 U[0] = T[n - 1]; 891 for (i = 0; i < pidx; ++i) 892 { 893 U[i + 1] = (unsigned char)A[i]; 894 } 895 for (i += 1; i < n; ++i) 896 { 897 U[i] = (unsigned char)A[i]; 898 } 899 pidx += 1; 900 return pidx; 901 } 902 903 int sais_int_bwt(const int *T, int *U, int *A, int n, int k) 904 { 905 int i, pidx; 906 if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (k <= 0)) 907 { 908 return -1; 909 } 910 if (n <= 1) 911 { 912 if (n == 1) 913 { 914 U[0] = T[0]; 915 } 916 return n; 917 } 918 pidx = sais_main(T, A, 0, n, k, sizeof(int), 1); 919 if (pidx < 0) 920 { 921 return pidx; 922 } 923 U[0] = T[n - 1]; 924 for (i = 0; i < pidx; ++i) 925 { 926 U[i + 1] = A[i]; 927 } 928 for (i += 1; i < n; ++i) 929 { 930 U[i] = A[i]; 931 } 932 pidx += 1; 933 return pidx; 934 }