github.com/ledgerwatch/erigon-lib@v1.0.0/sais/gsa/gsacak.c (about)

     1  // vim: noai:ts=2:sw=2
     2  
     3  #include "gsacak.h"
     4  
     5  // set only the highest bit as 1, i.e. 1000...
     6  //const unsigned int EMPTY_k=((unsigned int)1)<<(sizeof(unsigned int)*8-1); 
     7  const uint_t EMPTY_k=((uint_t)1)<<(sizeof(uint_t)*8-1); 
     8  
     9  // get s[i] at a certain level
    10  #define chr(i) (cs==sizeof(int_t)?((int_t*)s)[i]:(cs==sizeof(int_text)?((int_text*)s)[i]:((unsigned char *)s)[i]))
    11  
    12  #define true 1
    13  #define false 0
    14  
    15  #define DEPTH 0  // compute time and size of reduced problem for each recursion call
    16  #define PHASES 0 // compute time for each phase
    17  #define RMQ_L   2  //variants = (1, trivial) (2, using Gog's stack)
    18  #define RMQ_S   2  //variants = (1, trivial) (2, using Gog's stack)
    19   
    20  #define STACK_SIZE_L 894 //to use 10Kb of working space
    21  #define STACK_SIZE_S 894 //to use 10Kb of working space
    22  
    23  #define EMPTY_STRING 0 //check if there is an empty string in the input collection
    24  
    25  typedef struct _pair{
    26    uint_t idx;
    27    int_t lcp;
    28  } t_pair_k;
    29  
    30  int compare_k (const void * a, const void * b){
    31    if(*(const uint_t *)a < *(const uint_t *)b) return -1;
    32    if(*(const uint_t *)a > *(const uint_t *)b) return 1;
    33  return 0;
    34  }
    35  
    36  void stack_push_k(t_pair_k* STACK, int_t *top, uint_t idx, int_t lcp){
    37  
    38    STACK[*top].idx=idx;
    39    STACK[*top].lcp=lcp;
    40  
    41    (*top)++;
    42  }
    43  
    44  void compute_lcp_phi_sparse(int_t *s, uint_t *SA1, 
    45    uint_t *RA, int_t *LCP, int_t *PLCP,
    46    uint_t n1, int cs, uint_t separator) {
    47  
    48    uint_t i;
    49  
    50    PLCP[SA1[0]]=0;//PLCP* (lms) is stored in PLCP array
    51    for(i=1; i<n1; i++)
    52      PLCP[SA1[i]] = LCP[i]; 
    53  
    54    LCP[SA1[0]]=0;//PHI is stored in LCP array
    55    for(i=1; i<n1; i++)
    56      LCP[SA1[i]] = SA1[i-1]; //RA[SA1[i-1]];
    57  
    58    int_t l=0; //q=0;
    59    for(i=0; i<n1-1;i++){
    60      if(chr(RA[i])==separator) continue;
    61  
    62      l = max(PLCP[i], l);//consider the LCP-value of the lms-substrings
    63  
    64      while(chr(RA[i]+l)==chr(RA[LCP[i]]+l) && !(chr(RA[i]+l) == separator && chr(RA[LCP[i]]+l)==separator) ) ++l;
    65      PLCP[i]=l;
    66  
    67      if(LCP[i]==n1-1) l -= RA[i+1]-RA[i];
    68      else l -= max(RA[i+1]-RA[i], RA[LCP[i]+1]-RA[LCP[i]]);//LCP[i] stores the distance of i-th suffix to its successor
    69    }
    70  
    71    LCP[0]=0;
    72    for(i=1; i<n1;i++) LCP[i]=PLCP[SA1[i]];
    73  
    74  }
    75  
    76  /*****************************************************************************/
    77  
    78  void getBuckets_k(int_t *s, 
    79    uint_t *bkt, uint_t n,
    80    unsigned int K, int end, int cs) { 
    81    uint_t i, sum=0;
    82    
    83    // clear all buckets .
    84    for(i=0; i<K; i++) bkt[i]=0; 
    85    
    86    // compute the size of each bucket .
    87    for(i=0; i<n; i++) bkt[chr(i)]++; 
    88    
    89    for(i=0; i<K; i++) { 
    90      sum+=bkt[i]; 
    91      bkt[i]=end ? sum-1 : sum-bkt[i]; 
    92    }
    93  }
    94  
    95  /*****************************************************************************/
    96  
    97  void putSuffix0(uint_t *SA, 
    98    int_t *s, uint_t *bkt, 
    99    uint_t n, unsigned int K, int_t n1, int cs) {
   100    uint_t i, j;
   101  
   102    // find the end of each bucket.
   103    getBuckets_k(s, bkt, n, K, true, cs);
   104  
   105    // put the suffixes into their buckets.
   106    for(i=n1-1; i>0; i--) {
   107      j=SA[i]; SA[i]=0;
   108      SA[bkt[chr(j)]--]=j;
   109    }
   110    SA[0]=n-1; // set the single sentinel suffix.
   111  }
   112  
   113  void putSuffix0_generalized(uint_t *SA, 
   114    uint_t *s, uint_t *bkt, 
   115    uint_t n, unsigned int K, int_t n1, int cs, uint_t separator) {
   116    uint_t i, j;
   117  
   118    // find the end of each bucket.
   119    getBuckets_k((int_t*)s, bkt, n, K, true, cs);
   120  
   121    int_t tmp=bkt[separator]--;// shifts one position left of bkt[separator]
   122  
   123    // put the suffixes into their buckets.
   124    for(i=n1-1; i>0; i--) {
   125      j=SA[i]; SA[i]=0;
   126      SA[bkt[chr(j)]--]=j;
   127    }
   128  
   129   // SA[0]=n-1; // set the single sentinel suffix.
   130  
   131    SA[tmp]=SA[0]-1;// insert the last separator at the end of bkt[separator]
   132  
   133  }
   134  
   135  void putSuffix0_generalized_LCP(uint_t *SA, int_t *LCP,
   136    uint_t *s, uint_t *bkt, 
   137    uint_t n, unsigned int K, int_t n1, int cs, uint_t separator) {
   138    uint_t i, j;
   139    int_t l;
   140  
   141    // find the end of each bucket.
   142    getBuckets_k((int_t*)s, bkt, n, K, true, cs);
   143  
   144    int_t tmp=bkt[separator]--;// shifts one position left of bkt[separator]
   145  
   146    // put the suffixes into their buckets.
   147    for(i=n1-1; i>0; i--) {
   148      j=SA[i]; SA[i]=U_MAX;
   149      l=LCP[i]; LCP[i]=0;
   150  
   151      SA[bkt[chr(j)]]=j;
   152      LCP[bkt[chr(j)]--]=l;
   153    }
   154  
   155   // SA[0]=n-1; // set the single sentinel suffix.
   156  
   157    SA[tmp]=SA[0]-1;// insert the last separator at the end of bkt[separator]
   158  
   159  }
   160  
   161  void putSuffix0_generalized_DA(uint_t *SA, int_da *DA,
   162    uint_t *s, uint_t *bkt, 
   163    uint_t n, unsigned int K, int_t n1, int cs, uint_t separator) {
   164    uint_t i, j;
   165  
   166    // find the end of each bucket.
   167    getBuckets_k((int_t*)s, bkt, n, K, true, cs);
   168  
   169    int_t tmp=bkt[separator]--;// shifts one position left of bkt[separator]
   170    
   171    // put the suffixes into their buckets.
   172    for(i=n1-1; i>0; i--) {
   173      j=SA[i]; SA[i]=0;
   174      SA[bkt[chr(j)]]=j;
   175      DA[bkt[chr(j)]--]=DA[i];
   176    }
   177  
   178   // SA[0]=n-1; // set the single sentinel suffix.
   179  
   180    SA[tmp]=SA[0]-1;// insert the last separator at the end of bkt[separator]
   181    DA[tmp]= (int_da) tmp-1;
   182  
   183  }
   184  
   185  
   186  void putSuffix0_generalized_LCP_DA(uint_t *SA, int_t *LCP,  int_da *DA, 
   187    uint_t *s, uint_t *bkt, 
   188    uint_t n, unsigned int K, int_t n1, int cs, uint_t separator) {
   189    uint_t i, j;
   190    int_t l;
   191  
   192    // find the end of each bucket.
   193    getBuckets_k((int_t*)s, bkt, n, K, true, cs);
   194  
   195    int_t tmp=bkt[separator]--;// shifts one position left of bkt[separator]
   196  
   197    // put the suffixes into their buckets.
   198    for(i=n1-1; i>0; i--) {
   199      j=SA[i]; SA[i]=U_MAX;
   200      l=LCP[i]; LCP[i]=0;
   201  
   202      SA[bkt[chr(j)]]=j;
   203      DA[bkt[chr(j)]]=DA[i];
   204      LCP[bkt[chr(j)]--]=l;
   205    }
   206  
   207   // SA[0]=n-1; // set the single sentinel suffix.
   208  
   209    SA[tmp]=SA[0]-1;// insert the last separator at the end of bkt[separator]
   210    DA[tmp]= (int_da) tmp-1;
   211  }
   212  
   213  /*****************************************************************************/
   214  
   215  void induceSAl0(uint_t *SA,
   216    int_t *s, uint_t *bkt,
   217    uint_t n, unsigned int K, int_t suffix, int cs) {
   218    uint_t i, j;
   219  
   220    // find the head of each bucket.
   221    getBuckets_k(s, bkt, n, K, false, cs);
   222  
   223    bkt[0]++; // skip the virtual sentinel.
   224    for(i=0; i<n; i++)
   225      if(SA[i]>0) {
   226        j=SA[i]-1;
   227        if(chr(j)>=chr(j+1)) {
   228          SA[bkt[chr(j)]]=j;
   229          bkt[chr(j)]++;
   230          if(!suffix && i>0) SA[i]=0;
   231        }
   232      }
   233  }
   234  
   235  void induceSAs0(uint_t *SA,
   236    int_t	*s, uint_t *bkt,
   237    uint_t n, unsigned int K, int_t suffix, int cs) {
   238    uint_t i, j;
   239  
   240    // find the end of each bucket.
   241    getBuckets_k(s, bkt, n, K, true, cs);
   242  
   243    for(i=n-1; i>0; i--)
   244      if(SA[i]>0) {
   245        j=SA[i]-1;
   246        if(chr(j)<=chr(j+1) && bkt[chr(j)]<i) {
   247          SA[bkt[chr(j)]]=j;
   248          bkt[chr(j)]--;
   249          if(!suffix) SA[i]=0;
   250        }
   251      }
   252  }
   253  /*****************************************************************************/
   254  
   255  void induceSAl0_generalized(uint_t *SA,
   256    uint_t *s, uint_t *bkt,
   257    uint_t n, unsigned int K, int_t suffix, int cs, uint_t separator) {
   258    uint_t i, j;
   259  
   260    // find the head of each bucket.
   261    getBuckets_k((int_t*)s, bkt, n, K, false, cs);
   262  
   263    bkt[0]++; // skip the virtual sentinel.
   264    for(i=0; i<n; i++)
   265      if(SA[i]>0) {
   266        j=SA[i]-1;
   267        if(chr(j)>=chr(j+1) ) {
   268  	if(chr(j)!=separator)//gsa-is
   269            SA[bkt[chr(j)]++]=j;
   270          if(!suffix && i>0) SA[i]=0;
   271        }
   272      }
   273  }
   274  
   275  void induceSAs0_generalized(uint_t *SA,
   276    uint_t *s, uint_t *bkt,
   277    uint_t n, uint_t K, int_t suffix, int cs, uint_t separator) {
   278    uint_t i, j;
   279  
   280    // find the end of each bucket.
   281    getBuckets_k((int_t*)s, bkt, n, K, true, cs);
   282  
   283    for(i=n-1; i>0; i--)
   284      if(SA[i]>0) {
   285        j=SA[i]-1;
   286        if(chr(j)<=chr(j+1) && bkt[chr(j)]<i) {
   287          if(chr(j)!=separator)
   288  	  SA[bkt[chr(j)]--]=j;
   289          if(!suffix) SA[i]=0;
   290        }
   291      }
   292  }
   293  
   294  /*****************************************************************************/
   295  
   296  void induceSAl0_generalized_LCP(uint_t *SA, int_t *LCP,
   297    uint_t *s, uint_t *bkt,
   298    uint_t n, unsigned int K, int cs, uint_t separator) {
   299    uint_t i, j;
   300  
   301    for(i=0;i<K;i++)
   302      if(bkt[i]+1<n) if(SA[bkt[i]+1]!=U_MAX) LCP[bkt[i]+1]=I_MIN;
   303  
   304    // find the head of each bucket.
   305    getBuckets_k((int_t*)s, bkt, n, K, false, cs);
   306  
   307    for(i=0;i<K;i++)
   308      if(bkt[i]<n) LCP[bkt[i]]=-2;
   309  
   310    #if RMQ_L == 1 
   311    int_t *M=(int_t *)malloc(sizeof(int_t)*K);
   312    for(i=0;i<K;i++){
   313      M[i]=I_MAX;
   314    }
   315    #elif RMQ_L == 2
   316    uint_t* last_occ = (uint_t*) malloc(K*sizeof(uint_t));
   317    uint_t* tmp = (uint_t*) malloc(K*sizeof(uint_t));
   318  
   319    t_pair_k* STACK = (t_pair_k*) malloc((STACK_SIZE_L+2)*sizeof(t_pair_k));
   320    int_t top = 0;
   321    stack_push_k(STACK, &top, 0, -1);//init
   322    for(i=0;i<K;i++) last_occ[i]=0;
   323    #endif 
   324  
   325    #if DEBUG
   326    printf("inducing..\n");
   327    for(i=0; i<n; i++)
   328          printf("%" PRIdN "\t", SA[i]+1);
   329    printf("\n");
   330    printf("LCP\n");
   331    for(i=0; i<n; i++)
   332          printf("%" PRIdN "\t", LCP[i]);
   333    printf("\n\n");
   334    #endif
   335  
   336    bkt[0]++; // skip the virtual sentinel.
   337    for(i=0; i<n; i++){
   338      if(SA[i]!=U_MAX){
   339  
   340        if(LCP[i]==I_MIN){ //is a L/S-seam position
   341    	  int_t l=0;
   342  	  if(SA[bkt[chr(SA[i])]-1]<n-1)	
   343     	    while(chr(SA[i]+l)==chr(SA[bkt[chr(SA[i])]-1]+l))++l;
   344    	  LCP[i]=l;
   345        }
   346        #if RMQ_L == 1
   347          uint_t k;
   348          for(k=0; k<K; k++) if(M[k]>LCP[i]) M[k] = max(0,LCP[i]);
   349        #elif RMQ_L == 2
   350          int_t min_lcp=0;
   351          uint_t last;
   352  
   353          if(!SA[i]) last = 0;
   354          else{
   355            last = last_occ[chr(SA[i]-1)];
   356            last_occ[chr(SA[i]-1)] = i+1;
   357          }
   358   
   359          int_t lcp=max(0,LCP[i]);
   360          while(STACK[(top)-1].lcp>=lcp) (top)--;	
   361  
   362          stack_push_k(STACK, &top, i+1, lcp);
   363          j = top-1;
   364  
   365          while(STACK[j].idx>last) j--;
   366          min_lcp=STACK[(j+1)].lcp;
   367  
   368        #endif
   369  
   370        if(SA[i]>0) {
   371          j=SA[i]-1;
   372          if(chr(j)>=chr(j+1))
   373    	if(chr(j)!=separator){//gsa-is
   374              SA[bkt[chr(j)]]=j;
   375  
   376              #if RMQ_L == 1
   377    	      LCP[bkt[chr(j)]]+=M[chr(j)]+1;
   378      	      M[chr(j)] = I_MAX;
   379              #elif RMQ_L == 2
   380    	      LCP[bkt[chr(j)]]+=min_lcp+1; 
   381              #endif
   382    
   383              bkt[chr(j)]++;
   384  	}
   385        	if(bkt[chr(SA[i])]-1<i){ //if is LMS-type
   386  	  if(chr(SA[i])!=separator)
   387  	  SA[i]=U_MAX;
   388          }
   389  
   390        }
   391        #if RMQ_L == 2
   392        if(top>STACK_SIZE_L){//if stack is full
   393  
   394          int_t j;
   395          memcpy(tmp, last_occ, K*sizeof(uint_t));
   396          qsort(tmp, K, sizeof(uint_t), compare_k);
   397         
   398          int_t curr=1, end=1;
   399          STACK[top].idx=U_MAX;
   400          for(j=0;j<K; j++){
   401  
   402            if(STACK[end-1].idx < tmp[j]+1){
   403  
   404              while(STACK[curr].idx<tmp[j]+1) curr++;
   405  
   406              if(curr<top){
   407  	      stack_push_k(STACK, &end, STACK[curr].idx, STACK[curr].lcp);
   408  	      curr++;
   409              }
   410  	  }
   411          }
   412   
   413          if(end>=STACK_SIZE_L){
   414            fprintf(stderr,"ERROR: induceSAl0_LCP\n");
   415            exit(1);
   416          }
   417  
   418          top = end;
   419        }
   420        #endif
   421      }
   422    }
   423    #if RMQ_L == 1
   424    free(M);
   425    #elif RMQ_L == 2
   426    free(STACK);
   427    free(last_occ);
   428    free(tmp);
   429    #endif
   430  
   431  }
   432  
   433  void induceSAs0_generalized_LCP(uint_t *SA, int_t* LCP,
   434    uint_t *s, uint_t *bkt,
   435    uint_t n, uint_t K, int cs, uint_t separator) {
   436    uint_t i, j;
   437  
   438    // find the end of each bucket.
   439    getBuckets_k((int_t*)s, bkt, n, K, true, cs);
   440  
   441    #if RMQ_S == 1
   442    int_t *M=(int_t *)malloc(sizeof(int_t)*K);
   443    for(i=0;i<K;i++) M[i]=I_MAX;
   444    #elif RMQ_S == 2 
   445    uint_t* last_occ = (uint_t*) malloc(K*sizeof(uint_t));
   446    uint_t* tmp = (uint_t*) malloc(K*sizeof(uint_t));
   447  
   448    t_pair_k* STACK = (t_pair_k*) malloc((STACK_SIZE_S+2)*sizeof(t_pair_k));
   449    int_t top = 0;
   450    stack_push_k(STACK, &top, n, -1); //init
   451    for(i=0;i<K;i++) last_occ[i]=n-1;
   452    #endif
   453  
   454    for(i=n-1; i>0; i--){
   455      if(SA[i]>0) {
   456        j=SA[i]-1;
   457        if(chr(j)<=chr(j+1) && bkt[chr(j)]<i)// induce S-type
   458          if(chr(j)!=separator){
   459  	  SA[bkt[chr(j)]]=j;
   460  
   461            #if RMQ_S == 1
   462    	    if(LCP[bkt[chr(j)]+1]>=0) 
   463    	      LCP[bkt[chr(j)]+1]=M[chr(j)]+1;
   464    	  
   465  	    if(LCP[bkt[chr(j)]]>0) 
   466    	      LCP[bkt[chr(j)]]=I_MAX;
   467  
   468            #elif RMQ_S == 2
   469              int_t min = I_MAX, end = top-1; 
   470    
   471    	    int_t last=last_occ[chr(j)];
   472              while(STACK[end].idx<=last) end--;
   473    
   474              min=STACK[(end+1)].lcp;
   475              last_occ[chr(j)] = i;
   476    
   477    	    if(LCP[bkt[chr(j)]+1]>=0) 
   478                LCP[bkt[chr(j)]+1]=min+1;
   479            #endif
   480    
   481  
   482            #if RMQ_S == 1
   483    	  M[chr(j)] = I_MAX;
   484            #endif
   485    
   486            bkt[chr(j)]--;
   487   
   488    	  if(SA[bkt[chr(j)]]!=U_MAX) {//L/S-seam
   489              int_t l=0;	
   490              while(chr(SA[bkt[chr(j)]+1]+l)==chr(SA[bkt[chr(j)]]+l))++l;
   491              LCP[bkt[chr(j)]+1]=l;
   492    	  }
   493    	}
   494       }
   495  
   496      if(LCP[i]<0) LCP[i]=0;
   497      #if RMQ_S == 1
   498        int_t k;
   499        for(k=0; k<K; k++) if(M[k]>LCP[i]) M[k] = LCP[i];
   500      #elif RMQ_S == 2
   501  
   502        int_t lcp=max(0,LCP[i]);
   503  
   504        while(STACK[(top)-1].lcp>=lcp) (top)--;
   505        stack_push_k(STACK, &top, i, lcp);
   506  
   507        if(top>=STACK_SIZE_S){
   508  
   509            int_t j;
   510            memcpy(tmp, last_occ, K*sizeof(uint_t));
   511            qsort(tmp, K, sizeof(uint_t), compare_k);
   512  
   513            int_t curr=1, end=1;
   514  
   515  	   for(j=K-1;j>=0; j--){
   516  
   517              if(tmp[j] < STACK[end-1].idx){
   518  
   519    	      while(STACK[curr].idx>tmp[j] && curr < top) curr++;
   520  	      if(curr>=top) break;
   521  	      stack_push_k(STACK, &end, STACK[curr].idx, STACK[curr].lcp);
   522  	      curr++;
   523              }
   524            } 
   525  
   526            if(end>=STACK_SIZE_S){
   527              fprintf(stderr,"ERROR: induceSAl0_LCP\n");
   528              exit(1);
   529            }
   530            top = end;
   531        }
   532        #endif
   533  
   534    }
   535  
   536    LCP[0]=0;
   537  
   538    //variant 1
   539    #if RMQ_S == 1
   540    free(M);
   541    #elif RMQ_S == 2
   542    free(STACK);
   543    free(last_occ);
   544    free(tmp);
   545    #endif
   546  }
   547  
   548  
   549  /*****************************************************************************/
   550  
   551  void induceSAl0_generalized_DA(uint_t *SA, int_da* DA,
   552    uint_t *s, uint_t *bkt,
   553    uint_t n, unsigned int K, int cs, uint_t separator) {
   554    uint_t i, j;
   555  
   556    // find the head of each bucket.
   557    getBuckets_k((int_t*)s, bkt, n, K, false, cs);
   558  
   559    bkt[0]++; // skip the virtual sentinel.
   560    for(i=0; i<n; i++)
   561      if(SA[i]>0) {
   562        j=SA[i]-1;
   563        if(chr(j)>=chr(j+1) ) {
   564  	    if(chr(j)!=separator){//gsa-is
   565            SA[bkt[chr(j)]]=j;
   566            DA[bkt[chr(j)]++]=DA[i];
   567  	    }
   568        }
   569      }
   570  }
   571  
   572  void induceSAs0_generalized_DA(uint_t *SA, int_da* DA,
   573    uint_t *s, uint_t *bkt,
   574    uint_t n, uint_t K, int cs, uint_t separator) {
   575    uint_t i, j;
   576  
   577    // find the end of each bucket.
   578    getBuckets_k((int_t*)s, bkt, n, K, true, cs);
   579  
   580    for(i=n-1; i>0; i--)
   581      if(SA[i]>0) {
   582        j=SA[i]-1;
   583        if(chr(j)<=chr(j+1) && bkt[chr(j)]<i) {
   584          if(chr(j)!=separator){
   585  		  SA[bkt[chr(j)]]=j;
   586  		  DA[bkt[chr(j)]--]=DA[i];
   587  	    }
   588        }
   589      }
   590  }
   591  
   592  /*****************************************************************************/
   593  
   594  void induceSAl0_generalized_LCP_DA(uint_t *SA, int_t *LCP, int_da *DA,
   595    uint_t *s, uint_t *bkt,
   596    uint_t n, unsigned int K, int cs, uint_t separator) {
   597    uint_t i, j;
   598  
   599    for(i=0;i<K;i++)
   600      if(bkt[i]+1<n) if(SA[bkt[i]+1]!=U_MAX) LCP[bkt[i]+1]=I_MIN;
   601  
   602    // find the head of each bucket.
   603    getBuckets_k((int_t*)s, bkt, n, K, false, cs);
   604  
   605    for(i=0;i<K;i++)
   606      if(bkt[i]<n) LCP[bkt[i]]=-2;
   607  
   608    #if RMQ_L == 1 
   609    int_t *M=(int_t *)malloc(sizeof(int_t)*K);
   610    for(i=0;i<K;i++){
   611      M[i]=I_MAX;
   612    }
   613    #elif RMQ_L == 2
   614    uint_t* last_occ = (uint_t*) malloc(K*sizeof(uint_t));
   615    uint_t* tmp = (uint_t*) malloc(K*sizeof(uint_t));
   616  
   617    t_pair_k* STACK = (t_pair_k*) malloc((STACK_SIZE_L+2)*sizeof(t_pair_k));
   618    int_t top = 0;
   619    stack_push_k(STACK, &top, 0, -1);//init
   620    for(i=0;i<K;i++) last_occ[i]=0;
   621    #endif 
   622  
   623    #if DEBUG
   624    printf("inducing..\n");
   625    for(i=0; i<n; i++)
   626          printf("%" PRIdN "\t", SA[i]+1);
   627    printf("\n");
   628    printf("LCP\n");
   629    for(i=0; i<n; i++)
   630          printf("%" PRIdN "\t", LCP[i]);
   631    printf("\n\n");
   632    #endif
   633  
   634    bkt[0]++; // skip the virtual sentinel.
   635    for(i=0; i<n; i++){
   636      if(SA[i]!=U_MAX){
   637  
   638        if(LCP[i]==I_MIN){ //is a L/S-seam position
   639    	  int_t l=0;
   640  		  if(SA[bkt[chr(SA[i])]-1]<n-1)	
   641     	    while(chr(SA[i]+l)==chr(SA[bkt[chr(SA[i])]-1]+l))++l;
   642    	  LCP[i]=l;
   643        }
   644        #if RMQ_L == 1
   645          uint_t k;
   646          for(k=0; k<K; k++) if(M[k]>LCP[i]) M[k] = max(0,LCP[i]);
   647        #elif RMQ_L == 2
   648          int_t min_lcp=0;
   649          uint_t last;
   650  
   651          if(!SA[i]) last = 0;
   652          else{
   653            last = last_occ[chr(SA[i]-1)];
   654            last_occ[chr(SA[i]-1)] = i+1;
   655          }
   656   
   657          int_t lcp=max(0,LCP[i]);
   658          while(STACK[(top)-1].lcp>=lcp) (top)--;	
   659  
   660          stack_push_k(STACK, &top, i+1, lcp);
   661          j = top-1;
   662  
   663          while(STACK[j].idx>last) j--;
   664          min_lcp=STACK[(j+1)].lcp;
   665  
   666        #endif
   667  
   668        if(SA[i]>0) {
   669          j=SA[i]-1;
   670          if(chr(j)>=chr(j+1))
   671  					if(chr(j)!=separator){//gsa-is
   672              SA[bkt[chr(j)]]=j;
   673              DA[bkt[chr(j)]]=DA[i];
   674  
   675              #if RMQ_L == 1
   676  							LCP[bkt[chr(j)]]+=M[chr(j)]+1;
   677  	    	      M[chr(j)] = I_MAX;
   678              #elif RMQ_L == 2
   679  				      LCP[bkt[chr(j)]]+=min_lcp+1; 
   680              #endif
   681    
   682              bkt[chr(j)]++;
   683  				}
   684        	if(bkt[chr(SA[i])]-1<i){ //if is LMS-type
   685  					if(chr(SA[i])!=separator)
   686  					SA[i]=U_MAX;
   687          }
   688        }
   689        #if RMQ_L == 2
   690        if(top>STACK_SIZE_L){//if stack is full
   691  
   692          int_t j;
   693          memcpy(tmp, last_occ, K*sizeof(uint_t));
   694          qsort(tmp, K, sizeof(uint_t), compare_k);
   695         
   696          int_t curr=1, end=1;
   697          STACK[top].idx=U_MAX;
   698          for(j=0;j<K; j++){
   699  
   700            if(STACK[end-1].idx < tmp[j]+1){
   701  
   702              while(STACK[curr].idx<tmp[j]+1) curr++;
   703  
   704              if(curr<top){
   705  	      stack_push_k(STACK, &end, STACK[curr].idx, STACK[curr].lcp);
   706  	      curr++;
   707              }
   708  	  }
   709          }
   710   
   711          if(end>=STACK_SIZE_L){
   712            fprintf(stderr,"ERROR: induceSAl0_LCP\n");
   713            exit(1);
   714          }
   715  
   716          top = end;
   717        }
   718        #endif
   719      }
   720    }
   721    #if RMQ_L == 1
   722    free(M);
   723    #elif RMQ_L == 2
   724    free(STACK);
   725    free(last_occ);
   726    free(tmp);
   727    #endif
   728  
   729  }
   730  
   731  void induceSAs0_generalized_LCP_DA(uint_t *SA, int_t* LCP, int_da* DA,
   732    uint_t *s, uint_t *bkt,
   733    uint_t n, uint_t K, int cs, uint_t separator) {
   734    uint_t i, j;
   735  
   736    // find the end of each bucket.
   737    getBuckets_k((int_t*)s, bkt, n, K, true, cs);
   738  
   739    #if RMQ_S == 1
   740    int_t *M=(int_t *)malloc(sizeof(int_t)*K);
   741    for(i=0;i<K;i++) M[i]=I_MAX;
   742    #elif RMQ_S == 2 
   743    uint_t* last_occ = (uint_t*) malloc(K*sizeof(uint_t));
   744    uint_t* tmp = (uint_t*) malloc(K*sizeof(uint_t));
   745  
   746    t_pair_k* STACK = (t_pair_k*) malloc((STACK_SIZE_S+2)*sizeof(t_pair_k));
   747    int_t top = 0;
   748    stack_push_k(STACK, &top, n, -1); //init
   749    for(i=0;i<K;i++) last_occ[i]=n-1;
   750    #endif
   751  
   752    for(i=n-1; i>0; i--){
   753      if(SA[i]>0) {
   754        j=SA[i]-1;
   755        if(chr(j)<=chr(j+1) && bkt[chr(j)]<i)// induce S-type
   756        if(chr(j)!=separator){
   757  	    SA[bkt[chr(j)]]=j;
   758          DA[bkt[chr(j)]]=DA[i];
   759        
   760          #if RMQ_S == 1
   761    	    if(LCP[bkt[chr(j)]+1]>=0) 
   762    	      LCP[bkt[chr(j)]+1]=M[chr(j)]+1;
   763  
   764  	    if(LCP[bkt[chr(j)]]>0) 
   765    	      LCP[bkt[chr(j)]]=I_MAX;
   766  
   767          #elif RMQ_S == 2
   768            int_t min = I_MAX, end = top-1; 
   769    
   770            int_t last=last_occ[chr(j)];
   771            while(STACK[end].idx<=last) end--;
   772    
   773            min=STACK[(end+1)].lcp;
   774            last_occ[chr(j)] = i;
   775    
   776    	      if(LCP[bkt[chr(j)]+1]>=0) 
   777              LCP[bkt[chr(j)]+1]=min+1;
   778          #endif
   779    
   780  
   781          #if RMQ_S == 1
   782            M[chr(j)] = I_MAX;
   783          #endif
   784    
   785          bkt[chr(j)]--;
   786   
   787          if(SA[bkt[chr(j)]]!=U_MAX) {//L/S-seam
   788                int_t l=0;	
   789                while(chr(SA[bkt[chr(j)]+1]+l)==chr(SA[bkt[chr(j)]]+l))++l;
   790                LCP[bkt[chr(j)]+1]=l;
   791          }
   792        }
   793      }
   794  
   795      if(LCP[i]<0) LCP[i]=0;
   796      #if RMQ_S == 1
   797        int_t k;
   798        for(k=0; k<K; k++) if(M[k]>LCP[i]) M[k] = LCP[i];
   799      #elif RMQ_S == 2
   800  
   801        int_t lcp=max(0,LCP[i]);
   802  
   803        while(STACK[(top)-1].lcp>=lcp) (top)--;
   804        stack_push_k(STACK, &top, i, lcp);
   805  
   806        if(top>=STACK_SIZE_S){
   807  
   808            int_t j;
   809            memcpy(tmp, last_occ, K*sizeof(uint_t));
   810            qsort(tmp, K, sizeof(uint_t), compare_k);
   811  
   812            int_t curr=1, end=1;
   813  
   814  	   for(j=K-1;j>=0; j--){
   815  
   816              if(tmp[j] < STACK[end-1].idx){
   817  
   818    	      while(STACK[curr].idx>tmp[j] && curr < top) curr++;
   819  	      if(curr>=top) break;
   820  	      stack_push_k(STACK, &end, STACK[curr].idx, STACK[curr].lcp);
   821  	      curr++;
   822              }
   823            } 
   824  
   825            if(end>=STACK_SIZE_S){
   826              fprintf(stderr,"ERROR: induceSAl0_LCP\n");
   827              exit(1);
   828            }
   829            top = end;
   830        }
   831        #endif
   832  
   833    }
   834  
   835    LCP[0]=0;
   836  
   837    //variant 1
   838    #if RMQ_S == 1
   839    free(M);
   840    #elif RMQ_S == 2
   841    free(STACK);
   842    free(last_occ);
   843    free(tmp);
   844    #endif
   845  }
   846  
   847  
   848  /*****************************************************************************/
   849  
   850  
   851  void putSubstr0(uint_t *SA,
   852    int_t *s, uint_t *bkt,
   853    uint_t n, unsigned int K, int cs) {
   854    uint_t i, cur_t, succ_t;
   855  
   856    // find the end of each bucket.
   857    getBuckets_k(s, bkt, n, K, true, cs);
   858  
   859    // set each item in SA as empty.
   860    for(i=0; i<n; i++) SA[i]=0;
   861  
   862    succ_t=0; // s[n-2] must be L-type.
   863    for(i=n-2; i>0; i--) {
   864      cur_t=(chr(i-1)<chr(i) ||
   865             (chr(i-1)==chr(i) && succ_t==1)
   866            )?1:0;
   867      if(cur_t==0 && succ_t==1) SA[bkt[chr(i)]--]=i;
   868      succ_t=cur_t;
   869    }
   870  
   871    // set the single sentinel LMS-substring.
   872    SA[0]=n-1;
   873  }
   874  
   875  /*****************************************************************************/
   876  void putSubstr0_generalized(uint_t *SA,
   877    uint_t *s, uint_t *bkt,
   878    uint_t n, unsigned int K, int cs, uint_t separator) {
   879    uint_t i, cur_t, succ_t;
   880  
   881    // find the end of each bucket.
   882    getBuckets_k((int_t*)s, bkt, n, K, true, cs);
   883  
   884    // set each item in SA as empty.
   885    for(i=0; i<n; i++) SA[i]=0;
   886  
   887    // gsa-is
   888    int_t tmp=bkt[separator]--;// shifts one position left of bkt[separator]
   889    
   890    SA[0]=n-1;// set the single sentinel LMS-substring.
   891  
   892    SA[tmp] = SA[0]-1; // insert the last separator at the end of bkt[separator]
   893  
   894    int_t p=n-2;
   895  
   896    succ_t=0; // s[n-2] must be L-type.
   897    for(i=n-2; i>0; i--) {
   898      cur_t=(chr(i-1)<chr(i) ||
   899             (chr(i-1)==chr(i) && succ_t==1)
   900            )?1:0;
   901      if(cur_t==0 && succ_t==1){
   902  
   903        if(chr(i)==separator)
   904          SA[++bkt[chr(p)]]=0; // removes LMS-positions that induces separator suffixes
   905  
   906        SA[bkt[chr(i)]--]=i;
   907        p=i;
   908      }
   909      succ_t=cur_t;
   910    }
   911  }
   912  /*****************************************************************************/
   913  
   914  void putSuffix1(int_t *SA, int_t *s, int_t n1, int cs) {
   915    int_t i, j, pos=n1, cur, pre=-1;
   916    
   917    for(i=n1-1; i>0; i--) {
   918      j=SA[i]; SA[i]=EMPTY_k;
   919      cur=chr(j);
   920      if(cur!=pre) {
   921        pre=cur; pos=cur;
   922      }
   923      SA[pos--]=j;
   924    }
   925  }
   926  
   927  void induceSAl1(int_t *SA, int_t *s, 
   928    int_t n, int_t suffix, int cs) {
   929    int_t h, i, j, step=1;
   930    
   931    for(i=0; i<n; i+=step) {
   932      step=1; j=SA[i]-1;
   933      if(SA[i]<=0) continue;
   934      int_t c=chr(j), c1=chr(j+1);
   935      int_t isL=c>=c1;
   936      if(!isL) continue;
   937  
   938      // s[j] is L-type.
   939  
   940      int_t d=SA[c];
   941      if(d>=0) {
   942        // SA[c] is borrowed by the left
   943        //   neighbor bucket.
   944        // shift-left the items in the
   945        //   left neighbor bucket.
   946        int_t foo, bar;
   947        foo=SA[c];
   948        for(h=c-1; SA[h]>=0||SA[h]==EMPTY_k; h--)
   949        { bar=SA[h]; SA[h]=foo; foo=bar; }
   950        SA[h]=foo;
   951        if(h<i) step=0;
   952  
   953        d=EMPTY_k;
   954      }
   955  
   956      if(d==EMPTY_k) { // SA[c] is empty.
   957        if(c<n-1 && SA[c+1]==EMPTY_k) {
   958          SA[c]=-1; // init the counter.
   959          SA[c+1]=j;
   960        }
   961        else        
   962          SA[c]=j; // a size-1 bucket.
   963      }
   964      else { // SA[c] is reused as a counter.
   965          int_t pos=c-d+1;
   966          if(pos>n-1 || SA[pos]!=EMPTY_k) {
   967            // we are running into the right
   968            //   neighbor bucket.
   969            // shift-left one step the items
   970            //   of bucket(SA, S, j).
   971            for(h=0; h<-d; h++)
   972              SA[c+h]=SA[c+h+1];
   973            pos--;
   974            if(c<i) step=0;
   975          }
   976          else
   977            SA[c]--;
   978  
   979          SA[pos]=j;
   980      }
   981  
   982      int_t c2;
   983      int_t isL1=(j+1<n-1) && (c1>(c2=chr(j+2)) || (c1==c2 && c1<i));  // is s[SA[i]] L-type?
   984      if((!suffix || !isL1) && i>0) {
   985        int_t i1=(step==0)?i-1:i;
   986        SA[i1]=EMPTY_k;
   987      }
   988    }
   989  
   990    // scan to shift-left the items in each bucket 
   991    //   with its head being reused as a counter.
   992    for(i=1; i<n; i++) {
   993      j=SA[i];
   994      if(j<0 && j!=EMPTY_k) { // is SA[i] a counter?
   995        for(h=0; h<-j; h++)
   996          SA[i+h]=SA[i+h+1];
   997        SA[i+h]=EMPTY_k;
   998      }
   999    }
  1000  }
  1001  
  1002  void induceSAs1(int_t *SA, int_t *s, 
  1003    int_t n, int_t suffix, int cs) {
  1004    int_t h, i, j, step=1;
  1005    
  1006    for(i=n-1; i>0; i-=step) {
  1007      step=1; j=SA[i]-1;
  1008      if(SA[i]<=0) continue;
  1009      int_t c=chr(j), c1=chr(j+1);
  1010      int_t isS=(c<c1) || (c==c1 && c>i);
  1011      if(!isS) continue;
  1012  
  1013      // s[j] is S-type
  1014  
  1015      int_t d=SA[c];
  1016      if(d>=0) {
  1017        // SA[c] is borrowed by the right
  1018        //   neighbor bucket.
  1019        // shift-right the items in the
  1020        //   right neighbor bucket.
  1021        int_t foo, bar;
  1022        foo=SA[c];
  1023        for(h=c+1; SA[h]>=0||SA[h]==EMPTY_k; h++)
  1024        { bar=SA[h]; SA[h]=foo; foo=bar; }
  1025        SA[h]=foo;
  1026        if(h>i) step=0;
  1027  
  1028        d=EMPTY_k;
  1029      }
  1030  
  1031      if(d==EMPTY_k) { // SA[c] is empty.
  1032        if(SA[c-1]==EMPTY_k) {
  1033          SA[c]=-1; // init the counter.
  1034          SA[c-1]=j;
  1035        }
  1036        else
  1037          SA[c]=j; // a size-1 bucket.
  1038      }
  1039      else { // SA[c] is reused as a counter.
  1040          int_t pos=c+d-1;
  1041          if(SA[pos]!=EMPTY_k) {
  1042            // we are running into the left
  1043            //   neighbor bucket.
  1044            // shift-right one step the items 
  1045            //   of bucket(SA, S, j).
  1046            for(h=0; h<-d; h++)
  1047              SA[c-h]=SA[c-h-1];
  1048            pos++;
  1049            if(c>i) step=0;
  1050          }
  1051          else
  1052            SA[c]--;
  1053  
  1054          SA[pos]=j;
  1055      }
  1056  
  1057      if(!suffix) {
  1058        int_t i1=(step==0)?i+1:i;
  1059        SA[i1]=EMPTY_k;
  1060      }
  1061    }
  1062  
  1063    // scan to shift-right the items in each bucket
  1064    //   with its head being reused as a counter.
  1065    if(!suffix)
  1066      for(i=n-1; i>0; i--) {
  1067        j=SA[i];
  1068        if(j<0 && j!=EMPTY_k) { // is SA[i] a counter?
  1069          for(h=0; h<-j; h++)
  1070            SA[i-h]=SA[i-h-1];
  1071          SA[i-h]=EMPTY_k;
  1072        }
  1073      }
  1074  }
  1075  
  1076  void putSubstr1(int_t *SA, int_t *s, int_t n, int cs) {
  1077    int_t h, i, j;
  1078  
  1079    for(i=0; i<n; i++) SA[i]=EMPTY_k;
  1080  
  1081    int_t c, c1, t, t1;
  1082    c1=chr(n-2);
  1083    t1=0; 
  1084    for(i=n-2; i>0; i--) {
  1085      c=c1; t=t1; 
  1086      c1=chr(i-1);
  1087      t1=c1<c || (c1==c && t);
  1088      if(t && !t1) {
  1089        if(SA[c]>=0) {
  1090          // SA[c] is borrowed by the right
  1091          //   neighbor bucket.
  1092          // shift-right the items in the
  1093          //   right neighbor bucket.
  1094          int_t foo, bar;
  1095          foo=SA[c];
  1096          for(h=c+1; SA[h]>=0; h++)
  1097          { bar=SA[h]; SA[h]=foo; foo=bar; }
  1098          SA[h]=foo;
  1099  
  1100          SA[c]=EMPTY_k;
  1101        }
  1102  
  1103        int_t d=SA[c];
  1104        if(d==EMPTY_k) { // SA[c] is empty.
  1105          if(SA[c-1]==EMPTY_k) {
  1106            SA[c]=-1; // init the counter.
  1107            SA[c-1]=i;
  1108          }
  1109          else
  1110            SA[c]=i; // a size-1 bucket.
  1111        }
  1112        else { // SA[c] is reused as a counter
  1113            int_t pos=c+d-1;
  1114            if(SA[pos]!=EMPTY_k) {
  1115              // we are running into the left
  1116              //   neighbor bucket.
  1117              // shift-right one step the items 
  1118              //   of bucket(SA, S, i).
  1119              for(h=0; h<-d; h++)
  1120                SA[c-h]=SA[c-h-1];
  1121              pos++;
  1122            }
  1123            else
  1124              SA[c]--;
  1125  
  1126            SA[pos]=i;
  1127        }
  1128      }
  1129    }
  1130  
  1131    // scan to shift-right the items in each bucket
  1132    //   with its head being reused as a counter.
  1133    for(i=n-1; i>0; i--) {
  1134      j=SA[i];
  1135      if(j<0 && j!=EMPTY_k) { // is SA[i] a counter?
  1136        for(h=0; h<-j; h++)
  1137          SA[i-h]=SA[i-h-1];
  1138        SA[i-h]=EMPTY_k;
  1139      }
  1140    }
  1141  
  1142    // put the single sentinel LMS-substring.
  1143    SA[0]=n-1;
  1144  }
  1145  
  1146  uint_t getLengthOfLMS(int_t	*s, 
  1147    uint_t n, int level, uint_t x, int cs) {
  1148    if(x==n-1) return 1;  
  1149    
  1150    uint_t dist=0, i=1;  
  1151    while(1) {
  1152      if(chr(x+i)<chr(x+i-1)) break;
  1153      i++;
  1154    }  
  1155    while(1) {
  1156      if(x+i>n-1 || chr(x+i)>chr(x+i-1)) break;
  1157      if(x+i==n-1 || chr(x+i)<chr(x+i-1)) dist=i;
  1158      i++;
  1159    }
  1160    
  1161    return dist+1;
  1162  }
  1163  
  1164  uint_t nameSubstr(uint_t *SA, 
  1165    int_t *s, uint_t *s1, uint_t n, 
  1166    uint_t m, uint_t n1, int level, int cs) {
  1167    uint_t i, j, cur_t, succ_t;
  1168  
  1169    // init the name array buffer
  1170    for(i=n1; i<n; i++) SA[i]=EMPTY_k;
  1171  
  1172    // scan to compute the interim s1
  1173    uint_t name=0, name_ctr=0;
  1174    uint_t pre_pos=n-1, pre_len=0;
  1175    for(i=0; i<n1; i++) {
  1176      int diff=false;
  1177      uint_t len, pos=SA[i];
  1178  
  1179      uint_t d;
  1180      len=getLengthOfLMS(s, n, level, pos, cs);
  1181      if(len!=pre_len) diff=true;
  1182      else
  1183        for(d=0; d<len; d++)
  1184          if(pos+d==n-1 || pre_pos+d==n-1 ||
  1185             chr(pos+d)!=chr(pre_pos+d)) {
  1186            diff=true; break;
  1187          }
  1188  
  1189      if(diff) {
  1190        name=i; name_ctr++;
  1191        SA[name]=1; // a new name.
  1192        pre_pos=pos; pre_len=len;
  1193      }
  1194      else
  1195        SA[name]++; // count this name.
  1196  
  1197      SA[n1+pos/2]=name;
  1198    }
  1199  
  1200    // compact the interim s1 sparsely stored 
  1201    //   in SA[n1, n-1] into SA[m-n1, m-1].
  1202    for(i=n-1, j=m-1; i>=n1; i--)
  1203      if(SA[i]!=EMPTY_k) SA[j--]=SA[i];
  1204  
  1205    // rename each S-type character of the
  1206    //   interim s1 as the end of its bucket
  1207    //   to produce the final s1.
  1208    succ_t=1;
  1209    for(i=n1-1; i>0; i--) {
  1210      int_t ch=s1[i], ch1=s1[i-1];
  1211      cur_t=(ch1< ch || (ch1==ch && succ_t==1))?1:0;
  1212      if(cur_t==1) {
  1213        s1[i-1]+=SA[s1[i-1]]-1;
  1214      }
  1215      succ_t=cur_t;
  1216    }
  1217  
  1218    return name_ctr;
  1219  }
  1220  
  1221  /*****************************************************************************/
  1222  
  1223  uint_t nameSubstr_generalized(uint_t *SA, 
  1224    uint_t *s, uint_t *s1, uint_t n, 
  1225    uint_t m, uint_t n1, int level, int cs, uint_t separator) {
  1226    uint_t i, j, cur_t, succ_t;
  1227  
  1228    // init the name array buffer
  1229    for(i=n1; i<n; i++) SA[i]=EMPTY_k;
  1230  
  1231    // scan to compute the interim s1
  1232    uint_t name=0, name_ctr=0;
  1233    uint_t pre_pos=n-1, pre_len=0;
  1234    for(i=0; i<n1; i++) {
  1235      int diff=false;
  1236      uint_t len, pos=SA[i];
  1237  
  1238      uint_t d;  
  1239      len=getLengthOfLMS((int_t*)s, n, level, pos, cs);
  1240      if(len!=pre_len) diff=true;
  1241      else
  1242        for(d=0; d<len; d++)
  1243          if(pos+d==n-1 || pre_pos+d==n-1 ||
  1244             chr(pos+d)!=chr(pre_pos+d) ||
  1245             (chr(pos+d)==separator && chr(pre_pos+d)==separator)){
  1246            diff=true; break;
  1247          }
  1248  
  1249      if(diff) {
  1250        name=i; name_ctr++;
  1251        SA[name]=1; // a new name.
  1252        pre_pos=pos; pre_len=len;
  1253      }
  1254      else
  1255        SA[name]++; // count this name.
  1256  
  1257      SA[n1+pos/2]=name;
  1258    }
  1259  
  1260    // compact the interim s1 sparsely stored 
  1261    //   in SA[n1, n-1] into SA[m-n1, m-1].
  1262    for(i=n-1, j=m-1; i>=n1; i--)
  1263      if(SA[i]!=EMPTY_k) SA[j--]=SA[i];
  1264  
  1265    // rename each S-type character of the
  1266    //   interim s1 as the end of its bucket
  1267    //   to produce the final s1.
  1268    succ_t=1;
  1269    for(i=n1-1; i>0; i--) {
  1270      int_t ch=s1[i], ch1=s1[i-1];
  1271      cur_t=(ch1< ch || (ch1==ch && succ_t==1))?1:0;
  1272      if(cur_t==1) {
  1273        s1[i-1]+=SA[s1[i-1]]-1;
  1274      }
  1275      succ_t=cur_t;
  1276    }
  1277    
  1278    return name_ctr;
  1279  }
  1280  
  1281  /*****************************************************************************/
  1282  
  1283  uint_t nameSubstr_generalized_LCP(uint_t *SA, int_t *LCP,
  1284    uint_t *s, uint_t *s1, uint_t n, 
  1285    uint_t m, uint_t n1, int level, int cs, uint_t separator) {
  1286    uint_t i, j, cur_t, succ_t;
  1287  
  1288    // init the name array buffer
  1289    for(i=n1; i<n; i++) SA[i]=EMPTY_k;
  1290  
  1291    // scan to compute the interim s1
  1292    uint_t name=0, name_ctr=0;
  1293    uint_t pre_pos=n-1, pre_len=0;
  1294    for(i=0; i<n1; i++) {
  1295      int diff=false;
  1296      uint_t len, pos=SA[i];
  1297  
  1298      uint_t d;  
  1299      len=getLengthOfLMS((int_t*)s, n, level, pos, cs);
  1300      if(len!=pre_len) diff=true;
  1301      else{
  1302        for(d=0; d<len; d++){
  1303          if(pos+d==n-1 || pre_pos+d==n-1 ||
  1304             chr(pos+d)!=chr(pre_pos+d) ||
  1305             (chr(pos+d)==separator && chr(pre_pos+d)==separator)){
  1306            diff=true; break;
  1307          }
  1308        }
  1309        LCP[i]=d;
  1310      }
  1311  
  1312      if(diff) {
  1313        name=i; name_ctr++;
  1314        SA[name]=1; // a new name.
  1315        pre_pos=pos; pre_len=len;
  1316      }
  1317      else
  1318        SA[name]++; // count this name.
  1319  
  1320      SA[n1+pos/2]=name;
  1321    }
  1322  
  1323    // compact the interim s1 sparsely stored 
  1324    //   in SA[n1, n-1] into SA[m-n1, m-1].
  1325    for(i=n-1, j=m-1; i>=n1; i--)
  1326      if(SA[i]!=EMPTY_k) SA[j--]=SA[i];
  1327  
  1328    // rename each S-type character of the
  1329    //   interim s1 as the end of its bucket
  1330    //   to produce the final s1.
  1331    succ_t=1;
  1332    for(i=n1-1; i>0; i--) {
  1333      int_t ch=s1[i], ch1=s1[i-1];
  1334      cur_t=(ch1< ch || (ch1==ch && succ_t==1))?1:0;
  1335      if(cur_t==1) {
  1336        s1[i-1]+=SA[s1[i-1]]-1;
  1337      }
  1338      succ_t=cur_t;
  1339    }
  1340    
  1341    return name_ctr;
  1342  }
  1343  
  1344  /*****************************************************************************/
  1345  
  1346  void getSAlms(uint_t *SA, 
  1347    int_t *s, 
  1348    uint_t *s1, uint_t n, 
  1349    uint_t n1, int level, int cs) {
  1350    uint_t i, j, cur_t, succ_t;
  1351  
  1352    j=n1-1; s1[j--]=n-1;
  1353    succ_t=0; // s[n-2] must be L-type
  1354    for(i=n-2; i>0; i--) {
  1355      cur_t=(chr(i-1)<chr(i) ||
  1356            (chr(i-1)==chr(i) && succ_t==1))?1:0;
  1357      if(cur_t==0 && succ_t==1) s1[j--]=i;
  1358      succ_t=cur_t;
  1359    }
  1360  }
  1361  
  1362  
  1363  void getSAlms_DA(uint_t *SA, int_da* DA,
  1364    int_t *s, 
  1365    uint_t *s1, uint_t n, 
  1366    uint_t n1, int level, int cs, uint_t separator) {
  1367    uint_t i, j, cur_t, succ_t;
  1368  
  1369  /**/
  1370    int_t k=0;
  1371    for(i=n-2; i>0; i--) if(chr(i)==separator)k++;
  1372    DA[n1-1]=(int_da) k;
  1373  /**/
  1374    
  1375    j=n1-1; s1[j--]=n-1;
  1376    succ_t=0; // s[n-2] must be L-type
  1377    for(i=n-2; i>0; i--) {
  1378  	  
  1379  	if(chr(i)==separator)k--;
  1380  
  1381      cur_t=(chr(i-1)<chr(i) ||
  1382            (chr(i-1)==chr(i) && succ_t==1))?1:0;
  1383      if(cur_t==0 && succ_t==1){//LMS-suffix
  1384  		s1[j]=i;
  1385  		DA[j]=k;
  1386  		j--;
  1387  	}
  1388      succ_t=cur_t;
  1389    }
  1390    
  1391  }
  1392  
  1393  
  1394  /*****************************************************************************/
  1395  
  1396  int_t SACA_K(int_t	*s, uint_t *SA,
  1397    uint_t n, unsigned int K,
  1398    uint_t m, int cs, int level) {
  1399    uint_t i;
  1400    uint_t *bkt=NULL;
  1401    
  1402    #if PHASES
  1403    time_t t_start_phase = 0.0; 
  1404    clock_t c_start_phase = 0.0;
  1405    #endif
  1406  
  1407    #if DEPTH
  1408    time_t t_start = time(NULL);
  1409    clock_t c_start =  clock();
  1410    #endif
  1411  
  1412    #if PHASES
  1413    if(!level){
  1414  	t_start_phase = time(NULL);
  1415  	c_start_phase =  clock();
  1416    }
  1417    #endif
  1418  
  1419    // stage 1: reduce the problem by at least 1/2.
  1420    if(level==0) {
  1421  
  1422      bkt=(uint_t *)malloc(sizeof(int_t)*K);
  1423      putSubstr0(SA, s, bkt, n, K, cs);
  1424  
  1425      induceSAl0(SA, s, bkt, n, K, false, cs);
  1426      induceSAs0(SA, s, bkt, n, K, false, cs);
  1427  
  1428    }
  1429    else {
  1430      putSubstr1((int_t *)SA, (int_t *)s,(int_t)n, cs);
  1431      induceSAl1((int_t *)SA, (int_t *)s, n ,false, cs);
  1432      induceSAs1((int_t *)SA, (int_t *)s, n, false, cs);
  1433    }
  1434  
  1435    // now, all the LMS-substrings are sorted and 
  1436    //   stored sparsely in SA.
  1437  
  1438    // compact all the sorted substrings into
  1439    //   the first n1 items of SA.
  1440    // 2*n1 must be not larger than n.
  1441    uint_t n1=0;
  1442    for(i=0; i<n; i++) 
  1443      if((!level&&SA[i]>0) || (level&&((int_t *)SA)[i]>0))
  1444        SA[n1++]=SA[i];
  1445  
  1446    uint_t *SA1=SA, *s1=SA+m-n1;
  1447    uint_t name_ctr;
  1448    name_ctr=nameSubstr(SA,s,s1,n,m,n1,level,cs);
  1449  
  1450    #if PHASES
  1451    if(!level){
  1452  	printf("phase 1:\n");
  1453  	time_stop(t_start_phase, c_start_phase);
  1454    }
  1455    #endif
  1456  
  1457    #if PHASES
  1458    if(!level){
  1459  	t_start_phase = time(NULL);
  1460  	c_start_phase =  clock();
  1461    }
  1462    #endif
  1463  
  1464    // stage 2: solve the reduced problem.
  1465    int_t depth=1;
  1466    // recurse if names are not yet unique.
  1467    if(name_ctr<n1)
  1468      depth += SACA_K((int_t*)s1, SA1, 
  1469            n1, 0, m-n1, sizeof(int_t), level+1);
  1470    else // get the suffix array of s1 directly.
  1471      for(i=0; i<n1; i++) SA1[s1[i]]=i;
  1472  
  1473    // stage 3: induce SA(S) from SA(S1).
  1474  
  1475    getSAlms(SA, s, s1, n, n1, level, cs);
  1476  
  1477    for(i=0; i<n1; i++) SA[i]=s1[SA[i]];
  1478    for(i=n1; i<n; i++) SA[i]=level?EMPTY_k:0; 
  1479  
  1480    if(level==0) {
  1481      putSuffix0(SA, s, bkt, n, K, n1, cs);
  1482  
  1483      #if PHASES
  1484      printf("phase 2:\n");
  1485      time_stop(t_start_phase, c_start_phase);
  1486      #endif
  1487    
  1488      #if PHASES
  1489      t_start_phase = time(NULL);
  1490      c_start_phase =  clock();
  1491      #endif
  1492  
  1493      induceSAl0(SA, s, bkt, n, K, true, cs);
  1494  
  1495      #if PHASES
  1496      printf("phase 3:\n");
  1497      time_stop(t_start_phase, c_start_phase);
  1498      #endif
  1499    
  1500      #if PHASES
  1501      t_start_phase = time(NULL);
  1502      c_start_phase =  clock();
  1503      #endif
  1504  
  1505      induceSAs0(SA, s, bkt, n, K, true, cs);
  1506      free(bkt);
  1507  
  1508      #if PHASES
  1509      printf("phase 4:\n");
  1510      time_stop(t_start_phase, c_start_phase);
  1511      #endif
  1512    }
  1513    else {
  1514      putSuffix1((int_t *)SA, (int_t *)s, n1, cs);
  1515      induceSAl1((int_t *)SA, (int_t *)s, n, true, cs);
  1516      induceSAs1((int_t *)SA, (int_t *)s, n, true, cs);
  1517    }
  1518  
  1519    #if DEPTH
  1520    printf("depth %" PRIdN ":\nname_ctr = %" PRIdN ", n1 =%" PRIdN ", n = %" PRIdN "\n", depth, name_ctr, n1, n);
  1521    time_stop(t_start, c_start);
  1522    #endif
  1523  
  1524  return depth;
  1525  }
  1526  
  1527  /*****************************************************************************/
  1528  
  1529  int_t gSACA_K(uint_t *s, uint_t *SA,
  1530    uint_t n, unsigned int K,
  1531    int cs, uint_t separator, int level) {
  1532    uint_t i;
  1533    uint_t *bkt=NULL;
  1534    uint_t m=n;
  1535  
  1536    #if PHASES
  1537    time_t t_start_phase = 0.0; 
  1538    clock_t c_start_phase = 0.0;
  1539    #endif
  1540  
  1541    #if DEPTH
  1542    time_t t_start = time(NULL);
  1543    clock_t c_start =  clock();
  1544    #endif
  1545  
  1546    #if PHASES
  1547  	t_start_phase = time(NULL);
  1548  	c_start_phase =  clock();
  1549    #endif
  1550    // stage 1: reduce the problem by at least 1/2.
  1551  
  1552    bkt=(uint_t *)malloc(sizeof(int_t)*K);
  1553  
  1554    putSubstr0_generalized(SA, s, bkt, n, K, cs, separator);
  1555  
  1556    induceSAl0_generalized(SA, s, bkt, n, K, false, cs, separator);
  1557    induceSAs0_generalized(SA, s, bkt, n, K, false, cs, separator);
  1558  
  1559    // insert separator suffixes in their buckets
  1560    // bkt[separator]=1; // gsa-is
  1561    for(i=n-3; i>0; i--)
  1562      if(chr(i)==separator)
  1563        SA[bkt[chr(i)]--]=i;
  1564  
  1565    // now, all the LMS-substrings are sorted and 
  1566    //   stored sparsely in SA.
  1567  
  1568    // compact all the sorted substrings into
  1569    //   the first n1 items of SA.
  1570    // 2*n1 must be not larger than n.
  1571    uint_t n1=0;
  1572    for(i=0; i<n; i++) 
  1573      if((SA[i]>0))
  1574        SA[n1++]=SA[i];
  1575  
  1576    uint_t *SA1=SA, *s1=SA+m-n1;
  1577    uint_t name_ctr;
  1578    name_ctr=nameSubstr_generalized(SA,s,s1,n,m,n1,level,cs,separator);
  1579  
  1580    #if PHASES
  1581  	printf("phase 1:\n");
  1582  	time_stop(t_start_phase, c_start_phase);
  1583    #endif
  1584  
  1585    #if PHASES
  1586  	t_start_phase = time(NULL);
  1587  	c_start_phase =  clock();
  1588    #endif
  1589  
  1590    // stage 2: solve the reduced problem.
  1591    int_t depth=1;
  1592    // recurse if names are not yet unique.
  1593    if(name_ctr<n1)
  1594      depth += SACA_K((int_t*)s1, SA1, 
  1595            n1, 0, m-n1, sizeof(int_t), level+1);
  1596    else // get the suffix array of s1 directly.
  1597      for(i=0; i<n1; i++) SA1[s1[i]]=i;
  1598  
  1599    // stage 3: induce SA(S) from SA(S1).
  1600  
  1601    getSAlms(SA, (int_t*)s, s1, n, n1, level, cs);
  1602  
  1603    for(i=0; i<n1; i++) SA[i]=s1[SA[i]];
  1604    for(i=n1; i<n; i++) SA[i]=0; 
  1605  
  1606    putSuffix0_generalized(SA, s, bkt, n, K, n1, cs, separator);
  1607    
  1608    #if PHASES
  1609  	printf("phase 2:\n");
  1610  	time_stop(t_start_phase, c_start_phase);
  1611    #endif
  1612  
  1613    #if PHASES
  1614  	t_start_phase = time(NULL);
  1615  	c_start_phase =  clock();
  1616    #endif
  1617  
  1618    induceSAl0_generalized(SA, s, bkt, n, K, true, cs, separator);
  1619  
  1620    #if PHASES
  1621        printf("phase 3:\n");
  1622        time_stop(t_start_phase, c_start_phase);
  1623    #endif
  1624    
  1625    #if PHASES
  1626        t_start_phase = time(NULL);
  1627        c_start_phase =  clock();
  1628    #endif
  1629  
  1630    induceSAs0_generalized(SA, s, bkt, n, K, true, cs, separator);
  1631  
  1632    free(bkt);
  1633  
  1634    #if DEPTH
  1635    printf("depth %" PRIdN ":\nname_ctr = %" PRIdN ", n1 =%" PRIdN ", n = %" PRIdN "\n", depth, name_ctr, n1, n);
  1636    time_stop(t_start, c_start);
  1637    #endif
  1638  
  1639    #if PHASES
  1640        printf("phase 4:\n");
  1641        time_stop(t_start_phase, c_start_phase);
  1642    #endif
  1643  
  1644  return depth;
  1645  }
  1646  
  1647  /*****************************************************************************/
  1648  
  1649  int_t gSACA_K_LCP(uint_t *s, uint_t *SA, int_t *LCP,
  1650    uint_t n, unsigned int K,
  1651    int cs, uint_t separator, int level) {
  1652    uint_t i;
  1653    uint_t *bkt=NULL;
  1654    uint_t m=n;
  1655    
  1656    #if PHASES
  1657    time_t t_start_phase = 0.0; 
  1658    clock_t c_start_phase = 0.0;
  1659    #endif
  1660  
  1661    #if DEPTH
  1662    time_t t_start = time(NULL);
  1663    clock_t c_start =  clock();
  1664    #endif
  1665  
  1666    #if PHASES
  1667  	t_start_phase = time(NULL);
  1668  	c_start_phase =  clock();
  1669    #endif
  1670    // stage 1: reduce the problem by at least 1/2.
  1671  
  1672    bkt=(uint_t *)malloc(sizeof(int_t)*K);
  1673    putSubstr0_generalized(SA, s, bkt, n, K, cs, separator);
  1674  
  1675    #if DEBUG
  1676    printf("bucket LMS-subs\n");
  1677    for(i=0; i<n; i++)
  1678          printf("%" PRIdN "\t", SA[i]+1);
  1679    printf("\n");
  1680    #endif
  1681   
  1682    induceSAl0_generalized(SA, s, bkt, n, K, false, cs, separator);
  1683  
  1684    #if DEBUG
  1685    printf("L-type\n");
  1686    for(i=0; i<n; i++)
  1687  	if(SA[i]!=0)  printf("%" PRIdN "\t", SA[i]+1);
  1688          else printf("-1\t");
  1689    printf("\n");
  1690    #endif
  1691  
  1692    induceSAs0_generalized(SA, s, bkt, n, K, false, cs, separator);
  1693  
  1694    #if DEBUG
  1695    printf("S-type\n");
  1696    for(i=0; i<n; i++)
  1697  	if(SA[i]!=0)  printf("%" PRIdN "\t", SA[i]+1);
  1698          else printf("-1\t");
  1699    printf("\n");
  1700    #endif
  1701    
  1702    // insert separator suffixes in their buckets
  1703    // bkt[separator]=1; // gsa-is
  1704    for(i=n-3; i>0; i--)
  1705      if(chr(i)==separator)
  1706        SA[bkt[chr(i)]--]=i;
  1707  
  1708    #if DEBUG
  1709    printf("S-type (separators)\n");
  1710    for(i=0; i<n; i++)
  1711  	if(SA[i]!=0)  printf("%" PRIdN "\t", SA[i]+1);
  1712          else printf("-1\t");
  1713    printf("\n");
  1714    #endif
  1715  
  1716    // now, all the LMS-substrings are sorted and 
  1717    //   stored sparsely in SA.
  1718  
  1719    // compact all the sorted substrings into
  1720    //   the first n1 items of SA.
  1721    // 2*n1 must be not larger than n.
  1722    uint_t n1=0;
  1723    for(i=0; i<n; i++) 
  1724      if((SA[i]>0))
  1725        SA[n1++]=SA[i];
  1726  
  1727    uint_t *SA1=SA, *s1=SA+m-n1;
  1728    uint_t name_ctr;
  1729  
  1730    #if DEBUG
  1731    printf("\nSA\n");
  1732    for(i=0; i<n; i++)
  1733      printf("%" PRIdN "\t", SA[i]+1);
  1734    printf("\n");
  1735    printf("LCP\n");
  1736    for(i=0; i<n; i++)
  1737          printf("%" PRIdN "\t", LCP[i]);
  1738    printf("\n\n");
  1739    #endif
  1740  
  1741    name_ctr=nameSubstr_generalized_LCP(SA,LCP,s,s1,n,m,n1,level,cs,separator);
  1742  
  1743    #if DEBUG
  1744    printf("nameSubstr:\n");
  1745    printf("SA\n");
  1746    for(i=0; i<n; i++)
  1747      printf("%" PRIdN "\t", SA[i]+1);
  1748    printf("\n");
  1749    printf("LCP\n");
  1750    for(i=0; i<n; i++)
  1751          printf("%" PRIdN "\t", LCP[i]);
  1752    printf("\n\n");
  1753    #endif
  1754  
  1755    #if PHASES
  1756  	printf("phase 1:\n");
  1757  	time_stop(t_start_phase, c_start_phase);
  1758    #endif
  1759  
  1760    #if PHASES
  1761  	t_start_phase = time(NULL);
  1762  	c_start_phase =  clock();
  1763    #endif
  1764  
  1765    // stage 2: solve the reduced problem.
  1766    int_t depth=1;
  1767    // recurse if names are not yet unique.
  1768    if(name_ctr<n1)
  1769      depth += SACA_K((int_t*)s1, SA1, 
  1770            n1, 0, m-n1, sizeof(int_t), level+1);
  1771    else // get the suffix array of s1 directly.
  1772      for(i=0; i<n1; i++) SA1[s1[i]]=i;
  1773  
  1774    // stage 3: induce SA(S) from SA(S1).
  1775    #if DEBUG
  1776    printf("recursive:\n");
  1777    printf("SA\n");
  1778    for(i=0; i<n; i++)
  1779      printf("%" PRIdN "\t", SA[i]+1);
  1780    printf("\n");
  1781    printf("LCP\n");
  1782    for(i=0; i<n; i++)
  1783          printf("%" PRIdN "\t", LCP[i]);
  1784    printf("\n\n");
  1785    #endif
  1786  
  1787    getSAlms(SA, (int_t*)s, s1, n, n1, level, cs);
  1788  
  1789    #if DEBUG
  1790    printf("getSAlms:\n");
  1791    printf("SA\n");
  1792    for(i=0; i<n; i++)
  1793      printf("%" PRIdN "\t", SA[i]+1);
  1794    printf("\n");
  1795    printf("LCP\n");
  1796    for(i=0; i<n; i++)
  1797          printf("%" PRIdN "\t", LCP[i]);
  1798    printf("\n\n");
  1799    #endif
  1800  
  1801    uint_t *RA=s1;
  1802    int_t *PLCP=LCP+m-n1;//PHI is stored in PLCP array
  1803  
  1804    //compute the LCP of consecutive LMS-suffixes
  1805    compute_lcp_phi_sparse((int_t*)s, SA1, RA, LCP, PLCP, n1, cs, separator); 
  1806  
  1807    #if DEBUG
  1808    printf("\nPHI-algorithm:\n");
  1809    printf("--\nSA1\n");
  1810    for(i=0; i<n1; i++)//SA1
  1811          printf("%" PRIdN "\t", SA1[i]+1);
  1812    printf("| ");
  1813    for(i=0; i<n1; i++)//s1
  1814          printf("%" PRIdN "\t", RA[i]+1);
  1815    printf("\n");
  1816    printf("LCP\n");
  1817    for(i=0; i<n1; i++)
  1818          printf("%" PRIdN "\t", LCP[i]);
  1819    printf("| ");
  1820    for(i=0; i<n1; i++)
  1821          printf("%" PRIdN "\t", PLCP[i]);
  1822    printf("\n");
  1823    #endif
  1824  
  1825    for(i=0; i<n1; i++) SA[i]=s1[SA[i]];
  1826    for(i=n1; i<n; i++) SA[i]=U_MAX; 
  1827    for(i=n1;i<n;i++) LCP[i]=0;
  1828  
  1829    #if DEBUG
  1830    printf("\nstage 3:\n\n");
  1831    printf("mapping back:\n");
  1832    printf("SA\n");
  1833    for(i=0; i<n; i++)
  1834      printf("%" PRIdN "\t", SA[i]+1);
  1835    printf("\n");
  1836    printf("LCP\n");
  1837    for(i=0; i<n; i++)
  1838          printf("%" PRIdN "\t", LCP[i]);
  1839    printf("\n\n");
  1840    #endif
  1841  
  1842  /*
  1843    if(!lcp_array_check((unsigned char*)s, SA1, LCP, n, cs, separator))printf("isNotLCP (lms)!!\n"); 
  1844    else printf("isLCP (lms)!!\n");
  1845  */
  1846  
  1847    putSuffix0_generalized_LCP(SA, LCP, s, bkt, n, K, n1, cs, separator);
  1848    
  1849    #if PHASES
  1850    if(!level){
  1851  	printf("phase 2:\n");
  1852  	time_stop(t_start_phase, c_start_phase);
  1853    }
  1854    #endif
  1855  
  1856    #if PHASES
  1857    if(!level){
  1858  	t_start_phase = time(NULL);
  1859  	c_start_phase =  clock();
  1860    }
  1861    #endif
  1862  
  1863  
  1864    #if DEBUG
  1865    printf("SA (mapped)\n");
  1866    for(i=0; i<n; i++)
  1867          printf("%" PRIdN "\t", SA[i]+1);
  1868    printf("\n");
  1869    printf("LCP\n");
  1870    for(i=0; i<n; i++)
  1871          printf("%" PRIdN "\t", LCP[i]);
  1872    printf("\n\n");
  1873    #endif
  1874  
  1875    induceSAl0_generalized_LCP(SA, LCP, s, bkt, n, K, cs, separator);
  1876  
  1877    #if DEBUG
  1878    printf("L-type\n");
  1879    for(i=0; i<n; i++)
  1880          printf("%" PRIdN "\t", SA[i]+1);
  1881    printf("\n");
  1882    printf("LCP\n");
  1883    for(i=0; i<n; i++)
  1884          printf("%" PRIdN "\t", LCP[i]);
  1885    printf("\n\n");
  1886    #endif
  1887  
  1888    #if PHASES
  1889    if(!level){
  1890        printf("phase 3:\n");
  1891        time_stop(t_start_phase, c_start_phase);
  1892    }
  1893    #endif
  1894    
  1895    #if PHASES
  1896    if(!level){
  1897        t_start_phase = time(NULL);
  1898        c_start_phase =  clock();
  1899    }
  1900    #endif
  1901  
  1902    induceSAs0_generalized_LCP(SA, LCP, s, bkt, n, K, cs, separator);
  1903  
  1904    #if DEBUG
  1905    printf("S-type\n");
  1906    for(i=0; i<n; i++)
  1907          printf("%" PRIdN "\t", SA[i]+1);
  1908    printf("\n");
  1909    printf("LCP\n");
  1910    for(i=0; i<n; i++)
  1911          printf("%" PRIdN "\t", LCP[i]);
  1912    printf("\n\n");
  1913    #endif
  1914    free(bkt);
  1915  
  1916    #if DEPTH
  1917    printf("depth %" PRIdN ":\nname_ctr = %" PRIdN ", n1 =%" PRIdN ", n = %" PRIdN "\n", depth, name_ctr, n1, n);
  1918    time_stop(t_start, c_start);
  1919    #endif
  1920  
  1921    #if PHASES
  1922    if(!level){
  1923        printf("phase 4:\n");
  1924        time_stop(t_start_phase, c_start_phase);
  1925    }
  1926    #endif
  1927  
  1928  return depth;
  1929  }
  1930  
  1931  /*****************************************************************************/
  1932  
  1933  int_t gSACA_K_DA(uint_t *s, uint_t *SA, int_da *DA,
  1934    uint_t n, unsigned int K,
  1935    int cs, uint_t separator, int level) {
  1936    uint_t i;
  1937    uint_t *bkt=NULL;
  1938    uint_t m=n;
  1939    
  1940    #if PHASES
  1941    time_t t_start_phase = 0.0; 
  1942    clock_t c_start_phase = 0.0;
  1943    #endif
  1944  
  1945    #if DEPTH
  1946    time_t t_start = time(NULL);
  1947    clock_t c_start =  clock();
  1948    #endif
  1949  
  1950    #if PHASES
  1951  	t_start_phase = time(NULL);
  1952  	c_start_phase =  clock();
  1953    #endif
  1954    // stage 1: reduce the problem by at least 1/2.
  1955  
  1956    bkt=(uint_t *)malloc(sizeof(int_t)*K);
  1957    putSubstr0_generalized(SA, s, bkt, n, K, cs, separator);
  1958  
  1959    #if DEBUG
  1960    printf("bucket LMS-subs\n");
  1961    for(i=0; i<n; i++)
  1962          printf("%" PRIdN "\t", SA[i]+1);
  1963    printf("\n");
  1964    #endif
  1965   
  1966    induceSAl0_generalized(SA, s, bkt, n, K, false, cs, separator);
  1967  
  1968    #if DEBUG
  1969    printf("L-type\n");
  1970    for(i=0; i<n; i++)
  1971  	if(SA[i]!=0)  printf("%" PRIdN "\t", SA[i]+1);
  1972          else printf("-1\t");
  1973    printf("\n");
  1974    #endif
  1975  
  1976    induceSAs0_generalized(SA, s, bkt, n, K, false, cs, separator);
  1977  
  1978    #if DEBUG
  1979    printf("S-type\n");
  1980    for(i=0; i<n; i++)
  1981  	if(SA[i]!=0)  printf("%" PRIdN "\t", SA[i]+1);
  1982          else printf("-1\t");
  1983    printf("\n");
  1984    #endif
  1985    
  1986    // insert separator suffixes in their buckets
  1987    // bkt[separator]=1; // gsa-is
  1988    for(i=n-3; i>0; i--)
  1989      if(chr(i)==separator)
  1990        SA[bkt[chr(i)]--]=i;
  1991  
  1992    #if DEBUG
  1993    printf("S-type (separators)\n");
  1994    for(i=0; i<n; i++)
  1995  	if(SA[i]!=0)  printf("%" PRIdN "\t", SA[i]+1);
  1996          else printf("-1\t");
  1997    printf("\n");
  1998    #endif
  1999  
  2000    // now, all the LMS-substrings are sorted and 
  2001    //   stored sparsely in SA.
  2002  
  2003    // compact all the sorted substrings into
  2004    //   the first n1 items of SA.
  2005    // 2*n1 must be not larger than n.
  2006    uint_t n1=0;
  2007    for(i=0; i<n; i++) 
  2008      if((SA[i]>0))
  2009        SA[n1++]=SA[i];
  2010  
  2011    uint_t *SA1=SA, *s1=SA+m-n1;
  2012    uint_t name_ctr;
  2013  
  2014    #if DEBUG
  2015    printf("\nSA\n");
  2016    for(i=0; i<n; i++)
  2017      printf("%" PRIdN "\t", SA[i]+1);
  2018    printf("\n\n");
  2019    #endif
  2020  
  2021    name_ctr=nameSubstr_generalized(SA,s,s1,n,m,n1,level,cs,separator);
  2022  
  2023    #if DEBUG
  2024    printf("nameSubstr:\n");
  2025    printf("SA\n");
  2026    for(i=0; i<n; i++)
  2027      printf("%" PRIdN "\t", SA[i]+1);
  2028    printf("\n\n");
  2029    #endif
  2030  
  2031    #if PHASES
  2032  	printf("phase 1:\n");
  2033  	time_stop(t_start_phase, c_start_phase);
  2034    #endif
  2035  
  2036    #if PHASES
  2037  	t_start_phase = time(NULL);
  2038  	c_start_phase =  clock();
  2039    #endif
  2040  
  2041    // stage 2: solve the reduced problem.
  2042    int_t depth=1;
  2043    // recurse if names are not yet unique.
  2044    if(name_ctr<n1)
  2045      depth += SACA_K((int_t*)s1, SA1, 
  2046            n1, 0, m-n1, sizeof(int_t), level+1);
  2047    else // get the suffix array of s1 directly.
  2048      for(i=0; i<n1; i++) SA1[s1[i]]=i;
  2049  
  2050    // stage 3: induce SA(S) from SA(S1).
  2051    #if DEBUG
  2052    printf("recursive:\n");
  2053    printf("SA\n");
  2054    for(i=0; i<n; i++)
  2055      printf("%" PRIdN "\t", SA[i]+1);
  2056    printf("\n\n");
  2057    #endif
  2058  
  2059    int_da *d1=DA+m-n1;
  2060    
  2061    getSAlms_DA(SA, d1, (int_t*)s, s1, n, n1, level, cs, separator);
  2062  
  2063    #if DEBUG
  2064    printf("getSAlms:\n");
  2065    printf("SA\n");
  2066    for(i=0; i<n; i++)
  2067      printf("%" PRIdN "\t", SA[i]+1);
  2068    printf("\n");
  2069    printf("DA\n");
  2070    for(i=0; i<n; i++)
  2071          printf("%" PRIdA "\t", DA[i]);
  2072    printf("\n\n");
  2073    #endif
  2074  
  2075  /**/
  2076    for(i=0; i<n1; i++) {DA[i]=d1[SA[i]]; SA[i]=s1[SA[i]];}
  2077    for(i=n1; i<n; i++) {SA[i]=0; DA[i]=0;}
  2078  /**/
  2079  
  2080    #if DEBUG
  2081    printf("\nstage 3:\n\n");
  2082    printf("mapping back:\n");
  2083    printf("SA\n");
  2084    for(i=0; i<n; i++)
  2085      printf("%" PRIdN "\t", SA[i]+1);
  2086    printf("\n");
  2087    printf("DA\n");
  2088    for(i=0; i<n; i++)
  2089          printf("%" PRIdA "\t", DA[i]);
  2090    printf("\n\n");
  2091    #endif
  2092  
  2093  /**/
  2094    putSuffix0_generalized_DA(SA, DA, s, bkt, n, K, n1, cs, separator);
  2095  
  2096  /**/
  2097    
  2098    #if PHASES
  2099    if(!level){
  2100  	printf("phase 2:\n");
  2101  	time_stop(t_start_phase, c_start_phase);
  2102    }
  2103    #endif
  2104  
  2105    #if PHASES
  2106    if(!level){
  2107  	t_start_phase = time(NULL);
  2108  	c_start_phase =  clock();
  2109    }
  2110    #endif
  2111  
  2112  
  2113    #if DEBUG
  2114    printf("SA (mapped)\n");
  2115    for(i=0; i<n; i++)
  2116          printf("%" PRIdN "\t", SA[i]+1);
  2117    printf("\n");
  2118    printf("DA\n");
  2119    for(i=0; i<n; i++)
  2120          printf("%" PRIdA "\t", DA[i]);
  2121    printf("\n\n");
  2122    #endif
  2123  
  2124  /**/
  2125    induceSAl0_generalized_DA(SA, DA, s, bkt, n, K, cs, separator);
  2126  /**/
  2127  
  2128    #if DEBUG
  2129    printf("L-type\n");
  2130    for(i=0; i<n; i++)
  2131          printf("%" PRIdN "\t", SA[i]+1);
  2132    printf("\n");
  2133    printf("DA\n");
  2134    for(i=0; i<n; i++)
  2135          printf("%" PRIdA "\t", DA[i]);
  2136    printf("\n\n");
  2137    #endif
  2138  
  2139    #if PHASES
  2140    if(!level){
  2141        printf("phase 3:\n");
  2142        time_stop(t_start_phase, c_start_phase);
  2143    }
  2144    #endif
  2145    
  2146    #if PHASES
  2147    if(!level){
  2148        t_start_phase = time(NULL);
  2149        c_start_phase =  clock();
  2150    }
  2151    #endif
  2152  
  2153  /**/
  2154    induceSAs0_generalized_DA(SA, DA, s, bkt, n, K, cs, separator);
  2155  /**/
  2156  
  2157    #if DEBUG
  2158    printf("S-type\n");
  2159    for(i=0; i<n; i++)
  2160          printf("%" PRIdN "\t", SA[i]+1);
  2161    printf("\n");
  2162    printf("DA\n");
  2163    for(i=0; i<n; i++)
  2164          printf("%" PRIdA "\t", DA[i]);
  2165    printf("\n\n");
  2166    #endif
  2167    free(bkt);
  2168  
  2169    #if DEPTH
  2170    printf("depth %" PRIdN ":\nname_ctr = %" PRIdN ", n1 =%" PRIdN ", n = %" PRIdN "\n", depth, name_ctr, n1, n);
  2171    time_stop(t_start, c_start);
  2172    #endif
  2173  
  2174    #if PHASES
  2175    if(!level){
  2176        printf("phase 4:\n");
  2177        time_stop(t_start_phase, c_start_phase);
  2178    }
  2179    #endif
  2180  
  2181  return depth;
  2182  }
  2183  
  2184  /*****************************************************************************/
  2185  
  2186  int_t gSACA_K_LCP_DA(uint_t *s, uint_t *SA, int_t *LCP, int_da *DA,
  2187    uint_t n, unsigned int K,
  2188    int cs, uint_t separator, int level) {
  2189    uint_t i;
  2190    uint_t *bkt=NULL;
  2191    uint_t m=n;
  2192    
  2193    #if PHASES
  2194    time_t t_start_phase = 0.0; 
  2195    clock_t c_start_phase = 0.0;
  2196    #endif
  2197  
  2198    #if DEPTH
  2199    time_t t_start = time(NULL);
  2200    clock_t c_start =  clock();
  2201    #endif
  2202  
  2203    #if PHASES
  2204  	t_start_phase = time(NULL);
  2205  	c_start_phase =  clock();
  2206    #endif
  2207    // stage 1: reduce the problem by at least 1/2.
  2208  
  2209    bkt=(uint_t *)malloc(sizeof(int_t)*K);
  2210    putSubstr0_generalized(SA, s, bkt, n, K, cs, separator);
  2211  
  2212    #if DEBUG
  2213    printf("bucket LMS-subs\n");
  2214    for(i=0; i<n; i++)
  2215          printf("%" PRIdN "\t", SA[i]+1);
  2216    printf("\n");
  2217    #endif
  2218   
  2219    induceSAl0_generalized(SA, s, bkt, n, K, false, cs, separator);
  2220  
  2221    #if DEBUG
  2222    printf("L-type\n");
  2223    for(i=0; i<n; i++)
  2224  	if(SA[i]!=0)  printf("%" PRIdN "\t", SA[i]+1);
  2225          else printf("-1\t");
  2226    printf("\n");
  2227    #endif
  2228  
  2229    induceSAs0_generalized(SA, s, bkt, n, K, false, cs, separator);
  2230  
  2231    #if DEBUG
  2232    printf("S-type\n");
  2233    for(i=0; i<n; i++)
  2234  	if(SA[i]!=0)  printf("%" PRIdN "\t", SA[i]+1);
  2235          else printf("-1\t");
  2236    printf("\n");
  2237    #endif
  2238    
  2239    // insert separator suffixes in their buckets
  2240    // bkt[separator]=1; // gsa-is
  2241    for(i=n-3; i>0; i--)
  2242      if(chr(i)==separator)
  2243        SA[bkt[chr(i)]--]=i;
  2244  
  2245    #if DEBUG
  2246    printf("S-type (separators)\n");
  2247    for(i=0; i<n; i++)
  2248  	if(SA[i]!=0)  printf("%" PRIdN "\t", SA[i]+1);
  2249          else printf("-1\t");
  2250    printf("\n");
  2251    #endif
  2252  
  2253    // now, all the LMS-substrings are sorted and 
  2254    //   stored sparsely in SA.
  2255  
  2256    // compact all the sorted substrings into
  2257    //   the first n1 items of SA.
  2258    // 2*n1 must be not larger than n.
  2259    uint_t n1=0;
  2260    for(i=0; i<n; i++) 
  2261      if((SA[i]>0))
  2262        SA[n1++]=SA[i];
  2263  
  2264    uint_t *SA1=SA, *s1=SA+m-n1;
  2265    uint_t name_ctr;
  2266  
  2267    #if DEBUG
  2268    printf("\nSA\n");
  2269    for(i=0; i<n; i++)
  2270      printf("%" PRIdN "\t", SA[i]+1);
  2271    printf("\n");
  2272    printf("LCP\n");
  2273    for(i=0; i<n; i++)
  2274          printf("%" PRIdN "\t", LCP[i]);
  2275    printf("\n\n");
  2276    #endif
  2277  
  2278    name_ctr=nameSubstr_generalized_LCP(SA,LCP,s,s1,n,m,n1,level,cs,separator);
  2279  
  2280    #if DEBUG
  2281    printf("nameSubstr:\n");
  2282    printf("SA\n");
  2283    for(i=0; i<n; i++)
  2284      printf("%" PRIdN "\t", SA[i]+1);
  2285    printf("\n");
  2286    printf("LCP\n");
  2287    for(i=0; i<n; i++)
  2288          printf("%" PRIdN "\t", LCP[i]);
  2289    printf("\n\n");
  2290    #endif
  2291  
  2292    #if PHASES
  2293  	printf("phase 1:\n");
  2294  	time_stop(t_start_phase, c_start_phase);
  2295    #endif
  2296  
  2297    #if PHASES
  2298  	t_start_phase = time(NULL);
  2299  	c_start_phase =  clock();
  2300    #endif
  2301  
  2302    // stage 2: solve the reduced problem.
  2303    int_t depth=1;
  2304    // recurse if names are not yet unique.
  2305    if(name_ctr<n1)
  2306      depth += SACA_K((int_t*)s1, SA1, 
  2307            n1, 0, m-n1, sizeof(int_t), level+1);
  2308    else // get the suffix array of s1 directly.
  2309      for(i=0; i<n1; i++) SA1[s1[i]]=i;
  2310  
  2311    // stage 3: induce SA(S) from SA(S1).
  2312    #if DEBUG
  2313    printf("recursive:\n");
  2314    printf("SA\n");
  2315    for(i=0; i<n; i++)
  2316      printf("%" PRIdN "\t", SA[i]+1);
  2317    printf("\n");
  2318    printf("LCP\n");
  2319    for(i=0; i<n; i++)
  2320          printf("%" PRIdN "\t", LCP[i]);
  2321    printf("\n\n");
  2322    #endif
  2323  
  2324    int_da *d1=DA+m-n1;
  2325    
  2326    getSAlms_DA(SA, d1, (int_t*)s, s1, n, n1, level, cs, separator);
  2327  
  2328  
  2329  //FELIPE  getSAlms(SA, (int_t*)s, s1, n, n1, level, cs);
  2330  
  2331    #if DEBUG
  2332    printf("getSAlms:\n");
  2333    printf("SA\n");
  2334    for(i=0; i<n; i++)
  2335      printf("%" PRIdN "\t", SA[i]+1);
  2336    printf("\n");
  2337    printf("LCP\n");
  2338    for(i=0; i<n; i++)
  2339          printf("%" PRIdN "\t", LCP[i]);
  2340    printf("\n");
  2341    printf("DA\n");
  2342    for(i=0; i<n; i++)
  2343          printf("%" PRIdA "\t", DA[i]); 
  2344    printf("\n\n");
  2345    #endif
  2346  
  2347    uint_t *RA=s1;
  2348    int_t *PLCP=LCP+m-n1;//PHI is stored in PLCP array
  2349  
  2350    //compute the LCP of consecutive LMS-suffixes
  2351    compute_lcp_phi_sparse((int_t*)s, SA1, RA, LCP, PLCP, n1, cs, separator); 
  2352  
  2353    #if DEBUG
  2354    printf("\nPHI-algorithm:\n");
  2355    printf("--\nSA1\n");
  2356    for(i=0; i<n1; i++)//SA1
  2357          printf("%" PRIdN "\t", SA1[i]+1);
  2358    printf("| ");
  2359    for(i=0; i<n1; i++)//s1
  2360          printf("%" PRIdN "\t", RA[i]+1);
  2361    printf("\n");
  2362    printf("LCP\n");
  2363    for(i=0; i<n1; i++)
  2364          printf("%" PRIdN "\t", LCP[i]);
  2365    printf("| ");
  2366    for(i=0; i<n1; i++)
  2367          printf("%" PRIdN "\t", PLCP[i]);
  2368    printf("\n");
  2369    #endif
  2370  
  2371    for(i=0; i<n1; i++) {DA[i]=d1[SA[i]]; SA[i]=s1[SA[i]];}
  2372    for(i=n1; i<n; i++) {SA[i]=U_MAX; DA[i]=0;}
  2373    for(i=n1;i<n;i++) LCP[i]=0;
  2374  
  2375  //DA  for(i=n1; i<n; i++) {SA[i]=0; DA[i]=-1;}
  2376  
  2377    #if DEBUG
  2378    printf("\nstage 3:\n\n");
  2379    printf("mapping back:\n");
  2380    printf("SA\n");
  2381    for(i=0; i<n; i++)
  2382      printf("%" PRIdN "\t", SA[i]+1);
  2383    printf("\n");
  2384    printf("LCP\n");
  2385    for(i=0; i<n; i++)
  2386          printf("%" PRIdN "\t", LCP[i]);
  2387    printf("\n");
  2388    printf("DA\n");
  2389    for(i=0; i<n; i++)
  2390          printf("%" PRIdA "\t", DA[i]);
  2391    printf("\n\n");
  2392    #endif
  2393  
  2394  /*
  2395    if(!lcp_array_check((unsigned char*)s, SA1, LCP, n, cs, separator))printf("isNotLCP (lms)!!\n"); 
  2396    else printf("isLCP (lms)!!\n");
  2397  */
  2398  
  2399    putSuffix0_generalized_LCP_DA(SA, LCP, DA, s, bkt, n, K, n1, cs, separator);
  2400    
  2401    #if PHASES
  2402    if(!level){
  2403  	printf("phase 2:\n");
  2404  	time_stop(t_start_phase, c_start_phase);
  2405    }
  2406    #endif
  2407  
  2408    #if PHASES
  2409    if(!level){
  2410  	t_start_phase = time(NULL);
  2411  	c_start_phase =  clock();
  2412    }
  2413    #endif
  2414  
  2415  
  2416    #if DEBUG
  2417    printf("SA (mapped)\n");
  2418    for(i=0; i<n; i++)
  2419          printf("%" PRIdN "\t", SA[i]+1);
  2420    printf("\n");
  2421    printf("LCP\n");
  2422    for(i=0; i<n; i++)
  2423          printf("%" PRIdN "\t", LCP[i]);
  2424    printf("\n\n");
  2425    #endif
  2426  
  2427    induceSAl0_generalized_LCP_DA(SA, LCP, DA, s, bkt, n, K, cs, separator);
  2428  
  2429    #if DEBUG
  2430    printf("L-type\n");
  2431    for(i=0; i<n; i++)
  2432          printf("%" PRIdN "\t", SA[i]+1);
  2433    printf("\n");
  2434    printf("LCP\n");
  2435    for(i=0; i<n; i++)
  2436          printf("%" PRIdN "\t", LCP[i]);
  2437    printf("\n");
  2438    printf("DA\n");
  2439    for(i=0; i<n; i++)
  2440          printf("%" PRIdA "\t", DA[i]);
  2441    printf("\n\n");
  2442    #endif
  2443  
  2444    #if PHASES
  2445    if(!level){
  2446        printf("phase 3:\n");
  2447        time_stop(t_start_phase, c_start_phase);
  2448    }
  2449    #endif
  2450    
  2451    #if PHASES
  2452    if(!level){
  2453        t_start_phase = time(NULL);
  2454        c_start_phase =  clock();
  2455    }
  2456    #endif
  2457  
  2458    induceSAs0_generalized_LCP_DA(SA, LCP, DA, s, bkt, n, K, cs, separator);
  2459  
  2460    #if DEBUG
  2461    printf("S-type\n");
  2462    for(i=0; i<n; i++)
  2463          printf("%" PRIdN "\t", SA[i]+1);
  2464    printf("\n");
  2465    printf("LCP\n");
  2466    for(i=0; i<n; i++)
  2467          printf("%" PRIdN "\t", LCP[i]);
  2468    printf("\n");
  2469    printf("DA\n");
  2470    for(i=0; i<n; i++)
  2471          printf("%" PRIdA "\t", DA[i]);
  2472    printf("\n\n");
  2473    #endif
  2474    free(bkt);
  2475  
  2476    #if DEPTH
  2477    printf("depth %" PRIdN ":\nname_ctr = %" PRIdN ", n1 =%" PRIdN ", n = %" PRIdN "\n", depth, name_ctr, n1, n);
  2478    time_stop(t_start, c_start);
  2479    #endif
  2480  
  2481    #if PHASES
  2482    if(!level){
  2483        printf("phase 4:\n");
  2484        time_stop(t_start_phase, c_start_phase);
  2485    }
  2486    #endif
  2487  
  2488  return depth;
  2489  }
  2490  
  2491  /*****************************************************************************/
  2492  
  2493  
  2494  int sacak(unsigned char *s, uint_t *SA, uint_t n){
  2495  	if((s == NULL) || (SA == NULL) || (n < 0)) return -1;
  2496  	return SACA_K((int_t*)s, (uint_t*)SA, n, 256, n, sizeof(char), 0);
  2497  }
  2498  
  2499  int sacak_int(int_text *s, uint_t *SA, uint_t n, uint_t k){
  2500  	if((s == NULL) || (SA == NULL) || (n < 0)) return -1;
  2501  	return SACA_K((int_t*)s, (uint_t*)SA, n, k, n, sizeof(int_text), 0);
  2502  }
  2503  
  2504  int gsacak(unsigned char *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n){
  2505  
  2506  	if((s == NULL) || (SA == NULL) || (n < 0)) return -1;
  2507  
  2508  	#if EMPTY_STRING
  2509  		for(i=0; i<n-1; i++) if(s[i]==1 && s[i+1]==1) return -2; 
  2510  	#endif  
  2511  
  2512  	if((LCP == NULL) && (DA == NULL))
  2513  		return gSACA_K((uint_t*)s, SA, n, 256, sizeof(char), 1, 0);
  2514  	else if (DA == NULL)
  2515  		return gSACA_K_LCP((uint_t*)s, SA, LCP, n, 256, sizeof(char), 1, 0);
  2516  	else if (LCP == NULL)
  2517  		return gSACA_K_DA((uint_t*)s, SA, DA, n, 256, sizeof(char), 1, 0);
  2518  	else
  2519  		return gSACA_K_LCP_DA((uint_t*)s, SA, LCP, DA, n, 256, sizeof(char), 1, 0);
  2520  }
  2521  
  2522  int gsacak_int(int_text *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n, uint_t k){
  2523  
  2524  	if((s == NULL) || (SA == NULL) || (n < 0)) return -1;
  2525  
  2526  	if((LCP == NULL) && (DA == NULL))
  2527  		return gSACA_K((uint_t*)s, SA, n, k, sizeof(int_text), 1, 0);
  2528  	else if (DA == NULL)
  2529  		return gSACA_K_LCP((uint_t*)s, SA, LCP, n, k, sizeof(int_text), 1, 0);
  2530  	else if (LCP == NULL)
  2531  		return gSACA_K_DA((uint_t*)s, SA, DA, n, k, sizeof(int_text), 1, 0);
  2532  	else
  2533  		return gSACA_K_LCP_DA((uint_t*)s, SA, LCP, DA, n, k, sizeof(int_text), 1, 0);
  2534  }
  2535  
  2536  /*****************************************************************************/