github.com/ledgerwatch/erigon-lib@v1.0.0/sais/gsa/gsacak.h (about)

     1  // vim: noai:ts=2:sw=2
     2  
     3  /* 
     4   * Authors: Felipe A. Louza, Simon Gog, Guilherme P. Telles
     5   * contact: louza@ic.unicamp.br
     6   * 03/04/2017
     7   */
     8  
     9  /* 
    10   * This code is a modification of SACA-K algorithm by G. Nong, which can be
    11   * retrieved at: http://code.google.com/p/ge-nong/ 
    12   *
    13   * Our version of SACA-K, called gSACA-K, maintain the theoretical bounds of the
    14   * original algorithm to construct the generalized suffix array.
    15   *
    16   * Our algorithm gSACA-K can also computes the LCP-array and the Document-array
    17   * with no additional costs.
    18   * 
    19   * gsacak(s, SA, NULL, NULL, n) //computes only SA
    20   * gsacak(s, SA, LCP,  NULL, n) //computes SA and LCP
    21   * gsacak(s, SA, NULL, DA, n)   //computes SA and DA
    22   * gsacak(s, SA, LCP,  DA, n)   //computes SA, LCP and DA
    23   * 
    24   */
    25  
    26  /******************************************************************************/
    27  
    28  #ifndef GSACAK_H
    29  #define GSACAK_H
    30  
    31  #include <stdlib.h>
    32  #include <stdio.h>
    33  #include <limits.h>
    34  #include <inttypes.h>
    35  #include <string.h>
    36  #include <time.h>
    37  
    38  #define max(a,b) ((a) > (b) ? (a) : (b))
    39  
    40  #ifndef DEBUG
    41    #define DEBUG 0
    42  #endif
    43  
    44  #ifndef M64
    45  	#define M64 1
    46  #endif
    47  
    48  #if M64
    49  	typedef int64_t	int_t;
    50  	typedef uint64_t uint_t;
    51  	#define PRIdN	PRId64
    52  	#define U_MAX	UINT64_MAX
    53  	#define I_MAX	INT64_MAX
    54  	#define I_MIN	INT64_MIN
    55  #else
    56  	typedef int32_t int_t;
    57  	typedef uint32_t uint_t;
    58  	#define PRIdN	PRId32
    59  	#define U_MAX	UINT32_MAX
    60  	#define I_MAX	INT32_MAX
    61  	#define I_MIN	INT32_MIN
    62  #endif
    63  
    64  /*! @option type of s[0,n-1] for integer alphabets 
    65   *
    66   *  @constraint sizeof(int_t) >= sizeof(int_text) 
    67   */
    68  typedef uint32_t int_text;	//4N bytes for s[0..n-1]
    69  #define PRIdT	PRIu32
    70  
    71  /*! @option type for array DA
    72   */
    73  typedef int32_t int_da;
    74  #define PRIdA	PRId32
    75  
    76  /******************************************************************************/
    77  
    78  /** @brief computes the suffix array of string s[0..n-1] 
    79   *
    80   *  @param s	input string with s[n-1]=0
    81   *  @param SA		suffix array 
    82   *  @param n	string length
    83   *  @return -1 if an error occured, otherwise the depth of the recursive calls.
    84   */
    85  int sacak(unsigned char *s, uint_t *SA, uint_t n);
    86  
    87  /** @brief computes the suffix array of string s[0..n-1]
    88   *
    89   *  @param k	alphabet size+1 (0 is reserved)
    90   */
    91  int sacak_int(int_text *s, uint_t *SA, uint_t n, uint_t k);
    92  
    93  /******************************************************************************/
    94  
    95  /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1]
    96   *
    97   *  @param s		input concatenated string, using separators s[i]=1 and with s[n-1]=0
    98   *  @param SA		Suffix array 
    99   *  @param LCP	LCP array 
   100   *  @param DA		Document array
   101   *  @param n		String length
   102   *  
   103   *  @return depth of the recursive calls.
   104   */
   105  int gsacak(unsigned char *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n);
   106  
   107  /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1]
   108   *
   109   *  @param s		input concatenated string, using separators s[i]=1 and with s[n-1]=0
   110   *  @param SA		Suffix array 
   111   *  @param LCP	LCP array 
   112   *  @param DA		Document array
   113   *  @param n		String length
   114   *  @param k    alphabet size+2 (0 and 1 are reserved)
   115   *
   116   *  @return depth of the recursive calls.
   117   */
   118  int gsacak_int(int_text *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n, uint_t k);
   119  
   120  /******************************************************************************/
   121  
   122  
   123  
   124  int_t SACA_K(int_t	*s, uint_t *SA,
   125    uint_t n, unsigned int K,
   126    uint_t m, int cs, int level);
   127  
   128  int_t gSACA_K(uint_t *s, uint_t *SA,
   129    uint_t n, unsigned int K,
   130    int cs, uint_t separator, int level);
   131  
   132  #endif