modernc.org/cc@v1.0.1/v2/testdata/_sqlite/src/btreeInt.h (about)

     1  /*
     2  ** 2004 April 6
     3  **
     4  ** The author disclaims copyright to this source code.  In place of
     5  ** a legal notice, here is a blessing:
     6  **
     7  **    May you do good and not evil.
     8  **    May you find forgiveness for yourself and forgive others.
     9  **    May you share freely, never taking more than you give.
    10  **
    11  *************************************************************************
    12  ** This file implements an external (disk-based) database using BTrees.
    13  ** For a detailed discussion of BTrees, refer to
    14  **
    15  **     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
    16  **     "Sorting And Searching", pages 473-480. Addison-Wesley
    17  **     Publishing Company, Reading, Massachusetts.
    18  **
    19  ** The basic idea is that each page of the file contains N database
    20  ** entries and N+1 pointers to subpages.
    21  **
    22  **   ----------------------------------------------------------------
    23  **   |  Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N-1) | Ptr(N) |
    24  **   ----------------------------------------------------------------
    25  **
    26  ** All of the keys on the page that Ptr(0) points to have values less
    27  ** than Key(0).  All of the keys on page Ptr(1) and its subpages have
    28  ** values greater than Key(0) and less than Key(1).  All of the keys
    29  ** on Ptr(N) and its subpages have values greater than Key(N-1).  And
    30  ** so forth.
    31  **
    32  ** Finding a particular key requires reading O(log(M)) pages from the 
    33  ** disk where M is the number of entries in the tree.
    34  **
    35  ** In this implementation, a single file can hold one or more separate 
    36  ** BTrees.  Each BTree is identified by the index of its root page.  The
    37  ** key and data for any entry are combined to form the "payload".  A
    38  ** fixed amount of payload can be carried directly on the database
    39  ** page.  If the payload is larger than the preset amount then surplus
    40  ** bytes are stored on overflow pages.  The payload for an entry
    41  ** and the preceding pointer are combined to form a "Cell".  Each 
    42  ** page has a small header which contains the Ptr(N) pointer and other
    43  ** information such as the size of key and data.
    44  **
    45  ** FORMAT DETAILS
    46  **
    47  ** The file is divided into pages.  The first page is called page 1,
    48  ** the second is page 2, and so forth.  A page number of zero indicates
    49  ** "no such page".  The page size can be any power of 2 between 512 and 65536.
    50  ** Each page can be either a btree page, a freelist page, an overflow
    51  ** page, or a pointer-map page.
    52  **
    53  ** The first page is always a btree page.  The first 100 bytes of the first
    54  ** page contain a special header (the "file header") that describes the file.
    55  ** The format of the file header is as follows:
    56  **
    57  **   OFFSET   SIZE    DESCRIPTION
    58  **      0      16     Header string: "SQLite format 3\000"
    59  **     16       2     Page size in bytes.  (1 means 65536)
    60  **     18       1     File format write version
    61  **     19       1     File format read version
    62  **     20       1     Bytes of unused space at the end of each page
    63  **     21       1     Max embedded payload fraction (must be 64)
    64  **     22       1     Min embedded payload fraction (must be 32)
    65  **     23       1     Min leaf payload fraction (must be 32)
    66  **     24       4     File change counter
    67  **     28       4     Reserved for future use
    68  **     32       4     First freelist page
    69  **     36       4     Number of freelist pages in the file
    70  **     40      60     15 4-byte meta values passed to higher layers
    71  **
    72  **     40       4     Schema cookie
    73  **     44       4     File format of schema layer
    74  **     48       4     Size of page cache
    75  **     52       4     Largest root-page (auto/incr_vacuum)
    76  **     56       4     1=UTF-8 2=UTF16le 3=UTF16be
    77  **     60       4     User version
    78  **     64       4     Incremental vacuum mode
    79  **     68       4     Application-ID
    80  **     72      20     unused
    81  **     92       4     The version-valid-for number
    82  **     96       4     SQLITE_VERSION_NUMBER
    83  **
    84  ** All of the integer values are big-endian (most significant byte first).
    85  **
    86  ** The file change counter is incremented when the database is changed
    87  ** This counter allows other processes to know when the file has changed
    88  ** and thus when they need to flush their cache.
    89  **
    90  ** The max embedded payload fraction is the amount of the total usable
    91  ** space in a page that can be consumed by a single cell for standard
    92  ** B-tree (non-LEAFDATA) tables.  A value of 255 means 100%.  The default
    93  ** is to limit the maximum cell size so that at least 4 cells will fit
    94  ** on one page.  Thus the default max embedded payload fraction is 64.
    95  **
    96  ** If the payload for a cell is larger than the max payload, then extra
    97  ** payload is spilled to overflow pages.  Once an overflow page is allocated,
    98  ** as many bytes as possible are moved into the overflow pages without letting
    99  ** the cell size drop below the min embedded payload fraction.
   100  **
   101  ** The min leaf payload fraction is like the min embedded payload fraction
   102  ** except that it applies to leaf nodes in a LEAFDATA tree.  The maximum
   103  ** payload fraction for a LEAFDATA tree is always 100% (or 255) and it
   104  ** not specified in the header.
   105  **
   106  ** Each btree pages is divided into three sections:  The header, the
   107  ** cell pointer array, and the cell content area.  Page 1 also has a 100-byte
   108  ** file header that occurs before the page header.
   109  **
   110  **      |----------------|
   111  **      | file header    |   100 bytes.  Page 1 only.
   112  **      |----------------|
   113  **      | page header    |   8 bytes for leaves.  12 bytes for interior nodes
   114  **      |----------------|
   115  **      | cell pointer   |   |  2 bytes per cell.  Sorted order.
   116  **      | array          |   |  Grows downward
   117  **      |                |   v
   118  **      |----------------|
   119  **      | unallocated    |
   120  **      | space          |
   121  **      |----------------|   ^  Grows upwards
   122  **      | cell content   |   |  Arbitrary order interspersed with freeblocks.
   123  **      | area           |   |  and free space fragments.
   124  **      |----------------|
   125  **
   126  ** The page headers looks like this:
   127  **
   128  **   OFFSET   SIZE     DESCRIPTION
   129  **      0       1      Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf
   130  **      1       2      byte offset to the first freeblock
   131  **      3       2      number of cells on this page
   132  **      5       2      first byte of the cell content area
   133  **      7       1      number of fragmented free bytes
   134  **      8       4      Right child (the Ptr(N) value).  Omitted on leaves.
   135  **
   136  ** The flags define the format of this btree page.  The leaf flag means that
   137  ** this page has no children.  The zerodata flag means that this page carries
   138  ** only keys and no data.  The intkey flag means that the key is an integer
   139  ** which is stored in the key size entry of the cell header rather than in
   140  ** the payload area.
   141  **
   142  ** The cell pointer array begins on the first byte after the page header.
   143  ** The cell pointer array contains zero or more 2-byte numbers which are
   144  ** offsets from the beginning of the page to the cell content in the cell
   145  ** content area.  The cell pointers occur in sorted order.  The system strives
   146  ** to keep free space after the last cell pointer so that new cells can
   147  ** be easily added without having to defragment the page.
   148  **
   149  ** Cell content is stored at the very end of the page and grows toward the
   150  ** beginning of the page.
   151  **
   152  ** Unused space within the cell content area is collected into a linked list of
   153  ** freeblocks.  Each freeblock is at least 4 bytes in size.  The byte offset
   154  ** to the first freeblock is given in the header.  Freeblocks occur in
   155  ** increasing order.  Because a freeblock must be at least 4 bytes in size,
   156  ** any group of 3 or fewer unused bytes in the cell content area cannot
   157  ** exist on the freeblock chain.  A group of 3 or fewer free bytes is called
   158  ** a fragment.  The total number of bytes in all fragments is recorded.
   159  ** in the page header at offset 7.
   160  **
   161  **    SIZE    DESCRIPTION
   162  **      2     Byte offset of the next freeblock
   163  **      2     Bytes in this freeblock
   164  **
   165  ** Cells are of variable length.  Cells are stored in the cell content area at
   166  ** the end of the page.  Pointers to the cells are in the cell pointer array
   167  ** that immediately follows the page header.  Cells is not necessarily
   168  ** contiguous or in order, but cell pointers are contiguous and in order.
   169  **
   170  ** Cell content makes use of variable length integers.  A variable
   171  ** length integer is 1 to 9 bytes where the lower 7 bits of each 
   172  ** byte are used.  The integer consists of all bytes that have bit 8 set and
   173  ** the first byte with bit 8 clear.  The most significant byte of the integer
   174  ** appears first.  A variable-length integer may not be more than 9 bytes long.
   175  ** As a special case, all 8 bytes of the 9th byte are used as data.  This
   176  ** allows a 64-bit integer to be encoded in 9 bytes.
   177  **
   178  **    0x00                      becomes  0x00000000
   179  **    0x7f                      becomes  0x0000007f
   180  **    0x81 0x00                 becomes  0x00000080
   181  **    0x82 0x00                 becomes  0x00000100
   182  **    0x80 0x7f                 becomes  0x0000007f
   183  **    0x8a 0x91 0xd1 0xac 0x78  becomes  0x12345678
   184  **    0x81 0x81 0x81 0x81 0x01  becomes  0x10204081
   185  **
   186  ** Variable length integers are used for rowids and to hold the number of
   187  ** bytes of key and data in a btree cell.
   188  **
   189  ** The content of a cell looks like this:
   190  **
   191  **    SIZE    DESCRIPTION
   192  **      4     Page number of the left child. Omitted if leaf flag is set.
   193  **     var    Number of bytes of data. Omitted if the zerodata flag is set.
   194  **     var    Number of bytes of key. Or the key itself if intkey flag is set.
   195  **      *     Payload
   196  **      4     First page of the overflow chain.  Omitted if no overflow
   197  **
   198  ** Overflow pages form a linked list.  Each page except the last is completely
   199  ** filled with data (pagesize - 4 bytes).  The last page can have as little
   200  ** as 1 byte of data.
   201  **
   202  **    SIZE    DESCRIPTION
   203  **      4     Page number of next overflow page
   204  **      *     Data
   205  **
   206  ** Freelist pages come in two subtypes: trunk pages and leaf pages.  The
   207  ** file header points to the first in a linked list of trunk page.  Each trunk
   208  ** page points to multiple leaf pages.  The content of a leaf page is
   209  ** unspecified.  A trunk page looks like this:
   210  **
   211  **    SIZE    DESCRIPTION
   212  **      4     Page number of next trunk page
   213  **      4     Number of leaf pointers on this page
   214  **      *     zero or more pages numbers of leaves
   215  */
   216  #include "sqliteInt.h"
   217  
   218  
   219  /* The following value is the maximum cell size assuming a maximum page
   220  ** size give above.
   221  */
   222  #define MX_CELL_SIZE(pBt)  ((int)(pBt->pageSize-8))
   223  
   224  /* The maximum number of cells on a single page of the database.  This
   225  ** assumes a minimum cell size of 6 bytes  (4 bytes for the cell itself
   226  ** plus 2 bytes for the index to the cell in the page header).  Such
   227  ** small cells will be rare, but they are possible.
   228  */
   229  #define MX_CELL(pBt) ((pBt->pageSize-8)/6)
   230  
   231  /* Forward declarations */
   232  typedef struct MemPage MemPage;
   233  typedef struct BtLock BtLock;
   234  typedef struct CellInfo CellInfo;
   235  
   236  /*
   237  ** This is a magic string that appears at the beginning of every
   238  ** SQLite database in order to identify the file as a real database.
   239  **
   240  ** You can change this value at compile-time by specifying a
   241  ** -DSQLITE_FILE_HEADER="..." on the compiler command-line.  The
   242  ** header must be exactly 16 bytes including the zero-terminator so
   243  ** the string itself should be 15 characters long.  If you change
   244  ** the header, then your custom library will not be able to read 
   245  ** databases generated by the standard tools and the standard tools
   246  ** will not be able to read databases created by your custom library.
   247  */
   248  #ifndef SQLITE_FILE_HEADER /* 123456789 123456 */
   249  #  define SQLITE_FILE_HEADER "SQLite format 3"
   250  #endif
   251  
   252  /*
   253  ** Page type flags.  An ORed combination of these flags appear as the
   254  ** first byte of on-disk image of every BTree page.
   255  */
   256  #define PTF_INTKEY    0x01
   257  #define PTF_ZERODATA  0x02
   258  #define PTF_LEAFDATA  0x04
   259  #define PTF_LEAF      0x08
   260  
   261  /*
   262  ** An instance of this object stores information about each a single database
   263  ** page that has been loaded into memory.  The information in this object
   264  ** is derived from the raw on-disk page content.
   265  **
   266  ** As each database page is loaded into memory, the pager allocats an
   267  ** instance of this object and zeros the first 8 bytes.  (This is the
   268  ** "extra" information associated with each page of the pager.)
   269  **
   270  ** Access to all fields of this structure is controlled by the mutex
   271  ** stored in MemPage.pBt->mutex.
   272  */
   273  struct MemPage {
   274    u8 isInit;           /* True if previously initialized. MUST BE FIRST! */
   275    u8 bBusy;            /* Prevent endless loops on corrupt database files */
   276    u8 intKey;           /* True if table b-trees.  False for index b-trees */
   277    u8 intKeyLeaf;       /* True if the leaf of an intKey table */
   278    Pgno pgno;           /* Page number for this page */
   279    /* Only the first 8 bytes (above) are zeroed by pager.c when a new page
   280    ** is allocated. All fields that follow must be initialized before use */
   281    u8 leaf;             /* True if a leaf page */
   282    u8 hdrOffset;        /* 100 for page 1.  0 otherwise */
   283    u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 */
   284    u8 max1bytePayload;  /* min(maxLocal,127) */
   285    u8 nOverflow;        /* Number of overflow cell bodies in aCell[] */
   286    u16 maxLocal;        /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
   287    u16 minLocal;        /* Copy of BtShared.minLocal or BtShared.minLeaf */
   288    u16 cellOffset;      /* Index in aData of first cell pointer */
   289    u16 nFree;           /* Number of free bytes on the page */
   290    u16 nCell;           /* Number of cells on this page, local and ovfl */
   291    u16 maskPage;        /* Mask for page offset */
   292    u16 aiOvfl[4];       /* Insert the i-th overflow cell before the aiOvfl-th
   293                         ** non-overflow cell */
   294    u8 *apOvfl[4];       /* Pointers to the body of overflow cells */
   295    BtShared *pBt;       /* Pointer to BtShared that this page is part of */
   296    u8 *aData;           /* Pointer to disk image of the page data */
   297    u8 *aDataEnd;        /* One byte past the end of usable data */
   298    u8 *aCellIdx;        /* The cell index area */
   299    u8 *aDataOfst;       /* Same as aData for leaves.  aData+4 for interior */
   300    DbPage *pDbPage;     /* Pager page handle */
   301    u16 (*xCellSize)(MemPage*,u8*);             /* cellSizePtr method */
   302    void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */
   303  };
   304  
   305  /*
   306  ** A linked list of the following structures is stored at BtShared.pLock.
   307  ** Locks are added (or upgraded from READ_LOCK to WRITE_LOCK) when a cursor 
   308  ** is opened on the table with root page BtShared.iTable. Locks are removed
   309  ** from this list when a transaction is committed or rolled back, or when
   310  ** a btree handle is closed.
   311  */
   312  struct BtLock {
   313    Btree *pBtree;        /* Btree handle holding this lock */
   314    Pgno iTable;          /* Root page of table */
   315    u8 eLock;             /* READ_LOCK or WRITE_LOCK */
   316    BtLock *pNext;        /* Next in BtShared.pLock list */
   317  };
   318  
   319  /* Candidate values for BtLock.eLock */
   320  #define READ_LOCK     1
   321  #define WRITE_LOCK    2
   322  
   323  /* A Btree handle
   324  **
   325  ** A database connection contains a pointer to an instance of
   326  ** this object for every database file that it has open.  This structure
   327  ** is opaque to the database connection.  The database connection cannot
   328  ** see the internals of this structure and only deals with pointers to
   329  ** this structure.
   330  **
   331  ** For some database files, the same underlying database cache might be 
   332  ** shared between multiple connections.  In that case, each connection
   333  ** has it own instance of this object.  But each instance of this object
   334  ** points to the same BtShared object.  The database cache and the
   335  ** schema associated with the database file are all contained within
   336  ** the BtShared object.
   337  **
   338  ** All fields in this structure are accessed under sqlite3.mutex.
   339  ** The pBt pointer itself may not be changed while there exists cursors 
   340  ** in the referenced BtShared that point back to this Btree since those
   341  ** cursors have to go through this Btree to find their BtShared and
   342  ** they often do so without holding sqlite3.mutex.
   343  */
   344  struct Btree {
   345    sqlite3 *db;       /* The database connection holding this btree */
   346    BtShared *pBt;     /* Sharable content of this btree */
   347    u8 inTrans;        /* TRANS_NONE, TRANS_READ or TRANS_WRITE */
   348    u8 sharable;       /* True if we can share pBt with another db */
   349    u8 locked;         /* True if db currently has pBt locked */
   350    u8 hasIncrblobCur; /* True if there are one or more Incrblob cursors */
   351    int wantToLock;    /* Number of nested calls to sqlite3BtreeEnter() */
   352    int nBackup;       /* Number of backup operations reading this btree */
   353    u32 iDataVersion;  /* Combines with pBt->pPager->iDataVersion */
   354    Btree *pNext;      /* List of other sharable Btrees from the same db */
   355    Btree *pPrev;      /* Back pointer of the same list */
   356  #ifndef SQLITE_OMIT_SHARED_CACHE
   357    BtLock lock;       /* Object used to lock page 1 */
   358  #endif
   359  };
   360  
   361  /*
   362  ** Btree.inTrans may take one of the following values.
   363  **
   364  ** If the shared-data extension is enabled, there may be multiple users
   365  ** of the Btree structure. At most one of these may open a write transaction,
   366  ** but any number may have active read transactions.
   367  */
   368  #define TRANS_NONE  0
   369  #define TRANS_READ  1
   370  #define TRANS_WRITE 2
   371  
   372  /*
   373  ** An instance of this object represents a single database file.
   374  ** 
   375  ** A single database file can be in use at the same time by two
   376  ** or more database connections.  When two or more connections are
   377  ** sharing the same database file, each connection has it own
   378  ** private Btree object for the file and each of those Btrees points
   379  ** to this one BtShared object.  BtShared.nRef is the number of
   380  ** connections currently sharing this database file.
   381  **
   382  ** Fields in this structure are accessed under the BtShared.mutex
   383  ** mutex, except for nRef and pNext which are accessed under the
   384  ** global SQLITE_MUTEX_STATIC_MASTER mutex.  The pPager field
   385  ** may not be modified once it is initially set as long as nRef>0.
   386  ** The pSchema field may be set once under BtShared.mutex and
   387  ** thereafter is unchanged as long as nRef>0.
   388  **
   389  ** isPending:
   390  **
   391  **   If a BtShared client fails to obtain a write-lock on a database
   392  **   table (because there exists one or more read-locks on the table),
   393  **   the shared-cache enters 'pending-lock' state and isPending is
   394  **   set to true.
   395  **
   396  **   The shared-cache leaves the 'pending lock' state when either of
   397  **   the following occur:
   398  **
   399  **     1) The current writer (BtShared.pWriter) concludes its transaction, OR
   400  **     2) The number of locks held by other connections drops to zero.
   401  **
   402  **   while in the 'pending-lock' state, no connection may start a new
   403  **   transaction.
   404  **
   405  **   This feature is included to help prevent writer-starvation.
   406  */
   407  struct BtShared {
   408    Pager *pPager;        /* The page cache */
   409    sqlite3 *db;          /* Database connection currently using this Btree */
   410    BtCursor *pCursor;    /* A list of all open cursors */
   411    MemPage *pPage1;      /* First page of the database */
   412    u8 openFlags;         /* Flags to sqlite3BtreeOpen() */
   413  #ifndef SQLITE_OMIT_AUTOVACUUM
   414    u8 autoVacuum;        /* True if auto-vacuum is enabled */
   415    u8 incrVacuum;        /* True if incr-vacuum is enabled */
   416    u8 bDoTruncate;       /* True to truncate db on commit */
   417  #endif
   418    u8 inTransaction;     /* Transaction state */
   419    u8 max1bytePayload;   /* Maximum first byte of cell for a 1-byte payload */
   420  #ifdef SQLITE_HAS_CODEC
   421    u8 optimalReserve;    /* Desired amount of reserved space per page */
   422  #endif
   423    u16 btsFlags;         /* Boolean parameters.  See BTS_* macros below */
   424    u16 maxLocal;         /* Maximum local payload in non-LEAFDATA tables */
   425    u16 minLocal;         /* Minimum local payload in non-LEAFDATA tables */
   426    u16 maxLeaf;          /* Maximum local payload in a LEAFDATA table */
   427    u16 minLeaf;          /* Minimum local payload in a LEAFDATA table */
   428    u32 pageSize;         /* Total number of bytes on a page */
   429    u32 usableSize;       /* Number of usable bytes on each page */
   430    int nTransaction;     /* Number of open transactions (read + write) */
   431    u32 nPage;            /* Number of pages in the database */
   432    void *pSchema;        /* Pointer to space allocated by sqlite3BtreeSchema() */
   433    void (*xFreeSchema)(void*);  /* Destructor for BtShared.pSchema */
   434    sqlite3_mutex *mutex; /* Non-recursive mutex required to access this object */
   435    Bitvec *pHasContent;  /* Set of pages moved to free-list this transaction */
   436  #ifndef SQLITE_OMIT_SHARED_CACHE
   437    int nRef;             /* Number of references to this structure */
   438    BtShared *pNext;      /* Next on a list of sharable BtShared structs */
   439    BtLock *pLock;        /* List of locks held on this shared-btree struct */
   440    Btree *pWriter;       /* Btree with currently open write transaction */
   441  #endif
   442    u8 *pTmpSpace;        /* Temp space sufficient to hold a single cell */
   443  };
   444  
   445  /*
   446  ** Allowed values for BtShared.btsFlags
   447  */
   448  #define BTS_READ_ONLY        0x0001   /* Underlying file is readonly */
   449  #define BTS_PAGESIZE_FIXED   0x0002   /* Page size can no longer be changed */
   450  #define BTS_SECURE_DELETE    0x0004   /* PRAGMA secure_delete is enabled */
   451  #define BTS_OVERWRITE        0x0008   /* Overwrite deleted content with zeros */
   452  #define BTS_FAST_SECURE      0x000c   /* Combination of the previous two */
   453  #define BTS_INITIALLY_EMPTY  0x0010   /* Database was empty at trans start */
   454  #define BTS_NO_WAL           0x0020   /* Do not open write-ahead-log files */
   455  #define BTS_EXCLUSIVE        0x0040   /* pWriter has an exclusive lock */
   456  #define BTS_PENDING          0x0080   /* Waiting for read-locks to clear */
   457  
   458  /*
   459  ** An instance of the following structure is used to hold information
   460  ** about a cell.  The parseCellPtr() function fills in this structure
   461  ** based on information extract from the raw disk page.
   462  */
   463  struct CellInfo {
   464    i64 nKey;      /* The key for INTKEY tables, or nPayload otherwise */
   465    u8 *pPayload;  /* Pointer to the start of payload */
   466    u32 nPayload;  /* Bytes of payload */
   467    u16 nLocal;    /* Amount of payload held locally, not on overflow */
   468    u16 nSize;     /* Size of the cell content on the main b-tree page */
   469  };
   470  
   471  /*
   472  ** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than
   473  ** this will be declared corrupt. This value is calculated based on a
   474  ** maximum database size of 2^31 pages a minimum fanout of 2 for a
   475  ** root-node and 3 for all other internal nodes.
   476  **
   477  ** If a tree that appears to be taller than this is encountered, it is
   478  ** assumed that the database is corrupt.
   479  */
   480  #define BTCURSOR_MAX_DEPTH 20
   481  
   482  /*
   483  ** A cursor is a pointer to a particular entry within a particular
   484  ** b-tree within a database file.
   485  **
   486  ** The entry is identified by its MemPage and the index in
   487  ** MemPage.aCell[] of the entry.
   488  **
   489  ** A single database file can be shared by two more database connections,
   490  ** but cursors cannot be shared.  Each cursor is associated with a
   491  ** particular database connection identified BtCursor.pBtree.db.
   492  **
   493  ** Fields in this structure are accessed under the BtShared.mutex
   494  ** found at self->pBt->mutex. 
   495  **
   496  ** skipNext meaning:
   497  **    eState==SKIPNEXT && skipNext>0:  Next sqlite3BtreeNext() is no-op.
   498  **    eState==SKIPNEXT && skipNext<0:  Next sqlite3BtreePrevious() is no-op.
   499  **    eState==FAULT:                   Cursor fault with skipNext as error code.
   500  */
   501  struct BtCursor {
   502    u8 eState;                /* One of the CURSOR_XXX constants (see below) */
   503    u8 curFlags;              /* zero or more BTCF_* flags defined below */
   504    u8 curPagerFlags;         /* Flags to send to sqlite3PagerGet() */
   505    u8 hints;                 /* As configured by CursorSetHints() */
   506    int nOvflAlloc;           /* Allocated size of aOverflow[] array */
   507    Btree *pBtree;            /* The Btree to which this cursor belongs */
   508    BtShared *pBt;            /* The BtShared this cursor points to */
   509    BtCursor *pNext;          /* Forms a linked list of all cursors */
   510    Pgno *aOverflow;          /* Cache of overflow page locations */
   511    CellInfo info;            /* A parse of the cell we are pointing at */
   512    i64 nKey;                 /* Size of pKey, or last integer key */
   513    void *pKey;               /* Saved key that was cursor last known position */
   514    Pgno pgnoRoot;            /* The root page of this tree */
   515    int skipNext;    /* Prev() is noop if negative. Next() is noop if positive.
   516                     ** Error code if eState==CURSOR_FAULT */
   517    /* All fields above are zeroed when the cursor is allocated.  See
   518    ** sqlite3BtreeCursorZero().  Fields that follow must be manually
   519    ** initialized. */
   520    i8 iPage;                 /* Index of current page in apPage */
   521    u8 curIntKey;             /* Value of apPage[0]->intKey */
   522    u16 ix;                   /* Current index for apPage[iPage] */
   523    u16 aiIdx[BTCURSOR_MAX_DEPTH-1];     /* Current index in apPage[i] */
   524    struct KeyInfo *pKeyInfo;            /* Arg passed to comparison function */
   525    MemPage *pPage;                        /* Current page */
   526    MemPage *apPage[BTCURSOR_MAX_DEPTH-1]; /* Stack of parents of current page */
   527  };
   528  
   529  /*
   530  ** Legal values for BtCursor.curFlags
   531  */
   532  #define BTCF_WriteFlag    0x01   /* True if a write cursor */
   533  #define BTCF_ValidNKey    0x02   /* True if info.nKey is valid */
   534  #define BTCF_ValidOvfl    0x04   /* True if aOverflow is valid */
   535  #define BTCF_AtLast       0x08   /* Cursor is pointing ot the last entry */
   536  #define BTCF_Incrblob     0x10   /* True if an incremental I/O handle */
   537  #define BTCF_Multiple     0x20   /* Maybe another cursor on the same btree */
   538  
   539  /*
   540  ** Potential values for BtCursor.eState.
   541  **
   542  ** CURSOR_INVALID:
   543  **   Cursor does not point to a valid entry. This can happen (for example) 
   544  **   because the table is empty or because BtreeCursorFirst() has not been
   545  **   called.
   546  **
   547  ** CURSOR_VALID:
   548  **   Cursor points to a valid entry. getPayload() etc. may be called.
   549  **
   550  ** CURSOR_SKIPNEXT:
   551  **   Cursor is valid except that the Cursor.skipNext field is non-zero
   552  **   indicating that the next sqlite3BtreeNext() or sqlite3BtreePrevious()
   553  **   operation should be a no-op.
   554  **
   555  ** CURSOR_REQUIRESEEK:
   556  **   The table that this cursor was opened on still exists, but has been 
   557  **   modified since the cursor was last used. The cursor position is saved
   558  **   in variables BtCursor.pKey and BtCursor.nKey. When a cursor is in 
   559  **   this state, restoreCursorPosition() can be called to attempt to
   560  **   seek the cursor to the saved position.
   561  **
   562  ** CURSOR_FAULT:
   563  **   An unrecoverable error (an I/O error or a malloc failure) has occurred
   564  **   on a different connection that shares the BtShared cache with this
   565  **   cursor.  The error has left the cache in an inconsistent state.
   566  **   Do nothing else with this cursor.  Any attempt to use the cursor
   567  **   should return the error code stored in BtCursor.skipNext
   568  */
   569  #define CURSOR_INVALID           0
   570  #define CURSOR_VALID             1
   571  #define CURSOR_SKIPNEXT          2
   572  #define CURSOR_REQUIRESEEK       3
   573  #define CURSOR_FAULT             4
   574  
   575  /* 
   576  ** The database page the PENDING_BYTE occupies. This page is never used.
   577  */
   578  # define PENDING_BYTE_PAGE(pBt) PAGER_MJ_PGNO(pBt)
   579  
   580  /*
   581  ** These macros define the location of the pointer-map entry for a 
   582  ** database page. The first argument to each is the number of usable
   583  ** bytes on each page of the database (often 1024). The second is the
   584  ** page number to look up in the pointer map.
   585  **
   586  ** PTRMAP_PAGENO returns the database page number of the pointer-map
   587  ** page that stores the required pointer. PTRMAP_PTROFFSET returns
   588  ** the offset of the requested map entry.
   589  **
   590  ** If the pgno argument passed to PTRMAP_PAGENO is a pointer-map page,
   591  ** then pgno is returned. So (pgno==PTRMAP_PAGENO(pgsz, pgno)) can be
   592  ** used to test if pgno is a pointer-map page. PTRMAP_ISPAGE implements
   593  ** this test.
   594  */
   595  #define PTRMAP_PAGENO(pBt, pgno) ptrmapPageno(pBt, pgno)
   596  #define PTRMAP_PTROFFSET(pgptrmap, pgno) (5*(pgno-pgptrmap-1))
   597  #define PTRMAP_ISPAGE(pBt, pgno) (PTRMAP_PAGENO((pBt),(pgno))==(pgno))
   598  
   599  /*
   600  ** The pointer map is a lookup table that identifies the parent page for
   601  ** each child page in the database file.  The parent page is the page that
   602  ** contains a pointer to the child.  Every page in the database contains
   603  ** 0 or 1 parent pages.  (In this context 'database page' refers
   604  ** to any page that is not part of the pointer map itself.)  Each pointer map
   605  ** entry consists of a single byte 'type' and a 4 byte parent page number.
   606  ** The PTRMAP_XXX identifiers below are the valid types.
   607  **
   608  ** The purpose of the pointer map is to facility moving pages from one
   609  ** position in the file to another as part of autovacuum.  When a page
   610  ** is moved, the pointer in its parent must be updated to point to the
   611  ** new location.  The pointer map is used to locate the parent page quickly.
   612  **
   613  ** PTRMAP_ROOTPAGE: The database page is a root-page. The page-number is not
   614  **                  used in this case.
   615  **
   616  ** PTRMAP_FREEPAGE: The database page is an unused (free) page. The page-number 
   617  **                  is not used in this case.
   618  **
   619  ** PTRMAP_OVERFLOW1: The database page is the first page in a list of 
   620  **                   overflow pages. The page number identifies the page that
   621  **                   contains the cell with a pointer to this overflow page.
   622  **
   623  ** PTRMAP_OVERFLOW2: The database page is the second or later page in a list of
   624  **                   overflow pages. The page-number identifies the previous
   625  **                   page in the overflow page list.
   626  **
   627  ** PTRMAP_BTREE: The database page is a non-root btree page. The page number
   628  **               identifies the parent page in the btree.
   629  */
   630  #define PTRMAP_ROOTPAGE 1
   631  #define PTRMAP_FREEPAGE 2
   632  #define PTRMAP_OVERFLOW1 3
   633  #define PTRMAP_OVERFLOW2 4
   634  #define PTRMAP_BTREE 5
   635  
   636  /* A bunch of assert() statements to check the transaction state variables
   637  ** of handle p (type Btree*) are internally consistent.
   638  */
   639  #define btreeIntegrity(p) \
   640    assert( p->pBt->inTransaction!=TRANS_NONE || p->pBt->nTransaction==0 ); \
   641    assert( p->pBt->inTransaction>=p->inTrans ); 
   642  
   643  
   644  /*
   645  ** The ISAUTOVACUUM macro is used within balance_nonroot() to determine
   646  ** if the database supports auto-vacuum or not. Because it is used
   647  ** within an expression that is an argument to another macro 
   648  ** (sqliteMallocRaw), it is not possible to use conditional compilation.
   649  ** So, this macro is defined instead.
   650  */
   651  #ifndef SQLITE_OMIT_AUTOVACUUM
   652  #define ISAUTOVACUUM (pBt->autoVacuum)
   653  #else
   654  #define ISAUTOVACUUM 0
   655  #endif
   656  
   657  
   658  /*
   659  ** This structure is passed around through all the sanity checking routines
   660  ** in order to keep track of some global state information.
   661  **
   662  ** The aRef[] array is allocated so that there is 1 bit for each page in
   663  ** the database. As the integrity-check proceeds, for each page used in
   664  ** the database the corresponding bit is set. This allows integrity-check to 
   665  ** detect pages that are used twice and orphaned pages (both of which 
   666  ** indicate corruption).
   667  */
   668  typedef struct IntegrityCk IntegrityCk;
   669  struct IntegrityCk {
   670    BtShared *pBt;    /* The tree being checked out */
   671    Pager *pPager;    /* The associated pager.  Also accessible by pBt->pPager */
   672    u8 *aPgRef;       /* 1 bit per page in the db (see above) */
   673    Pgno nPage;       /* Number of pages in the database */
   674    int mxErr;        /* Stop accumulating errors when this reaches zero */
   675    int nErr;         /* Number of messages written to zErrMsg so far */
   676    int mallocFailed; /* A memory allocation error has occurred */
   677    const char *zPfx; /* Error message prefix */
   678    int v1, v2;       /* Values for up to two %d fields in zPfx */
   679    StrAccum errMsg;  /* Accumulate the error message text here */
   680    u32 *heap;        /* Min-heap used for analyzing cell coverage */
   681  };
   682  
   683  /*
   684  ** Routines to read or write a two- and four-byte big-endian integer values.
   685  */
   686  #define get2byte(x)   ((x)[0]<<8 | (x)[1])
   687  #define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v))
   688  #define get4byte sqlite3Get4byte
   689  #define put4byte sqlite3Put4byte
   690  
   691  /*
   692  ** get2byteAligned(), unlike get2byte(), requires that its argument point to a
   693  ** two-byte aligned address.  get2bytea() is only used for accessing the
   694  ** cell addresses in a btree header.
   695  */
   696  #if SQLITE_BYTEORDER==4321
   697  # define get2byteAligned(x)  (*(u16*)(x))
   698  #elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4008000
   699  # define get2byteAligned(x)  __builtin_bswap16(*(u16*)(x))
   700  #elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
   701  # define get2byteAligned(x)  _byteswap_ushort(*(u16*)(x))
   702  #else
   703  # define get2byteAligned(x)  ((x)[0]<<8 | (x)[1])
   704  #endif