github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/targets/rp2040-boot-stage2.S (about)

     1  //
     2  // Implementation of RP2040 stage 2 boot loader.  This code is derived from the
     3  // Winbond W25Q080 implementation (as found in the Pico) in the official Pico SDK.
     4  //
     5  // This implementation has been made 'stand-alone' by including necessary code /
     6  // symbols from the included files in the reference implementation directly into
     7  // the source.  It has also been modified to include the conditional logic from
     8  // the CircuitPython implementation that supports additional flash chips.  The
     9  // CircuitPython source is here:
    10  //   https://github.com/adafruit/circuitpython/blob/main/ports/raspberrypi/stage2.c.jinja
    11  //
    12  // This file cannot be assembled directly, instead assemble the board-specific file
    13  // (such as pico-boot-stage2.S) which defines the parameters specific to the flash
    14  // chip included on that board.
    15  //
    16  // Care has been taken to preserve ordering and it has been verified the generated
    17  // binary is byte-for-byte identical to the reference code binary when assembled for
    18  // the Pico.
    19  //
    20  // Note: the stage 2 boot loader must be 256 bytes in length and have a checksum
    21  // present.  In TinyGo, the linker script is responsible for allocating 256 bytes
    22  // for the .boot2 section and the build logic patches the checksum into the
    23  // binary after linking, controlled by the '<target>.json' flag 'rp2040-boot-patch'.
    24  //
    25  // The stage 2 bootstrap section can be inspected in an elf file using this command:
    26  //     objdump -s -j .boot2 <binary>.elf
    27  //
    28  // Original Source:
    29  // https://github.com/raspberrypi/pico-sdk/blob/master/src/rp2_common/boot_stage2/boot2_w25q080.S
    30  //
    31  
    32  
    33  // ----------------------------------------------------------------------------
    34  // Second stage boot code
    35  // Copyright (c) 2019-2021 Raspberry Pi (Trading) Ltd.
    36  // SPDX-License-Identifier: BSD-3-Clause
    37  //
    38  // Device:      Winbond W25Q080
    39  //              Also supports W25Q16JV (which has some different SR instructions)
    40  //              Also supports AT25SF081
    41  //              Also supports S25FL132K0
    42  //
    43  // Description: Configures W25Q080 to run in Quad I/O continuous read XIP mode
    44  //
    45  // Details:     * Check status register 2 to determine if QSPI mode is enabled,
    46  //                and perform an SR2 programming cycle if necessary.
    47  //              * Use SSI to perform a dummy 0xEB read command, with the mode
    48  //                continuation bits set, so that the flash will not require
    49  //                0xEB instruction prefix on subsequent reads.
    50  //              * Configure SSI to write address, mode bits, but no instruction.
    51  //                SSI + flash are now jointly in a state where continuous reads
    52  //                can take place.
    53  //              * Jump to exit pointer passed in via lr. Bootrom passes null,
    54  //                in which case this code uses a default 256 byte flash offset
    55  //
    56  // Building:    * This code must be position-independent, and use stack only
    57  //              * The code will be padded to a size of 256 bytes, including a
    58  //                4-byte checksum. Therefore code size cannot exceed 252 bytes.
    59  // ----------------------------------------------------------------------------
    60  
    61  
    62  //
    63  // Expanded include files
    64  //
    65  #define CMD_WRITE_ENABLE 0x06
    66  #define CMD_READ_STATUS1 0x05
    67  #define CMD_READ_STATUS2 0x35
    68  #define CMD_WRITE_STATUS1 0x01
    69  #define CMD_WRITE_STATUS2 0x31
    70  #define SREG_DATA 0x02  // Enable quad-SPI mode
    71  
    72  #define XIP_BASE       0x10000000
    73  #define XIP_SSI_BASE   0x18000000
    74  #define PADS_QSPI_BASE 0x40020000
    75  #define PPB_BASE       0xe0000000
    76  
    77  #define M0PLUS_VTOR_OFFSET 0x0000ed08
    78  
    79  #define PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB      4
    80  #define PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS  0x00000001
    81  #define PADS_QSPI_GPIO_QSPI_SCLK_OFFSET         0x00000004
    82  #define PADS_QSPI_GPIO_QSPI_SD0_OFFSET          0x00000008
    83  #define PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS    0x00000002
    84  #define PADS_QSPI_GPIO_QSPI_SD1_OFFSET          0x0000000c
    85  #define PADS_QSPI_GPIO_QSPI_SD2_OFFSET          0x00000010
    86  #define PADS_QSPI_GPIO_QSPI_SD3_OFFSET          0x00000014
    87  
    88  #define SSI_CTRLR0_OFFSET        0x00000000
    89  #define SSI_CTRLR1_OFFSET        0x00000004
    90  #define SSI_SSIENR_OFFSET        0x00000008
    91  #define SSI_BAUDR_OFFSET         0x00000014
    92  #define SSI_SR_OFFSET            0x00000028
    93  #define SSI_DR0_OFFSET           0x00000060
    94  #define SSI_RX_SAMPLE_DLY_OFFSET 0x000000f0
    95  
    96  #define SSI_CTRLR0_DFS_32_LSB 16
    97  
    98  #define SSI_CTRLR0_SPI_FRF_VALUE_QUAD 0x2
    99  #define SSI_CTRLR0_SPI_FRF_LSB        21
   100  
   101  #define SSI_CTRLR0_TMOD_VALUE_TX_AND_RX   0x0
   102  #define SSI_CTRLR0_TMOD_VALUE_EEPROM_READ 0x3
   103  #define SSI_CTRLR0_TMOD_LSB               8
   104  
   105  #define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A 0x1
   106  #define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A 0x2
   107  
   108  #define SSI_SPI_CTRLR0_OFFSET 0x000000f4
   109  
   110  #define SSI_SPI_CTRLR0_INST_L_VALUE_NONE 0x0
   111  #define SSI_SPI_CTRLR0_INST_L_VALUE_8B   0x2
   112  
   113  #define SSI_SPI_CTRLR0_TRANS_TYPE_LSB  0
   114  #define SSI_SPI_CTRLR0_ADDR_L_LSB      2
   115  #define SSI_SPI_CTRLR0_INST_L_LSB      8
   116  #define SSI_SPI_CTRLR0_WAIT_CYCLES_LSB 11
   117  #define SSI_SPI_CTRLR0_XIP_CMD_LSB     24
   118  
   119  #define SSI_SR_BUSY_BITS  0x00000001
   120  #define SSI_SR_TFE_BITS   0x00000004
   121  
   122  
   123  // ----------------------------------------------------------------------------
   124  // Config section
   125  // ----------------------------------------------------------------------------
   126  // It should be possible to support most flash devices by modifying this section
   127  
   128  // The serial flash interface will run at clk_sys/PICO_FLASH_SPI_CLKDIV.
   129  // This must be a positive, even integer.
   130  // The bootrom is very conservative with SPI frequency, but here we should be
   131  // as aggressive as possible.
   132  
   133  #define PICO_FLASH_SPI_CLKDIV BOARD_PICO_FLASH_SPI_CLKDIV
   134  #if PICO_FLASH_SPI_CLKDIV & 1
   135  #error PICO_FLASH_SPI_CLKDIV must be even
   136  #endif
   137  
   138  #if BOARD_QUAD_OK==1
   139  // Define interface width: single/dual/quad IO
   140  #define FRAME_FORMAT       SSI_CTRLR0_SPI_FRF_VALUE_QUAD
   141  #define TRANSACTION_TYPE   SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A
   142  // Note that the INST_L field is used to select what XIP data gets pushed into
   143  // the TX FIFO:
   144  //      INST_L_0_BITS   {ADDR[23:0],XIP_CMD[7:0]}       Load "mode bits" into XIP_CMD
   145  //      Anything else   {XIP_CMD[7:0],ADDR[23:0]}       Load SPI command into XIP_CMD
   146  #define INSTRUCTION_LENGTH SSI_SPI_CTRLR0_INST_L_VALUE_NONE
   147  #define READ_INSTRUCTION   MODE_CONTINUOUS_READ
   148  #define ADDR_L             8 // 6 for address, 2 for mode
   149  #else
   150  #define FRAME_FORMAT       SSI_CTRLR0_SPI_FRF_VALUE_STD
   151  #define TRANSACTION_TYPE   SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C1A
   152  #define INSTRUCTION_LENGTH SSI_SPI_CTRLR0_INST_L_VALUE_8B
   153  #define READ_INSTRUCTION   BOARD_CMD_READ
   154  #define ADDR_L             6 // * 4 = 24
   155  #endif
   156  
   157  // The flash-chip specific read isntruction
   158  #define CMD_READ BOARD_CMD_READ
   159  
   160  // "Mode bits" are 8 special bits sent immediately after
   161  // the address bits in a "Read Data Fast Quad I/O" command sequence. 
   162  // On W25Q080, the four LSBs are don't care, and if MSBs == 0xa, the
   163  // next read does not require the 0xeb instruction prefix.
   164  #define MODE_CONTINUOUS_READ 0xa0
   165  
   166  // How many clocks of Hi-Z following the mode bits. For W25Q080, 4 dummy cycles
   167  // are required.
   168  #define WAIT_CYCLES BOARD_WAIT_CYCLES
   169  
   170  
   171  // If defined, we will read status reg, compare to SREG_DATA, and overwrite
   172  // with our value if the SR doesn't match.
   173  // We do a two-byte write to SR1 (01h cmd) rather than a one-byte write to
   174  // SR2 (31h cmd) as the latter command isn't supported by WX25Q080.
   175  // This isn't great because it will remove block protections.
   176  // A better solution is to use a volatile SR write if your device supports it.
   177  #define PROGRAM_STATUS_REG
   178  
   179  .syntax unified
   180  .cpu cortex-m0plus
   181  .thumb
   182  .section .boot2, "ax"
   183  
   184  // The exit point is passed in lr. If entered from bootrom, this will be the
   185  // flash address immediately following this second stage (0x10000100).
   186  // Otherwise it will be a return address -- second stage being called as a
   187  // function by user code, after copying out of XIP region. r3 holds SSI base,
   188  // r0...2 used as temporaries. Other GPRs not used.
   189  .global _stage2_boot
   190  .type _stage2_boot,%function
   191  .thumb_func
   192  _stage2_boot:
   193      push {lr}
   194  
   195      // Set pad configuration:
   196      // - SCLK 8mA drive, no slew limiting
   197      // - SDx disable input Schmitt to reduce delay
   198  
   199      ldr r3, =PADS_QSPI_BASE
   200      movs r0, #(2 << PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB | PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS)
   201      str r0, [r3, #PADS_QSPI_GPIO_QSPI_SCLK_OFFSET]
   202      ldr r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET]
   203      movs r1, #PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS
   204      bics r0, r1
   205  #if BOARD_QUAD_OK==1
   206      str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET]
   207  #endif
   208      str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD1_OFFSET]
   209  #if BOARD_QUAD_OK==1
   210      str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD2_OFFSET]
   211      str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD3_OFFSET]
   212  #endif
   213  
   214      ldr r3, =XIP_SSI_BASE
   215  
   216      // Disable SSI to allow further config
   217      movs r1, #0
   218      str r1, [r3, #SSI_SSIENR_OFFSET]
   219  
   220      // Set baud rate
   221      movs r1, #PICO_FLASH_SPI_CLKDIV
   222      str r1, [r3, #SSI_BAUDR_OFFSET]
   223  
   224      // Set 1-cycle sample delay. If PICO_FLASH_SPI_CLKDIV == 2 then this means,
   225      // if the flash launches data on SCLK posedge, we capture it at the time that
   226      // the next SCLK posedge is launched. This is shortly before that posedge
   227      // arrives at the flash, so data hold time should be ok. For
   228      // PICO_FLASH_SPI_CLKDIV > 2 this pretty much has no effect.
   229  
   230      movs r1, #1
   231      movs r2, #SSI_RX_SAMPLE_DLY_OFFSET  // == 0xf0 so need 8 bits of offset significance
   232      str r1, [r3, r2]
   233  
   234  // On QSPI parts we usually need a 01h SR-write command to enable QSPI mode
   235  // (i.e. turn WPn and HOLDn into IO2/IO3)
   236  #ifdef PROGRAM_STATUS_REG
   237  program_sregs:
   238  #define CTRL0_SPI_TXRX \
   239      (7 << SSI_CTRLR0_DFS_32_LSB) | /* 8 bits per data frame */ \
   240      (SSI_CTRLR0_TMOD_VALUE_TX_AND_RX << SSI_CTRLR0_TMOD_LSB)
   241  
   242      ldr r1, =(CTRL0_SPI_TXRX)
   243      str r1, [r3, #SSI_CTRLR0_OFFSET]
   244  
   245       // Enable SSI and select slave 0
   246      movs r1, #1
   247      str r1, [r3, #SSI_SSIENR_OFFSET]
   248  
   249      // Check whether SR needs updating
   250  #if BOARD_QUAD_OK==1
   251  # if BOARD_QUAD_ENABLE_STATUS_BYTE==1
   252      movs r0, #CMD_READ_STATUS1
   253  # elif BOARD_QUAD_ENABLE_STATUS_BYTE==2
   254      movs r0, #CMD_READ_STATUS2
   255  # endif
   256  
   257      bl read_flash_sreg
   258      movs r2, #BOARD_QUAD_ENABLE_BIT_MASK
   259      cmp r0, r2
   260      beq skip_sreg_programming
   261  
   262      // Send write enable command
   263      movs r1, #CMD_WRITE_ENABLE
   264      str r1, [r3, #SSI_DR0_OFFSET]
   265  
   266      // Poll for completion and discard RX
   267      bl wait_ssi_ready
   268      ldr r1, [r3, #SSI_DR0_OFFSET]
   269  
   270      // Send status write command followed by data bytes
   271  # if BOARD_SPLIT_STATUS_WRITE==1
   272  #  if BOARD_QUAD_ENABLE_STATUS_BYTE==1
   273      movs r1, #CMD_WRITE_STATUS1
   274  #  elif BOARD_QUAD_ENABLE_STATUS_BYTE==2
   275      movs r1, #CMD_WRITE_STATUS2
   276  #  endif
   277      str r1, [r3, #SSI_DR0_OFFSET]
   278      str r2, [r3, #SSI_DR0_OFFSET]
   279  
   280      bl wait_ssi_ready
   281      //ldr r1, [r3, #SSI_DR0_OFFSET]
   282      ldr r1, [r3, #SSI_DR0_OFFSET]
   283      ldr r1, [r3, #SSI_DR0_OFFSET]
   284  
   285  # else 
   286      movs r1, #CMD_WRITE_STATUS1
   287      str r1, [r3, #SSI_DR0_OFFSET]
   288  #  if BOARD_QUAD_ENABLE_STATUS_BYTE==2
   289      movs r0, #0
   290      str r0, [r3, #SSI_DR0_OFFSET]
   291  #  endif
   292      str r2, [r3, #SSI_DR0_OFFSET]
   293  
   294      bl wait_ssi_ready
   295      ldr r1, [r3, #SSI_DR0_OFFSET]
   296      ldr r1, [r3, #SSI_DR0_OFFSET]
   297  #  if BOARD_QUAD_ENABLE_STATUS_BYTE==2
   298      ldr r1, [r3, #SSI_DR0_OFFSET]
   299  #  endif
   300  
   301  # endif
   302      // Poll status register for write completion
   303  1:
   304      movs r0, #CMD_READ_STATUS1
   305      bl read_flash_sreg
   306      movs r1, #1
   307      tst r0, r1
   308      bne 1b
   309  #endif
   310  
   311  skip_sreg_programming:
   312  
   313      // Disable SSI again so that it can be reconfigured
   314      movs r1, #0
   315      str r1, [r3, #SSI_SSIENR_OFFSET]
   316  #endif
   317  
   318  // Currently the flash expects an 8 bit serial command prefix on every
   319  // transfer, which is a waste of cycles. Perform a dummy Fast Read Quad I/O
   320  // command, with mode bits set such that the flash will not expect a serial
   321  // command prefix on *subsequent* transfers. We don't care about the results
   322  // of the read, the important part is the mode bits.
   323  
   324  dummy_read:
   325  #define CTRLR0_ENTER_XIP \
   326      (FRAME_FORMAT                          /* Quad I/O mode */                \
   327          << SSI_CTRLR0_SPI_FRF_LSB) |                                          \
   328      (31 << SSI_CTRLR0_DFS_32_LSB)  |       /* 32 data bits */                 \
   329      (SSI_CTRLR0_TMOD_VALUE_EEPROM_READ     /* Send INST/ADDR, Receive Data */ \
   330          << SSI_CTRLR0_TMOD_LSB)
   331  
   332      ldr r1, =(CTRLR0_ENTER_XIP)
   333      str r1, [r3, #SSI_CTRLR0_OFFSET]
   334  
   335      movs r1, #0x0                    // NDF=0 (single 32b read)
   336      str r1, [r3, #SSI_CTRLR1_OFFSET]
   337  
   338  #if BOARD_QUAD_OK==1
   339  #define SPI_CTRLR0_ENTER_XIP \
   340      (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) |     /* Address + mode bits */ \
   341      (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \
   342      (SSI_SPI_CTRLR0_INST_L_VALUE_8B \
   343          << SSI_SPI_CTRLR0_INST_L_LSB) |        /* 8-bit instruction */ \
   344      (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A      /* Send Command in serial mode then address in Quad I/O mode */ \
   345          << SSI_SPI_CTRLR0_TRANS_TYPE_LSB)
   346  
   347      ldr r1, =(SPI_CTRLR0_ENTER_XIP)
   348      ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET)  // SPI_CTRL0 Register
   349      str r1, [r0]
   350  
   351      movs r1, #1                      // Re-enable SSI
   352      str r1, [r3, #SSI_SSIENR_OFFSET]
   353  
   354      movs r1, #CMD_READ
   355      str r1, [r3, #SSI_DR0_OFFSET]   // Push SPI command into TX FIFO
   356      movs r1, #MODE_CONTINUOUS_READ   // 32-bit: 24 address bits (we don't care, so 0) and M[7:4]=1010
   357      str r1, [r3, #SSI_DR0_OFFSET]   // Push Address into TX FIFO - this will trigger the transaction
   358  
   359      // Poll for completion
   360      bl wait_ssi_ready
   361  
   362  // The flash is in a state where we can blast addresses in parallel, and get
   363  // parallel data back. Now configure the SSI to translate XIP bus accesses
   364  // into QSPI transfers of this form.
   365  
   366      movs r1, #0
   367      str r1, [r3, #SSI_SSIENR_OFFSET]   // Disable SSI (and clear FIFO) to allow further config
   368  #endif
   369  
   370  // Note that the INST_L field is used to select what XIP data gets pushed into
   371  // the TX FIFO:
   372  //      INST_L_0_BITS   {ADDR[23:0],XIP_CMD[7:0]}       Load "mode bits" into XIP_CMD
   373  //      Anything else   {XIP_CMD[7:0],ADDR[23:0]}       Load SPI command into XIP_CMD
   374  configure_ssi:
   375  #define SPI_CTRLR0_XIP \
   376      (READ_INSTRUCTION                          /* Mode bits to keep flash in continuous read mode */ \
   377          << SSI_SPI_CTRLR0_XIP_CMD_LSB) | \
   378      (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) |    /* Total number of address + mode bits */ \
   379      (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) |    /* Hi-Z dummy clocks following address + mode */ \
   380      (INSTRUCTION_LENGTH                        /* Do not send a command, instead send XIP_CMD as mode bits after address */ \
   381          << SSI_SPI_CTRLR0_INST_L_LSB) | \
   382      (TRANSACTION_TYPE                          /* Send Address in Quad I/O mode (and Command but that is zero bits long) */ \
   383          << SSI_SPI_CTRLR0_TRANS_TYPE_LSB)
   384  
   385      ldr r1, =(SPI_CTRLR0_XIP)
   386  
   387      ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET)
   388      str r1, [r0]
   389  
   390      movs r1, #1
   391      str r1, [r3, #SSI_SSIENR_OFFSET]   // Re-enable SSI
   392  
   393  // Bus accesses to the XIP window will now be transparently serviced by the
   394  // external flash on cache miss. We are ready to run code from flash.
   395  
   396  
   397  //
   398  // Helper Includes
   399  //
   400  
   401  //
   402  // #include "boot2_helpers/exit_from_boot2.S"
   403  //
   404  
   405  // If entered from the bootrom, lr (which we earlier pushed) will be 0,
   406  // and we vector through the table at the start of the main flash image.
   407  // Any regular function call will have a nonzero value for lr.
   408  check_return:
   409      pop {r0}
   410      cmp r0, #0
   411      beq vector_into_flash
   412      bx r0
   413  vector_into_flash:
   414      ldr r0, =(XIP_BASE + 0x100)
   415      ldr r1, =(PPB_BASE + M0PLUS_VTOR_OFFSET)
   416      str r0, [r1]
   417      ldmia r0, {r0, r1}
   418      msr msp, r0
   419      bx r1
   420  
   421  //
   422  // #include "boot2_helpers/wait_ssi_ready.S"
   423  //
   424  wait_ssi_ready:
   425      push {r0, r1, lr}
   426  
   427      // Command is complete when there is nothing left to send
   428      // (TX FIFO empty) and SSI is no longer busy (CSn deasserted)
   429  1:
   430      ldr r1, [r3, #SSI_SR_OFFSET]
   431      movs r0, #SSI_SR_TFE_BITS
   432      tst r1, r0
   433      beq 1b
   434      movs r0, #SSI_SR_BUSY_BITS
   435      tst r1, r0
   436      bne 1b
   437  
   438      pop {r0, r1, pc}
   439  
   440  
   441  #ifdef PROGRAM_STATUS_REG
   442  
   443  //
   444  // #include "boot2_helpers/read_flash_sreg.S"
   445  //
   446  
   447  // Pass status read cmd into r0.
   448  // Returns status value in r0.
   449  .global read_flash_sreg
   450  .type read_flash_sreg,%function
   451  .thumb_func
   452  read_flash_sreg:
   453      push {r1, lr}
   454      str r0, [r3, #SSI_DR0_OFFSET]
   455      // Dummy byte:
   456      str r0, [r3, #SSI_DR0_OFFSET]
   457      
   458      bl wait_ssi_ready
   459      // Discard first byte and combine the next two
   460      ldr r0, [r3, #SSI_DR0_OFFSET]
   461      ldr r0, [r3, #SSI_DR0_OFFSET]
   462  
   463      pop {r1, pc}
   464  
   465  #endif
   466  
   467  .global literals
   468  literals:
   469  .ltorg
   470  
   471  .end