github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/targets/rp2040-boot-stage2.S (about) 1 // 2 // Implementation of RP2040 stage 2 boot loader. This code is derived from the 3 // Winbond W25Q080 implementation (as found in the Pico) in the official Pico SDK. 4 // 5 // This implementation has been made 'stand-alone' by including necessary code / 6 // symbols from the included files in the reference implementation directly into 7 // the source. It has also been modified to include the conditional logic from 8 // the CircuitPython implementation that supports additional flash chips. The 9 // CircuitPython source is here: 10 // https://github.com/adafruit/circuitpython/blob/main/ports/raspberrypi/stage2.c.jinja 11 // 12 // This file cannot be assembled directly, instead assemble the board-specific file 13 // (such as pico-boot-stage2.S) which defines the parameters specific to the flash 14 // chip included on that board. 15 // 16 // Care has been taken to preserve ordering and it has been verified the generated 17 // binary is byte-for-byte identical to the reference code binary when assembled for 18 // the Pico. 19 // 20 // Note: the stage 2 boot loader must be 256 bytes in length and have a checksum 21 // present. In TinyGo, the linker script is responsible for allocating 256 bytes 22 // for the .boot2 section and the build logic patches the checksum into the 23 // binary after linking, controlled by the '<target>.json' flag 'rp2040-boot-patch'. 24 // 25 // The stage 2 bootstrap section can be inspected in an elf file using this command: 26 // objdump -s -j .boot2 <binary>.elf 27 // 28 // Original Source: 29 // https://github.com/raspberrypi/pico-sdk/blob/master/src/rp2_common/boot_stage2/boot2_w25q080.S 30 // 31 32 33 // ---------------------------------------------------------------------------- 34 // Second stage boot code 35 // Copyright (c) 2019-2021 Raspberry Pi (Trading) Ltd. 36 // SPDX-License-Identifier: BSD-3-Clause 37 // 38 // Device: Winbond W25Q080 39 // Also supports W25Q16JV (which has some different SR instructions) 40 // Also supports AT25SF081 41 // Also supports S25FL132K0 42 // 43 // Description: Configures W25Q080 to run in Quad I/O continuous read XIP mode 44 // 45 // Details: * Check status register 2 to determine if QSPI mode is enabled, 46 // and perform an SR2 programming cycle if necessary. 47 // * Use SSI to perform a dummy 0xEB read command, with the mode 48 // continuation bits set, so that the flash will not require 49 // 0xEB instruction prefix on subsequent reads. 50 // * Configure SSI to write address, mode bits, but no instruction. 51 // SSI + flash are now jointly in a state where continuous reads 52 // can take place. 53 // * Jump to exit pointer passed in via lr. Bootrom passes null, 54 // in which case this code uses a default 256 byte flash offset 55 // 56 // Building: * This code must be position-independent, and use stack only 57 // * The code will be padded to a size of 256 bytes, including a 58 // 4-byte checksum. Therefore code size cannot exceed 252 bytes. 59 // ---------------------------------------------------------------------------- 60 61 62 // 63 // Expanded include files 64 // 65 #define CMD_WRITE_ENABLE 0x06 66 #define CMD_READ_STATUS1 0x05 67 #define CMD_READ_STATUS2 0x35 68 #define CMD_WRITE_STATUS1 0x01 69 #define CMD_WRITE_STATUS2 0x31 70 #define SREG_DATA 0x02 // Enable quad-SPI mode 71 72 #define XIP_BASE 0x10000000 73 #define XIP_SSI_BASE 0x18000000 74 #define PADS_QSPI_BASE 0x40020000 75 #define PPB_BASE 0xe0000000 76 77 #define M0PLUS_VTOR_OFFSET 0x0000ed08 78 79 #define PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB 4 80 #define PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS 0x00000001 81 #define PADS_QSPI_GPIO_QSPI_SCLK_OFFSET 0x00000004 82 #define PADS_QSPI_GPIO_QSPI_SD0_OFFSET 0x00000008 83 #define PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS 0x00000002 84 #define PADS_QSPI_GPIO_QSPI_SD1_OFFSET 0x0000000c 85 #define PADS_QSPI_GPIO_QSPI_SD2_OFFSET 0x00000010 86 #define PADS_QSPI_GPIO_QSPI_SD3_OFFSET 0x00000014 87 88 #define SSI_CTRLR0_OFFSET 0x00000000 89 #define SSI_CTRLR1_OFFSET 0x00000004 90 #define SSI_SSIENR_OFFSET 0x00000008 91 #define SSI_BAUDR_OFFSET 0x00000014 92 #define SSI_SR_OFFSET 0x00000028 93 #define SSI_DR0_OFFSET 0x00000060 94 #define SSI_RX_SAMPLE_DLY_OFFSET 0x000000f0 95 96 #define SSI_CTRLR0_DFS_32_LSB 16 97 98 #define SSI_CTRLR0_SPI_FRF_VALUE_QUAD 0x2 99 #define SSI_CTRLR0_SPI_FRF_LSB 21 100 101 #define SSI_CTRLR0_TMOD_VALUE_TX_AND_RX 0x0 102 #define SSI_CTRLR0_TMOD_VALUE_EEPROM_READ 0x3 103 #define SSI_CTRLR0_TMOD_LSB 8 104 105 #define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A 0x1 106 #define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A 0x2 107 108 #define SSI_SPI_CTRLR0_OFFSET 0x000000f4 109 110 #define SSI_SPI_CTRLR0_INST_L_VALUE_NONE 0x0 111 #define SSI_SPI_CTRLR0_INST_L_VALUE_8B 0x2 112 113 #define SSI_SPI_CTRLR0_TRANS_TYPE_LSB 0 114 #define SSI_SPI_CTRLR0_ADDR_L_LSB 2 115 #define SSI_SPI_CTRLR0_INST_L_LSB 8 116 #define SSI_SPI_CTRLR0_WAIT_CYCLES_LSB 11 117 #define SSI_SPI_CTRLR0_XIP_CMD_LSB 24 118 119 #define SSI_SR_BUSY_BITS 0x00000001 120 #define SSI_SR_TFE_BITS 0x00000004 121 122 123 // ---------------------------------------------------------------------------- 124 // Config section 125 // ---------------------------------------------------------------------------- 126 // It should be possible to support most flash devices by modifying this section 127 128 // The serial flash interface will run at clk_sys/PICO_FLASH_SPI_CLKDIV. 129 // This must be a positive, even integer. 130 // The bootrom is very conservative with SPI frequency, but here we should be 131 // as aggressive as possible. 132 133 #define PICO_FLASH_SPI_CLKDIV BOARD_PICO_FLASH_SPI_CLKDIV 134 #if PICO_FLASH_SPI_CLKDIV & 1 135 #error PICO_FLASH_SPI_CLKDIV must be even 136 #endif 137 138 #if BOARD_QUAD_OK==1 139 // Define interface width: single/dual/quad IO 140 #define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_QUAD 141 #define TRANSACTION_TYPE SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A 142 // Note that the INST_L field is used to select what XIP data gets pushed into 143 // the TX FIFO: 144 // INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD 145 // Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD 146 #define INSTRUCTION_LENGTH SSI_SPI_CTRLR0_INST_L_VALUE_NONE 147 #define READ_INSTRUCTION MODE_CONTINUOUS_READ 148 #define ADDR_L 8 // 6 for address, 2 for mode 149 #else 150 #define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_STD 151 #define TRANSACTION_TYPE SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C1A 152 #define INSTRUCTION_LENGTH SSI_SPI_CTRLR0_INST_L_VALUE_8B 153 #define READ_INSTRUCTION BOARD_CMD_READ 154 #define ADDR_L 6 // * 4 = 24 155 #endif 156 157 // The flash-chip specific read isntruction 158 #define CMD_READ BOARD_CMD_READ 159 160 // "Mode bits" are 8 special bits sent immediately after 161 // the address bits in a "Read Data Fast Quad I/O" command sequence. 162 // On W25Q080, the four LSBs are don't care, and if MSBs == 0xa, the 163 // next read does not require the 0xeb instruction prefix. 164 #define MODE_CONTINUOUS_READ 0xa0 165 166 // How many clocks of Hi-Z following the mode bits. For W25Q080, 4 dummy cycles 167 // are required. 168 #define WAIT_CYCLES BOARD_WAIT_CYCLES 169 170 171 // If defined, we will read status reg, compare to SREG_DATA, and overwrite 172 // with our value if the SR doesn't match. 173 // We do a two-byte write to SR1 (01h cmd) rather than a one-byte write to 174 // SR2 (31h cmd) as the latter command isn't supported by WX25Q080. 175 // This isn't great because it will remove block protections. 176 // A better solution is to use a volatile SR write if your device supports it. 177 #define PROGRAM_STATUS_REG 178 179 .syntax unified 180 .cpu cortex-m0plus 181 .thumb 182 .section .boot2, "ax" 183 184 // The exit point is passed in lr. If entered from bootrom, this will be the 185 // flash address immediately following this second stage (0x10000100). 186 // Otherwise it will be a return address -- second stage being called as a 187 // function by user code, after copying out of XIP region. r3 holds SSI base, 188 // r0...2 used as temporaries. Other GPRs not used. 189 .global _stage2_boot 190 .type _stage2_boot,%function 191 .thumb_func 192 _stage2_boot: 193 push {lr} 194 195 // Set pad configuration: 196 // - SCLK 8mA drive, no slew limiting 197 // - SDx disable input Schmitt to reduce delay 198 199 ldr r3, =PADS_QSPI_BASE 200 movs r0, #(2 << PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB | PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS) 201 str r0, [r3, #PADS_QSPI_GPIO_QSPI_SCLK_OFFSET] 202 ldr r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] 203 movs r1, #PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS 204 bics r0, r1 205 #if BOARD_QUAD_OK==1 206 str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] 207 #endif 208 str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD1_OFFSET] 209 #if BOARD_QUAD_OK==1 210 str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD2_OFFSET] 211 str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD3_OFFSET] 212 #endif 213 214 ldr r3, =XIP_SSI_BASE 215 216 // Disable SSI to allow further config 217 movs r1, #0 218 str r1, [r3, #SSI_SSIENR_OFFSET] 219 220 // Set baud rate 221 movs r1, #PICO_FLASH_SPI_CLKDIV 222 str r1, [r3, #SSI_BAUDR_OFFSET] 223 224 // Set 1-cycle sample delay. If PICO_FLASH_SPI_CLKDIV == 2 then this means, 225 // if the flash launches data on SCLK posedge, we capture it at the time that 226 // the next SCLK posedge is launched. This is shortly before that posedge 227 // arrives at the flash, so data hold time should be ok. For 228 // PICO_FLASH_SPI_CLKDIV > 2 this pretty much has no effect. 229 230 movs r1, #1 231 movs r2, #SSI_RX_SAMPLE_DLY_OFFSET // == 0xf0 so need 8 bits of offset significance 232 str r1, [r3, r2] 233 234 // On QSPI parts we usually need a 01h SR-write command to enable QSPI mode 235 // (i.e. turn WPn and HOLDn into IO2/IO3) 236 #ifdef PROGRAM_STATUS_REG 237 program_sregs: 238 #define CTRL0_SPI_TXRX \ 239 (7 << SSI_CTRLR0_DFS_32_LSB) | /* 8 bits per data frame */ \ 240 (SSI_CTRLR0_TMOD_VALUE_TX_AND_RX << SSI_CTRLR0_TMOD_LSB) 241 242 ldr r1, =(CTRL0_SPI_TXRX) 243 str r1, [r3, #SSI_CTRLR0_OFFSET] 244 245 // Enable SSI and select slave 0 246 movs r1, #1 247 str r1, [r3, #SSI_SSIENR_OFFSET] 248 249 // Check whether SR needs updating 250 #if BOARD_QUAD_OK==1 251 # if BOARD_QUAD_ENABLE_STATUS_BYTE==1 252 movs r0, #CMD_READ_STATUS1 253 # elif BOARD_QUAD_ENABLE_STATUS_BYTE==2 254 movs r0, #CMD_READ_STATUS2 255 # endif 256 257 bl read_flash_sreg 258 movs r2, #BOARD_QUAD_ENABLE_BIT_MASK 259 cmp r0, r2 260 beq skip_sreg_programming 261 262 // Send write enable command 263 movs r1, #CMD_WRITE_ENABLE 264 str r1, [r3, #SSI_DR0_OFFSET] 265 266 // Poll for completion and discard RX 267 bl wait_ssi_ready 268 ldr r1, [r3, #SSI_DR0_OFFSET] 269 270 // Send status write command followed by data bytes 271 # if BOARD_SPLIT_STATUS_WRITE==1 272 # if BOARD_QUAD_ENABLE_STATUS_BYTE==1 273 movs r1, #CMD_WRITE_STATUS1 274 # elif BOARD_QUAD_ENABLE_STATUS_BYTE==2 275 movs r1, #CMD_WRITE_STATUS2 276 # endif 277 str r1, [r3, #SSI_DR0_OFFSET] 278 str r2, [r3, #SSI_DR0_OFFSET] 279 280 bl wait_ssi_ready 281 //ldr r1, [r3, #SSI_DR0_OFFSET] 282 ldr r1, [r3, #SSI_DR0_OFFSET] 283 ldr r1, [r3, #SSI_DR0_OFFSET] 284 285 # else 286 movs r1, #CMD_WRITE_STATUS1 287 str r1, [r3, #SSI_DR0_OFFSET] 288 # if BOARD_QUAD_ENABLE_STATUS_BYTE==2 289 movs r0, #0 290 str r0, [r3, #SSI_DR0_OFFSET] 291 # endif 292 str r2, [r3, #SSI_DR0_OFFSET] 293 294 bl wait_ssi_ready 295 ldr r1, [r3, #SSI_DR0_OFFSET] 296 ldr r1, [r3, #SSI_DR0_OFFSET] 297 # if BOARD_QUAD_ENABLE_STATUS_BYTE==2 298 ldr r1, [r3, #SSI_DR0_OFFSET] 299 # endif 300 301 # endif 302 // Poll status register for write completion 303 1: 304 movs r0, #CMD_READ_STATUS1 305 bl read_flash_sreg 306 movs r1, #1 307 tst r0, r1 308 bne 1b 309 #endif 310 311 skip_sreg_programming: 312 313 // Disable SSI again so that it can be reconfigured 314 movs r1, #0 315 str r1, [r3, #SSI_SSIENR_OFFSET] 316 #endif 317 318 // Currently the flash expects an 8 bit serial command prefix on every 319 // transfer, which is a waste of cycles. Perform a dummy Fast Read Quad I/O 320 // command, with mode bits set such that the flash will not expect a serial 321 // command prefix on *subsequent* transfers. We don't care about the results 322 // of the read, the important part is the mode bits. 323 324 dummy_read: 325 #define CTRLR0_ENTER_XIP \ 326 (FRAME_FORMAT /* Quad I/O mode */ \ 327 << SSI_CTRLR0_SPI_FRF_LSB) | \ 328 (31 << SSI_CTRLR0_DFS_32_LSB) | /* 32 data bits */ \ 329 (SSI_CTRLR0_TMOD_VALUE_EEPROM_READ /* Send INST/ADDR, Receive Data */ \ 330 << SSI_CTRLR0_TMOD_LSB) 331 332 ldr r1, =(CTRLR0_ENTER_XIP) 333 str r1, [r3, #SSI_CTRLR0_OFFSET] 334 335 movs r1, #0x0 // NDF=0 (single 32b read) 336 str r1, [r3, #SSI_CTRLR1_OFFSET] 337 338 #if BOARD_QUAD_OK==1 339 #define SPI_CTRLR0_ENTER_XIP \ 340 (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Address + mode bits */ \ 341 (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ 342 (SSI_SPI_CTRLR0_INST_L_VALUE_8B \ 343 << SSI_SPI_CTRLR0_INST_L_LSB) | /* 8-bit instruction */ \ 344 (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A /* Send Command in serial mode then address in Quad I/O mode */ \ 345 << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) 346 347 ldr r1, =(SPI_CTRLR0_ENTER_XIP) 348 ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) // SPI_CTRL0 Register 349 str r1, [r0] 350 351 movs r1, #1 // Re-enable SSI 352 str r1, [r3, #SSI_SSIENR_OFFSET] 353 354 movs r1, #CMD_READ 355 str r1, [r3, #SSI_DR0_OFFSET] // Push SPI command into TX FIFO 356 movs r1, #MODE_CONTINUOUS_READ // 32-bit: 24 address bits (we don't care, so 0) and M[7:4]=1010 357 str r1, [r3, #SSI_DR0_OFFSET] // Push Address into TX FIFO - this will trigger the transaction 358 359 // Poll for completion 360 bl wait_ssi_ready 361 362 // The flash is in a state where we can blast addresses in parallel, and get 363 // parallel data back. Now configure the SSI to translate XIP bus accesses 364 // into QSPI transfers of this form. 365 366 movs r1, #0 367 str r1, [r3, #SSI_SSIENR_OFFSET] // Disable SSI (and clear FIFO) to allow further config 368 #endif 369 370 // Note that the INST_L field is used to select what XIP data gets pushed into 371 // the TX FIFO: 372 // INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD 373 // Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD 374 configure_ssi: 375 #define SPI_CTRLR0_XIP \ 376 (READ_INSTRUCTION /* Mode bits to keep flash in continuous read mode */ \ 377 << SSI_SPI_CTRLR0_XIP_CMD_LSB) | \ 378 (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Total number of address + mode bits */ \ 379 (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ 380 (INSTRUCTION_LENGTH /* Do not send a command, instead send XIP_CMD as mode bits after address */ \ 381 << SSI_SPI_CTRLR0_INST_L_LSB) | \ 382 (TRANSACTION_TYPE /* Send Address in Quad I/O mode (and Command but that is zero bits long) */ \ 383 << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) 384 385 ldr r1, =(SPI_CTRLR0_XIP) 386 387 ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) 388 str r1, [r0] 389 390 movs r1, #1 391 str r1, [r3, #SSI_SSIENR_OFFSET] // Re-enable SSI 392 393 // Bus accesses to the XIP window will now be transparently serviced by the 394 // external flash on cache miss. We are ready to run code from flash. 395 396 397 // 398 // Helper Includes 399 // 400 401 // 402 // #include "boot2_helpers/exit_from_boot2.S" 403 // 404 405 // If entered from the bootrom, lr (which we earlier pushed) will be 0, 406 // and we vector through the table at the start of the main flash image. 407 // Any regular function call will have a nonzero value for lr. 408 check_return: 409 pop {r0} 410 cmp r0, #0 411 beq vector_into_flash 412 bx r0 413 vector_into_flash: 414 ldr r0, =(XIP_BASE + 0x100) 415 ldr r1, =(PPB_BASE + M0PLUS_VTOR_OFFSET) 416 str r0, [r1] 417 ldmia r0, {r0, r1} 418 msr msp, r0 419 bx r1 420 421 // 422 // #include "boot2_helpers/wait_ssi_ready.S" 423 // 424 wait_ssi_ready: 425 push {r0, r1, lr} 426 427 // Command is complete when there is nothing left to send 428 // (TX FIFO empty) and SSI is no longer busy (CSn deasserted) 429 1: 430 ldr r1, [r3, #SSI_SR_OFFSET] 431 movs r0, #SSI_SR_TFE_BITS 432 tst r1, r0 433 beq 1b 434 movs r0, #SSI_SR_BUSY_BITS 435 tst r1, r0 436 bne 1b 437 438 pop {r0, r1, pc} 439 440 441 #ifdef PROGRAM_STATUS_REG 442 443 // 444 // #include "boot2_helpers/read_flash_sreg.S" 445 // 446 447 // Pass status read cmd into r0. 448 // Returns status value in r0. 449 .global read_flash_sreg 450 .type read_flash_sreg,%function 451 .thumb_func 452 read_flash_sreg: 453 push {r1, lr} 454 str r0, [r3, #SSI_DR0_OFFSET] 455 // Dummy byte: 456 str r0, [r3, #SSI_DR0_OFFSET] 457 458 bl wait_ssi_ready 459 // Discard first byte and combine the next two 460 ldr r0, [r3, #SSI_DR0_OFFSET] 461 ldr r0, [r3, #SSI_DR0_OFFSET] 462 463 pop {r1, pc} 464 465 #endif 466 467 .global literals 468 literals: 469 .ltorg 470 471 .end