diff --git a/.github/workflows/test-configs.yml b/.github/workflows/test-configs.yml index e3a2910a6e..6bcb77042d 100644 --- a/.github/workflows/test-configs.yml +++ b/.github/workflows/test-configs.yml @@ -548,6 +548,12 @@ jobs: arch: aarch64 config-file: ./config/examples/zynqmp.config + versal_vmk180_test: + uses: ./.github/workflows/test-build-aarch64.yml + with: + arch: aarch64 + config-file: ./config/examples/versal_vmk180.config + sim_wolfhsm_test: uses: ./.github/workflows/test-build.yml with: diff --git a/arch.mk b/arch.mk index d4467bf886..6bd589a0e4 100644 --- a/arch.mk +++ b/arch.mk @@ -78,23 +78,34 @@ ifeq ($(ARCH),AARCH64) HASH_HAL=1 CFLAGS+=-DWOLFBOOT_ZYNQMP_CSU endif - else - ifeq ($(TARGET),nxp_ls1028a) - ARCH_FLAGS=-mcpu=cortex-a72+crypto -march=armv8-a+crypto -mtune=cortex-a72 - CFLAGS+=$(ARCH_FLAGS) -DCORTEX_A72 + endif - CFLAGS +=-ffunction-sections -fdata-sections - LDFLAGS+=-Wl,--gc-sections + ifeq ($(TARGET),versal) + # AMD Versal ACAP (VMK180) - Dual Cortex-A72 + ARCH_FLAGS=-mcpu=cortex-a72+crypto -march=armv8-a+crypto -mtune=cortex-a72 + CFLAGS+=$(ARCH_FLAGS) -DCORTEX_A72 + CFLAGS+=-DWOLFBOOT_DUALBOOT + # Support detection and skip of U-Boot legacy header + CFLAGS+=-DWOLFBOOT_UBOOT_LEGACY + endif - ifeq ($(DEBUG_UART),0) - CFLAGS+=-fno-builtin-printf - endif + ifeq ($(TARGET),nxp_ls1028a) + ARCH_FLAGS=-mcpu=cortex-a72+crypto -march=armv8-a+crypto -mtune=cortex-a72 + CFLAGS+=$(ARCH_FLAGS) -DCORTEX_A72 - SPI_TARGET=nxp - else - # By default disable ARM ASM for other targets - NO_ARM_ASM?=1 + CFLAGS +=-ffunction-sections -fdata-sections + LDFLAGS+=-Wl,--gc-sections + + ifeq ($(DEBUG_UART),0) + CFLAGS+=-fno-builtin-printf endif + + SPI_TARGET=nxp + endif + + # Default ARM ASM setting for unrecognized AARCH64 targets + ifeq ($(filter zynq versal nxp_ls1028a,$(TARGET)),) + NO_ARM_ASM?=1 endif ifeq ($(SPMATH),1) diff --git a/config/examples/versal_vmk180.config b/config/examples/versal_vmk180.config new file mode 100644 index 0000000000..2d56e0f282 --- /dev/null +++ b/config/examples/versal_vmk180.config @@ -0,0 +1,113 @@ +# wolfBoot configuration for AMD Versal VMK180 - DDR Boot (U-Boot Replacement) +# Versal Prime VM1802 ACAP - Dual ARM Cortex-A72 +# +# This configuration replaces U-Boot in the Versal boot flow: +# PLM -> PSM -> BL31 (EL3) -> wolfBoot (EL2) -> Linux (EL1) +# +# wolfBoot entry point is 0x8000000 in DDR, running at EL2 (non-secure) +# All clock, MIO, and DDR initialization is done by PLM/PSM before wolfBoot starts + +ARCH?=AARCH64 +TARGET?=versal + +WOLFBOOT_VERSION?=1 + +# ECC-384 with SHA-384 (good balance of security and performance) +SIGN?=ECC384 +HASH?=SHA384 +IMAGE_HEADER_SIZE?=512 + +# RSA 4096-bit with SHA3-384 (alternative) +#SIGN?=RSA4096 +#HASH?=SHA3 +#IMAGE_HEADER_SIZE?=1024 + +# Debug options +DEBUG?=1 +DEBUG_SYMBOLS=1 +DEBUG_UART=1 + +# Boot Benchmarking (optional): +# Enables timing of boot operations (flash read, integrity, signature). +BOOT_BENCHMARK?=1 + +VTOR?=1 +CORTEX_M0?=0 +NO_ASM?=0 +ALLOW_DOWNGRADE?=0 +NVM_FLASH_WRITEONCE?=0 +V?=0 +SPMATH?=1 +RAM_CODE?=0 +DUALBANK_SWAP?=0 +PKA?=0 +WOLFTPM?=0 + +# Flash configuration +EXT_FLASH?=1 +NO_XIP=1 + +# ELF loading support +ELF?=1 + +# Toolchain +USE_GCC=1 +CROSS_COMPILE=aarch64-none-elf- + +# ============================================================================ +# Boot Memory Layout +# ============================================================================ +# wolfBoot runs from DDR at 0x8000000 (same address as U-Boot) +# This matches the partition header from PetaLinux BOOT.BIN: +# exec_addr: 0x08000000, load_addr: 0x08000000 +# EL: el-2, trustzone: non-secure, aarch-64 +WOLFBOOT_ORIGIN=0x8000000 + +# Optional debugging with OCRAM +# Versal Gen 1 (VMK180): OCM is 256KB at 0xFFFC0000 - 0xFFFFFFFF +# Versal Gen 2: OCM is 2MB at 0xFFE00000 - 0xFFFFFFFF +#WOLFBOOT_ORIGIN=0xFFFC0000 + +# Flash Sector Size (QSPI) +WOLFBOOT_SECTOR_SIZE=0x20000 + +# Application Partition Size (44MB) +WOLFBOOT_PARTITION_SIZE=0x2C00000 + +# Location in Flash for Primary Boot Partition +WOLFBOOT_PARTITION_BOOT_ADDRESS?=0x800000 + +# Load Partition to RAM Address (Linux kernel loads here) +WOLFBOOT_LOAD_ADDRESS?=0x10000000 + +# Location in Flash for Secondary Partition (update image) +WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0x3400000 + +# Location to store wolfBoot state/swap +WOLFBOOT_PARTITION_SWAP_ADDRESS?=0x6000000 + +# DTS (Device Tree) - matches addresses from BOOT.BIN analysis +WOLFBOOT_LOAD_DTS_ADDRESS?=0x1000 +WOLFBOOT_DTS_BOOT_ADDRESS?=0x7B0000 +WOLFBOOT_DTS_UPDATE_ADDRESS?=0x39B0000 + +# Speed up reads by using larger blocks +CFLAGS_EXTRA+=-DWOLFBOOT_SHA_BLOCK_SIZE=4096 + +# UART Configuration - UART0 for APU console +CFLAGS_EXTRA+=-DDEBUG_UART_NUM=0 + +# QSPI Reference Clock: Ref (300MHz default for Versal) +#CFLAGS_EXTRA+=-DGQSPI_CLK_REF=300000000 + +# QSPI Bus Divisor: (2 << div) = BUS (0=div2, 1=div4, 2=div8) +# MT25QU01G max: 133MHz Quad Read (0x6C) with 8 dummy cycles +# div=0: 300MHz/2 = 150MHz (above spec but tested working) +# div=1: 300MHz/4 = 75MHz (within spec, default) +# div=2: 300MHz/8 = 37.5MHz (conservative) +#CFLAGS_EXTRA+=-DGQSPI_CLK_DIV=1 + +# QSPI flash options (uncomment to enable) +#CFLAGS_EXTRA+=-DDEBUG_QSPI # Enable QSPI debug logging +#CFLAGS_EXTRA+=-DGQSPI_MODE_IO # Use polling instead of DMA (slower) +#CFLAGS_EXTRA+=-DTEST_EXT_FLASH # Run flash erase/write/read test diff --git a/docs/Targets.md b/docs/Targets.md index 49d4c1e453..2cdda247b6 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -48,6 +48,7 @@ This README describes configuration of supported targets. * [TI Hercules TMS570LC435](#ti-hercules-tms570lc435) * [Vorago VA416x0](#vorago-va416x0) * [Xilinx Zynq UltraScale](#xilinx-zynq-ultrascale) +* [Versal Gen 1 VMK180](#versal-gen-1-vmk180) ## STM32F4 @@ -1859,6 +1860,184 @@ qemu-system-aarch64 -machine xlnx-zcu102 -cpu cortex-a53 -serial stdio -display `tools/keytools/sign --rsa4096 --sha3 /srv/linux-rpi4/vmlinux.bin wolfboot_signing_private_key.der 1` +## Versal Gen 1 VMK180 + +AMD Versal Prime Series VMK180 Evaluation Kit - Versal Prime XCVM1802-2MSEVSVA2197 Adaptive SoC - Dual ARM Cortex-A72 + +wolfBoot replaces U-Boot in the Versal boot flow: +``` +PLM -> PSM -> BL31 (EL3) -> wolfBoot (EL2) -> Linux (EL1) +``` + +wolfBoot runs from DDR at address `0x8000000` at EL2 (non-secure). All clock, MIO, and DDR initialization is handled by PLM/PSM before wolfBoot starts. + +See example configuration file at `config/examples/versal_vmk180.config`. + +### Prerequisites + +1. **Xilinx Vitis 2024.1 or 2024.2** (required for bootgen - 2025.1 or later has QSPI boot issues) + - Set `VITIS_PATH` environment variable: `export VITIS_PATH=/opt/Xilinx/Vitis/2024.1` + +2. **Toolchain** + - ARM GCC toolchain: `aarch64-none-elf-gcc` + + +### Configuration Options + +Key configuration options in `config/examples/versal_vmk180.config`: + +- `ARCH=AARCH64` - ARM 64-bit architecture +- `TARGET=versal` - Versal platform target +- `WOLFBOOT_ORIGIN=0x8000000` - Entry point in DDR +- `WOLFBOOT_SECTOR_SIZE=0x20000` - QSPI flash sector size (128KB) +- `WOLFBOOT_PARTITION_SIZE=0x2C00000` - Application partition size (44MB) +- `EXT_FLASH=1` - External flash support +- `ELF=1` - ELF loading support + +### Memory Layout + +| Partition | Size | Address | Description | +|-------------|--------|---------|-------------| +| Bootloader | - | 0x8000000 | wolfBoot in DDR (loaded by BL31) | +| Primary | 44MB | 0x800000 | Boot partition in QSPI | +| Update | 44MB | 0x3400000 | Update partition in QSPI | +| Swap | - | 0x6000000 | Swap area in QSPI | + +### Debugging + +For debugging with OCRAM (OCM), set `WOLFBOOT_ORIGIN=0xFFFC0000` in the config file. Versal Gen 1 OCM is 256KB at `0xFFFC0000 - 0xFFFFFFFF`. + +### Building wolfBoot + +Build wolfBoot from the wolfBoot root directory: + +```sh +cp config/examples/versal_vmk180.config .config +make clean +make +``` + +### Building BOOT.BIN + +If you don't already have prebuilt firmware, clone the Xilinx prebuilt firmware repository: + +```sh +git clone --branch xlnx_rel_v2024.2 https://github.com/Xilinx/soc-prebuilt-firmware.git +export PREBUILT_DIR=$(pwd)/../soc-prebuilt-firmware/vmk180-versal +``` + +Copy the required files into wolfboot root directory: + +```sh +cp ${PREBUILT_DIR}/project_1.pdi . +cp ${PREBUILT_DIR}/plm.elf . +cp ${PREBUILT_DIR}/psmfw.elf . +cp ${PREBUILT_DIR}/bl31.elf . +cp ${PREBUILT_DIR}/system-default.dtb . +``` + +Source the Vitis environment and generate BOOT.BIN using bootgen: + +```sh +source ${VITIS_PATH}/settings64.sh +bootgen -arch versal -image ./tools/scripts/vmk180/boot_wolfboot.bif -w -o BOOT.BIN +``` + +The BIF file (`boot_wolfboot.bif`) references files using relative paths in the same directory. After successful generation, `BOOT.BIN` will be created in `tools/scripts/vmk180/`. + +### Flashing QSPI + +Flash `BOOT.BIN` to QSPI flash using your preferred method. For example: + +- **Vitis**: Use the Hardware Manager to program the QSPI flash via JTAG. Load `BOOT.BIN` and program to QSPI32 flash memory. + +- **Lauterbach**: Use Trace32 to program QSPI flash via JTAG. Load `BOOT.BIN` and write to QSPI flash memory addresses. + +- **U-Boot via SD Card**: Boot from SD card with U-Boot, then use TFTP to download `BOOT.BIN` and program QSPI flash: + ```sh + tftp ${loadaddr} BOOT.BIN + sf probe 0 0 0 + sf erase 0 +${filesize} + sf write ${loadaddr} 0 ${filesize} + ``` + +### QSPI Flash + +VMK180 uses dual parallel MT25QU01GBBB flash (128MB each, 256MB total). The QSPI driver supports: +- DMA mode (default) or IO polling mode (`GQSPI_MODE_IO`) +- Quad SPI (4-bit) for faster reads +- 4-byte addressing for full flash access +- Hardware striping for dual parallel operation +- 75MHz default clock (configurable via `GQSPI_CLK_DIV`) + +### Building and Signing Test Application + +```sh +# Build and sign the test application +make test-app/image_v1_signed.bin +``` + +The signed test application will be at `test-app/image_v1_signed.bin`. + +**Test Application Details:** +- Uses generic `boot_arm64_start.S` startup code (shared with other AArch64 platforms) +- Uses generic `AARCH64.ld` linker script with `@WOLFBOOT_LOAD_ADDRESS@` placeholder +- Displays current exception level (EL) and firmware version +- Entry point: `_start` (in `boot_arm64_start.S`) which sets up stack, clears BSS, and calls `main()` + +### Firmware Update Testing + +wolfBoot supports firmware updates using the UPDATE partition. The bootloader automatically selects the image with the higher version number from either the BOOT or UPDATE partition. + +**Partition Layout:** +- BOOT partition: `0x800000` +- UPDATE partition: `0x3400000` +- For RAM-based boot (Versal), images are loaded to `WOLFBOOT_LOAD_ADDRESS` (`0x10000000`) + +**Update Behavior:** +- wolfBoot checks both BOOT and UPDATE partitions on boot +- Selects the partition with the higher version number +- Falls back to the other partition if verification fails +- The test application displays the firmware version it was signed with + +To test firmware updates, build and sign the test application with different version numbers, then flash them to the appropriate partitions using your preferred method. + +### Example Boot Output + +``` +======================================== +wolfBoot Secure Boot - AMD Versal +======================================== +Current EL: 2 +Timer Freq: 99999904 Hz +QSPI: Lower ID: 20 BB 21 +QSPI: Upper ID: 20 BB 21 +QSPI: 75MHz, Quad mode, DMA +Versions: Boot 1, Update 0 +Trying Boot partition at 0x800000 +Loading header 512 bytes from 0x800000 to 0xFFFFE00 +Loading image 664 bytes from 0x800200 to 0x10000000...done +Boot partition: 0xFFFFE00 (sz 664, ver 0x1, type 0x601) +Checking integrity...done +Verifying signature...done +Successfully selected image in part: 0 +Firmware Valid +Loading elf at 0x10000000 +Invalid elf, falling back to raw binary +Loading DTB (size 24894) from 0x1000 to RAM at 0x1000 +Booting at 0x10000000 + +=========================================== + wolfBoot Test Application - AMD Versal +=========================================== +Current EL: 1 +Firmware Version: 2 (0x00000002) +Application running successfully! + +Entering idle loop... +``` + + ## Cypress PSoC-6 The Cypress PSoC 62S2 is a dual-core Cortex-M4 & Cortex-M0+ MCU. The secure boot process is managed by the M0+. diff --git a/hal/versal.c b/hal/versal.c new file mode 100644 index 0000000000..7293138e7a --- /dev/null +++ b/hal/versal.c @@ -0,0 +1,1509 @@ +/* versal.c + * + * Copyright (C) 2025 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * + * AMD Versal ACAP HAL implementation for wolfBoot + * Target: VMK180 Evaluation Board (VM1802 Versal Prime) + * + * Features: + * - UART driver (Cadence UART / ARM PL011) + * - ARM Generic Timer + * - QSPI flash driver (GQSPI - dual parallel MT25QU01GBBB) + * + * QSPI Driver Notes: + * This driver is a port of the ZynqMP GQSPI driver (hal/zynq.c) with the + * following Versal-specific adaptations: + * + * 1. Different base address (0xF1030000 vs 0xFF0F0000) + * 2. Tap delay bypass register is in QSPI block (not IOU_SLCR) + * 3. Preserves PLM's QSPI configuration instead of full reset + * 4. UART init skips MIO/clock setup when EL2 (PLM already did it) + * + * The register layout, GenFIFO format, and DMA interface are identical + * to ZynqMP since both use the same Xilinx GQSPI IP block. + * + * See hal/versal.h for detailed comparison with ZynqMP. + */ + +#ifdef TARGET_versal + +#include +#include + +#include "hal.h" +#include "hal/versal.h" +#include "image.h" +#include "printf.h" + +#ifndef ARCH_AARCH64 +# error "wolfBoot versal HAL: wrong architecture. Please compile with ARCH=AARCH64." +#endif + + +/* ============================================================================ + * UART Driver + * ============================================================================ + * ARM PL011 UART controller + * Note: In JTAG boot mode, the PLM doesn't run so UART may be inaccessible. + * Timeouts are added to prevent infinite loops. + */ + +#ifdef DEBUG_UART + +/* Timeout to prevent infinite loops if UART is inaccessible (e.g., JTAG boot) */ +#define UART_TIMEOUT 10000 + +/** + * Calculate baud rate divisors for ARM PL011 UART + * Formula: baud = UART_CLK / (16 * divisor) + * divisor = IBRD + (FBRD / 64) + * IBRD = integer part of (UART_CLK / (16 * baud)) + * FBRD = integer part of ((fractional * 64) + 0.5) + */ +static void uart_calc_baud(uint32_t ref_clk, uint32_t baud, + uint32_t *ibrd, uint32_t *fbrd) +{ + uint32_t divisor_x64; + + /* Calculate divisor * 64 to get fractional part */ + /* divisor = ref_clk / (16 * baud) */ + /* divisor_x64 = (ref_clk * 64) / (16 * baud) = (ref_clk * 4) / baud */ + divisor_x64 = (ref_clk * 4) / baud; + + /* Integer part: divisor_x64 / 64 */ + *ibrd = divisor_x64 >> 6; + + /* Fractional part: divisor_x64 % 64 (already in correct format) */ + *fbrd = divisor_x64 & 0x3F; +} + +void uart_init(void) +{ +#if defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + /* When booting via PLM -> BL31 -> wolfBoot (EL2), UART is already + * fully configured by PLM. Do NOT reinitialize - just use it as-is. + * Any reconfiguration at EL2 may fail or corrupt the UART state. */ + (void)0; /* UART already configured by PLM - nothing to do */ +#else + /* Full UART initialization for JTAG boot mode or EL3 boot */ + uint32_t ibrd, fbrd; + uint32_t lcr; + volatile uint32_t timeout; + volatile uint32_t *uart_clk_ctrl; + volatile uint32_t *uart_rst_ctrl; + int rx_pin, tx_pin; + + /* Select which UART to use */ +#if defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 1 + uart_clk_ctrl = (volatile uint32_t*)&CRL_UART1_REF_CTRL; + uart_rst_ctrl = (volatile uint32_t*)&CRL_RST_UART1; + rx_pin = MIO_UART1_RX_PIN; + tx_pin = MIO_UART1_TX_PIN; +#else + uart_clk_ctrl = (volatile uint32_t*)&CRL_UART0_REF_CTRL; + uart_rst_ctrl = (volatile uint32_t*)&CRL_RST_UART0; + rx_pin = MIO_UART0_RX_PIN; + tx_pin = MIO_UART0_TX_PIN; +#endif + + /* Configure MIO pins for UART (required in JTAG boot mode) */ + PMC_IOU_SLCR_MIO_PIN(tx_pin) = MIO_UART_TX_CFG; + PMC_IOU_SLCR_MIO_PIN(rx_pin) = MIO_UART_RX_CFG; + + /* Ensure clock is enabled with proper divisor */ + *uart_clk_ctrl = 0x02000600; + + /* Clear UART reset */ + *uart_rst_ctrl = 0; + + /* Delay to let reset clear and clock stabilize */ + for (timeout = 1000; timeout > 0; timeout--) + __asm__ volatile("nop"); + + /* ===== Step 1: Disable UART before configuration (per TRM) ===== */ + UART_CR = 0; + + /* Wait for UART to finish any current TX (with timeout) */ + timeout = UART_TIMEOUT; + while ((UART_FR & UART_FR_BUSY) && --timeout) + ; + + /* ===== Step 2: Flush FIFOs by disabling FEN in LCR ===== */ + UART_LCR = 0; + + /* ===== Step 3: Clear all pending interrupts ===== */ + UART_IMSC = 0; /* Disable all interrupts */ + UART_ICR = UART_INT_ALL; /* Clear any pending */ + + /* ===== Step 4: Calculate and set baud rate divisors ===== */ + uart_calc_baud(UART_CLK_REF, DEBUG_UART_BAUD, &ibrd, &fbrd); + UART_IBRD = ibrd; + UART_FBRD = fbrd; + + /* ===== Step 5: Write LCR to latch baud rate (REQUIRED per TRM!) ===== + * The TRM states: "do write of LCR after writing to baud rate registers" + * Configure: 8 data bits, 1 stop bit, no parity, FIFOs enabled */ + lcr = UART_LCR_WLEN_8 | UART_LCR_FEN; + UART_LCR = lcr; + + /* ===== Step 6: Set FIFO trigger levels ===== */ + UART_IFLS = UART_IFLS_RXIFLSEL_1_2 | UART_IFLS_TXIFLSEL_1_2; + + /* ===== Step 7: Enable UART with TX and RX ===== */ + UART_CR = UART_CR_UARTEN | UART_CR_TXE | UART_CR_RXE; + + /* Small delay to let UART stabilize */ + for (timeout = 100; timeout > 0; timeout--) + __asm__ volatile("nop"); +#endif /* EL2_HYPERVISOR */ +} + +static void uart_tx(uint8_t c) +{ + volatile uint32_t timeout = UART_TIMEOUT; + + /* Wait for TX FIFO to have space (not full) with timeout */ + while ((UART_FR & UART_FR_TXFF) && --timeout) + ; + + /* Write character to data register */ + UART_DR = c; +} + +void uart_write(const char *buf, uint32_t len) +{ + uint32_t i; + volatile uint32_t timeout; + + for (i = 0; i < len; i++) { + if (buf[i] == '\n') { + uart_tx('\r'); + } + uart_tx((uint8_t)buf[i]); + } + + /* Wait for transmit FIFO to empty (with timeout) */ + timeout = UART_TIMEOUT; + while (!(UART_FR & UART_FR_TXFE) && --timeout) + ; + + /* Wait for UART to finish transmitting (with timeout) */ + timeout = UART_TIMEOUT; + while ((UART_FR & UART_FR_BUSY) && --timeout) + ; +} + +#else +#define uart_init() do {} while(0) +#endif /* DEBUG_UART */ + + +/* ============================================================================ + * Timer Functions (ARM Generic Timer) + * ============================================================================ + */ + +/** + * Get current timer count (physical counter) + */ +static inline uint64_t timer_get_count(void) +{ + uint64_t cntpct; + __asm__ volatile("mrs %0, cntpct_el0" : "=r" (cntpct)); + return cntpct; +} + +/** + * Get timer frequency + */ +static inline uint64_t timer_get_freq(void) +{ + uint64_t cntfrq; + __asm__ volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq)); + return cntfrq; +} + +/** + * Get current time in milliseconds + */ +uint64_t hal_timer_ms(void) +{ + uint64_t cntpct = timer_get_count(); + uint64_t cntfrq = timer_get_freq(); + + if (cntfrq == 0) + cntfrq = TIMER_CLK_FREQ; + + /* Convert to milliseconds: (count * 1000) / freq */ + return (cntpct * 1000ULL) / cntfrq; +} + +/** + * Delay for specified number of microseconds + */ +void hal_delay_us(uint32_t us) +{ + uint64_t cntfrq = timer_get_freq(); + uint64_t start, target; + + if (cntfrq == 0) + cntfrq = TIMER_CLK_FREQ; + + start = timer_get_count(); + target = start + ((uint64_t)us * cntfrq) / 1000000ULL; + + while (timer_get_count() < target) + ; +} + +/** + * Get current time in microseconds (for benchmarking) + */ +uint64_t hal_get_timer_us(void) +{ + uint64_t cntpct = timer_get_count(); + uint64_t cntfrq = timer_get_freq(); + + if (cntfrq == 0) + cntfrq = TIMER_CLK_FREQ; + + /* Convert to microseconds: (count * 1000000) / freq */ + return (cntpct * 1000000ULL) / cntfrq; +} + + +/* ============================================================================ + * QSPI Flash Driver (GQSPI) + * ============================================================================ + * Bare-metal QSPI driver for Versal VMK180. + * Hardware: Dual parallel MT25QU01GBBB (128MB each, 256MB total). + * + * This driver is adapted from the ZynqMP GQSPI driver (hal/zynq.c). + * Both platforms use the same Xilinx GQSPI IP block with identical: + * - Register offsets (GQSPI at +0x100, DMA at +0x800 from base) + * - GenFIFO entry format (command, address, data, stripe bits) + * - Interrupt status bits and DMA interface + * + * Versal-specific differences from ZynqMP: + * - Base address: 0xF1030000 (vs 0xFF0F0000 on ZynqMP) + * - Tap delay register: In QSPI block (vs IOU_SLCR on ZynqMP) + * - Initialization: Preserves PLM config (vs full reset on ZynqMP) + * + * Supported modes (same as ZynqMP): + * - DMA mode (default) or IO polling mode (GQSPI_MODE_IO) + * - Quad SPI (4-bit), Dual SPI (2-bit), or Standard SPI (1-bit) + * - 4-byte addressing for flash >16MB (GQPI_USE_4BYTE_ADDR) + * - Dual parallel with hardware striping (GQPI_USE_DUAL_PARALLEL) + * - EXP (exponent) length mode for large transfers + * + * Clock: 300MHz ref / (2 << DIV) = 75MHz default (DIV=1) + * MT25QU01GBBB supports up to 133MHz for Quad Output Read. + */ + +#ifdef EXT_FLASH + +/* Debug logging for QSPI driver */ +#ifdef DEBUG_QSPI + #define QSPI_DEBUG_PRINTF(...) wolfBoot_printf(__VA_ARGS__) +#else + #define QSPI_DEBUG_PRINTF(...) do {} while(0) +#endif + +/* QSPI device structure */ +typedef struct { + uint32_t mode; /* GQSPI_GEN_FIFO_MODE_SPI/DSPI/QSPI */ + uint32_t bus; /* GQSPI_GEN_FIFO_BUS_LOW/UP/BOTH */ + uint32_t cs; /* GQSPI_GEN_FIFO_CS_LOWER/UPPER/BOTH */ + uint32_t stripe; /* 0 or GQSPI_GEN_FIFO_STRIPE for dual parallel */ +} QspiDev_t; + +static QspiDev_t qspiDev; +static int qspi_initialized = 0; + +/* Forward declarations */ +static int qspi_transfer(QspiDev_t *dev, const uint8_t *txData, uint32_t txLen, + uint8_t *rxData, uint32_t rxLen, uint32_t dummyClocks, + const uint8_t *writeData, uint32_t writeLen); +static int qspi_wait_ready(QspiDev_t *dev); + +/* Wait for GenFIFO empty (all entries processed) with timeout */ +static int qspi_wait_genfifo_empty(void) +{ + uint32_t timeout = GQSPI_TIMEOUT_TRIES; + uint32_t isr; + + isr = GQSPI_ISR; + while (!(isr & GQSPI_IXR_GEN_FIFO_EMPTY) && --timeout) { + isr = GQSPI_ISR; + } + if (timeout == 0) { + QSPI_DEBUG_PRINTF("QSPI: GenFIFO empty timeout\n"); + return -1; + } + return 0; +} + +/* Wait for TX FIFO empty with timeout */ +static int qspi_wait_tx_empty(void) +{ + uint32_t timeout = GQSPI_TIMEOUT_TRIES; + while (!(GQSPI_ISR & GQSPI_IXR_TX_FIFO_EMPTY) && --timeout) + ; + if (timeout == 0) { + QSPI_DEBUG_PRINTF("QSPI: TX empty timeout\n"); + return -1; + } + return 0; +} + +/* Write to GenFIFO (without triggering - batch mode) */ +static int qspi_gen_fifo_push(uint32_t entry) +{ + uint32_t timeout = GQSPI_TIMEOUT_TRIES; + uint32_t isr; + + /* Wait for GenFIFO not full */ + isr = GQSPI_ISR; + while (!(isr & GQSPI_IXR_GEN_FIFO_NOT_FULL) && --timeout) { + isr = GQSPI_ISR; + } + if (timeout == 0) { + QSPI_DEBUG_PRINTF("QSPI: GenFIFO full timeout\n"); + return -1; + } + + /* Write the entry to GenFIFO */ + GQSPI_GEN_FIFO = entry; + + return 0; +} + +/* Trigger GenFIFO processing and wait for completion */ +static int qspi_gen_fifo_start_and_wait(void) +{ + uint32_t cfg; + uint32_t timeout = GQSPI_TIMEOUT_TRIES; + uint32_t isr; + + dsb(); /* Ensure all writes complete */ + + /* Trigger GenFIFO processing by setting START_GEN_FIFO */ + cfg = GQSPI_CFG; + cfg |= GQSPI_CFG_START_GEN_FIFO; + GQSPI_CFG = cfg; + dsb(); + + /* Wait for GenFIFO to empty (all entries processed) */ + isr = GQSPI_ISR; + while (!(isr & GQSPI_IXR_GEN_FIFO_EMPTY) && --timeout) { + isr = GQSPI_ISR; + } + if (timeout == 0) { + QSPI_DEBUG_PRINTF("QSPI: GenFIFO start timeout\n"); + return -1; + } + return 0; +} + +/* Legacy wrapper for compatibility */ +static int qspi_gen_fifo_write(uint32_t entry) +{ + int ret = qspi_gen_fifo_push(entry); + if (ret == 0) { + ret = qspi_gen_fifo_start_and_wait(); + } + return ret; +} + +/* Calculate EXP mode for large transfers (returns actual transfer size) + * For transfers > 255 bytes, use exponent mode where IMM = power of 2 + * Pattern from zynq.c qspi_calc_exp() */ +static uint32_t qspi_calc_exp(uint32_t xferSz, uint32_t *reg_genfifo) +{ + uint32_t expval; + + *reg_genfifo &= ~(GQSPI_GEN_FIFO_IMM_MASK | GQSPI_GEN_FIFO_EXP); + + if (xferSz > GQSPI_GEN_FIFO_IMM_MASK) { + /* Use exponent mode (max is 2^28 for DMA) */ + for (expval = 28; expval >= 8; expval--) { + /* Find highest power of 2 that fits */ + if (xferSz >= (1UL << expval)) { + *reg_genfifo |= GQSPI_GEN_FIFO_EXP; + *reg_genfifo |= GQSPI_GEN_FIFO_IMM(expval); + xferSz = (1UL << expval); + break; + } + } + } else { + /* Use immediate length mode */ + *reg_genfifo |= GQSPI_GEN_FIFO_IMM(xferSz); + } + return xferSz; +} + +/* Chip select control */ +static int qspi_cs(QspiDev_t *dev, int assert) +{ + uint32_t entry; + + entry = (dev->bus & GQSPI_GEN_FIFO_BUS_MASK) | GQSPI_GEN_FIFO_MODE_SPI; + if (assert) { + entry |= (dev->cs & GQSPI_GEN_FIFO_CS_MASK); + } + /* Idle clocks for CS setup/hold */ + entry |= GQSPI_GEN_FIFO_IMM(2); + + return qspi_gen_fifo_write(entry); +} + +/* DMA temporary buffer for unaligned transfers (DMA is default, IO is optional) */ +#ifndef GQSPI_MODE_IO +static uint8_t XALIGNED(GQSPI_DMA_ALIGN) dma_tmpbuf[GQSPI_DMA_TMPSZ]; + +/* Flush data cache for DMA coherency */ +static void flush_dcache_range(uintptr_t start, uintptr_t end) +{ + /* ARM64: Clean and invalidate by virtual address to PoC */ + uintptr_t addr; + for (addr = (start & ~(GQSPI_DMA_ALIGN - 1)); addr < end; + addr += GQSPI_DMA_ALIGN) { + __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory"); + } + __asm__ volatile("dsb sy" : : : "memory"); +} + +/* Wait for DMA completion + * Returns: 0 on success, -1 on timeout + */ +static int qspi_dma_wait(void) +{ + uint32_t timeout = GQSPIDMA_TIMEOUT_TRIES; + + while (!(GQSPIDMA_ISR & GQSPIDMA_ISR_DONE) && --timeout) + ; + + if (timeout == 0) { + QSPI_DEBUG_PRINTF("QSPI: DMA timeout\n"); + /* Clear any pending interrupts */ + GQSPIDMA_ISR = GQSPIDMA_ISR_ALL_MASK; + return -1; + } + + /* Clear DMA done interrupt */ + GQSPIDMA_ISR = GQSPIDMA_ISR_DONE; + return 0; +} +#endif /* !GQSPI_MODE_IO */ + +/* TX via FIFO (polling mode) */ +static int qspi_fifo_tx(const uint8_t *data, uint32_t len) +{ + uint32_t tmp32; + uint32_t timeout; + + while (len > 0) { + /* Wait for TX FIFO not full */ + timeout = GQSPI_TIMEOUT_TRIES; + while ((GQSPI_ISR & GQSPI_IXR_TX_FIFO_FULL) && --timeout) + ; + if (timeout == 0) { + QSPI_DEBUG_PRINTF("QSPI: TX FIFO full timeout\n"); + return -1; + } + + if (len >= 4) { + tmp32 = *((uint32_t*)data); + GQSPI_TXD = tmp32; + data += 4; + len -= 4; + } else { + tmp32 = 0; + memcpy(&tmp32, data, len); + GQSPI_TXD = tmp32; + len = 0; + } + } + return 0; +} + +/* RX via FIFO (polling mode) */ +static int qspi_fifo_rx(uint8_t *data, uint32_t len) +{ + uint32_t tmp32; + uint32_t timeout; + + while (len > 0) { + /* Wait for RX FIFO not empty */ + timeout = GQSPI_TIMEOUT_TRIES; + while (!(GQSPI_ISR & GQSPI_IXR_RX_FIFO_NOT_EMPTY) && --timeout) + ; + if (timeout == 0) { + QSPI_DEBUG_PRINTF("QSPI: RX FIFO empty timeout\n"); + return -1; + } + + tmp32 = GQSPI_RXD; + if (len >= 4) { + *((uint32_t*)data) = tmp32; + data += 4; + len -= 4; + } else { + memcpy(data, &tmp32, len); + len = 0; + } + } + return 0; +} + +/* Core QSPI transfer function using GenFIFO */ +static int qspi_transfer(QspiDev_t *dev, const uint8_t *txData, uint32_t txLen, + uint8_t *rxData, uint32_t rxLen, uint32_t dummyClocks, + const uint8_t *writeData, uint32_t writeLen) +{ + int ret = 0; + uint32_t entry; + uint32_t i; + uint32_t chunkLen; + uint32_t txEntry, chunkEntry; + const uint8_t *writePtr; + uint32_t remaining, offset, xferSz; + uint32_t rxEntry; + + /* Enable GQSPI controller */ + /* Set DMA mode only for Quad reads (when dummyClocks > 0) and not in IO mode */ + if (dummyClocks > 0 && rxLen > 0) { +#ifndef GQSPI_MODE_IO + GQSPI_CFG = (GQSPI_CFG & ~GQSPI_CFG_MODE_EN_MASK) | GQSPI_CFG_MODE_EN_DMA; +#endif + } + GQSPI_EN = 1; + dsb(); + + /* Base entry: bus + CS + SPI mode */ + entry = (dev->bus & GQSPI_GEN_FIFO_BUS_MASK) | + (dev->cs & GQSPI_GEN_FIFO_CS_MASK) | + GQSPI_GEN_FIFO_MODE_SPI; + + /* === CS Assert + TX Phase (batch all entries, then trigger) === */ + + /* CS assertion entry - just set CS with some idle clocks */ + ret = qspi_gen_fifo_push(entry | GQSPI_GEN_FIFO_IMM(1)); + + /* TX Phase - send command bytes via immediate data */ + for (i = 0; i < txLen && ret == 0; i++) { + uint32_t txEntry = entry | GQSPI_GEN_FIFO_TX | + GQSPI_GEN_FIFO_IMM(txData[i]); + ret = qspi_gen_fifo_push(txEntry); + } + + /* Trigger and wait for TX to complete */ + if (ret == 0) { + ret = qspi_gen_fifo_start_and_wait(); + } + + /* Dummy clocks phase (for fast read commands) + * Use QSPI mode if dummy clocks are present (indicates Quad Read) */ + if (ret == 0 && dummyClocks > 0) { + uint32_t dummyEntry = (dev->bus & GQSPI_GEN_FIFO_BUS_MASK) | + (dev->cs & GQSPI_GEN_FIFO_CS_MASK) | + GQSPI_QSPI_MODE | + GQSPI_GEN_FIFO_DATA_XFER | + GQSPI_GEN_FIFO_IMM(dummyClocks); + ret = qspi_gen_fifo_push(dummyEntry); + if (ret == 0) { + ret = qspi_gen_fifo_start_and_wait(); + } + } + + /* === TX Write Data Phase === */ + if (ret == 0 && writeLen > 0 && writeData != NULL) { + txEntry = entry | GQSPI_GEN_FIFO_TX | GQSPI_GEN_FIFO_DATA_XFER | + (dev->stripe & GQSPI_GEN_FIFO_STRIPE); + writePtr = writeData; + chunkLen = writeLen; + + while (chunkLen > 0 && ret == 0) { + uint32_t chunk = (chunkLen > 255) ? 255 : chunkLen; + chunkEntry = txEntry | GQSPI_GEN_FIFO_IMM(chunk); + + ret = qspi_gen_fifo_push(chunkEntry); + if (ret != 0) break; + + /* Start GenFIFO processing so it drains TX FIFO as we fill it */ + GQSPI_CFG |= GQSPI_CFG_START_GEN_FIFO; + dsb(); + + /* Push data to TX FIFO */ + ret = qspi_fifo_tx(writePtr, chunk); + if (ret != 0) break; + + /* Wait for GenFIFO to complete */ + ret = qspi_wait_genfifo_empty(); + + writePtr += chunk; + chunkLen -= chunk; + } + } + + /* === RX Phase === */ + if (ret == 0 && rxLen > 0) { + /* Use QSPI mode for RX if dummy clocks were used (Quad Read) */ + if (dummyClocks > 0) { + rxEntry = (dev->bus & GQSPI_GEN_FIFO_BUS_MASK) | + (dev->cs & GQSPI_GEN_FIFO_CS_MASK) | + GQSPI_QSPI_MODE | + GQSPI_GEN_FIFO_RX | + GQSPI_GEN_FIFO_DATA_XFER | + (dev->stripe & GQSPI_GEN_FIFO_STRIPE); + +#ifndef GQSPI_MODE_IO + /* DMA mode: Use DMA for RX phase */ + if ((GQSPI_CFG & GQSPI_CFG_MODE_EN_MASK) == GQSPI_CFG_MODE_EN_DMA) { + uint8_t *dmaPtr; + uint32_t dmaLen; + int useTemp = 0; + + /* Check alignment - DMA requires cache-line aligned buffer. + * If unaligned or not a multiple of 4 bytes, use temp buffer. + * CRITICAL: GenFIFO transfer size must match DMA size! */ + if (((uintptr_t)rxData & (GQSPI_DMA_ALIGN - 1)) || (rxLen & 3)) { + /* Use temp buffer for unaligned data */ + dmaPtr = dma_tmpbuf; + dmaLen = (rxLen + GQSPI_DMA_ALIGN - 1) & ~(GQSPI_DMA_ALIGN - 1); + if (dmaLen > sizeof(dma_tmpbuf)) { + dmaLen = sizeof(dma_tmpbuf); + } + useTemp = 1; + } else { + dmaPtr = rxData; + dmaLen = rxLen; + } + + /* GenFIFO must request the same number of bytes as DMA expects */ + remaining = dmaLen; + + /* Setup DMA destination */ + GQSPIDMA_DST = ((uintptr_t)dmaPtr & 0xFFFFFFFFUL); + GQSPIDMA_DST_MSB = ((uintptr_t)dmaPtr >> 32); + GQSPIDMA_SIZE = dmaLen; + + /* Enable DMA done interrupt */ + GQSPIDMA_IER = GQSPIDMA_ISR_DONE; + + /* Flush dcache for DMA coherency */ + flush_dcache_range((uintptr_t)dmaPtr, (uintptr_t)dmaPtr + dmaLen); + + /* Push all GenFIFO entries first (use EXP mode for large transfers) */ + while (ret == 0 && remaining > 0) { + xferSz = qspi_calc_exp(remaining, &rxEntry); + ret = qspi_gen_fifo_push(rxEntry); + remaining -= xferSz; + } + + /* Trigger GenFIFO */ + if (ret == 0) { + GQSPI_CFG |= GQSPI_CFG_START_GEN_FIFO; + dsb(); + } + + /* Wait for DMA completion */ + if (ret == 0) { + ret = qspi_dma_wait(); + } + + /* Invalidate cache after DMA */ + flush_dcache_range((uintptr_t)dmaPtr, (uintptr_t)dmaPtr + dmaLen); + + /* Copy from temp buffer if needed (only copy requested bytes) */ + if (ret == 0 && useTemp) { + memcpy(rxData, dmaPtr, rxLen); + } + } else { + /* IO mode: Use FIFO polling (fallback when DMA mode not enabled) */ + remaining = rxLen; + offset = 0; + while (ret == 0 && remaining > 0) { + xferSz = qspi_calc_exp(remaining, &rxEntry); + ret = qspi_gen_fifo_push(rxEntry); + if (ret == 0) { + ret = qspi_gen_fifo_start_and_wait(); + } + if (ret == 0) { + ret = qspi_fifo_rx(&rxData[offset], xferSz); + } + offset += xferSz; + remaining -= xferSz; + } + } +#else /* GQSPI_MODE_IO */ + /* IO mode: Use FIFO polling */ + remaining = rxLen; + offset = 0; + while (ret == 0 && remaining > 0) { + xferSz = qspi_calc_exp(remaining, &rxEntry); + ret = qspi_gen_fifo_push(rxEntry); + if (ret == 0) { + ret = qspi_gen_fifo_start_and_wait(); + } + if (ret == 0) { + ret = qspi_fifo_rx(&rxData[offset], xferSz); + } + offset += xferSz; + remaining -= xferSz; + } +#endif /* !GQSPI_MODE_IO */ + } else { + /* SPI mode for simple reads */ + rxEntry = entry | GQSPI_GEN_FIFO_RX | + GQSPI_GEN_FIFO_DATA_XFER | + (dev->stripe & GQSPI_GEN_FIFO_STRIPE) | + GQSPI_GEN_FIFO_IMM(1); + uint32_t readSz = dev->stripe ? 2 : 1; + + for (i = 0; i < rxLen && ret == 0; i += readSz) { + ret = qspi_gen_fifo_push(rxEntry); + if (ret == 0) { + ret = qspi_gen_fifo_start_and_wait(); + } + if (ret == 0) { + ret = qspi_fifo_rx(&rxData[i], readSz); + } + } + } + } + + /* === CS Deassert === */ + /* Remove CS bits from entry for deassert */ + entry = (dev->bus & GQSPI_GEN_FIFO_BUS_MASK) | GQSPI_GEN_FIFO_MODE_SPI; + qspi_gen_fifo_push(entry | GQSPI_GEN_FIFO_IMM(1)); + qspi_gen_fifo_start_and_wait(); + + /* Switch back to IO mode if DMA was used and disable controller */ +#ifndef GQSPI_MODE_IO + if ((GQSPI_CFG & GQSPI_CFG_MODE_EN_MASK) == GQSPI_CFG_MODE_EN_DMA) { + GQSPI_CFG = (GQSPI_CFG & ~GQSPI_CFG_MODE_EN_MASK) | GQSPI_CFG_MODE_EN_IO; + } +#endif + GQSPI_EN = 0; + dsb(); + + return ret; +} + +/* Read flash ID */ +static int qspi_read_id(QspiDev_t *dev, uint8_t *id, uint32_t len) +{ + uint8_t cmd[1]; + int ret; + + cmd[0] = FLASH_CMD_READ_ID; + ret = qspi_transfer(dev, cmd, 1, id, len, 0, NULL, 0); + + return ret; +} + +/* Read flash status register */ +static int qspi_read_status(QspiDev_t *dev, uint8_t *status) +{ + uint8_t cmd[1]; + uint8_t data[4]; /* Space for 2 bytes from each chip */ + int ret; + QspiDev_t tmpDev; + + /* For dual parallel, read status from each chip separately */ + if (dev->stripe) { + /* Read from lower chip */ + tmpDev = *dev; + tmpDev.bus = GQSPI_GEN_FIFO_BUS_LOW; + tmpDev.cs = GQSPI_GEN_FIFO_CS_LOWER; + tmpDev.stripe = 0; + cmd[0] = FLASH_CMD_READ_STATUS; + ret = qspi_transfer(&tmpDev, cmd, 1, &data[0], 1, 0, NULL, 0); + if (ret != 0) return ret; + + /* Read from upper chip */ + tmpDev.bus = GQSPI_GEN_FIFO_BUS_UP; + tmpDev.cs = GQSPI_GEN_FIFO_CS_UPPER; + ret = qspi_transfer(&tmpDev, cmd, 1, &data[1], 1, 0, NULL, 0); + if (ret != 0) return ret; + + /* AND the status from both chips */ + *status = data[0] & data[1]; + return 0; + } + + cmd[0] = FLASH_CMD_READ_STATUS; + ret = qspi_transfer(dev, cmd, 1, data, 1, 0, NULL, 0); + if (ret == 0) { + *status = data[0]; + } + return ret; +} + +/* Read flash flag status register */ +static int qspi_read_flag_status(QspiDev_t *dev, uint8_t *status) +{ + uint8_t cmd[1]; + uint8_t data[4]; + int ret; + QspiDev_t tmpDev; + + /* For dual parallel, read status from each chip separately */ + if (dev->stripe) { + /* Read from lower chip */ + tmpDev = *dev; + tmpDev.bus = GQSPI_GEN_FIFO_BUS_LOW; + tmpDev.cs = GQSPI_GEN_FIFO_CS_LOWER; + tmpDev.stripe = 0; + cmd[0] = FLASH_CMD_READ_FLAG_STATUS; + ret = qspi_transfer(&tmpDev, cmd, 1, &data[0], 1, 0, NULL, 0); + if (ret != 0) return ret; + + /* Read from upper chip */ + tmpDev.bus = GQSPI_GEN_FIFO_BUS_UP; + tmpDev.cs = GQSPI_GEN_FIFO_CS_UPPER; + ret = qspi_transfer(&tmpDev, cmd, 1, &data[1], 1, 0, NULL, 0); + if (ret != 0) return ret; + + /* AND the status from both chips */ + *status = data[0] & data[1]; + return 0; + } + + cmd[0] = FLASH_CMD_READ_FLAG_STATUS; + ret = qspi_transfer(dev, cmd, 1, data, 1, 0, NULL, 0); + if (ret == 0) { + *status = data[0]; + } + return ret; +} + +/* Wait for flash ready (not busy) */ +static int qspi_wait_ready(QspiDev_t *dev) +{ + uint8_t status = 0; + uint32_t timeout = GQSPI_FLASH_READY_TRIES; + int ret; + + while (timeout-- > 0) { + ret = qspi_read_flag_status(dev, &status); + if (ret == 0 && (status & FLASH_FSR_READY)) { + return 0; + } + } + QSPI_DEBUG_PRINTF("QSPI: Flash ready timeout\n"); + return -1; +} + +/* Write Enable */ +static int qspi_write_enable(QspiDev_t *dev) +{ + uint8_t cmd[1]; + uint8_t status = 0; + int ret; + uint32_t timeout = GQSPI_FLASH_READY_TRIES; + QspiDev_t tmpDev; + + cmd[0] = FLASH_CMD_WRITE_ENABLE; + + /* For dual parallel, send write enable to both chips separately */ + if (dev->stripe) { + /* Send to lower chip */ + tmpDev = *dev; + tmpDev.bus = GQSPI_GEN_FIFO_BUS_LOW; + tmpDev.cs = GQSPI_GEN_FIFO_CS_LOWER; + tmpDev.stripe = 0; + ret = qspi_transfer(&tmpDev, cmd, sizeof(cmd), NULL, 0, 0, NULL, 0); + if (ret != 0) return ret; + + /* Send to upper chip */ + tmpDev.bus = GQSPI_GEN_FIFO_BUS_UP; + tmpDev.cs = GQSPI_GEN_FIFO_CS_UPPER; + ret = qspi_transfer(&tmpDev, cmd, sizeof(cmd), NULL, 0, 0, NULL, 0); + if (ret != 0) return ret; + } else { + ret = qspi_transfer(dev, cmd, sizeof(cmd), NULL, 0, 0, NULL, 0); + if (ret != 0) return ret; + } + + /* Wait for WEL bit to be set */ + while (timeout-- > 0) { + ret = qspi_read_status(dev, &status); + if (ret == 0 && (status & FLASH_SR_WEL)) { + return 0; + } + } + QSPI_DEBUG_PRINTF("QSPI: Write enable timeout\n"); + return -1; +} + +/* Write Disable */ +static int qspi_write_disable(QspiDev_t *dev) +{ + uint8_t cmd[1]; + + cmd[0] = FLASH_CMD_WRITE_DISABLE; + return qspi_transfer(dev, cmd, sizeof(cmd), NULL, 0, 0, NULL, 0); +} + +#if GQPI_USE_4BYTE_ADDR == 1 +/* Enter 4-byte address mode */ +static int qspi_enter_4byte_addr(QspiDev_t *dev) +{ + uint8_t cmd[1]; + int ret; + + qspi_wait_ready(dev); + ret = qspi_write_enable(dev); + if (ret != 0) return ret; + + cmd[0] = FLASH_CMD_ENTER_4B_MODE; + ret = qspi_transfer(dev, cmd, sizeof(cmd), NULL, 0, 0, NULL, 0); + QSPI_DEBUG_PRINTF("QSPI: Enter 4-byte mode: ret=%d\n", ret); + + if (ret == 0) { + ret = qspi_wait_ready(dev); + } + qspi_write_disable(dev); + return ret; +} + +/* Exit 4-byte address mode */ +static int qspi_exit_4byte_addr(QspiDev_t *dev) +{ + uint8_t cmd[1]; + int ret; + + ret = qspi_write_enable(dev); + if (ret != 0) return ret; + + cmd[0] = FLASH_CMD_EXIT_4B_MODE; + ret = qspi_transfer(dev, cmd, sizeof(cmd), NULL, 0, 0, NULL, 0); + QSPI_DEBUG_PRINTF("QSPI: Exit 4-byte mode: ret=%d\n", ret); + + if (ret == 0) { + ret = qspi_wait_ready(dev); + } + qspi_write_disable(dev); + return ret; +} +#endif + +#ifdef TEST_EXT_FLASH +#ifndef TEST_EXT_ADDRESS +#define TEST_EXT_ADDRESS 0x2800000 /* 40MB */ +#endif +#ifndef TEST_EXT_SIZE +#define TEST_EXT_SIZE (FLASH_PAGE_SIZE * 4) +#endif + +static int test_ext_flash(QspiDev_t* dev) +{ + int ret; + uint32_t i; + uint8_t pageData[TEST_EXT_SIZE]; + + (void)dev; + wolfBoot_printf("Testing ext flash at 0x%x...\n", TEST_EXT_ADDRESS); + +#ifndef TEST_FLASH_READONLY + /* Erase sector */ + ret = ext_flash_erase(TEST_EXT_ADDRESS, WOLFBOOT_SECTOR_SIZE); + wolfBoot_printf("Erase Sector: Ret %d\n", ret); + + /* Write Pages */ + for (i = 0; i < sizeof(pageData); i++) { + pageData[i] = (i & 0xff); + } + ret = ext_flash_write(TEST_EXT_ADDRESS, pageData, sizeof(pageData)); + wolfBoot_printf("Write Page: Ret %d\n", ret); +#endif /* !TEST_FLASH_READONLY */ + + /* Read page */ + memset(pageData, 0, sizeof(pageData)); + ret = ext_flash_read(TEST_EXT_ADDRESS, pageData, sizeof(pageData)); + wolfBoot_printf("Read Page: Ret %d\n", ret); + if (ret < 0) { + wolfBoot_printf("Flash read failed!\n"); + return ret; + } + + /* Print first 32 bytes of data */ + wolfBoot_printf("Data: "); + for (i = 0; i < 32 && i < sizeof(pageData); i++) { + wolfBoot_printf("%02x ", pageData[i]); + } + wolfBoot_printf("...\n"); + +#ifndef TEST_FLASH_READONLY + wolfBoot_printf("Checking pattern...\n"); + /* Check data */ + for (i = 0; i < sizeof(pageData); i++) { + if (pageData[i] != (i & 0xff)) { + wolfBoot_printf("Check Data @ %d failed: got 0x%02x, expected 0x%02x\n", + i, pageData[i], (i & 0xff)); + return -1; + } + } + wolfBoot_printf("Flash Test Passed!\n"); +#else + wolfBoot_printf("Flash Read Test Complete (readonly mode)\n"); +#endif + + return ret; +} +#endif /* TEST_EXT_FLASH */ + +/* Initialize QSPI controller */ +static void qspi_init(void) +{ + uint32_t cfg; + uint8_t id[4]; + int ret; + + QSPI_DEBUG_PRINTF("QSPI: Initializing (base=0x%lx)...\n", + (unsigned long)VERSAL_QSPI_BASE); + + /* Read initial state left by PLM */ + cfg = GQSPI_CFG; + + /* Disable controller during reconfiguration */ + GQSPI_EN = 0; + dsb(); + + /* Select GQSPI mode (not linear LQSPI) */ + GQSPI_SEL = GQSPI_SEL_GQSPI; + dsb(); + + /* Don't reset FIFOs - just drain any stale data by reading RXD */ + while (GQSPI_ISR & GQSPI_IXR_RX_FIFO_NOT_EMPTY) { + (void)GQSPI_RXD; /* Discard any stale RX data */ + } + + /* Clear all interrupt status bits */ + GQSPI_ISR = GQSPI_IXR_ALL_MASK; + dsb(); + + /* Preserve PLM's CFG but set IO mode for initial commands (ID read, etc.) + * PLM: 0xA0080010 = DMA mode | manual start | WP_HOLD | CLK_POL + * Key: Keep manual start mode (bit 29) and clock settings + * Note: ext_flash_read() will switch to DMA mode for reads if not in IO mode */ + cfg = (cfg & ~GQSPI_CFG_MODE_EN_MASK); /* Clear mode bits */ + cfg |= GQSPI_CFG_MODE_EN_IO; /* Set IO mode for init */ + GQSPI_CFG = cfg; + dsb(); + + /* Set thresholds */ + GQSPI_TX_THRESH = 1; + GQSPI_RX_THRESH = 1; + GQSPI_GF_THRESH = 16; + +#ifndef GQSPI_MODE_IO + /* Initialize DMA controller - this was missing compared to zynq.c! + * Without this, DMA transfers can hang or timeout because the DMA + * controller is in an undefined state after PLM handoff. + */ + GQSPIDMA_CTRL = GQSPIDMA_CTRL_DEF; + GQSPIDMA_CTRL2 = GQSPIDMA_CTRL2_DEF; + GQSPIDMA_ISR = GQSPIDMA_ISR_ALL_MASK; /* Clear all pending interrupts */ + GQSPIDMA_IER = GQSPIDMA_ISR_ALL_MASK; /* Enable all interrupts */ + dsb(); +#endif + + /* Configure device for single flash (lower) first */ + qspiDev.mode = GQSPI_GEN_FIFO_MODE_SPI; + qspiDev.bus = GQSPI_GEN_FIFO_BUS_LOW; + qspiDev.cs = GQSPI_GEN_FIFO_CS_LOWER; + qspiDev.stripe = 0; + + memset(id, 0, sizeof(id)); + ret = qspi_read_id(&qspiDev, id, 3); + wolfBoot_printf("QSPI: Lower ID: %02x %02x %02x\n", id[0], id[1], id[2]); + +#if GQPI_USE_4BYTE_ADDR == 1 + /* Enter 4-byte address mode for lower flash */ + ret = qspi_enter_4byte_addr(&qspiDev); + if (ret != 0) { + QSPI_DEBUG_PRINTF("QSPI: 4-byte mode failed (lower)\n"); + } +#endif + +#if GQPI_USE_DUAL_PARALLEL == 1 + /* Read ID from upper flash */ + qspiDev.bus = GQSPI_GEN_FIFO_BUS_UP; + qspiDev.cs = GQSPI_GEN_FIFO_CS_UPPER; + + memset(id, 0, sizeof(id)); + ret = qspi_read_id(&qspiDev, id, 3); + wolfBoot_printf("QSPI: Upper ID: %02x %02x %02x\n", id[0], id[1], id[2]); + +#if GQPI_USE_4BYTE_ADDR == 1 + /* Enter 4-byte address mode for upper flash */ + ret = qspi_enter_4byte_addr(&qspiDev); + if (ret != 0) { + QSPI_DEBUG_PRINTF("QSPI: 4-byte mode failed (upper)\n"); + } +#endif + + /* Configure for dual parallel operation */ + qspiDev.mode = GQSPI_GEN_FIFO_MODE_SPI; + qspiDev.bus = GQSPI_GEN_FIFO_BUS_BOTH; + qspiDev.cs = GQSPI_GEN_FIFO_CS_BOTH; + qspiDev.stripe = GQSPI_GEN_FIFO_STRIPE; +#endif + + /* QSPI bare-metal driver info */ + wolfBoot_printf("QSPI: %dMHz, %s mode, %s\n", + (GQSPI_CLK_REF / (2 << GQSPI_CLK_DIV)) / 1000000, + #if GQSPI_QSPI_MODE == GQSPI_GEN_FIFO_MODE_QSPI + "Quad" + #elif GQSPI_QSPI_MODE == GQSPI_GEN_FIFO_MODE_DSPI + "Dual" + #else + "SPI" + #endif + , + #ifdef GQSPI_MODE_IO + "Poll" + #else + "DMA" + #endif + ); + + qspi_initialized = 1; + +#ifdef TEST_EXT_FLASH + test_ext_flash(&qspiDev); +#endif +} + +/* ============================================================================ + * HAL Public Interface + * ============================================================================ + */ + +void hal_init(void) +{ +#if defined(__WOLFBOOT) && defined(DEBUG_UART) + const char *banner = "\n" + "========================================\n" + "wolfBoot Secure Boot - AMD Versal\n" + "========================================\n"; +#endif + + uart_init(); + +#ifdef __WOLFBOOT + wolfBoot_printf("%s", banner); + wolfBoot_printf("Current EL: %d\n", current_el()); +#endif + +#ifdef EXT_FLASH + qspi_init(); +#endif +} + +#endif /* EXT_FLASH */ + +void hal_prepare_boot(void) +{ +#if defined(EXT_FLASH) && GQPI_USE_4BYTE_ADDR == 1 + /* Exit 4-byte address mode before handing off to application */ + qspi_exit_4byte_addr(&qspiDev); +#endif + + /* Flush any pending UART output (with timeout) */ +#ifdef DEBUG_UART + { + volatile uint32_t timeout = UART_TIMEOUT; + while (!(UART_FR & UART_FR_TXFE) && --timeout) + ; + timeout = UART_TIMEOUT; + while ((UART_FR & UART_FR_BUSY) && --timeout) + ; + } +#endif + + /* Clean and invalidate caches for the loaded application. + * The application was written to RAM via D-cache, but the CPU will + * fetch instructions via I-cache from main memory. We must: + * 1. Clean D-cache (flush dirty data to memory) + * 2. Invalidate I-cache (ensure fresh instruction fetch) + */ + + /* Clean entire D-cache to Point of Coherency */ + __asm__ volatile("dsb sy"); + + /* Clean D-cache for application region (0x10000000, 1MB should be enough) */ + { + uintptr_t addr; + uintptr_t end = 0x10000000 + (1 * 1024 * 1024); + for (addr = 0x10000000; addr < end; addr += 64) { + /* DC CVAC - Clean data cache line by VA to PoC */ + __asm__ volatile("dc cvac, %0" : : "r"(addr)); + } + } + + /* Data synchronization barrier - ensure clean completes */ + __asm__ volatile("dsb sy"); + + /* Invalidate instruction cache to ensure fresh code is fetched */ + __asm__ volatile("ic iallu"); + + /* Ensure cache invalidation completes before jumping */ + __asm__ volatile("dsb sy"); + __asm__ volatile("isb"); +} + +#ifdef MMU +/** + * Get the Device Tree address for the boot partition + * Returns the DTS load address in RAM + */ +void* hal_get_dts_address(void) +{ +#ifdef WOLFBOOT_LOAD_DTS_ADDRESS + return (void*)WOLFBOOT_LOAD_DTS_ADDRESS; +#else + return NULL; +#endif +} + +/** + * Get the update Device Tree address + */ +void* hal_get_dts_update_address(void) +{ +#ifdef WOLFBOOT_DTS_UPDATE_ADDRESS + return (void*)WOLFBOOT_DTS_UPDATE_ADDRESS; +#else + return NULL; +#endif +} +#endif /* MMU */ + +#ifdef WOLFBOOT_DUALBOOT +/** + * Get the primary (boot) partition address in flash + * Returns the flash address where the boot partition starts + */ +void* hal_get_primary_address(void) +{ + return (void*)WOLFBOOT_PARTITION_BOOT_ADDRESS; +} + +/** + * Get the update partition address in flash + * Returns the flash address where the update partition starts + */ +void* hal_get_update_address(void) +{ + return (void*)WOLFBOOT_PARTITION_UPDATE_ADDRESS; +} +#endif /* WOLFBOOT_DUALBOOT */ + +/* ============================================================================ + * Flash Functions (STUBS) + * ============================================================================ + * There is no "internal flash" on the Versal, so these are stubs. + */ + +void RAMFUNCTION hal_flash_unlock(void) +{ + /* Stub - no-op for now */ +} + +void RAMFUNCTION hal_flash_lock(void) +{ + /* Stub - no-op for now */ +} + +int RAMFUNCTION hal_flash_write(uintptr_t address, const uint8_t *data, int len) +{ + (void)address; + (void)data; + (void)len; + return -1; +} + +int RAMFUNCTION hal_flash_erase(uintptr_t address, int len) +{ + (void)address; + (void)len; + return -1; +} + + +/* ============================================================================ + * External Flash Interface + * ============================================================================ + */ + +#ifdef EXT_FLASH + +void ext_flash_lock(void) +{ + /* No-op - flash protection handled elsewhere */ +} + +void ext_flash_unlock(void) +{ + /* No-op - flash protection handled elsewhere */ +} + +int ext_flash_write(uintptr_t address, const uint8_t *data, int len) +{ + int ret = 0; + uint8_t cmd[5]; + uint32_t xferSz, page, pages; + uintptr_t addr; + const uint8_t *pageData; + + if (!qspi_initialized) { + return -1; + } + + QSPI_DEBUG_PRINTF("ext_flash_write: addr=0x%lx, len=%d\n", + (unsigned long)address, len); + + /* Write by page */ + pages = ((len + (FLASH_PAGE_SIZE - 1)) / FLASH_PAGE_SIZE); + for (page = 0; page < pages && ret == 0; page++) { + ret = qspi_write_enable(&qspiDev); + if (ret != 0) break; + + xferSz = len; + if (xferSz > FLASH_PAGE_SIZE) + xferSz = FLASH_PAGE_SIZE; + + addr = address + (page * FLASH_PAGE_SIZE); + if (qspiDev.stripe) { + /* For dual parallel the address is divided by 2 */ + addr /= 2; + } + + /* Page Program with 4-byte address */ + cmd[0] = FLASH_CMD_PAGE_PROG_4B; + cmd[1] = (addr >> 24) & 0xFF; + cmd[2] = (addr >> 16) & 0xFF; + cmd[3] = (addr >> 8) & 0xFF; + cmd[4] = addr & 0xFF; + + pageData = data + (page * FLASH_PAGE_SIZE); + ret = qspi_transfer(&qspiDev, cmd, sizeof(cmd), NULL, 0, 0, pageData, xferSz); + + QSPI_DEBUG_PRINTF("Flash Page %d Write: Ret %d\n", page, ret); + if (ret != 0) break; + + ret = qspi_wait_ready(&qspiDev); + qspi_write_disable(&qspiDev); + len -= xferSz; + } + + return ret; +} + +int ext_flash_read(uintptr_t address, uint8_t *data, int len) +{ + uint8_t cmd[5]; + int ret = 0; + uintptr_t addr = address; + + if (!qspi_initialized) { + return -1; + } + + QSPI_DEBUG_PRINTF("ext_flash_read: addr=0x%lx len=%d\n", + (unsigned long)address, len); + + if (qspiDev.stripe) { + /* For dual parallel the address is divided by 2 */ + addr /= 2; + } + + /* Use Quad Read command (0x6C) with 4-byte address */ + cmd[0] = FLASH_CMD_QUAD_READ_4B; + cmd[1] = (addr >> 24) & 0xFF; + cmd[2] = (addr >> 16) & 0xFF; + cmd[3] = (addr >> 8) & 0xFF; + cmd[4] = addr & 0xFF; + + ret = qspi_transfer(&qspiDev, cmd, sizeof(cmd), data, len, GQSPI_DUMMY_READ, NULL, 0); + + /* On error, fill buffer with 0xFF to simulate unwritten flash */ + if (ret != 0) { + memset(data, 0xFF, len); + } + + QSPI_DEBUG_PRINTF("ext_flash_read: ret=%d\n", ret); + return (ret == 0) ? len : ret; +} + +int ext_flash_erase(uintptr_t address, int len) +{ + int ret = 0; + uint8_t cmd[5]; + uintptr_t addr; + + if (!qspi_initialized) { + return -1; + } + + QSPI_DEBUG_PRINTF("ext_flash_erase: addr=0x%lx, len=%d\n", + (unsigned long)address, len); + + while (len > 0 && ret == 0) { + addr = address; + if (qspiDev.stripe) { + /* For dual parallel the address divide by 2 */ + addr /= 2; + } + + ret = qspi_write_enable(&qspiDev); + if (ret != 0) break; + + /* Sector Erase with 4-byte address */ + cmd[0] = FLASH_CMD_SECTOR_ERASE_4B; + cmd[1] = (addr >> 24) & 0xFF; + cmd[2] = (addr >> 16) & 0xFF; + cmd[3] = (addr >> 8) & 0xFF; + cmd[4] = addr & 0xFF; + ret = qspi_transfer(&qspiDev, cmd, sizeof(cmd), NULL, 0, 0, NULL, 0); + + QSPI_DEBUG_PRINTF(" Flash Erase: Ret %d, Address 0x%x\n", + ret, address); + + if (ret == 0) { + ret = qspi_wait_ready(&qspiDev); + } + qspi_write_disable(&qspiDev); + + address += FLASH_SECTOR_SIZE; + len -= FLASH_SECTOR_SIZE; + } + + return ret; +} + +#endif /* EXT_FLASH */ + + +#endif /* TARGET_versal */ + diff --git a/hal/versal.h b/hal/versal.h new file mode 100644 index 0000000000..75b10409ee --- /dev/null +++ b/hal/versal.h @@ -0,0 +1,577 @@ +/* versal.h + * + * Copyright (C) 2025 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * + * AMD Versal ACAP HAL definitions for wolfBoot + * Target: VMK180 Evaluation Board (VM1802 Versal Prime) + */ + +#ifndef _VERSAL_H_ +#define _VERSAL_H_ + +/* Only include C headers when compiling C code, not assembly */ +#ifndef __ASSEMBLER__ +#include +#endif /* __ASSEMBLER__ */ + +/* ============================================================================ + * Exception Level Configuration + * ============================================================================ + * Versal PLM (Platform Loader Manager) can hand off at EL3, EL2, or EL1 + * depending on configuration. Default is EL2 (hypervisor mode). + */ +#ifndef USE_BUILTIN_STARTUP + +#ifndef EL3_SECURE +#define EL3_SECURE 0 +#endif +#ifndef EL2_HYPERVISOR +#define EL2_HYPERVISOR 1 +#endif +#ifndef EL1_NONSECURE +#define EL1_NONSECURE 1 +#endif + +#ifndef HYP_GUEST +#define HYP_GUEST 0 +#endif + +#ifndef FPU_TRAP +#define FPU_TRAP 0 +#endif + +/* ARM Errata */ +#define CONFIG_ARM_ERRATA_855873 1 + +/* BL31-applied errata/CVEs (preserve when modifying CPUACTLR): + * - Erratum 859971 + * - Erratum 1319367 + * - CVE-2017-5715 (Spectre V2) + * - CVE-2018-3639 (SSB) + * - CVE-2022-23960 + */ + +#endif /* USE_BUILTIN_STARTUP */ + +/* ============================================================================ + * Versal-specific Boot Defaults + * ============================================================================ + * SKIP_GIC_INIT: Versal uses GICv3 (not GICv2 like ZynqMP). + * BL31 handles GIC initialization, so skip gicv2_init_secure(). + * BOOT_EL1: wolfBoot runs at EL2, but applications (Linux, test-app) + * expect EL1. Transition from EL2 to EL1 before jumping to app. + */ +#ifndef SKIP_GIC_INIT +#define SKIP_GIC_INIT 1 +#endif +#ifndef BOOT_EL1 +#define BOOT_EL1 1 +#endif + + +/* ============================================================================ + * Memory Map + * ============================================================================ + * Versal memory map (simplified): + * 0x0000_0000 - 0x7FFF_FFFF : DDR Low (2GB) + * 0x8_0000_0000 - ... : DDR High (extended) + * 0xF000_0000 - 0xFFFF_FFFF : LPD/FPD Peripherals + */ + +/* DDR Memory */ +#define VERSAL_DDR_0_BASE 0x00000000UL +#define VERSAL_DDR_0_HIGH 0x7FFFFFFFUL +#define VERSAL_DDR_1_BASE 0x800000000ULL +#define VERSAL_DDR_1_HIGH 0x87FFFFFFFULL + +/* DDR defines for MMU table setup (used by boot_aarch64_start.S) + * These macros enable proper DDR mapping in the page tables. + * Without these, the MMU tables would have DDR_0_REG=0 and no DDR mapped! */ +#define XPAR_PSU_DDR_0_S_AXI_BASEADDR VERSAL_DDR_0_BASE +#define XPAR_PSU_DDR_0_S_AXI_HIGHADDR VERSAL_DDR_0_HIGH + + +/* ============================================================================ + * UART (ARM PL011 UART - UARTPSV) + * ============================================================================ + * Versal uses ARM PL011 UART (different from ZynqMP Cadence UART!) + * Based on AMD/Xilinx xuartpsv_hw.h + */ + +#define VERSAL_UART0_BASE 0xFF000000UL +#define VERSAL_UART1_BASE 0xFF010000UL + +/* Select UART based on DEBUG_UART_NUM */ +#if defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 1 + #define DEBUG_UART_BASE VERSAL_UART1_BASE +#else + #define DEBUG_UART_BASE VERSAL_UART0_BASE +#endif + +/* UART Register Offsets (ARM PL011) */ +#define UART_DR_OFFSET 0x00 /* Data Register (TX/RX FIFO) */ +#define UART_RSR_OFFSET 0x04 /* Receive Status / Error Clear */ +#define UART_FR_OFFSET 0x18 /* Flag Register (Status) */ +#define UART_ILPR_OFFSET 0x20 /* IrDA Low-Power Counter */ +#define UART_IBRD_OFFSET 0x24 /* Integer Baud Rate Divisor */ +#define UART_FBRD_OFFSET 0x28 /* Fractional Baud Rate Divisor */ +#define UART_LCR_OFFSET 0x2C /* Line Control Register */ +#define UART_CR_OFFSET 0x30 /* Control Register */ +#define UART_IFLS_OFFSET 0x34 /* Interrupt FIFO Level Select */ +#define UART_IMSC_OFFSET 0x38 /* Interrupt Mask Set/Clear */ +#define UART_RIS_OFFSET 0x3C /* Raw Interrupt Status */ +#define UART_MIS_OFFSET 0x40 /* Masked Interrupt Status */ +#define UART_ICR_OFFSET 0x44 /* Interrupt Clear Register */ +#define UART_DMACR_OFFSET 0x48 /* DMA Control Register */ + +/* UART Register Access Macros */ +#define UART_REG(offset) (*((volatile uint32_t*)(DEBUG_UART_BASE + (offset)))) + +#define UART_DR UART_REG(UART_DR_OFFSET) +#define UART_RSR UART_REG(UART_RSR_OFFSET) +#define UART_FR UART_REG(UART_FR_OFFSET) +#define UART_IBRD UART_REG(UART_IBRD_OFFSET) +#define UART_FBRD UART_REG(UART_FBRD_OFFSET) +#define UART_LCR UART_REG(UART_LCR_OFFSET) +#define UART_CR UART_REG(UART_CR_OFFSET) +#define UART_IFLS UART_REG(UART_IFLS_OFFSET) +#define UART_IMSC UART_REG(UART_IMSC_OFFSET) +#define UART_ICR UART_REG(UART_ICR_OFFSET) + +/* Flag Register (UARTFR) bits - Status */ +#define UART_FR_RI (1UL << 8) /* Ring indicator */ +#define UART_FR_TXFE (1UL << 7) /* TX FIFO empty */ +#define UART_FR_RXFF (1UL << 6) /* RX FIFO full */ +#define UART_FR_TXFF (1UL << 5) /* TX FIFO full */ +#define UART_FR_RXFE (1UL << 4) /* RX FIFO empty */ +#define UART_FR_BUSY (1UL << 3) /* UART busy */ +#define UART_FR_DCD (1UL << 2) /* Data carrier detect */ +#define UART_FR_DSR (1UL << 1) /* Data set ready */ +#define UART_FR_CTS (1UL << 0) /* Clear to send */ + +/* Control Register (UARTCR) bits */ +#define UART_CR_CTSEN (1UL << 15) /* CTS hardware flow control */ +#define UART_CR_RTSEN (1UL << 14) /* RTS hardware flow control */ +#define UART_CR_RTS (1UL << 11) /* Request to send */ +#define UART_CR_DTR (1UL << 10) /* Data transmit ready */ +#define UART_CR_RXE (1UL << 9) /* Receive enable */ +#define UART_CR_TXE (1UL << 8) /* Transmit enable */ +#define UART_CR_LBE (1UL << 7) /* Loopback enable */ +#define UART_CR_UARTEN (1UL << 0) /* UART enable */ + +/* Line Control Register (UARTLCR) bits */ +#define UART_LCR_SPS (1UL << 7) /* Stick parity select */ +#define UART_LCR_WLEN_MASK (3UL << 5) /* Word length mask */ +#define UART_LCR_WLEN_8 (3UL << 5) /* 8 data bits */ +#define UART_LCR_WLEN_7 (2UL << 5) /* 7 data bits */ +#define UART_LCR_WLEN_6 (1UL << 5) /* 6 data bits */ +#define UART_LCR_WLEN_5 (0UL << 5) /* 5 data bits */ +#define UART_LCR_FEN (1UL << 4) /* FIFO enable */ +#define UART_LCR_STP2 (1UL << 3) /* Two stop bits */ +#define UART_LCR_EPS (1UL << 2) /* Even parity select */ +#define UART_LCR_PEN (1UL << 1) /* Parity enable */ +#define UART_LCR_BRK (1UL << 0) /* Send break */ + +/* Interrupt FIFO Level Select (UARTIFLS) */ +#define UART_IFLS_RXIFLSEL_1_8 (0UL << 3) /* RX FIFO 1/8 full */ +#define UART_IFLS_RXIFLSEL_1_4 (1UL << 3) /* RX FIFO 1/4 full */ +#define UART_IFLS_RXIFLSEL_1_2 (2UL << 3) /* RX FIFO 1/2 full */ +#define UART_IFLS_TXIFLSEL_1_8 (0UL << 0) /* TX FIFO 1/8 full */ +#define UART_IFLS_TXIFLSEL_1_4 (1UL << 0) /* TX FIFO 1/4 full */ +#define UART_IFLS_TXIFLSEL_1_2 (2UL << 0) /* TX FIFO 1/2 full */ + +/* Interrupt bits (for IMSC, RIS, MIS, ICR) */ +#define UART_INT_OE (1UL << 10) /* Overrun error */ +#define UART_INT_BE (1UL << 9) /* Break error */ +#define UART_INT_PE (1UL << 8) /* Parity error */ +#define UART_INT_FE (1UL << 7) /* Framing error */ +#define UART_INT_RT (1UL << 6) /* Receive timeout */ +#define UART_INT_TX (1UL << 5) /* Transmit */ +#define UART_INT_RX (1UL << 4) /* Receive */ +#define UART_INT_ALL 0x7FFU /* All interrupts */ + +/* UART Configuration */ +#ifndef UART_CLK_REF + #define UART_CLK_REF 100000000UL /* 100 MHz reference clock */ +#endif + +#ifndef DEBUG_UART_BAUD + #define DEBUG_UART_BAUD 115200 +#endif + +/* ============================================================================ + * PMC_IOU_SLCR - MIO Pin Configuration + * ============================================================================ + * Required for JTAG boot mode where PLM doesn't run + */ +#define PMC_IOU_SLCR_BASE 0xF1060000UL + +/* MIO Pin registers - each MIO pin has its own 4-byte register */ +#define PMC_IOU_SLCR_MIO_PIN(n) (*((volatile uint32_t*)(PMC_IOU_SLCR_BASE + 0x0 + ((n) * 4)))) + +/* MIO Pin register bits */ +#define MIO_PIN_L0_SEL_MASK (0x1UL << 0) /* Level 0 MUX select */ +#define MIO_PIN_L1_SEL_MASK (0x1UL << 1) /* Level 1 MUX select */ +#define MIO_PIN_L2_SEL_MASK (0x3UL << 2) /* Level 2 MUX select */ +#define MIO_PIN_L3_SEL_MASK (0x7UL << 4) /* Level 3 MUX select */ +#define MIO_PIN_TRI_ENABLE (0x1UL << 8) /* Tri-state enable (input) */ +#define MIO_PIN_PULLUP (0x1UL << 12) /* Pull-up enable */ +#define MIO_PIN_SCHMITT_ENABLE (0x1UL << 13) /* Schmitt trigger enable */ +#define MIO_PIN_SLOW_SLEW (0x0UL << 14) /* Slow slew rate */ +#define MIO_PIN_FAST_SLEW (0x1UL << 14) /* Fast slew rate */ + +/* UART0 default MIO pins on VMK180: MIO 0 (RX), MIO 1 (TX) + * L3_SEL = 1 selects UART function */ +#define MIO_UART0_RX_PIN 0 /* MIO0 = UART0 RX */ +#define MIO_UART0_TX_PIN 1 /* MIO1 = UART0 TX */ +#define MIO_UART1_RX_PIN 4 /* MIO4 = UART1 RX */ +#define MIO_UART1_TX_PIN 5 /* MIO5 = UART1 TX */ + +/* MIO configuration for UART TX pin: Output, UART function (L3_SEL=1) */ +#define MIO_UART_TX_CFG (0x1UL << 4) /* L3_SEL = 1 for UART */ +/* MIO configuration for UART RX pin: Input, UART function (L3_SEL=1) */ +#define MIO_UART_RX_CFG ((0x1UL << 4) | MIO_PIN_TRI_ENABLE) + +/* ============================================================================ + * CRL (Clock Reset LPD) - For UART clock/reset control + * ============================================================================ + * Register offsets verified from Xilinx lpd_data.cdo + */ +#define VERSAL_CRL_BASE 0xFF5E0000UL + +/* UART Reference Clock Control - from lpd_data.cdo line 176 */ +#define CRL_UART0_REF_CTRL (*((volatile uint32_t*)(VERSAL_CRL_BASE + 0x0128))) +#define CRL_UART1_REF_CTRL (*((volatile uint32_t*)(VERSAL_CRL_BASE + 0x012C))) + +/* UART Reset Control - from lpd_data.cdo line 258 */ +#define CRL_RST_UART0 (*((volatile uint32_t*)(VERSAL_CRL_BASE + 0x0318))) +#define CRL_RST_UART1 (*((volatile uint32_t*)(VERSAL_CRL_BASE + 0x031C))) +#define CRL_RST_UART0_BIT (1UL << 0) +#define CRL_RST_UART1_BIT (1UL << 0) /* Each UART has its own register */ + +/* Backward compatibility alias */ +#define CRL_RST_UART CRL_RST_UART0 + +/* Clock Reference Control bits */ +#define CRL_CLK_CLKACT (1UL << 25) /* Clock active */ +#define CRL_CLK_DIVISOR_MASK (0x3FFUL << 8) /* Divisor field */ + + +/* ============================================================================ + * System Timer (ARM Generic Timer) + * ============================================================================ + * Versal uses ARM Generic Timer accessed via system registers + */ + +/* Timer frequency (typically configured by PLM) */ +#ifndef TIMER_CLK_FREQ +#define TIMER_CLK_FREQ 100000000UL /* 100 MHz default */ +#endif + + +/* ============================================================================ + * GIC (Generic Interrupt Controller) + * ============================================================================ + */ +#define VERSAL_GIC_BASE 0xF9000000UL +#define VERSAL_GICD_BASE (VERSAL_GIC_BASE + 0x00000) /* Distributor */ +#define VERSAL_GICC_BASE (VERSAL_GIC_BASE + 0x40000) /* CPU Interface */ +#define VERSAL_GICH_BASE (VERSAL_GIC_BASE + 0x60000) /* Virtual Interface Control */ +#define VERSAL_GICV_BASE (VERSAL_GIC_BASE + 0x80000) /* Virtual CPU Interface */ + + +/* ============================================================================ + * Clock and Reset (CRL/CRF) + * ============================================================================ + */ +#define VERSAL_CRL_BASE 0xFF5E0000UL /* Clock and Reset LPD */ +#define VERSAL_CRF_BASE 0xFD1A0000UL /* Clock and Reset FPD */ + + +/* ============================================================================ + * PMC (Platform Management Controller) + * ============================================================================ + * The PMC is the security controller in Versal (replaces CSU from ZynqMP) + */ +#define VERSAL_PMC_GLOBAL_BASE 0xF1110000UL +#define VERSAL_PMC_TAP_BASE 0xF11A0000UL + + +/* ============================================================================ + * QSPI (Quad SPI) Flash Controller - GQSPI + * ============================================================================ + * The Versal GQSPI controller is derived from the ZynqMP GQSPI IP block. + * VMK180 uses dual parallel MT25QU01GBBB (128MB each, 256MB total). + * + * Key differences from ZynqMP (see hal/zynq.c for comparison): + * + * 1. BASE ADDRESS: + * - ZynqMP: 0xFF0F0000 + * - Versal: 0xF1030000 + * + * 2. TAP DELAY BYPASS REGISTER: + * - ZynqMP: Located in IOU_SLCR block at 0xFF180390 + * - Versal: Integrated in QSPI block at BASE + 0x03C + * + * 3. INITIALIZATION: + * - ZynqMP: Full init including FIFO reset and loopback delay tuning + * - Versal: Preserves PLM configuration, only drains RX FIFO + * (PLM already configured clocks, MIO, and controller) + * + * 4. CLOCK CONFIGURATION: + * - Both use same divisor formula: QSPI_CLK = REF_CLK / (2 << DIV) + * - Default: 300MHz ref, DIV=1 -> 75MHz (within MT25QU01G 133MHz spec) + * + * 5. REGISTER LAYOUT: + * - Identical offsets from base (+0x100 for GQSPI, +0x800 for DMA) + * - Same GenFIFO format, interrupt bits, and DMA interface + * + * 6. BUILD OPTIONS (same as ZynqMP): + * - GQSPI_MODE_IO: Use polling instead of DMA (DMA is default) + * - GQPI_USE_DUAL_PARALLEL: Enable dual parallel flash striping + * - GQPI_USE_4BYTE_ADDR: Enable 4-byte addressing for >16MB flash + * - GQSPI_CLK_DIV: Clock divider (0-7) + * - DEBUG_QSPI: Enable verbose debug logging + */ +#define VERSAL_QSPI_BASE 0xF1030000UL + +/* QSPI Enable Register (at base, not +0x100) */ +#define QSPI_EN_REG (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x14))) + +/* GQSPI Registers (at offset 0x100 from QSPI base) */ +#define GQSPI_CFG (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x100))) +#define GQSPI_ISR (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x104))) +#define GQSPI_IER (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x108))) +#define GQSPI_IDR (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x10C))) +#define GQSPI_IMR (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x110))) +#define GQSPI_EN (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x114))) +#define GQSPI_TXD (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x11C))) +#define GQSPI_RXD (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x120))) +#define GQSPI_TX_THRESH (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x128))) +#define GQSPI_RX_THRESH (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x12C))) +#define GQSPI_GPIO (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x130))) +#define GQSPI_LPBK_DLY_ADJ (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x138))) +#define GQSPI_GEN_FIFO (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x140))) +#define GQSPI_SEL (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x144))) +#define GQSPI_FIFO_CTRL (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x14C))) +#define GQSPI_GF_THRESH (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x150))) +#define GQSPI_POLL_CFG (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x154))) +#define GQSPI_P_TIMEOUT (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x158))) +#define GQSPI_XFER_STS (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x15C))) +#define GQSPI_DATA_DLY_ADJ (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x1F8))) +#define GQSPI_MOD_ID (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x1FC))) + +/* DMA Registers (at offset 0x800 from QSPI base) */ +#define GQSPIDMA_DST (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x800))) +#define GQSPIDMA_SIZE (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x804))) +#define GQSPIDMA_STS (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x808))) +#define GQSPIDMA_CTRL (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x80C))) +#define GQSPIDMA_ISR (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x814))) +#define GQSPIDMA_IER (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x818))) +#define GQSPIDMA_IDR (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x81C))) +#define GQSPIDMA_IMR (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x820))) +#define GQSPIDMA_CTRL2 (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x824))) +#define GQSPIDMA_DST_MSB (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x828))) + +/* Tap Delay Bypass Register - Versal specific location */ +#define IOU_TAPDLY_BYPASS (*((volatile uint32_t*)(VERSAL_QSPI_BASE + 0x03C))) +#define IOU_TAPDLY_BYPASS_LQSPI_RX (1UL << 2) + +/* GQSPI_CFG: Configuration register bits */ +#define GQSPI_CFG_CLK_POL (1UL << 1) +#define GQSPI_CFG_CLK_PH (1UL << 2) +#define GQSPI_CFG_BAUD_RATE_DIV_MASK (7UL << 3) +#define GQSPI_CFG_BAUD_RATE_DIV(d) (((d) << 3) & GQSPI_CFG_BAUD_RATE_DIV_MASK) +#define GQSPI_CFG_WP_HOLD (1UL << 19) +#define GQSPI_CFG_EN_POLL_TIMEOUT (1UL << 20) +#define GQSPI_CFG_ENDIAN (1UL << 26) +#define GQSPI_CFG_START_GEN_FIFO (1UL << 28) +#define GQSPI_CFG_GEN_FIFO_START_MODE (1UL << 29) +#define GQSPI_CFG_MODE_EN_MASK (3UL << 30) +#define GQSPI_CFG_MODE_EN_IO (0UL << 30) +#define GQSPI_CFG_MODE_EN_DMA (2UL << 30) + +/* GQSPI_ISR/IER/IDR: Interrupt bits */ +#define GQSPI_IXR_POLL_TIME_EXPIRE (1UL << 1) +#define GQSPI_IXR_TX_FIFO_NOT_FULL (1UL << 2) +#define GQSPI_IXR_TX_FIFO_FULL (1UL << 3) +#define GQSPI_IXR_RX_FIFO_NOT_EMPTY (1UL << 4) +#define GQSPI_IXR_RX_FIFO_FULL (1UL << 5) +#define GQSPI_IXR_GEN_FIFO_EMPTY (1UL << 7) +#define GQSPI_IXR_TX_FIFO_EMPTY (1UL << 8) +#define GQSPI_IXR_GEN_FIFO_NOT_FULL (1UL << 9) +#define GQSPI_IXR_GEN_FIFO_FULL (1UL << 10) +#define GQSPI_IXR_RX_FIFO_EMPTY (1UL << 11) +#define GQSPI_IXR_ALL_MASK 0x0FBEU +#define GQSPI_ISR_WR_TO_CLR_MASK 0x02U + +/* GenFIFO Entry bits */ +#define GQSPI_GEN_FIFO_IMM_MASK 0xFFU +#define GQSPI_GEN_FIFO_IMM(x) ((x) & GQSPI_GEN_FIFO_IMM_MASK) +#define GQSPI_GEN_FIFO_DATA_XFER (1UL << 8) +#define GQSPI_GEN_FIFO_EXP (1UL << 9) +#define GQSPI_GEN_FIFO_MODE_SPI (1UL << 10) +#define GQSPI_GEN_FIFO_MODE_DSPI (2UL << 10) +#define GQSPI_GEN_FIFO_MODE_QSPI (3UL << 10) +#define GQSPI_GEN_FIFO_MODE_MASK (3UL << 10) +#define GQSPI_GEN_FIFO_CS_LOWER (1UL << 12) +#define GQSPI_GEN_FIFO_CS_UPPER (1UL << 13) +#define GQSPI_GEN_FIFO_CS_MASK (3UL << 12) +#define GQSPI_GEN_FIFO_CS_BOTH (3UL << 12) +#define GQSPI_GEN_FIFO_BUS_LOW (1UL << 14) +#define GQSPI_GEN_FIFO_BUS_UP (1UL << 15) +#define GQSPI_GEN_FIFO_BUS_BOTH (3UL << 14) +#define GQSPI_GEN_FIFO_BUS_MASK (3UL << 14) +#define GQSPI_GEN_FIFO_TX (1UL << 16) +#define GQSPI_GEN_FIFO_RX (1UL << 17) +#define GQSPI_GEN_FIFO_STRIPE (1UL << 18) +#define GQSPI_GEN_FIFO_POLL (1UL << 19) + +/* DMA Control bits */ +#define GQSPIDMA_CTRL_DEF 0x403FFA00UL +#define GQSPIDMA_CTRL2_DEF 0x081BFFF8UL +#define GQSPIDMA_CTRL_ENDIANNESS (1UL << 23) + +/* DMA Status bits */ +#define GQSPIDMA_STS_BUSY (1UL << 0) +#define GQSPIDMA_STS_WTC (7UL << 13) + +/* DMA Interrupt bits */ +#define GQSPIDMA_ISR_DONE (1UL << 1) +#define GQSPIDMA_ISR_ALL_MASK 0xFEU + +/* FIFO Control bits */ +#define GQSPI_FIFO_CTRL_RST_GEN (1UL << 0) +#define GQSPI_FIFO_CTRL_RST_TX (1UL << 1) +#define GQSPI_FIFO_CTRL_RST_RX (1UL << 2) + +/* QSPI Select */ +#define GQSPI_SEL_GQSPI (1UL << 0) + +/* Flash Commands */ +#define FLASH_CMD_READ_ID 0x9F +#define FLASH_CMD_READ_STATUS 0x05 +#define FLASH_CMD_READ_FLAG_STATUS 0x70 +#define FLASH_CMD_WRITE_ENABLE 0x06 +#define FLASH_CMD_WRITE_DISABLE 0x04 +#define FLASH_CMD_READ 0x03 +#define FLASH_CMD_FAST_READ 0x0B +#define FLASH_CMD_QUAD_READ 0x6B +#define FLASH_CMD_READ_4B 0x13 +#define FLASH_CMD_FAST_READ_4B 0x0C +#define FLASH_CMD_QUAD_READ_4B 0x6C +#define FLASH_CMD_PAGE_PROG 0x02 +#define FLASH_CMD_PAGE_PROG_4B 0x12 +#define FLASH_CMD_SECTOR_ERASE 0xD8 +#define FLASH_CMD_SECTOR_ERASE_4B 0xDC +#define FLASH_CMD_ENTER_4B_MODE 0xB7 +#define FLASH_CMD_EXIT_4B_MODE 0xE9 + +/* Flash Status Register bits */ +#define FLASH_SR_WIP (1UL << 0) /* Write In Progress */ +#define FLASH_SR_WEL (1UL << 1) /* Write Enable Latch */ +#define FLASH_FSR_READY (1UL << 7) /* Flag Status Ready */ + +/* Flash Configuration for MT25QU01GBBB */ +#define FLASH_JEDEC_MICRON 0x20 +#define FLASH_JEDEC_MT25QU01G 0x20BB21 +#define FLASH_PAGE_SIZE 256 +#define FLASH_SECTOR_SIZE 0x10000 /* 64KB */ +#define FLASH_DEVICE_SIZE 0x8000000 /* 128MB per chip */ + +/* QSPI Configuration (bare-metal driver) */ +#ifndef GQSPI_CLK_REF + #define GQSPI_CLK_REF 300000000 /* 300 MHz */ +#endif +#ifndef GQSPI_CLK_DIV + #define GQSPI_CLK_DIV 1 /* Divide by 4 (300MHz / 4 = 75MHz) */ +#endif +#define GQSPI_CS_ASSERT_CLOCKS 5 /* CS Setup Time (tCSS) */ +#define GQSPI_CS_DEASSERT_CLOCKS 4 /* CS Hold Time */ +#define GQSPI_FIFO_WORD_SZ 4 +#define GQSPI_DMA_ALIGN 64 /* L1 cache size */ +#ifndef GQSPI_DMA_TMPSZ + #define GQSPI_DMA_TMPSZ 4096 +#endif +#define GQSPI_TIMEOUT_TRIES 100000 +#define GQSPIDMA_TIMEOUT_TRIES 100000000 +#define GQSPI_FLASH_READY_TRIES 1000000 /* Erase can take seconds */ + +/* QSPI Mode Configuration */ +#ifndef GQSPI_QSPI_MODE + #define GQSPI_QSPI_MODE GQSPI_GEN_FIFO_MODE_QSPI /* 4-bit data */ +#endif +#ifndef GQPI_USE_DUAL_PARALLEL + #define GQPI_USE_DUAL_PARALLEL 1 /* 0=single, 1=dual parallel (striped) */ +#endif +#ifndef GQPI_USE_4BYTE_ADDR + #define GQPI_USE_4BYTE_ADDR 1 /* 0=3-byte addr, 1=4-byte addr */ +#endif +#ifndef GQSPI_DUMMY_READ + #define GQSPI_DUMMY_READ 8 /* Dummy clocks for Fast/Quad Read */ +#endif + +#ifndef XALIGNED +#define XALIGNED(x) __attribute__((aligned(x))) +#endif + + +/* ============================================================================ + * SD/eMMC Controller (SDHCI) + * ============================================================================ + * Versal has 2 SD/eMMC controllers + */ +#define VERSAL_SD0_BASE 0xF1040000UL +#define VERSAL_SD1_BASE 0xF1050000UL + + +/* ============================================================================ + * Helper Functions (C code only) + * ============================================================================ + */ +#ifndef __ASSEMBLER__ + +/* Get current exception level */ +static inline unsigned int current_el(void) +{ + unsigned long el; + __asm__ volatile("mrs %0, CurrentEL" : "=r" (el)); + return (unsigned int)((el >> 2) & 0x3); +} + +/* Memory barrier */ +static inline void dmb(void) +{ + __asm__ volatile("dmb sy" ::: "memory"); +} + +static inline void dsb(void) +{ + __asm__ volatile("dsb sy" ::: "memory"); +} + +static inline void isb(void) +{ + __asm__ volatile("isb" ::: "memory"); +} + +#endif /* __ASSEMBLER__ */ + +#endif /* _VERSAL_H_ */ + diff --git a/hal/versal.ld b/hal/versal.ld new file mode 100644 index 0000000000..054bd26cb0 --- /dev/null +++ b/hal/versal.ld @@ -0,0 +1,232 @@ +/* versal_ddr.ld + * + * Linker script for wolfBoot on AMD Versal ACAP - DDR Boot + * Target: VMK180 Evaluation Board (VM1802 Versal Prime) + * + * This script is for production boot where: + * - PLM/PSM have initialized DDR + * - BL31 runs at EL3 and transitions to EL2 + * - wolfBoot runs at EL2 from DDR at 0x8000000 (replacing U-Boot) + * + * Memory Map: + * 0x00000000 - 0x07FFFFFF : Reserved / Low DDR + * 0x08000000 - 0x081FFFFF : wolfBoot (2MB) + * 0x08200000 - 0x0FFFFFFF : Available for scratch/heap + * 0x10000000 : Linux kernel load address + * 0x11800000 : Device tree load address (optional) + * + * Copyright (C) 2025 wolfSSL Inc. + */ + +OUTPUT_FORMAT("elf64-littleaarch64") +OUTPUT_ARCH(aarch64) +ENTRY(_vector_table) + +/* Stack and heap sizes - larger for DDR boot */ +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x10000; /* 64KB stack */ +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x20000; /* 128KB heap */ + +_EL0_STACK_SIZE = DEFINED(_EL0_STACK_SIZE) ? _EL0_STACK_SIZE : 0x1000; +_EL1_STACK_SIZE = DEFINED(_EL1_STACK_SIZE) ? _EL1_STACK_SIZE : 0x2000; +_EL2_STACK_SIZE = DEFINED(_EL2_STACK_SIZE) ? _EL2_STACK_SIZE : 0x8000; /* Primary stack at EL2 */ + +/* Memory regions + * wolfBoot at 0x8000000 with 2MB allocated + * This matches U-Boot's load address in PetaLinux BOOT.BIN + */ +MEMORY +{ + DDR (rwx) : ORIGIN = 0x8000000, LENGTH = 0x200000 +} + +/* Sections */ +SECTIONS +{ + /* Code section - must start at entry point */ + .text : { + _start_text = .; + + /* Vector table and startup code MUST be first */ + KEEP (*(.vectors)) + *(.boot) + *(.text) + *(.text.*) + *(.gnu.linkonce.t.*) + *(.plt) + *(.gnu_warning) + *(.gcc_execpt_table) + *(.glue_7) + *(.glue_7t) + *(.ARM.extab) + *(.gnu.linkonce.armextab.*) + + . = ALIGN(8); + _end_text = .; + } > DDR + + .init (ALIGN(64)) : { + KEEP (*(.init)) + } > DDR + + .fini (ALIGN(64)) : { + KEEP (*(.fini)) + } > DDR + + /* Read-only data */ + .rodata : { + . = ALIGN(64); + __rodata_start = .; + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + __rodata_end = .; + } > DDR + + /* Keystore - public keys for image verification */ + .keystore : { + . = ALIGN(8); + *(.keystore) + . = ALIGN(8); + } > DDR + + /* Initialized data */ + .data : { + . = ALIGN(64); + _start_data = .; + __data_start = .; + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + *(.jcr) + *(.got) + *(.got.plt) + _end_data = .; + __data_end = .; + } > DDR + + .got : { + *(.got) + } > DDR + + .got1 : { + *(.got1) + } > DDR + + .got2 : { + *(.got2) + } > DDR + + /* MMU tables - 4KB aligned for AArch64 */ + .mmu_tbl0 (ALIGN(4096)) : { + __mmu_tbl0_start = .; + *(.mmu_tbl0) + __mmu_tbl0_end = .; + } > DDR + + .mmu_tbl1 (ALIGN(4096)) : { + __mmu_tbl1_start = .; + *(.mmu_tbl1) + __mmu_tbl1_end = .; + } > DDR + + .mmu_tbl2 (ALIGN(4096)) : { + __mmu_tbl2_start = .; + *(.mmu_tbl2) + __mmu_tbl2_end = .; + } > DDR + + .ARM.exidx : { + __exidx_start = .; + *(.ARM.exidx*) + *(.gnu.linkonce.armexidix.*.*) + __exidx_end = .; + } > DDR + + /* Small data */ + .sdata : { + . = ALIGN(64); + __sdata_start = .; + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + __sdata_end = .; + } > DDR + + /* RAM functions section */ + .ramcode : { + . = ALIGN(8); + _start_ramcode = .; + *(.ramcode) + *(.ramcode.*) + . = ALIGN(8); + _end_ramcode = .; + } > DDR + + /* Small BSS */ + .sbss (NOLOAD) : { + . = ALIGN(64); + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + . = ALIGN(64); + __sbss_end = .; + } > DDR + + /* Uninitialized data (BSS) */ + .bss (NOLOAD) : { + . = ALIGN(64); + __bss_start__ = .; + _start_bss = .; + *(.bss) + *(.bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + . = ALIGN(64); + __bss_end__ = .; + _end_bss = .; + } > DDR + + /* Heap */ + .heap (NOLOAD) : { + . = ALIGN(64); + _heap = .; + __heap_start = .; + . += _HEAP_SIZE; + __heap_end = .; + } > DDR + + /* Stack - EL2 is primary when booting from BL31 */ + .stack (NOLOAD) : { + . = ALIGN(64); + /* EL3 stack (not used when entering at EL2) */ + _el3_stack_end = .; + . += 0x1000; + __el3_stack = .; + + /* EL2 stack - primary execution level */ + _el2_stack_end = .; + . += _EL2_STACK_SIZE; + . = ALIGN(64); + __el2_stack = .; + + /* EL1 stack */ + _el1_stack_end = .; + . += _EL1_STACK_SIZE; + . = ALIGN(64); + __el1_stack = .; + + /* EL0 stack */ + _el0_stack_end = .; + . += _EL0_STACK_SIZE; + . = ALIGN(64); + __el0_stack = .; + } > DDR + + _end = .; +} + +/* Provide symbols for startup code */ +PROVIDE(_start_vector = _start_text); +PROVIDE(__stack = __el2_stack); /* Use EL2 stack as default when entering from BL31 */ +PROVIDE(_stack_base = .); diff --git a/include/hal.h b/include/hal.h index 1804e05465..3b1ff17ed0 100644 --- a/include/hal.h +++ b/include/hal.h @@ -53,11 +53,28 @@ void hal_deinit(); void hal_init(void); -#ifdef WOLFBOOT_UPDATE_DISK -/* Timer functions (platform-specific) */ +/* Timer functions (platform-specific, used for benchmarking) */ +#if defined(WOLFBOOT_UPDATE_DISK) || defined(BOOT_BENCHMARK) uint64_t hal_get_timer_us(void); #endif +/* Boot benchmarking macros + * Usage: Declare BENCHMARK_DECLARE() at function scope, + * then use BENCHMARK_START() and BENCHMARK_END(msg) to measure time. + */ +#ifdef BOOT_BENCHMARK + #define BENCHMARK_DECLARE() uint64_t _boot_bench_start + #define BENCHMARK_START() (_boot_bench_start = hal_get_timer_us()) + #define BENCHMARK_END(msg) do { \ + uint64_t _elapsed_ms = (hal_get_timer_us() - _boot_bench_start) / 1000; \ + wolfBoot_printf(msg " (%lu ms)\r\n", (unsigned long)_elapsed_ms); \ + } while(0) +#else + #define BENCHMARK_DECLARE() do {} while(0) + #define BENCHMARK_START() do {} while(0) + #define BENCHMARK_END(msg) wolfBoot_printf(msg "\r\n") +#endif + #ifdef ARCH_64BIT typedef uintptr_t haladdr_t; /* 64-bit platforms */ int hal_flash_write(uintptr_t address, const uint8_t *data, int len); diff --git a/options.mk b/options.mk index d55dd160c4..b933f76d5e 100644 --- a/options.mk +++ b/options.mk @@ -604,6 +604,15 @@ endif ifeq ($(NO_QNX),1) CFLAGS+=-D"NO_QNX" endif +ifeq ($(SKIP_GIC_INIT),1) + CFLAGS+=-D"SKIP_GIC_INIT" +endif +ifeq ($(BOOT_EL1),1) + CFLAGS+=-D"BOOT_EL1" +endif +ifeq ($(BOOT_BENCHMARK),1) + CFLAGS+=-D"BOOT_BENCHMARK" +endif ifeq ($(ALLOW_DOWNGRADE),1) CFLAGS+= -D"ALLOW_DOWNGRADE" diff --git a/src/boot_aarch64.c b/src/boot_aarch64.c index 9487f84a60..3fa21c9ba2 100644 --- a/src/boot_aarch64.c +++ b/src/boot_aarch64.c @@ -25,6 +25,11 @@ #include "loader.h" #include "wolfboot/wolfboot.h" +/* Include platform-specific header for EL configuration defines */ +#ifdef TARGET_versal +#include "hal/versal.h" +#endif + /* Linker exported variables */ extern unsigned int __bss_start__; extern unsigned int __bss_end__; @@ -37,12 +42,40 @@ extern unsigned int _end_data; extern void main(void); extern void gicv2_init_secure(void); +/* SKIP_GIC_INIT - Skip GIC initialization before booting app + * This is needed for: + * - Versal: Uses GICv3, not GICv2. BL31 handles GIC setup. + * - Systems where another bootloader stage handles GIC init + * NO_QNX also implies SKIP_GIC_INIT for backwards compatibility + */ +#if defined(NO_QNX) && !defined(SKIP_GIC_INIT) +#define SKIP_GIC_INIT +#endif + +#ifndef TARGET_versal +/* current_el() is defined in hal/versal.h for Versal */ unsigned int current_el(void) { unsigned long el; asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc"); return (unsigned int)((el >> 2) & 0x3U); } +#endif + +#if defined(BOOT_EL1) && defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 +/** + * @brief Transition from EL2 to EL1 and jump to application + * + * This function configures the necessary system registers for EL1 operation + * and performs an exception return (ERET) to drop from EL2 to EL1. + * + * Based on ARM Architecture Reference Manual and U-Boot implementation. + * + * @param entry_point Address to jump to in EL1 + * @param dts_addr Device tree address (passed in x0 to application) + */ +extern void el2_to_el1_boot(uintptr_t entry_point, uintptr_t dts_addr); +#endif /* BOOT_EL1 && EL2_HYPERVISOR */ void boot_entry_C(void) { @@ -101,6 +134,32 @@ void RAMFUNCTION do_boot(const uint32_t *app_offset) hal_dts_fixup((uint32_t*)dts_offset); #endif +#ifndef SKIP_GIC_INIT + /* Initialize GICv2 for Kernel (ZynqMP and similar platforms) + * Skip this for: + * - Versal (uses GICv3, handled by BL31) + * - Platforms where BL31 or another stage handles GIC + */ + gicv2_init_secure(); +#endif + +#if defined(BOOT_EL1) && defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + /* Transition from EL2 to EL1 before jumping to application. + * This is needed when: + * - Application expects to run at EL1 (e.g., Linux kernel) + * - wolfBoot runs at EL2 (hypervisor mode) + */ + { + #ifdef MMU + uintptr_t dts = (uintptr_t)dts_offset; + #else + uintptr_t dts = 0; + #endif + el2_to_el1_boot((uintptr_t)app_offset, dts); + } +#else + /* Stay at current EL (EL2 or EL3) and jump directly to application */ + /* Set application address via x4 */ asm volatile("mov x4, %0" : : "r"(app_offset)); @@ -111,11 +170,6 @@ void RAMFUNCTION do_boot(const uint32_t *app_offset) asm volatile("mov x5, xzr"); #endif -#ifndef NO_QNX - /* Initialize GICv2 for Kernel */ - gicv2_init_secure(); -#endif - /* Zero registers x1, x2, x3 */ asm volatile("mov x3, xzr"); asm volatile("mov x2, xzr"); @@ -126,6 +180,7 @@ void RAMFUNCTION do_boot(const uint32_t *app_offset) /* Unconditionally jump to app_entry at x4 */ asm volatile("br x4"); +#endif /* BOOT_EL1 */ } #ifdef RAM_CODE diff --git a/src/boot_aarch64_start.S b/src/boot_aarch64_start.S index 705fa41692..169e4e92a4 100644 --- a/src/boot_aarch64_start.S +++ b/src/boot_aarch64_start.S @@ -26,6 +26,10 @@ #include "hal/zynq.h" #endif +#ifdef TARGET_versal +#include "hal/versal.h" +#endif + #ifdef TARGET_nxp_ls1028a #include "hal/nxp_ls1028a.h" #endif @@ -78,10 +82,16 @@ .set L1Table, MMUTableL1 .set L2Table, MMUTableL2 .set vector_base, _vector_table -.set rvbar_base, 0xFD5C0040 -# Cortex-A53 timestamp clock frequency +#ifdef TARGET_versal +/* Versal: RVBAR is handled by PLM, not accessible from APU in JTAG mode */ +/* Cortex-A72 timestamp clock frequency (from Versal HW) */ +.set counterfreq, 100000000 +#else +.set rvbar_base, 0xFD5C0040 +/* Cortex-A53 timestamp clock frequency */ .set counterfreq, 99990005 +#endif .set MODE_EL1, 0x5 .set DAIF_BIT, 0x1C0 @@ -142,6 +152,9 @@ InitEL3: msr VBAR_EL3,x1 /* Set reset vector address */ + /* Note: On Versal, RVBAR is handled by PLM and APU_DUAL_CSR is not + * accessible in JTAG boot mode. Skip RVBAR write for Versal. */ +#if !defined(SKIP_RVBAR) || SKIP_RVBAR == 0 /* Get the cpu ID */ mrs x0, MPIDR_EL1 and x0, x0, #0xFF @@ -153,6 +166,7 @@ InitEL3: add w2, w2, w0 /* store vector base address to RVBAR */ str x1, [x2] +#endif /* Define stack pointer for current exception level */ ldr x2,=EL3_stack @@ -184,8 +198,11 @@ InitEL3: orr w1, w1, #(1 << 1) /* Set IRQ bit (IRQs routed to EL3) */ msr SCR_EL3, x1 - /* Configure cpu auxiliary control register EL1 */ - ldr x0,=0x80CA000 /* L1 Data prefetch control - 5, Enable device split throttle, 2 independent data prefetch streams */ + /* Configure CPUACTLR_EL1 - read-modify-write to preserve BL31 workarounds: + * 859971, 1319367, CVE-2017-5715, CVE-2018-3639, CVE-2022-23960 */ + mrs x0, S3_1_C15_C2_0 /* Read current CPUACTLR_EL1 */ + ldr x1,=0x80CA000 /* L1 Data prefetch control - 5, Enable device split throttle, 2 independent data prefetch streams */ + orr x0, x0, x1 /* Merge with existing value */ #if defined(CONFIG_ARM_ERRATA_855873) && CONFIG_ARM_ERRATA_855873 /* Set ENDCCASCI bit in CPUACTLR_EL1 register, to execute data * cache clean operations as data cache clean and invalidate @@ -210,6 +227,7 @@ InitEL3: dsb sy isb +#ifndef NO_MMU ldr x1, =L0Table /* Get address of level 0 for TTBR0_EL3 */ msr TTBR0_EL3, x1 /* Set TTBR0_EL3 */ @@ -227,14 +245,21 @@ InitEL3: /********************************************** * Set up TCR_EL3 - * Physical Address Size PS = 010 -> 40bits 1TB * Granual Size TG0 = 00 -> 4KB - * size offset of the memory region T0SZ = 24 -> (region size 2^(64-24) = 2^40) ***************************************************/ +#ifdef TARGET_versal + /* Versal: Physical Address Size PS = 100 -> 44bits 16TB + * T0SZ = 20 -> (region size 2^(64-20) = 2^44) */ + ldr x1,=0x80843514 +#else + /* ZynqMP: Physical Address Size PS = 010 -> 40bits 1TB + * T0SZ = 24 -> (region size 2^(64-24) = 2^40) */ ldr x1,=0x80823518 +#endif msr TCR_EL3, x1 isb +#endif /* !NO_MMU */ /* Enable SError Exception for asynchronous abort */ mrs x1,DAIF @@ -243,10 +268,14 @@ InitEL3: /* Configure SCTLR_EL3 */ mov x1, #0 /* Most of the SCTLR_EL3 bits are unknown at reset */ +#ifndef NO_MMU orr x1, x1, #(1 << 12) /* Enable I cache */ orr x1, x1, #(1 << 3) /* Enable SP alignment check */ orr x1, x1, #(1 << 2) /* Enable caches */ orr x1, x1, #(1 << 0) /* Enable MMU */ +#else + orr x1, x1, #(1 << 3) /* Enable SP alignment check */ +#endif msr SCTLR_EL3, x1 dsb sy isb @@ -280,6 +309,7 @@ InitEL2: dsb sy isb +#ifndef NO_MMU ldr x1, =L0Table /* Get address of level 0 for TTBR0_EL2 */ msr TTBR0_EL2, x1 /* Set TTBR0_EL2 */ @@ -297,21 +327,32 @@ InitEL2: /********************************************** * Set up TCR_EL2 - * Physical Address Size PS = 010 -> 40bits 1TB * Granual Size TG0 = 00 -> 4KB - * size offset of the memory region T0SZ = 24 -> (region size 2^(64-24) = 2^40) ***************************************************/ +#ifdef TARGET_versal + /* Versal: Physical Address Size PS = 100 -> 44bits 16TB + * T0SZ = 20 -> (region size 2^(64-20) = 2^44) */ + ldr x1,=0x80843514 +#else + /* ZynqMP: Physical Address Size PS = 010 -> 40bits 1TB + * T0SZ = 24 -> (region size 2^(64-24) = 2^40) */ ldr x1,=0x80823518 +#endif msr TCR_EL2, x1 isb - /* Enable ICache */ + /* Configure SCTLR_EL2 */ mrs x1, SCTLR_EL2 orr x1, x1, #(1 << 12) /* Enable ICache */ orr x1, x1, #(1 << 3) /* Enable SP alignment check */ orr x1, x1, #(1 << 2) /* Enable DCaches */ orr x1, x1, #(1 << 0) /* Enable MMU */ +#else + /* Configure SCTLR_EL2 - no MMU/cache */ + mov x1, #0 + orr x1, x1, #(1 << 3) /* Enable SP alignment check */ +#endif msr SCTLR_EL2, x1 dsb sy isb @@ -364,6 +405,7 @@ InitEL1: dsb sy isb +#ifndef NO_MMU ldr x1, =L0Table /* Get address of level 0 for TTBR0_EL1 */ msr TTBR0_EL1, x1 /* Set TTBR0_EL1 */ @@ -381,28 +423,37 @@ InitEL1: /********************************************** * Set up TCR_EL1 - * Physical Address Size PS = 010 -> 44bits 16TB * Granual Size TG0 = 00 -> 4KB - * size offset of the memory region T0SZ = 24 -> (region size 2^(64-24) = 2^40) ***************************************************/ +#ifdef TARGET_versal + /* Versal: Physical Address Size PS = 100 -> 44bits 16TB + * T0SZ = 20 -> (region size 2^(64-20) = 2^44) */ + ldr x1,=0x485800514 +#else + /* ZynqMP: Physical Address Size PS = 010 -> 40bits 1TB + * T0SZ = 24 -> (region size 2^(64-24) = 2^40) */ ldr x1,=0x285800518 +#endif msr TCR_EL1, x1 isb +#endif /* !NO_MMU */ /* Enable SError Exception for asynchronous abort */ mrs x1,DAIF bic x1,x1,#(0x1<<8) msr DAIF,x1 - /* Enable MMU */ + /* Configure SCTLR_EL1 */ mov x1,#0x0 orr x1, x1, #(1 << 18) /* Set WFE non trapping */ orr x1, x1, #(1 << 17) /* Set WFI non trapping */ orr x1, x1, #(1 << 5) /* Set CP15 barrier enabled */ - orr x1, x1, #(1 << 12) /* Set I bit */ - orr x1, x1, #(1 << 2) /* Set C bit */ - orr x1, x1, #(1 << 0) /* Set M bit */ +#ifndef NO_MMU + orr x1, x1, #(1 << 12) /* Set I bit (ICache) */ + orr x1, x1, #(1 << 2) /* Set C bit (DCache) */ + orr x1, x1, #(1 << 0) /* Set M bit (MMU) */ +#endif msr SCTLR_EL1, x1 isb @@ -633,10 +684,19 @@ MMUTableL2: .set SECT, SECT+0x200000 .endr +#ifdef TARGET_versal +/* Versal: LPD/PMC peripherals at 0xF0000000 - 0xF7FFFFFF (includes QSPI @ 0xF1030000) */ +.rept 0x040 /* 0xF000_0000 - 0xF7FF_FFFF */ +.8byte SECT + Device /* 128MB LPD peripherals (QSPI, I2C, etc) */ +.set SECT, SECT+0x200000 +.endr +#else +/* ZynqMP: This region is reserved */ .rept 0x040 /* 0xF000_0000 - 0xF7FF_FFFF */ .8byte SECT + reserved /* 128MB Reserved */ .set SECT, SECT+0x200000 .endr +#endif .rept 0x8 /* 0xF800_0000 - 0xF8FF_FFFF */ .8byte SECT + Device /* 16MB coresight */ @@ -840,6 +900,8 @@ SynchronousInterruptHandler: /* Check if the Synchronous abort is occurred due to floating point access. */ #if defined(EL3_SECURE) && EL3_SECURE == 1 mrs x0, ESR_EL3 +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + mrs x0, ESR_EL2 #else mrs x0, ESR_EL1 #endif @@ -857,6 +919,10 @@ SynchronousInterruptHandler: mrs x1,CPTR_EL3 bic x1, x1, #(0x1<<10) msr CPTR_EL3, x1 +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + mrs x1,CPTR_EL2 + orr x1, x1, #(0x3 << 20) /* Clear TFP bit to enable FP/SIMD */ + msr CPTR_EL2, x1 #else mrs x1,CPACR_EL1 orr x1, x1, #(0x1<<20) @@ -898,6 +964,10 @@ IRQInterruptHandler: mrs x0, CPTR_EL3 mrs x1, ELR_EL3 mrs x2, SPSR_EL3 +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + mrs x0, CPTR_EL2 + mrs x1, ELR_EL2 + mrs x2, SPSR_EL2 #else mrs x0, CPACR_EL1 mrs x1, ELR_EL1 @@ -911,6 +981,10 @@ IRQInterruptHandler: mrs x1,CPTR_EL3 orr x1, x1, #(0x1<<10) msr CPTR_EL3, x1 +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + mrs x1,CPTR_EL2 + bic x1, x1, #(0x3<<20) /* Set TFP bits to trap FP/SIMD */ + msr CPTR_EL2, x1 #else mrs x1,CPACR_EL1 bic x1, x1, #(0x1<<20) @@ -928,6 +1002,10 @@ IRQInterruptHandler: mrs x0, CPTR_EL3 ands x0, x0, #(0x1<<10) bne RestorePrevState +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + mrs x0, CPTR_EL2 + ands x0, x0, #(0x3<<20) + bne RestorePrevState #else mrs x0,CPACR_EL1 ands x0, x0, #(0x1<<20) @@ -944,6 +1022,10 @@ RestorePrevState: msr CPTR_EL3, x0 msr ELR_EL3, x1 msr SPSR_EL3, x2 +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + msr CPTR_EL2, x0 + msr ELR_EL2, x1 + msr SPSR_EL2, x2 #else msr CPACR_EL1, x0 msr ELR_EL1, x1 @@ -960,6 +1042,10 @@ FIQInterruptHandler: mrs x0, CPTR_EL3 mrs x1, ELR_EL3 mrs x2, SPSR_EL3 +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + mrs x0, CPTR_EL2 + mrs x1, ELR_EL2 + mrs x2, SPSR_EL2 #else mrs x0, CPACR_EL1 mrs x1, ELR_EL1 @@ -973,6 +1059,10 @@ FIQInterruptHandler: mrs x1,CPTR_EL3 orr x1, x1, #(0x1<<10) msr CPTR_EL3, x1 +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + mrs x1,CPTR_EL2 + bic x1, x1, #(0x3<<20) + msr CPTR_EL2, x1 #else mrs x1,CPACR_EL1 bic x1, x1, #(0x1<<20) @@ -989,6 +1079,10 @@ FIQInterruptHandler: mrs x0, CPTR_EL3 ands x0, x0, #(0x1<<10) bne RestorePrevStatefiq +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + mrs x0, CPTR_EL2 + ands x0, x0, #(0x3<<20) + bne RestorePrevStatefiq #else mrs x0,CPACR_EL1 ands x0, x0, #(0x1<<20) @@ -1001,10 +1095,14 @@ FIQInterruptHandler: RestorePrevStatefiq: ldr x2,[sp],0x10 ldp x0, x1, [sp],0x10 - #ifdef EL3_SECURE +#if defined(EL3_SECURE) && EL3_SECURE == 1 msr CPTR_EL3, x0 msr ELR_EL3, x1 msr SPSR_EL3, x2 +#elif defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + msr CPTR_EL2, x0 + msr ELR_EL2, x1 + msr SPSR_EL2, x2 #else msr CPACR_EL1, x0 msr ELR_EL1, x1 @@ -1136,4 +1234,101 @@ gicv2_init_secure: 1: ret +#if defined(BOOT_EL1) && defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 +/* + * Transition from EL2 to EL1 and jump to application + * + * Parameters: + * x0: entry_point - Address to jump to in EL1 + * x1: dts_addr - Device tree address (passed in x0 to application) + * + * This function configures the necessary system registers for EL1 operation + * and performs an exception return (ERET) to drop from EL2 to EL1. + * + * Based on ARM Architecture Reference Manual and U-Boot implementation. + */ +.global el2_to_el1_boot +el2_to_el1_boot: + /* Save parameters - x0=entry_point, x1=dts_addr */ + mov x19, x0 /* Save entry_point in x19 */ + mov x20, x1 /* Save dts_addr in x20 */ + + /* 1. Configure timer access for EL1 */ + mrs x0, CNTHCTL_EL2 + orr x0, x0, #3 /* EL1PCEN | EL1PCTEN - enable EL1 timer access */ + msr CNTHCTL_EL2, x0 + msr CNTVOFF_EL2, xzr /* Clear virtual timer offset */ + + /* 2. Configure virtual processor ID */ + mrs x0, MIDR_EL1 + msr VPIDR_EL2, x0 + mrs x0, MPIDR_EL1 + msr VMPIDR_EL2, x0 + + /* 3. Disable coprocessor traps to EL2 */ + mov x0, #0x33ff /* CPTR_EL2: RES1 bits, no traps */ + msr CPTR_EL2, x0 + msr HSTR_EL2, xzr /* No traps to EL2 on system registers */ + mov x0, #(3 << 20) /* CPACR_EL1: Full FP/SIMD access */ + msr CPACR_EL1, x0 + + /* 4. Initialize SCTLR_EL1 with safe defaults (RES1 bits, MMU/cache off) */ + /* RES1 bits: 29,28,23,22,20,11 = 0x30d00800 */ + movz x0, #0x800 + movk x0, #0x30d0, lsl #16 + msr SCTLR_EL1, x0 + + /* 5. Migrate stack pointer and vector base to EL1 */ + /* SP_EL1 must be 16-byte aligned per ARM spec */ + mov x0, sp + bic x0, x0, #0xF /* Ensure 16-byte alignment */ + msr SP_EL1, x0 + mrs x0, VBAR_EL2 + msr VBAR_EL1, x0 + dsb sy /* Ensure SP_EL1 and VBAR_EL1 writes complete */ + isb /* Ensure writes take effect */ + + /* 6. Configure HCR_EL2 - EL1 is AArch64, no hypervisor calls */ + /* Check if PAuth (Pointer Authentication) is supported */ + mrs x0, ID_AA64ISAR1_EL1 /* Read ISA feature register */ + mov x1, #(0xF << 28) /* GPI mask */ + orr x1, x1, #(0xF << 24) /* GPA mask */ + orr x1, x1, #(0xF << 8) /* API mask */ + orr x1, x1, #(0xF << 4) /* APA mask */ + tst x0, x1 /* Test if PAuth supported (Z=1 if not supported) */ + mov x0, #(1 << 31) /* RW: EL1 is AArch64 */ + orr x0, x0, #(1 << 29) /* HCD: Disable HVC instruction */ + mov x1, x0 /* Copy base value */ + orr x1, x1, #(1 << 41) /* API: Trap PAuth instructions */ + orr x1, x1, #(1 << 40) /* APK: Trap PAuth key access */ + csel x0, x0, x1, eq /* If PAuth not supported (eq), use x0 (base), else x1 (with traps) */ + msr HCR_EL2, x0 + dsb sy /* Ensure HCR_EL2 write completes */ + isb /* Ensure HCR_EL2 takes effect */ + + /* 7. Set up SPSR_EL2 for return to EL1h with all interrupts masked */ + /* M[3:0] = 0101 = EL1h (EL1 with SP_EL1) - NOT 0100 which is EL1t! */ + /* M[4] = 0 = AArch64 mode (bit 4 must be 0 for AArch64, 1 for AArch32) */ + /* DAIF = 0xF = all interrupts masked */ + /* Value: 0x3C5 = (0xF << 6) | 0x5 */ + movz x0, #0x3C5 /* DAIF=0xF (bits 9:6), M[3:0]=0x5 (EL1h) */ + msr SPSR_EL2, x0 + dsb sy /* Ensure SPSR_EL2 write completes */ + isb /* Ensure SPSR_EL2 takes effect */ + + /* 8. Set exception return address and DTB pointer, then ERET */ + /* Critical: All register writes must complete before eret */ + msr ELR_EL2, x19 /* Entry point in ELR_EL2 */ + mov x0, x20 /* DTB address in x0 (first arg) */ + mov x1, xzr /* Zero remaining argument registers */ + mov x2, xzr + mov x3, xzr + dsb sy /* Ensure all writes complete */ + isb /* Ensure all effects are visible */ + eret /* Exception return to EL1 */ + + /* Should never reach here */ + b . +#endif /* BOOT_EL1 && EL2_HYPERVISOR */ + .end diff --git a/src/libwolfboot.c b/src/libwolfboot.c index 6f573d8271..5a7fadfa0d 100644 --- a/src/libwolfboot.c +++ b/src/libwolfboot.c @@ -1288,11 +1288,12 @@ int wolfBoot_dualboot_candidate(void) } #else -static int wolfBoot_current_firmware_version() +static int wolfBoot_current_firmware_version(void) { return wolfBoot_get_blob_version(hal_get_primary_address()); } -static int wolfBoot_update_firmware_version() { +static int wolfBoot_update_firmware_version(void) +{ return wolfBoot_get_blob_version(hal_get_update_address()); } diff --git a/src/update_disk.c b/src/update_disk.c index 614eedf0e7..0055e365c9 100644 --- a/src/update_disk.c +++ b/src/update_disk.c @@ -266,7 +266,7 @@ void RAMFUNCTION wolfBoot_start(void) uint32_t dts_size = 0; #endif char part_name[4] = {'P', ':', 'X', '\0'}; - uint64_t start_us, elapsed_ms; + BENCHMARK_DECLARE(); #ifdef DISK_ENCRYPT /* Initialize encryption - this sets up the cipher with key from storage */ @@ -400,7 +400,7 @@ void RAMFUNCTION wolfBoot_start(void) /* Read the image into RAM */ wolfBoot_printf("Loading image from disk..."); - start_us = hal_get_timer_us(); + BENCHMARK_START(); load_off = 0; do { ret = disk_part_read(BOOT_DISK, cur_part, load_off, @@ -416,13 +416,12 @@ void RAMFUNCTION wolfBoot_start(void) selected ^= 1; continue; } - elapsed_ms = (hal_get_timer_us() - start_us) / 1000; - wolfBoot_printf("done. (%lu ms)\r\n", (unsigned long)elapsed_ms); + BENCHMARK_END("done"); #ifdef DISK_ENCRYPT /* Decrypt the image in RAM */ wolfBoot_printf("Decrypting image..."); - start_us = hal_get_timer_us(); + BENCHMARK_START(); ret = decrypt_image((uint8_t*)load_address, os_image.fw_size + IMAGE_HEADER_SIZE); if (ret != 0) { @@ -430,8 +429,7 @@ void RAMFUNCTION wolfBoot_start(void) selected ^= 1; continue; } - elapsed_ms = (hal_get_timer_us() - start_us) / 1000; - wolfBoot_printf("done. (%lu ms)\r\n", (unsigned long)elapsed_ms); + BENCHMARK_END("done"); #endif memset(&os_image, 0, sizeof(os_image)); @@ -443,25 +441,23 @@ void RAMFUNCTION wolfBoot_start(void) } wolfBoot_printf("Checking image integrity..."); - start_us = hal_get_timer_us(); + BENCHMARK_START(); if (wolfBoot_verify_integrity(&os_image) != 0) { wolfBoot_printf("Error validating integrity for %s\r\n", part_name); selected ^= 1; continue; } - elapsed_ms = (hal_get_timer_us() - start_us) / 1000; - wolfBoot_printf("done. (%lu ms)\r\n", (unsigned long)elapsed_ms); + BENCHMARK_END("done"); wolfBoot_printf("Verifying image signature..."); - start_us = hal_get_timer_us(); + BENCHMARK_START(); if (wolfBoot_verify_authenticity(&os_image) != 0) { wolfBoot_printf("Error validating authenticity for %s\r\n", part_name); selected ^= 1; continue; } else { - elapsed_ms = (hal_get_timer_us() - start_us) / 1000; - wolfBoot_printf("done. (%lu ms)\r\n", (unsigned long)elapsed_ms); + BENCHMARK_END("done"); failures = 0; break; /* Success case */ } diff --git a/src/update_ram.c b/src/update_ram.c index ece070fd57..82193aaf9c 100644 --- a/src/update_ram.c +++ b/src/update_ram.c @@ -29,6 +29,7 @@ #include "printf.h" #include "wolfboot/wolfboot.h" #include + #ifdef WOLFBOOT_TPM #include "tpm.h" #endif @@ -57,6 +58,7 @@ int wolfBoot_ramboot(struct wolfBoot_image *img, uint8_t *src, uint8_t *dst) { int ret; uint32_t img_size; + BENCHMARK_DECLARE(); /* read header into RAM */ wolfBoot_printf("Loading header %d bytes from %p to %p\n", @@ -82,8 +84,9 @@ int wolfBoot_ramboot(struct wolfBoot_image *img, uint8_t *src, uint8_t *dst) img_size = wolfBoot_image_size((uint8_t*)dst); /* Read the entire image into RAM */ - wolfBoot_printf("Loading image %d bytes from %p to %p\n", + wolfBoot_printf("Loading image %d bytes from %p to %p...", img_size, src + IMAGE_HEADER_SIZE, dst + IMAGE_HEADER_SIZE); + BENCHMARK_START(); #if defined(EXT_FLASH) && defined(NO_XIP) ret = ext_flash_read((uintptr_t)src + IMAGE_HEADER_SIZE, dst + IMAGE_HEADER_SIZE, img_size); @@ -94,6 +97,7 @@ int wolfBoot_ramboot(struct wolfBoot_image *img, uint8_t *src, uint8_t *dst) #else memcpy(dst + IMAGE_HEADER_SIZE, src + IMAGE_HEADER_SIZE, img_size); #endif + BENCHMARK_END("done"); /* mark image as no longer external */ img->not_ext = 1; @@ -106,6 +110,7 @@ void RAMFUNCTION wolfBoot_start(void) { int active = -1, ret = 0; struct wolfBoot_image os_image; + BENCHMARK_DECLARE(); #ifdef WOLFBOOT_UBOOT_LEGACY uint8_t *image_ptr; #endif @@ -168,12 +173,31 @@ void RAMFUNCTION wolfBoot_start(void) #else ret = wolfBoot_open_image(&os_image, active); #endif - if ( (ret < 0) || - ((ret = wolfBoot_verify_integrity(&os_image) < 0)) || - ((ret = wolfBoot_verify_authenticity(&os_image)) < 0)) { + if (ret < 0) { goto backup_on_failure; + } - } else { + /* Verify image integrity (hash check) */ + wolfBoot_printf("Checking integrity..."); + BENCHMARK_START(); + ret = wolfBoot_verify_integrity(&os_image); + if (ret < 0) { + wolfBoot_printf("FAILED\n"); + goto backup_on_failure; + } + BENCHMARK_END("done"); + + /* Verify image authenticity (signature check) */ + wolfBoot_printf("Verifying signature..."); + BENCHMARK_START(); + ret = wolfBoot_verify_authenticity(&os_image); + if (ret < 0) { + wolfBoot_printf("FAILED\n"); + goto backup_on_failure; + } + BENCHMARK_END("done"); + + { /* Success - integrity and signature valid */ #if !defined(WOLFBOOT_NO_LOAD_ADDRESS) && defined(WOLFBOOT_LOAD_ADDRESS) load_address = (uint32_t*)WOLFBOOT_LOAD_ADDRESS; diff --git a/test-app/AARCH64-ls1028a.ld b/test-app/AARCH64-ls1028a.ld deleted file mode 100644 index c96342bbb4..0000000000 --- a/test-app/AARCH64-ls1028a.ld +++ /dev/null @@ -1,55 +0,0 @@ -MEMORY -{ - FLASH (rx) : ORIGIN = @WOLFBOOT_TEST_APP_ADDRESS@, LENGTH = 256K - DRAM (rwx) : ORIGIN = 0x80001000 , LENGTH = 0xBFFFFFFF - OCRAM (rwx) : ORIGIN = 0x18020100, LENGTH = 128K -} - -ENTRY(main); - -SECTIONS -{ - .text : - { - _start_text = .; - KEEP(*(.boot*)) - *(.text*) - *(.rodata*) - *(.note.*) - . = ALIGN(4); - _end_text = .; - } > OCRAM - - .edidx : - { - . = ALIGN(4); - *(.ARM.exidx*) - } > OCRAM - - PROVIDE(_stored_data = .); - - .data : - { - _start_data = .; - KEEP(*(.data*)) - . = ALIGN(4); - KEEP(*(.ramcode)) - . = ALIGN(4); - _end_data = .; - } > OCRAM - - .bss (NOLOAD) : - { - _start_bss = .; - __bss_start__ = .; - *(.bss*) - *(COMMON) - . = ALIGN(4); - _end_bss = .; - __bss_end__ = .; - _end = .; - } > OCRAM - . = ALIGN(4); -} - -END_STACK = _start_text; diff --git a/test-app/AARCH64.ld b/test-app/AARCH64.ld index 18f423fdc6..0d08f193c6 100644 --- a/test-app/AARCH64.ld +++ b/test-app/AARCH64.ld @@ -1,42 +1,42 @@ +/* Memory region - address comes from WOLFBOOT_LOAD_ADDRESS, size from WOLFBOOT_TEST_APP_SIZE */ MEMORY { - DDR_MEM(rx) : ORIGIN = 0xa0000, LENGTH = 0x80000000 - 0xa0000 + FLASH (rx) : ORIGIN = @WOLFBOOT_LOAD_ADDRESS@, LENGTH = @WOLFBOOT_TEST_APP_SIZE@ } -ENTRY(main); + +/* Use _start if boot_arm64_start.S is linked, otherwise fall back to main */ +ENTRY(_start); SECTIONS { .text : { _start_text = .; - KEEP(*(.boot*)) + KEEP(*(.text.startup)) /* Startup code (_start) if boot_arm64_start.S is used */ + KEEP(*(.boot*)) /* Legacy boot section for compatibility */ *(.text*) *(.rodata*) *(.note.*) . = ALIGN(4); _end_text = .; - } > DDR_MEM - .edidx : - { - . = ALIGN(4); - *(.ARM.exidx*) - } > DDR_MEM + } > FLASH .edidx : { . = ALIGN(4); *(.ARM.exidx*) - } > DDR_MEM + } > FLASH .data : { + PROVIDE(_stored_data = .); /* For boot_arm64_start.S data copying */ _start_data = .; KEEP(*(.data*)) . = ALIGN(4); KEEP(*(.ramcode)) . = ALIGN(4); _end_data = .; - } > DDR_MEM + } > FLASH .bss (NOLOAD) : { @@ -48,9 +48,19 @@ SECTIONS _end_bss = .; __bss_end__ = .; _end = .; - } > DDR_MEM + } > FLASH + + .stack (NOLOAD) : + { + . = ALIGN(16); + . = . + 0x4000; /* 16KB stack */ + _stack = .; + } > FLASH + . = ALIGN(4); } -END_STACK = _start_text; +/* Stack pointer - use proper stack section if available, otherwise fall back to text start */ +PROVIDE(__stack = _stack); +PROVIDE(END_STACK = _start_text); /* Legacy compatibility for platforms that don't use stack section */ diff --git a/test-app/Makefile b/test-app/Makefile index 135dbfd5ac..ce0aaad675 100644 --- a/test-app/Makefile +++ b/test-app/Makefile @@ -132,6 +132,13 @@ ifeq ($(ARCH),RISCV) APP_OBJS+=startup_riscv.o vector_riscv.o endif +ifeq ($(ARCH),AARCH64) + APP_OBJS:=boot_arm64_start.o $(APP_OBJS) + # Prevent inclusion of standard C runtime startup files that conflict with boot_arm64_start.S + # Use -Wl, prefix to pass flags directly to linker when GCC is used as linker driver + LDFLAGS+=-nostartfiles -nostdlib -nodefaultlibs -Wl,--entry=_start +endif + ifeq ($(ARCH),RISCV64) APP_OBJS+=startup_riscv.o vector_riscv.o endif @@ -283,6 +290,9 @@ endif ifeq ($(EXT_FLASH),1) CFLAGS+=-D"EXT_FLASH=1" -D"PART_UPDATE_EXT=1" + ifeq ($(NO_XIP),1) + CFLAGS+=-D"PART_BOOT_EXT=1" + endif endif ifeq ($(SPI_FLASH),1) @@ -495,8 +505,16 @@ ifeq ($(TARGET),x86_fsp_qemu) LDFLAGS= endif -ifeq ($(TARGET),nxp_ls1028a) - LSCRIPT_TEMPLATE:=AARCH64-ls1028a.ld +ifeq ($(TARGET),versal) + LSCRIPT_TEMPLATE:=AARCH64.ld + LDFLAGS+=-nostdlib + # Enable DEBUG_UART for test-app to use wolfBoot_printf and hal functions + DEBUG_UART:=1 + CFLAGS+=-DDEBUG_UART +endif + +ifeq ($(TARGET),zynq) + LSCRIPT_TEMPLATE:=AARCH64.ld endif ifeq ($(TARGET),pic32ck) diff --git a/test-app/app_versal.c b/test-app/app_versal.c new file mode 100644 index 0000000000..c7bee8e1a8 --- /dev/null +++ b/test-app/app_versal.c @@ -0,0 +1,59 @@ +/* app_versal.c + * + * Test application for AMD Versal VMK180 + * + * Copyright (C) 2025 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#include + +#include "hal.h" +#include "hal/versal.h" +#include "wolfboot/wolfboot.h" +#include "printf.h" + +void main(void) +{ + uint32_t boot_version, update_version; + + hal_init(); + + /* Get versions from both partitions */ + boot_version = wolfBoot_get_image_version(PART_BOOT); + update_version = wolfBoot_get_image_version(PART_UPDATE); + + wolfBoot_printf("\n\n"); + wolfBoot_printf("===========================================\n"); + wolfBoot_printf(" wolfBoot Test Application - AMD Versal\n"); + wolfBoot_printf("===========================================\n\n"); + + wolfBoot_printf("Current EL: %d\n", current_el()); + + /* Print firmware versions */ + wolfBoot_printf("BOOT: Version: %d (0x%08x)\n", boot_version, boot_version); + wolfBoot_printf("UPDATE: Version: %d (0x%08x)\n", update_version, update_version); + + wolfBoot_printf("Application running successfully!\n"); + wolfBoot_printf("\nEntering idle loop...\n"); + + /* Idle loop */ + while (1) { + __asm__ volatile("wfi"); + } +} diff --git a/test-app/boot_arm64_start.S b/test-app/boot_arm64_start.S new file mode 100644 index 0000000000..7f7ca7e178 --- /dev/null +++ b/test-app/boot_arm64_start.S @@ -0,0 +1,80 @@ +/* boot_arm64_start.S + * + * AArch64 (64-bit ARM) boot startup code for test applications + * + * Copyright (C) 2025 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +.section .text.startup, "ax" +.global _start +.type _start, @function + +_start: + /* Set up stack pointer */ + ldr x0, =__stack + mov sp, x0 + + /* Copy data section from flash to RAM if needed */ + ldr x0, =_stored_data + ldr x1, =_start_data + ldr x2, =_end_data + cmp x1, x2 + b.ge 2f + cmp x0, x1 + b.eq 2f /* Skip if data already in place */ +1: + ldr x3, [x0], #8 + str x3, [x1], #8 + cmp x1, x2 + b.lt 1b +2: + + /* Clear BSS */ + ldr x0, =__bss_start__ + ldr x1, =__bss_end__ + cmp x0, x1 + b.ge 4f +3: + str xzr, [x0], #8 + cmp x0, x1 + b.lt 3b +4: + + /* Jump to main - never returns */ + bl main + + /* If main returns, loop forever */ +5: + wfi + b 5b + +.size _start, . - _start + +/* Provide _exit stub for bare-metal builds (required by some standard library code) */ +.section .text, "ax" +.global _exit +.type _exit, @function + +_exit: + /* Loop forever - bare-metal applications don't exit */ +6: + wfi + b 6b + +.size _exit, . - _exit diff --git a/tools/scripts/versal_boot.bif b/tools/scripts/versal_boot.bif new file mode 100644 index 0000000000..c4fdc192c9 --- /dev/null +++ b/tools/scripts/versal_boot.bif @@ -0,0 +1,14 @@ +the_ROM_image: +{ + image { + { type=bootimage, file=project_1.pdi } + { type=bootloader, file=plm.elf } + { core=psm, file=psmfw.elf } + } + image { + id = 0x1c000000, name=apu_subsystem + { type=raw, load=0x1000, file=system-default.dtb } + { core=a72-0, exception_level=el-3, trustzone, file=bl31.elf } + { core=a72-0, exception_level=el-2, file=wolfboot.elf } + } +} diff --git a/tools/scripts/versal_test.sh b/tools/scripts/versal_test.sh new file mode 100755 index 0000000000..ba24cfe6a8 --- /dev/null +++ b/tools/scripts/versal_test.sh @@ -0,0 +1,687 @@ +#!/bin/bash +# Build, flash QSPI, and boot VMK180 - all in one script +# +# Usage: +# ./versal_test.sh # Full build, flash, and boot wolfBoot +# ./versal_test.sh --test-app # Full build + flash test app to boot partition +# ./versal_test.sh --test-update # Full build + flash test app v2 to update partition +# ./versal_test.sh --boot-sdcard # Test SD card boot mode only +# ./versal_test.sh --boot-qspi # Test QSPI boot mode only +# +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WOLFBOOT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +cd "$WOLFBOOT_ROOT" + +# Config +UART_PORT="${UART_PORT:-/dev/ttyUSB2}" +UART_BAUD="${UART_BAUD:-115200}" +SERVER_IP="${SERVER_IP:-10.0.4.24}" +BOARD_IP="${BOARD_IP:-10.0.4.90}" +TFTP_DIR="${TFTP_DIR:-/srv/tftp}" +VITIS_PATH="${VITIS_PATH:-/opt/Xilinx/Vitis/2024.2}" +RELAY_PORT="${RELAY_PORT:-/dev/ttyACM2}" +UART_LOG="${UART_LOG:-${WOLFBOOT_ROOT}/uart_log.txt}" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +BLUE='\033[0;34m' +NC='\033[0m' +log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } +log_ok() { echo -e "${GREEN}[OK]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*"; } + +# Check for required tools +for cmd in expect socat; do + command -v "$cmd" &>/dev/null || { log_error "$cmd not found - install with: sudo apt install $cmd"; exit 1; } +done + +# Load configuration from .config file +# Parses Makefile-style .config and sets global variables +# Handles both KEY=VALUE and KEY?=VALUE syntax (for ?=, only sets if not already set) +# Since .config uses Makefile syntax, we parse it directly rather than using make +load_config() { + local config_file="${1:-.config}" + + [ ! -f "$config_file" ] && { log_error "Config file not found: $config_file"; return 1; } + + # Extract variables from .config file + # Pattern: KEY?=VALUE or KEY=VALUE (ignores comments and blank lines) + while IFS= read -r line; do + # Skip comments and blank lines + [[ "$line" =~ ^[[:space:]]*# ]] && continue + [[ -z "${line// }" ]] && continue + + # Extract key, conditional flag, and value + # Match: optional whitespace, key, optional ?, =, optional whitespace, value + if [[ "$line" =~ ^[[:space:]]*([A-Za-z_][A-Za-z0-9_]*)[[:space:]]*(\?)?=[[:space:]]*(.*)$ ]]; then + local key="${BASH_REMATCH[1]}" + local conditional="${BASH_REMATCH[2]}" # "?" if present + local value="${BASH_REMATCH[3]}" + + # Remove surrounding quotes if present + value="${value#\"}" + value="${value%\"}" + + # Strip trailing whitespace from value + value="${value%"${value##*[![:space:]]}"}" + + # For ?= syntax, only set if variable is not already set + if [ -n "$conditional" ]; then + # Check if variable is already set using indirect reference + # ${!key} expands to the value of the variable named by $key + if [ -z "${!key:-}" ]; then + # Variable not set, assign it using declare + declare -g "${key}=${value}" + fi + else + # Always set for = syntax + declare -g "${key}=${value}" + fi + fi + done < <(grep -E '^[[:space:]]*[A-Za-z_][A-Za-z0-9_]*[[:space:]]*(\?)?=' "$config_file" 2>/dev/null || true) + + # Export all config variables as globals + export IMAGE_HEADER_SIZE SIGN HASH SECONDARY_SIGN_OPTIONS SECONDARY_PRIVATE_KEY + + # Calculate IMAGE_SIGNATURE_SIZE based on SIGN algorithm + case "${SIGN:-}" in + ECC256) IMAGE_SIGNATURE_SIZE=64 ;; + ECC384) IMAGE_SIGNATURE_SIZE=96 ;; + ECC521) IMAGE_SIGNATURE_SIZE=132 ;; + ED25519) IMAGE_SIGNATURE_SIZE=64 ;; + ED448) IMAGE_SIGNATURE_SIZE=114 ;; + RSA2048) IMAGE_SIGNATURE_SIZE=256 ;; + RSA3072) IMAGE_SIGNATURE_SIZE=384 ;; + RSA4096) IMAGE_SIGNATURE_SIZE=512 ;; + *) IMAGE_SIGNATURE_SIZE=96 ;; # Default to ECC384 + esac + export IMAGE_SIGNATURE_SIZE + + # Build SIGN_OPTIONS from SIGN and HASH + SIGN_OPTIONS="" + case "${SIGN:-}" in + ECC256) SIGN_OPTIONS="--ecc256" ;; + ECC384) SIGN_OPTIONS="--ecc384" ;; + ECC521) SIGN_OPTIONS="--ecc521" ;; + ED25519) SIGN_OPTIONS="--ed25519" ;; + ED448) SIGN_OPTIONS="--ed448" ;; + RSA2048) SIGN_OPTIONS="--rsa2048" ;; + RSA3072) SIGN_OPTIONS="--rsa3072" ;; + RSA4096) SIGN_OPTIONS="--rsa4096" ;; + esac + + case "${HASH:-}" in + SHA256) SIGN_OPTIONS="$SIGN_OPTIONS --sha256" ;; + SHA384) SIGN_OPTIONS="$SIGN_OPTIONS --sha384" ;; + SHA3) SIGN_OPTIONS="$SIGN_OPTIONS --sha3" ;; + esac + export SIGN_OPTIONS +} + +# Initialize UART capture variables +UART_PIDS=() +UART_PTY="" +KEEP_UART_CAPTURE=false + +# Relay control functions (integrated) +# Boot mode patterns (R4 R3 R2 R1): 0000=SDCard, 0011=QSPI, 0111=JTAG, 1000=Reset +relay_set_mode() { + local pattern=$1 + local port="${2:-$RELAY_PORT}" + + [ ${#pattern} -ne 4 ] && { log_error "Pattern must be 4 binary digits"; return 1; } + echo "$pattern" | grep -qE '^[01]{4}$' || { log_error "Pattern must contain only 0s and 1s"; return 1; } + + # Configure serial port (like Python serial.Serial initialization) + stty -F "$port" 115200 raw -echo -echoe -echok -echoctl -echoke cs8 -cstopb -parenb 2>/dev/null || { + log_error "Failed to configure serial port $port"; return 1; + } + + # Allow port to stabilize (like Python time.sleep(0.1)) + sleep 0.1 + + # Open port as file descriptor (like Python serial.Serial) + exec 3<>"$port" || { log_error "Failed to open serial port $port"; return 1; } + + # Send each command separately (like Python: write, flush, sleep for each) + local i=0 + log_info "Setting relay pattern: $pattern" + + while [ $i -lt 4 ]; do + local bit="${pattern:$i:1}" + local relay_num=$((i + 1)) # pattern[0]=R1, pattern[1]=R2, pattern[2]=R3, pattern[3]=R4 + local state=$([ "$bit" = "1" ] && echo 1 || echo 0) + + # Calculate checksum: (0xA0 + relay + state) & 0xFF + local checksum=$(( (0xA0 + relay_num + state) & 0xFF )) + + # Echo command being sent + echo " Relay $relay_num: $( [ $state -eq 1 ] && echo ON || echo OFF ) (pattern[$i]=$bit) -> [0xA0, $relay_num, $state, 0x$(printf "%02x" $checksum)]" + + # Send command bytes: [0xA0, relay_num, state, checksum] + # Write to file descriptor (like Python ser.write()) + printf "%b" "\x$(printf "%02x" 0xA0)\x$(printf "%02x" $relay_num)\x$(printf "%02x" $state)\x$(printf "%02x" $checksum)" >&3 + + # Flush (like Python ser.flush()) + sync + + # Small delay for relay to respond (like Python time.sleep(0.05)) + sleep 0.05 + i=$((i + 1)) + done + + # Close file descriptor (like Python ser.close()) + exec 3<&- + exec 3>&- +} + +boot_sdcard() { + log_info "Booting from SD card..." + local boot_pattern="0000" + local reset_pattern="1000" + local reset_with_mode="1${boot_pattern:1}" + + relay_set_mode "$reset_pattern" || return 1 + sleep 0.1 + relay_set_mode "$reset_with_mode" || return 1 + sleep 0.2 + relay_set_mode "$boot_pattern" || return 1 + sleep 0.1 + log_ok "SD card boot mode set, reset released" +} + +boot_qspi() { + log_info "Booting from QSPI..." + local boot_pattern="0011" + local reset_pattern="1000" + local reset_with_mode="1${boot_pattern:1}" + + relay_set_mode "$reset_pattern" || return 1 + sleep 0.1 + relay_set_mode "$reset_with_mode" || return 1 + sleep 0.2 + relay_set_mode "$boot_pattern" || return 1 + sleep 0.1 + log_ok "QSPI boot mode set, reset released" +} + +# UART capture functions +kill_existing_uart_processes() { + local pids=$(lsof -t "$UART_PORT" 2>/dev/null || true) + if [ -n "$pids" ]; then + log_info "Killing existing processes using $UART_PORT: $pids" + for pid in $pids; do + kill "$pid" 2>/dev/null || true + sleep 0.2 + kill -9 "$pid" 2>/dev/null || true + done + sleep 0.5 + fi +} + +start_uart_capture() { + log_info "Starting UART capture: $UART_PORT -> $UART_LOG" + UART_PTY=$(mktemp -u /tmp/vmk180_uart_XXXXXX) + + [ ! -e "$UART_PORT" ] && { log_error "Serial port not found: $UART_PORT"; exit 1; } + [ ! -r "$UART_PORT" ] || [ ! -w "$UART_PORT" ] && { + log_error "No read/write access to $UART_PORT" + log_info "Try: sudo chmod 666 $UART_PORT or add user to dialout group" + exit 1 + } + + if lsof "$UART_PORT" >/dev/null 2>&1; then + log_info "Serial port in use, cleaning up..." + kill_existing_uart_processes + lsof "$UART_PORT" >/dev/null 2>&1 && { log_error "Failed to free serial port"; exit 1; } + fi + + [ -e "$UART_PTY" ] && rm -f "$UART_PTY" + stty -F "$UART_PORT" "$UART_BAUD" raw -echo -echoe -echok -echoctl -echoke cs8 -cstopb -parenb 2>/dev/null || { + log_error "Failed to configure serial port"; exit 1; + } + + socat_err=$(mktemp /tmp/socat_err_XXXXXX) + log_info "Creating PTY bridge: $UART_PTY <-> $UART_PORT @ ${UART_BAUD}bps" + socat PTY,link="$UART_PTY",raw,echo=0 "GOPEN:$UART_PORT" >/dev/null 2>"$socat_err" & + socat_pid=$! + UART_PIDS+=($socat_pid) + + for i in {1..10}; do + sleep 0.2 + [ -e "$UART_PTY" ] && break + if ! kill -0 "$socat_pid" 2>/dev/null; then + log_error "socat process died:" + cat "$socat_err" >&2 + rm -f "$socat_err" + exit 1 + fi + done + + [ -s "$socat_err" ] && { log_error "socat errors:"; cat "$socat_err" >&2; } + rm -f "$socat_err" + + [ ! -e "$UART_PTY" ] && { log_error "Failed to create PTY after 2 seconds"; exit 1; } + log_ok "UART PTY created: $UART_PTY" +} + +stop_uart_capture() { + for pid in "${UART_PIDS[@]}"; do + kill "$pid" 2>/dev/null || true + sleep 0.1 + kill -9 "$pid" 2>/dev/null || true + done + [ -n "$UART_PTY" ] && [ -e "$UART_PTY" ] && rm -f "$UART_PTY" +} + +cleanup() { + [ "$KEEP_UART_CAPTURE" = "false" ] && { + log_info "Cleaning up..." + stop_uart_capture + kill_existing_uart_processes + } +} +trap cleanup EXIT INT TERM + +# Start UART capture immediately +log_info "Starting UART capture..." +start_uart_capture || { log_error "Failed to start UART capture"; exit 1; } +log_info "UART capture active, PIDs: ${UART_PIDS[*]}" + +# Test function helper +test_boot() { + local boot_func=$1 + local mode_name=$2 + + log_info "=== Testing $mode_name relay sequence ===" + [ ${#UART_PIDS[@]} -eq 0 ] && { log_error "UART capture not running!"; exit 1; } + [ -z "$UART_PTY" ] || [ ! -e "$UART_PTY" ] && { log_error "UART PTY not available: $UART_PTY"; exit 1; } + + log_info "Starting UART logging from PTY..." + cat "$UART_PTY" >> "$UART_LOG" 2>&1 & + UART_PIDS+=($!) + sleep 0.5 + + $boot_func || { log_error "$mode_name failed"; exit 1; } + + log_info "Monitoring UART for 30 seconds (Ctrl+C to stop early)..." + log_info "UART log: $UART_LOG (watch: tail -f $UART_LOG)" + trap 'log_info "Interrupted by user"; KEEP_UART_CAPTURE=true; exit 0' INT + sleep 30 || true + + log_info "Test finished - UART capture still running (PID: ${UART_PIDS[*]})" + log_info "To stop: kill ${UART_PIDS[*]}" + KEEP_UART_CAPTURE=true + exit 0 +} + +# Check for test modes +FLASH_TEST_APP=false +FLASH_UPDATE_APP=false +case "${1:-}" in + test-boot|--boot-sdcard) test_boot boot_sdcard "boot-sdcard" ;; + --boot-qspi) test_boot boot_qspi "boot-qspi" ;; + --test-app) FLASH_TEST_APP=true ;; + --test-update) FLASH_TEST_APP=true; FLASH_UPDATE_APP=true ;; +esac + +# Build wolfBoot +log_info "Building wolfBoot..." +cp config/examples/versal_vmk180.config .config + +make clean +make || { log_error "Failed to build wolfBoot"; exit 1; } +[ ! -f "wolfboot.elf" ] && { log_error "wolfboot.elf not found after build"; exit 1; } + +# Build test app if requested +if [ "$FLASH_TEST_APP" = "true" ]; then + if [ "$FLASH_UPDATE_APP" = "true" ]; then + log_info "Building and signing test application version 2..." + make test-app/image.bin + + # Sign as version 2 for update testing + # Load all config values from .config file + load_config .config + + IMAGE_TRAILER_SIZE=0 # Usually 0 unless delta updates are used + PRIVATE_KEY="${PRIVATE_KEY:-wolfboot_signing_private_key.der}" + + BOOT_IMG="test-app/image.bin" + + # Build sign command with environment variables + # The sign tool needs IMAGE_HEADER_SIZE and IMAGE_SIGNATURE_SIZE as environment variables + export IMAGE_HEADER_SIZE IMAGE_SIGNATURE_SIZE IMAGE_TRAILER_SIZE + + log_info "Signing test app as version 2..." + log_info " IMAGE_HEADER_SIZE=$IMAGE_HEADER_SIZE" + log_info " IMAGE_SIGNATURE_SIZE=$IMAGE_SIGNATURE_SIZE" + log_info " SIGN=$SIGN" + log_info " HASH=$HASH" + log_info " SIGN_OPTIONS=$SIGN_OPTIONS" + log_info " PRIVATE_KEY=$PRIVATE_KEY" + + # Sign the image as version 2 + if [ "$SIGN" != "NONE" ] && [ -n "$SECONDARY_PRIVATE_KEY" ]; then + ./tools/keytools/sign $SIGN_OPTIONS $SECONDARY_SIGN_OPTIONS "$BOOT_IMG" "$PRIVATE_KEY" "$SECONDARY_PRIVATE_KEY" 2 || { + log_error "Signing failed with secondary key" + exit 1 + } + elif [ "$SIGN" != "NONE" ]; then + ./tools/keytools/sign $SIGN_OPTIONS "$BOOT_IMG" "$PRIVATE_KEY" 2 || { + log_error "Signing failed" + exit 1 + } + else + ./tools/keytools/sign $SIGN_OPTIONS "$BOOT_IMG" 2 || { + log_error "Signing failed (SIGN=NONE)" + exit 1 + } + fi + + testapp_size=$(stat -c%s "test-app/image_v2_signed.bin") + log_info "Test app v2 size: $testapp_size bytes" + cp test-app/image_v2_signed.bin "${TFTP_DIR}/" + log_ok "Test app v2 copied to TFTP: ${TFTP_DIR}/image_v2_signed.bin" + else + log_info "Building and signing test application version 1..." + make test-app/image.bin + make test-app/image_v1_signed.bin + + testapp_size=$(stat -c%s "test-app/image_v1_signed.bin") + log_info "Test app size: $testapp_size bytes" + cp test-app/image_v1_signed.bin "${TFTP_DIR}/" + log_ok "Test app copied to TFTP: ${TFTP_DIR}/image_v1_signed.bin" + fi +fi + +# Generate BOOT.BIN +log_info "Generating BOOT.BIN..." +[ ! -f "wolfboot.elf" ] && { log_error "wolfboot.elf not found - cannot generate BOOT.BIN"; exit 1; } + +# Set PREBUILT_DIR (relative to wolfBoot root) +export PREBUILT_DIR="${WOLFBOOT_ROOT}/../soc-prebuilt-firmware/vmk180-versal" + +# Copy required files to wolfBoot root directory +log_info "Copying prebuilt firmware files..." +[ ! -d "${PREBUILT_DIR}" ] && { + log_error "Prebuilt firmware directory not found: ${PREBUILT_DIR}" + log_info "Clone with: git clone --branch xlnx_rel_v2024.2 https://github.com/Xilinx/soc-prebuilt-firmware.git" + exit 1 +} + +cp "${PREBUILT_DIR}/project_1.pdi" . +cp "${PREBUILT_DIR}/plm.elf" . +cp "${PREBUILT_DIR}/psmfw.elf" . +cp "${PREBUILT_DIR}/bl31.elf" . +cp "${PREBUILT_DIR}/system-default.dtb" . + +source "${VITIS_PATH}/settings64.sh" +rm -f BOOT.BIN +bootgen -arch versal -image ./tools/scripts/versal_boot.bif -w -o BOOT.BIN || { log_error "bootgen failed"; exit 1; } +cp BOOT.BIN "${TFTP_DIR}/" || { log_error "Failed to copy BOOT.BIN to TFTP directory"; exit 1; } +filesize=$(stat -c%s "${TFTP_DIR}/BOOT.BIN") +filesize_hex=$(printf "0x%x" $filesize) + +# Get test app size if flashing it +testapp_size_hex="0x0" +if [ "$FLASH_TEST_APP" = "true" ]; then + if [ "$FLASH_UPDATE_APP" = "true" ]; then + testapp_size=$(stat -c%s "${TFTP_DIR}/image_v2_signed.bin") + testapp_size_hex=$(printf "0x%x" $testapp_size) + log_info "Test app v2 size: $testapp_size bytes" + else + testapp_size=$(stat -c%s "${TFTP_DIR}/image_v1_signed.bin") + testapp_size_hex=$(printf "0x%x" $testapp_size) + log_info "Test app size: $testapp_size bytes" + fi +fi + +# Flash QSPI via U-Boot TFTP +log_info "Flashing QSPI..." +boot_sdcard + +expect <" { + puts "U-Boot prompt found" + } + timeout { + puts "Timeout waiting for U-Boot prompt" + exit 1 + } +} +expect "Versal>" +puts "At U-Boot prompt, configuring network..." +sleep 0.5 + +send "setenv ipaddr \$board_ip\r" +expect { + "Versal>" { + puts "IP address set" + } + timeout { + puts "Warning: Timeout waiting for prompt after setenv ipaddr" + } +} + +send "setenv serverip \$server_ip\r" +expect { + "Versal>" { + puts "Server IP set" + } + timeout { + puts "Warning: Timeout waiting for prompt after setenv serverip" + } +} + +send "setenv netmask 255.255.255.0\r" +expect { + "Versal>" { + puts "Netmask set" + } + timeout { + puts "Warning: Timeout waiting for prompt after setenv netmask" + } +} + +puts "Downloading BOOT.BIN via TFTP..." +send "tftpboot 0x10000000 BOOT.BIN\r" +expect { + "Bytes transferred" { puts "TFTP download successful" } + "Error" { puts "TFTP download failed"; exit 1 } + timeout { puts "TFTP timeout"; exit 1 } +} +expect "Versal>" + +puts "Probing SPI flash..." +send "sf probe 0\r" +expect { + "SF:" { puts "SPI flash detected" } + timeout { + puts "SPI probe failed, trying alternate..." + send "sf probe 0 0 0\r" + expect "Versal>" + } +} +expect "Versal>" + +puts "Updating flash with BOOT.BIN..." +send "sf update 0x10000000 0 \$filesize_hex\r" +expect { + -re ".*updated.*Versal>" { puts "Flash update successful" } + -re ".*Versal>" { puts "Flash update complete" } + timeout { + puts "Flash update timeout - checking for prompt..." + send "\r" + expect { "Versal>" { puts "Got prompt, continuing..." } timeout { puts "No prompt found"; exit 1 } } + } +} +puts "Flash update done, verifying..." + +set verify_addr "0x20000000" +puts "Reading flash back for verification..." +send "sf read \$verify_addr 0 \$filesize_hex\r" +expect { + "Versal>" { + puts "Flash read complete" + } + timeout { + puts "Flash read timeout" + exit 1 + } +} + +puts "Comparing flash contents..." +send "cmp.b 0x10000000 \$verify_addr \$filesize_hex\r" +set timeout 30 +expect { + -re ".*are equal.*" { + puts "Flash verification PASSED" + expect "Versal>" + } + -re ".*identical.*" { + puts "Flash verification PASSED" + expect "Versal>" + } + -re ".*differ.*" { + puts "Flash verification FAILED - data differs!" + exit 1 + } + "Versal>" { + puts "Verification completed (result unclear, assuming OK)" + } + timeout { + puts "Verification timeout - checking for prompt..." + send "\r" + expect { + "Versal>" { + puts "Got prompt after timeout, continuing..." + } + timeout { + puts "No prompt found, exiting" + exit 1 + } + } + } +} +set timeout 90 + +puts "BOOT.BIN flash and verification complete!" + +# Flash test app if requested +if { \$flash_test_app eq "true" } { + if { \$flash_update_app eq "true" } { + puts "" + puts "=== Flashing test app v2 to UPDATE partition at 0x3400000 ===" + + puts "Downloading test app v2 via TFTP..." + send "tftpboot 0x10000000 image_v2_signed.bin\r" + expect { + "Bytes transferred" { puts "TFTP download successful" } + "Error" { puts "TFTP download failed"; exit 1 } + timeout { puts "TFTP timeout"; exit 1 } + } + expect "Versal>" + + puts "Erasing UPDATE partition at 0x3400000 (128KB sector)..." + send "sf erase 0x3400000 0x20000\r" + expect { + "Versal>" { puts "Erase complete" } + timeout { puts "Erase timeout"; exit 1 } + } + + puts "Writing test app v2 to 0x3400000..." + send "sf write 0x10000000 0x3400000 \$testapp_size_hex\r" + expect { + "Versal>" { puts "Write complete" } + timeout { puts "Write timeout"; exit 1 } + } + + puts "Test app v2 flashed to UPDATE partition!" + } else { + puts "" + puts "=== Flashing test app to boot partition at 0x800000 ===" + + puts "Downloading test app via TFTP..." + send "tftpboot 0x10000000 image_v1_signed.bin\r" + expect { + "Bytes transferred" { puts "TFTP download successful" } + "Error" { puts "TFTP download failed"; exit 1 } + timeout { puts "TFTP timeout"; exit 1 } + } + expect "Versal>" + + puts "Erasing boot partition at 0x800000 (128KB sector)..." + send "sf erase 0x800000 0x20000\r" + expect { + "Versal>" { puts "Erase complete" } + timeout { puts "Erase timeout"; exit 1 } + } + + puts "Erasing update partition at 0x3400000 (128KB sector)..." + send "sf erase 0x3400000 0x20000\r" + expect { + "Versal>" { puts "Erase complete" } + timeout { puts "Erase timeout"; exit 1 } + } + + puts "Writing test app to 0x800000..." + send "sf write 0x10000000 0x800000 \$testapp_size_hex\r" + expect { + "Versal>" { puts "Write complete" } + timeout { puts "Write timeout"; exit 1 } + } + + puts "Test app flashed to boot partition!" + } +} + +puts "" +puts "All flash operations complete!" +close +EXPECT_EOF + +log_ok "Flash operations complete!" + +# Restart continuous UART logging after expect exits +log_info "Restarting continuous UART logging..." +cat "$UART_PTY" >> "$UART_LOG" 2>&1 & +UART_PIDS+=($!) +sleep 1 + +# Boot from QSPI +log_info "Switching to QSPI boot mode..." +boot_qspi +log_ok "Board booting from QSPI" + +log_info "Capturing UART output for 30 seconds..." +log_info "Watch live: tail -f $UART_LOG" +sleep 30 + +log_ok "Capture complete" +log_info "UART log saved to: $UART_LOG" +KEEP_UART_CAPTURE=true +log_info "UART capture still active in background (PID: ${UART_PIDS[*]})" +log_info "To stop: kill ${UART_PIDS[*]}"