diff --git a/arch.mk b/arch.mk index d4467bf886..7fc39cf52d 100644 --- a/arch.mk +++ b/arch.mk @@ -563,11 +563,27 @@ endif ## RISCV64 (64-bit) ifeq ($(ARCH),RISCV64) CROSS_COMPILE?=riscv64-unknown-elf- + + # M-mode vs S-mode configuration + ifeq ($(RISCV_MMODE),1) + # Machine Mode: Running directly from eNVM/L2 SRAM + # Boots from SD card after initializing DDR + CFLAGS+=-DWOLFBOOT_RISCV_MMODE + # Use M-mode specific linker script + LSCRIPT_IN:=hal/$(TARGET)-m.ld + else + # Supervisor Mode (default): Running under HSS with DDR available + endif + CFLAGS+=-DMMU -DWOLFBOOT_DUALBOOT CFLAGS+=-DWOLFBOOT_UPDATE_DISK -DMAX_DISKS=1 + + # Disk boot support UPDATE_OBJS:=src/update_disk.o OBJS += src/gpt.o OBJS += src/disk.o + # Note: sdhci.o is added by options.mk when DISK_SDCARD=1 + ARCH_FLAGS=-march=rv64imafd -mabi=lp64d -mcmodel=medany CFLAGS+=-fno-builtin-printf -DUSE_M_TIME -g -nostartfiles -DARCH_RISCV -DARCH_RISCV64 CFLAGS+=$(ARCH_FLAGS) @@ -580,6 +596,7 @@ ifeq ($(ARCH),RISCV64) # Unified RISC-V boot code (32/64-bit via __riscv_xlen) OBJS+=src/boot_riscv_start.o src/boot_riscv.o src/vector_riscv.o + # FDT support required CFLAGS+=-DWOLFBOOT_FDT OBJS+=src/fdt.o diff --git a/config/examples/polarfire_mpfs250-m.config b/config/examples/polarfire_mpfs250-m.config new file mode 100644 index 0000000000..14e38eb5fa --- /dev/null +++ b/config/examples/polarfire_mpfs250-m.config @@ -0,0 +1,98 @@ +# PolarFire SoC MPFS250T M-Mode (Machine Mode) Configuration +# +# This configuration runs wolfBoot directly from eNVM in M-mode (Machine Mode), +# initializes DDR memory, and boots Linux from SD card. +# +# Boot flow: +# 1. eNVM (0x20220100) -> L2_SCRATCH (0x0A000000) - wolfBoot starts +# 2. DDR initialization (PLLs, controller, training) +# 3. Load signed OS image from SD card partition to DDR +# 4. Verify signature and boot +# +# Flash using mpfsBootmodeProgrammer (bootmode 1): +# java -jar mpfsBootmodeProgrammer.jar --bootmode 1 --die MPFS250T \ +# --package FCG1152 --workdir $PWD wolfboot.elf + +ARCH?=RISCV64 +TARGET?=mpfs250 +SIGN?=ECC384 +HASH?=SHA384 +IMAGE_HEADER_SIZE=512 +WOLFBOOT_VERSION?=1 +ARMORED?=0 +DEBUG?=0 +DEBUG_SYMBOLS?=1 +DEBUG_UART?=1 +VTOR?=1 +EXT_FLASH?=0 +SPI_FLASH?=0 +NO_XIP?=1 +NVM_FLASH_WRITEONCE?=0 +UART_FLASH?=0 +V?=0 +NO_MPU?=1 +RAM_CODE?=0 +SPMATH?=1 +DUALBANK_SWAP?=0 +PKA?=0 +ENCRYPT=0 +WOLFTPM?=0 +ELF?=1 +#DEBUG_ELF?=1 + +# M-Mode Configuration +# Runs on E51 core in Machine Mode from L2 SRAM +RISCV_MMODE?=1 + +# Stack size per hart (reduced for L2 SRAM constraints) +CFLAGS_EXTRA+=-DSTACK_SIZE_PER_HART=8192 + +# Use RISC-V assembly version of ECDSA and SHA +NO_ASM?=0 +NO_ARM_ASM?=0 + +# Enable SD card for loading application +DISK_SDCARD?=1 +DISK_EMMC?=0 +# L2 SRAM Address for wolfBoot (256KB available) +# Stack grows down from end of L2_SCRATCH +WOLFBOOT_ORIGIN?=0x0A000000 + +# Flash sector size (4KB typical) +WOLFBOOT_SECTOR_SIZE?=0x1000 + +# Load Partition to RAM Address +WOLFBOOT_LOAD_ADDRESS?=0x8E000000 + +# Partition layout for PolarFire SoC MPFS250T +# Using update_disk loader we just need to specify the partition number (A/B) +WOLFBOOT_NO_PARTITIONS=1 +CFLAGS_EXTRA+=-DBOOT_PART_A=1 +CFLAGS_EXTRA+=-DBOOT_PART_B=2 +# Speed up disk partition read (512KB chunks - max DMA size) +CFLAGS_EXTRA+=-DDISK_BLOCK_SIZE=0x80000 + +# DTS (Device Tree) load address +WOLFBOOT_LOAD_DTS_ADDRESS?=0x8A000000 + +# Optional Encryption +CUSTOM_ENCRYPT_KEY=1 +ENCRYPT=1 +ENCRYPT_WITH_AES256=1 +OBJS_EXTRA=src/my_custom_encrypt_key.o + +# Debug options (useful for initial M-mode bring-up) +#CFLAGS_EXTRA+=-DDEBUG_BOOT + +# Optional EMMC_SD debugging logs +#CFLAGS_EXTRA+=-DDEBUG_SDHCI +# Optional disk debugging logs +#CFLAGS_EXTRA+=-DDEBUG_DISK +#CFLAGS_EXTRA+=-DDISK_TEST + +# Used by test-application for ELF +WOLFBOOT_PARTITION_BOOT_ADDRESS=0x80200000 +WOLFBOOT_PARTITION_SIZE=0x4000000 + + + diff --git a/config/examples/polarfire_mpfs250.config b/config/examples/polarfire_mpfs250.config index 448191d789..7d244bfbfc 100644 --- a/config/examples/polarfire_mpfs250.config +++ b/config/examples/polarfire_mpfs250.config @@ -52,17 +52,17 @@ CFLAGS_EXTRA+=-DBOOT_PART_B=2 # Speed up disk partition read (512KB chunks - max DMA size) CFLAGS_EXTRA+=-DDISK_BLOCK_SIZE=0x80000 -# DTS (Device Tree) +# DTS (Device Tree) load address WOLFBOOT_LOAD_DTS_ADDRESS?=0x8A000000 # Optional Encryption -#CUSTOM_ENCRYPT_KEY=1 -#ENCRYPT=1 -#ENCRYPT_WITH_AES256=1 -#OBJS_EXTRA=src/my_custom_encrypt_key.o +CUSTOM_ENCRYPT_KEY=1 +ENCRYPT=1 +ENCRYPT_WITH_AES256=1 +OBJS_EXTRA=src/my_custom_encrypt_key.o # Optional EMMC_SD debugging logs -#CFLAGS_EXTRA+=-DDEBUG_MMC +#CFLAGS_EXTRA+=-DDEBUG_SDHCI # Optional disk debugging logs #CFLAGS_EXTRA+=-DDEBUG_DISK #CFLAGS_EXTRA+=-DDISK_TEST diff --git a/docs/PolarFire_SoC_DDR_Training.md b/docs/PolarFire_SoC_DDR_Training.md new file mode 100644 index 0000000000..85c48bce20 --- /dev/null +++ b/docs/PolarFire_SoC_DDR_Training.md @@ -0,0 +1,532 @@ +# PolarFire SoC DDR Training Investigation + +## Overview + +This document tracks the investigation and debugging of LPDDR4 training on the PolarFire SoC MPFS250T Video Kit board. The DDR controller uses a Training IP (TIP) block that performs automatic training phases, but requires specific initialization sequences and state transitions to progress correctly. + +**Target Hardware:** +- Board: PolarFire SoC Video Kit +- SoC: MPFS250T-FCG1152 +- Memory: Micron MT53D512M32D2DS-053 LPDDR4 (2GB, x32, 1600 Mbps) +- Memory Type: LPDDR4 (not DDR4 - board has both, but MSS uses LPDDR4) + +**Current Status:** TIP training gets stuck after BCLK_SCLK phase completes. Write Leveling (WRLVL), Read Gate Training (RDGATE), and Read Data Eye Training (DQ_DQS) phases do not start automatically. + +--- + +## DDR Controller Architecture + +### Key Components + +1. **DDR Controller (DDRCFG_BASE @ 0x20084000)** + - Main controller logic + - DFI (DDR PHY Interface) control + - Mode Register (MR) write interface + - Memory Test Controller (MTC) + +2. **DDR PHY (CFG_DDR_SGMII_PHY @ 0x20080000)** + - Physical layer interface + - Training IP (TIP) state machine + - Expert mode for manual control + - Per-lane training registers + +3. **Training IP (TIP)** + - Hardware state machine for automatic training + - Phases: BCLK_SCLK, ADDCMD, WRLVL, RDGATE, DQ_DQS + - Updates `training_status` register as phases complete + - Requires specific conditions to transition between phases + +### Critical Registers + +**PHY Registers:** +- `PHY_TRAINING_STATUS` (0x804): Bit flags for completed training phases + - Bit 0: BCLK_SCLK complete + - Bit 1: ADDCMD complete + - Bit 2: WRLVL complete + - Bit 3: RDGATE complete + - Bit 4: DQ_DQS complete +- `PHY_TRAINING_SKIP` (0x808): Which phases to skip (0x02 = skip ADDCMD) +- `PHY_TRAINING_RESET` (0x80C): Reset TIP state machine +- `PHY_TRAINING_START` (0x810): Start/stop TIP +- `PHY_GT_STATE` (0x82C): Gate training state (0xB is normal, not error) +- `PHY_WL_DELAY_0` (0x830): Write leveling delay for lane 0 +- `PHY_EXPERT_MODE_EN` (0x850): Enable expert mode for manual control +- `PHY_DPC_BITS` (0x5C0): DPC configuration (vrgen_h for WRLVL) + +**Controller Registers:** +- `MC_DFI_INIT_START` (0x00): Start DFI initialization +- `MC_DFI_INIT_COMPLETE` (0x04): DFI initialization complete flag +- `MC_CTRLR_INIT` (0x08): Controller initialization trigger +- `MC_INIT_MR_ADDR` (0x20): Mode register address for MR writes +- `MC_INIT_MR_WR_DATA` (0x24): Mode register write data +- `MC_INIT_MR_W_REQ` (0x28): Mode register write request + +--- + +## LPDDR4 Training Sequence + +### Standard Sequence (Per User Guide) + +1. **DFI Initialization** + - Release training reset + - Start DFI init (`DFI_INIT_START`) + - Wait for `DFI_INIT_COMPLETE` + +2. **DRAM Initialization (LPDDR4)** + - Device reset sequence + - PLL frequency doubling for MR writes + - Mode Register writes (MR1, MR2, MR3, MR4, MR11, MR12, MR13, MR14, MR16, MR17, MR22) + - PLL frequency restore + - CA VREF training (manual) + - ADDCMD training (manual) + - MR re-write after ADDCMD + - ZQ calibration + +3. **TIP Training Phases** (automatic) + - BCLK_SCLK: Clock training + - ADDCMD: Command/Address training (can be skipped if done manually) + - WRLVL: Write leveling + - RDGATE: Read DQS gate training + - DQ_DQS: Read data eye training + +4. **Post-Training** + - Write calibration (using MTC) + - Memory test + +### LPDDR4-Specific Requirements + +**Mode Register Values (MT53D512M32D2DS-053 @ 1600 Mbps):** +- MR1 = 0x56: nWR=16, RD preamble=toggle, WR preamble=2tCK, BL=16 +- MR2 = 0x2D: RL=14, WL=8, WLS=1 (normal mode) +- MR2 = 0xAD: RL=14, WL=8, WLS=1, **WRLVL enable (bit 7=1)** - Required for WRLVL phase +- MR3 = 0xF1: PDDS=RZQ/6 (40ohm), DBI-RD/WR disabled +- MR11 = 0x31: DQ_ODT=RZQ2, CA_ODT=RZQ4 +- MR12 = 0x32: CA VREF=50 +- MR13 = 0x00: FSP-OP=0, FSP-WR=0, DMI enabled +- MR14 = 0x0F: DQ VREF=15 +- MR22 = 0x06: SOC_ODT=RZQ6 (40ohm) + +**WRLVL Configuration:** +- `DPC_BITS` vrgen_h = 0x5 (bits 9:4) - Set before training reset release +- `RPC3_ODT` = 0x0 - ODT off during WRLVL +- MR2 bit 7 = 1 - Enable WRLVL mode in DRAM + +**IBUFMD Registers (LPDDR4 specific):** +- `PHY_RPC_IBUFMD_ADDCMD` = 0x3 +- `PHY_RPC_IBUFMD_CLK` = 0x4 +- `PHY_RPC_IBUFMD_DQ` = 0x3 +- `PHY_RPC_IBUFMD_DQS` = 0x4 + +--- + +## Our Implementation vs HSS + +### HSS (Hart Software Services) - Working Reference + +**Sequence:** +1. Configure PHY for WRLVL (vrgen_h=0x5, ODT=0) **before** training reset release +2. Release training reset +3. Start DFI init +4. Wait for DFI init complete +5. Call `lpddr4_manual_training()`: + - Device reset + - PLL freq double + - MR writes + - PLL freq restore + - CA VREF training + - ADDCMD training + - MR re-write + - ZQ calibration +6. **State machine transitions:** + - Check BCLK_SCLK complete → go to ADDCMD state + - Check ADDCMD skipped → immediately go to WRLVL state + - Poll `training_status` for WRLVL bit +7. TIP automatically completes WRLVL, RDGATE, DQ_DQS +8. Restore ODT and disable WRLVL in MR2 + +**Key Observation from HSS Logs:** +- Line 120: `END lpddr4_manual_training training_status: 00000001` (BCLK_SCLK only) +- Line 128: `POST_MANUAL_TRAINING training_status: 0000001D` (all phases done!) +- Lines 134-138: `wl_delay` values already populated + +**Conclusion:** TIP runs automatically between manual training end and POST check. No explicit restart needed. + +### Our Implementation - Current State + +**Sequence:** +1. ✅ Configure PHY for WRLVL (vrgen_h=0x5, ODT=0) before training reset release +2. ✅ Release training reset +3. ✅ Start DFI init +4. ✅ Wait for DFI init complete +5. ✅ Manual LPDDR4 training (matches HSS) +6. ✅ ZQ calibration +7. ✅ **State machine simulation:** + - Check BCLK_SCLK done + - Check ADDCMD skipped + - Enable MR2 WRLVL + - Set training_start=1 + - Add delays +8. ❌ **Wait for TIP - STUCK HERE** + - `training_status` stays at 0x1 (BCLK_SCLK only) + - `wl_delay` remains 0x0 on all lanes + - TIP does not progress to WRLVL phase + +--- + +## Failure Analysis + +### Current Failure Point + +**Symptom:** +- After manual training completes, TIP does not progress from BCLK_SCLK phase to WRLVL phase +- `training_status` = 0x1 (only BCLK_SCLK bit set) +- All lanes show `wl_delay = 0x0` (WRLVL not started) +- `gt_state = 0xB` (normal, not an error state) + +**Timing:** +- Manual training completes successfully +- State machine simulation executes +- MR2 WRLVL enabled (0xAD) +- TIP remains stuck at BCLK_SCLK phase + +### Root Cause Hypotheses + +#### Hypothesis 1: TIP Needs State Machine Acknowledgment +**Theory:** TIP may require the software state machine to explicitly acknowledge ADDCMD skip before it can start WRLVL. + +**Evidence:** +- HSS uses a state machine that transitions: ADDCMD state → checks skip → WRLVL state +- Our code polls `training_status` but doesn't simulate state transitions +- TIP might be waiting for state machine to be in WRLVL state + +**Status:** ✅ Implemented state machine simulation, but still stuck + +#### Hypothesis 2: MR2 WRLVL Enable Timing +**Theory:** MR2 WRLVL must be enabled at a specific time relative to TIP state transitions. + +**Evidence:** +- HSS doesn't explicitly enable MR2 WRLVL in code (TIP may do it automatically) +- We enable MR2 WRLVL manually after state simulation +- Timing might be wrong + +**Status:** ⚠️ Timing adjusted, but still stuck + +#### Hypothesis 3: TIP Needs Explicit Signal +**Theory:** TIP requires a specific register write or signal to transition from ADDCMD (skipped) to WRLVL. + +**Evidence:** +- No explicit register found that triggers WRLVL start +- `training_start` register exists but may need specific sequence +- DFI signals might need toggling + +**Status:** ❓ Unknown - needs investigation + +#### Hypothesis 4: Missing Configuration +**Theory:** Some register or configuration is missing that TIP needs to start WRLVL. + +**Evidence:** +- All known registers match HSS values +- DPC_BITS, ODT, training_skip all correct +- IBUFMD registers set correctly + +**Status:** ❓ Unknown - needs deeper investigation + +--- + +## What We've Tried + +### Attempt 1: Correct MR Values +- Updated LPDDR4 Mode Register values to match Libero config +- Result: ❌ No change - still stuck + +### Attempt 2: Manual Write Leveling +- Implemented manual DFI write leveling using expert mode +- Result: ❌ Removed - conflicts with TIP automatic training + +### Attempt 3: TIP Restart Sequence +- Added TIP restart after manual training (training_start toggle, DFI init toggle) +- Result: ❌ No change - still stuck + +### Attempt 4: WRLVL Configuration Before Reset +- Moved WRLVL config (vrgen_h, ODT) before training reset release +- Result: ✅ Correct sequence, but still stuck + +### Attempt 5: IBUFMD Initialization +- Added LPDDR4 IBUFMD register initialization +- Result: ✅ Correct, but still stuck + +### Attempt 6: MR2 WRLVL Enable Before TIP +- Enable MR2 WRLVL after manual training, before waiting for TIP +- Result: ✅ Correct sequence, but still stuck + +### Attempt 7: State Machine Simulation (Current) +- Simulate HSS state machine transitions (BCLKSCLK → ADDCMD → WRLVL) +- Add delays for state transitions +- Result: ❌ Still stuck - TIP doesn't progress + +--- + +## Key Findings from HSS Analysis + +### 1. TIP Runs Automatically +- No explicit restart needed after manual training +- TIP continues automatically from where it left off +- State machine just polls `training_status` - doesn't control TIP + +### 2. Timing is Critical +- HSS checks `training_status` immediately after manual training +- By POST_MANUAL_TRAINING check, TIP has already completed all phases +- Suggests TIP runs very quickly once conditions are met + +### 3. gt_state=0xB is Normal +- Successful HSS training shows `gt_state=0xB` throughout +- This is NOT an error state +- Don't use gt_state to detect completion + +### 4. Write Leveling Delays Populate Automatically +- `wl_delay` values appear automatically when WRLVL completes +- No manual intervention needed +- Primary indicator of WRLVL completion + +### 5. MR2 WRLVL Enable +- HSS code doesn't explicitly write MR2 with WRLVL enabled +- TIP likely enables it automatically when starting WRLVL phase +- Or controller enables it based on TIP state + +### 6. State Machine is Polling, Not Controlling +- HSS state machine polls `training_status` bits +- It doesn't send commands to TIP +- TIP runs independently based on hardware conditions + +--- + +## Register Values Comparison + +### Successful HSS Training (from logs) +``` +training_status: 0x1D (all phases complete) +training_skip: 0x02 (skip ADDCMD) +training_reset: 0x0 (released) +DPC_BITS: 0x47452 (after manual training) +rpc3_ODT: 0x3 (after training) +gt_state: 0xB (normal) +wl_delay: 0x19, 0x18, 0x1A, 0x19, 0x1F (lanes 0-4) +``` + +### Our Current State +``` +training_status: 0x1 (BCLK_SCLK only) +training_skip: 0x02 (skip ADDCMD) ✅ +training_reset: 0x0 (released) ✅ +DPC_BITS: 0x50452 (vrgen_h=0x5) ✅ +rpc3_ODT: 0x0 (during WRLVL) ✅ +gt_state: 0xB (normal) ✅ +wl_delay: 0x0, 0x0, 0x0, 0x0, 0x0 ❌ (not started) +``` + +**Difference:** DPC_BITS value (0x47452 vs 0x50452) - need to investigate what this means. + +--- + +## Next Steps / Options to Try + +### Option A: Investigate DPC_BITS Difference +- HSS shows `DPC_BITS: 0x47452` after manual training +- We have `DPC_BITS: 0x50452` +- Difference: bits 9:4 (vrgen_h) - HSS might restore different value +- **Action:** Check if DPC_BITS needs to be restored before TIP starts WRLVL + +### Option B: Check Controller State +- Verify `DFI_INIT_COMPLETE` remains set +- Check if controller needs to be in specific state +- **Action:** Add debug output for controller state registers + +### Option C: TIP Internal State Machine +- TIP might have internal state that needs specific conditions +- May need to check TIP-specific registers not documented +- **Action:** Search register map for TIP state/control registers + +### Option D: Timing Adjustments +- Increase delays after state machine simulation +- Add delay after MR2 WRLVL enable +- **Action:** Try longer delays (1ms, 10ms) to see if TIP needs more time + +### Option E: Remove Manual MR2 WRLVL Enable +- Let TIP enable MR2 WRLVL automatically +- Remove our MR2 WRLVL enable code +- **Action:** Test if TIP handles MR2 automatically + +### Option F: DFI Signal Toggling +- Toggle `DFI_INIT_START` again after manual training +- May trigger TIP to continue +- **Action:** Try DFI init restart sequence + +### Option G: Training Start Register Sequence +- Toggle `training_start` register in specific sequence +- May need to clear then set +- **Action:** Try training_start = 0, delay, training_start = 1 + +### Option H: Contact Microchip Support +- This may be a known issue or require specific sequence +- Register map or documentation might be incomplete +- **Action:** Open support case with Microchip + +--- + +## Code Locations + +### Main Training Function +- **File:** `hal/mpfs250.c` +- **Function:** `run_training()` +- **Line:** ~912 + +### Key Sections: +1. **WRLVL Configuration:** Lines ~1028-1038 +2. **Training Reset Release:** Lines ~1040-1045 +3. **DFI Init:** Lines ~1047-1057 +4. **Manual LPDDR4 Training:** Lines ~1068-1690 +5. **State Machine Simulation:** Lines ~1692-1730 +6. **TIP Wait Loop:** Lines ~1732-1820 + +### Register Definitions +- **File:** `hal/mpfs250.h` +- **PHY Registers:** Lines ~400-500 +- **Controller Registers:** Lines ~200-300 + +--- + +## References + +### Documentation +1. **PolarFire Family Memory Controller User Guide VB** + - Path: `/home/davidgarske/Projects/TwoSixTech/PolarFire_FPGA_PolarFire_SoC_FPGA_Memory_Controller_User_Guide_VB.txt` + - Sections: 2.7.3.4 (Write Leveling), 3.4 (Training Sequence) + +2. **PolarFire SoC MSS Technical Reference Manual VC** + - Path: `/home/davidgarske/Projects/TwoSixTech/PolarFire_SoC_FPGA_MSS_Technical_Reference_Manual_VC.txt` + - Section: 3.11 (MSS DDR Controller) + +3. **Microchip Online Docs - LPDDR4 Troubleshooting** + - URL: https://onlinedocs.microchip.com/oxy/GUID-7F276F66-9418-456E-9FA3-8E7EE40C9E25-en-US-7/GUID-3C46F146-3BCB-4847-8790-263398D5F223.html + +### Reference Code +1. **HSS (Hart Software Services)** + - Path: `/home/davidgarske/GitHub/hart-software-services/` + - File: `baremetal/polarfire-soc-bare-metal-library/src/platform/mpfs_hal/common/nwc/mss_ddr.c` + - Function: `lpddr4_manual_training()` (line ~5102) + - Function: `ddr_setup()` state machine (line ~348) + +2. **Successful HSS Training Log** + - Path: `/home/davidgarske/Projects/TwoSixTech/ddr_training.txt` + - Shows complete training sequence with all phases + +### Configuration Files +1. **Libero Configuration** + - Path: `hal/mpfs250/MSS_VIDEO_KIT_H264.cfg` + - Contains DDR controller and PHY settings + +2. **Register Map** + - Path: `/home/davidgarske/Projects/TwoSixTech/PolarFireSoC_Register_Map/` + - HTML format register definitions + +--- + +## Test Logs + +``` +=== E51 (hart 0) Output - MMUART0 === +wolfBoot Version: 2.7.0 (Jan 12 2026 16:22:42) +Running on E51 (hart 0) in M-mode + +======================================== +MPFS DDR Init (Video Kit LPDDR4 2GB) +MT53D512M32D2DS-053 x32 @ 1600 Mbps +======================================== +DDR: NWC init... + MSSIO...done + STARTUP=0x3F1F00 DYN_CNTL=0x4FF + MSSIO_CR=0x3880 +DDR: Configuring SGMII/clock mux... + Soft reset CFM to load NV map...done + RFCKMUX after NV load = 0x5 + CLK_XCVR=0x2C30 +DDR: Configuring MSS PLL... + Initial MSS PLL CTRL=0x800010C6 + Using external refclk (RFCKMUX=0x5) + PLL_CKMUX=0x155 + BCLKMUX=0x208 + Powering up PLL (CTRL=0x100001F)... + After power up: CTRL=0x3300001F + Waiting for MSS PLL lock...locked (0x3300001F)�DDR: Configuring DDR PLL... + DDR bank controller reset...done + Waiting for DDR PLL lock...locked (0x3300003F) +DDR: Enable DDRC clock/reset...CLK before=0x21 after=0x800021 + RST=0x3F7FFFDE + Test MC_BASE2@0x20084000: SR=0x0 RAS=0x1C +done +DDR: Blocker@0x20005D1C before=0x0 after=0x1 +DDR: PHY setup... PVT calib...done +PHY PLL locked + SR before=0x1 + SR after 0=0x0 + SR after 1=0x1 +DDR: After rotation SR_N=0x1 +DDR: BCLK90 rotation...done +DDR: BCLK phase...0x5003 +DDR: Starting TIP training... + Configure PHY for WRLVL...DPC=0x50452 ODT=0x0...done + Training reset release...done + DFI init start...done + Wait DFI complete...OK + LPDDR4 manual training... + Device reset...done + PLL freq double...done + Second reset...done + Pre-MR: CKE=0 RST=0 CS=1 PLL=0x20010BE + DIV0_1=0x4000200 DIV2_3=0x2070205 + MR writes...ack=80 err=0...done + PLL freq restore...done + CA VREF training... +0x10...done + ADDCMD training...phase=6 dly=12...PLL_PHADJ=0x501B DPC=0x50452... MR re-write...ack=80 err=0...done + Post-manual training status: + train_stat=0x1 dfi_train_complete=0x0 + gt_state=0xB dqdqs_state=0x8 + ZQ cal...done + Simulate state machine transitions...BCLK_SCLK done ADDCMD skipped MR2 WRLVL enable...done + Post-state-machine: train_stat=0x1 + Wait for TIP WRLVL to start and complete... + Progress: train_stat=0x1 (iter=0) + Training status: 0x1 + training_skip=0x2 training_reset=0x0 + Per-lane status: + L0: gt_err=0x0 gt_state=0xB wl_dly=0x0 dqdqs_st=0x8 + L1: gt_err=0x0 gt_state=0xB wl_dly=0x0 dqdqs_st=0x8 + L2: gt_err=0x0 gt_state=0xB wl_dly=0x0 dqdqs_st=0x8 + L3: gt_err=0x0 gt_state=0xB wl_dly=0x0 dqdqs_st=0x8 + L4: gt_err=0x0 gt_state=0xB wl_dly=0x0 dqdqs_st=0x8 + TIP cfg: tip_cfg_params=0x7CFE02F + BCLK: pll_phadj=0x501B bclk_sclk=0x0 + RPC: rpc145=0x12 rpc147=0x13 rpc156=0x6 rpc166=0x2 + TIP training timeout or incomplete + all_lanes_trained=0 train_stat=0x1 + Restore ODT and disable WRLVL...done + Final train_stat=0x1 +Write calib...MTC timeout...MTC timeout...MTC timeout...MTC timeout...MTC timeout...MTC timeout...MTC timeout... +``` + +--- + +## Summary + +The DDR training sequence is correctly implemented up to the point where TIP should automatically progress from BCLK_SCLK to WRLVL phase. All manual training steps complete successfully, and the configuration matches HSS reference code. However, TIP does not transition to WRLVL phase automatically. + +**Key Insight:** HSS shows that TIP completes all phases automatically between manual training end and the POST check, suggesting TIP runs continuously once conditions are met. Our implementation may be missing a condition that TIP needs to detect before starting WRLVL. + +**Most Likely Issue:** TIP may need to see a specific state or signal that we're not providing, or there may be a timing/sequencing issue that prevents TIP from detecting it's ready to start WRLVL. + +**Recommended Next Step:** Try Option E (remove manual MR2 WRLVL enable) to see if TIP handles it automatically, or Option H (contact Microchip support) if this is a known issue. + + +*Last Updated: 2026-01-12* +*Status: Investigation ongoing - TIP stuck at BCLK_SCLK phase* diff --git a/docs/Targets.md b/docs/Targets.md index f78c7095cf..b614035f0b 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -1198,12 +1198,111 @@ ECDSA [ SECP256R1] 256 verify 200 ops took 1.846 sec, avg 9.231 m Benchmark complete ``` +### PolarFire Machine Mode (M-Mode) Support + +wolfBoot supports running directly in Machine Mode (M-mode) on PolarFire SoC, replacing the Hart Software Services (HSS) as the first-stage bootloader. This provides a smaller, more secure boot solution with full control over all hardware initialization. + +#### M-Mode Features + +* Runs on E51 monitor core (hart 0) directly from eNVM +* Executes from L2 Scratchpad SRAM for performance +* Wakes and manages all four U54 application cores +* Per-hart UART output (each U54 uses its own MMUART) +* M-mode to S-mode transition for booting Linux +* Supports loading Linux kernel from eMMC or SD card + +#### M-Mode Files + +| File | Description | +|------|-------------| +| `config/examples/polarfire_mpfs250-m.config` | M-mode configuration | +| `hal/mpfs250-m.ld` | M-mode linker script (eNVM + L2 SRAM) | +| `src/update_mmode.c` | M-mode boot stub (bring-up) | +| `src/boot_riscv_start.S` | M-mode assembly startup | + +#### Building for M-Mode + +```sh +# Copy M-mode configuration +cp config/examples/polarfire_mpfs250-m.config .config + +# Build wolfBoot +make clean +make wolfboot.elf +``` + +#### Flashing via JTAG (Bootmode 1) + +M-mode wolfBoot is programmed directly to eNVM using the Microchip mpfsBootmodeProgrammer tool. This requires SoftConsole with the `SC_INSTALL_DIR` environment variable set. + +```sh +# Set SoftConsole installation directory +export SC_INSTALL_DIR=/opt/Microchip/SoftConsole-v2022.2-RISC-V-747 + +# Flash wolfboot.elf to eNVM +$SC_INSTALL_DIR/eclipse/jre/bin/java -jar \ + $SC_INSTALL_DIR/extras/mpfs/mpfsBootmodeProgrammer.jar \ + --bootmode 1 --die MPFS250T --package FCG1152 --workdir $PWD wolfboot.elf +``` + +#### M-Mode Boot Flow + +1. **eNVM Reset Vector** (0x20220100): CPU starts here, copies code to L2 SRAM +2. **L2 SRAM Execution** (0x0A000000): Main wolfBoot code runs from SRAM +3. **Hardware Init**: L2 cache, clocks, DDR controller (when implemented) +4. **Hart Wake-Up**: E51 sends IPIs to wake U54 cores +5. **Application Load**: Load kernel/DTB from eMMC or SD card to DDR +6. **M-to-S Transition**: Configure PMP, delegate traps, MRET to S-mode + +#### M-Mode UART Mapping + +| Hart | Core | MMUART | USB Device | +|------|------|--------|------------| +| 0 | E51 | MMUART0 | /dev/ttyUSB0 | +| 1 | U54_1 | MMUART1 | /dev/ttyUSB1 | +| 2 | U54_2 | MMUART2 | N/A | +| 3 | U54_3 | MMUART3 | N/A | +| 4 | U54_4 | MMUART4 | N/A | + +#### M-Mode Progress + +**Completed:** +- L2 cache controller configuration +- NWC clock initialization +- Secondary hart (U54) wake-up via IPI +- M-mode to S-mode transition framework +- Per-hart UART for debug output + +**In Progress:** +- DDR controller initialization (requires porting ~6500 lines from MPFS HAL) + +**TODO:** +- Full DDR training sequence (port mss_ddr.c from mpfs_hal) +- Application loading from eMMC/SD card +- Full Linux boot with DTB + +#### DDR Initialization Status + +The MPFS250T DDR controller requires a complex training sequence to initialize +LPDDR4 memory. The training process includes: + +1. PVT (Process/Voltage/Temperature) calibration +2. VREF calibration +3. Write leveling +4. Read gate training +5. DQ/DQS eye training + +The full DDR driver is located in the MPFS HAL at: +`platform/mpfs_hal/common/nwc/mss_ddr.c` (~272KB, 6500+ lines) + +**Options for DDR support:** +1. **Port MPFS HAL DDR driver** - Import mss_ddr.c and dependencies +2. **Use HSS for DDR init** - Boot HSS first, then chainload wolfBoot +3. **FPGA fabric DDR** - Some Libero designs train DDR in fabric + ### PolarFire TODO * Add support for QSPI NOR flash -* Add support for full HSS replacement using wolfboot - - Machine level assembly startup - - DDR driver ## STM32F7 diff --git a/hal/mpfs250-m.ld b/hal/mpfs250-m.ld new file mode 100644 index 0000000000..16e252ca4c --- /dev/null +++ b/hal/mpfs250-m.ld @@ -0,0 +1,140 @@ +/* PolarFire SoC MPFS250 M-Mode Linker Script for wolfBoot + * + * This linker script is for running wolfBoot in Machine Mode (M-mode) + * directly from eNVM, executing from L2 SRAM. + * + * Boot flow: + * 1. CPU starts at eNVM reset vector (0x20220100) + * 2. Startup code in eNVM copies main code to L2_SCRATCH + * 3. Jumps to L2_SCRATCH for execution + * + * The first 0x100 bytes of eNVM are reserved for the boot ROM secure boot + * meta information added by mpfsBootmodeProgrammer. + * + * Memory regions: + * FLASH_ENVM - Embedded NVM (128KB - 0x100 for header) + * L2_SCRATCH - L2 Scratchpad SRAM (256KB) - execution and data + */ + +OUTPUT_ARCH( "riscv" ) + +ENTRY( _reset ) + +MEMORY +{ + /* The first 0x100 bytes of eNVM are used for boot ROM secure boot meta information + * This offset is added by mpfsBootmodeProgrammer (bootmode 1) */ + FLASH_ENVM (rx) : ORIGIN = 0x20220100, LENGTH = 128k - 0x100 + + /* L2 Scratchpad SRAM - 256KB available + * Used for code execution, data, and stack in M-mode + * Address range: 0x0A000000 - 0x0A03FFFF */ + L2_SCRATCH (rwx) : ORIGIN = @WOLFBOOT_ORIGIN@, LENGTH = 256k +} + +/* Stack size for the boot hart (E51 in M-mode) */ +PROVIDE(STACK_SIZE = 16k); + +SECTIONS +{ + /* + * Reset vector and early initialization code + * This section MUST be in eNVM (VMA = LMA) since CPU starts here. + * It copies the main code to L2_SCRATCH and jumps there. + */ + .init : ALIGN(0x10) + { + _start_text = .; + KEEP(*(.init)) + . = ALIGN(0x10); + } > FLASH_ENVM + + /* + * Main code section - runs from L2_SCRATCH, stored in FLASH_ENVM + * The .init code will copy this section to L2_SCRATCH before jumping here. + */ + .text : ALIGN(0x10) + { + _start_text_sram = .; + _start_vector = .; + KEEP(*(.isr_vector)) + KEEP(*(.trap_vector)) + . = ALIGN(0x10); + *(.text*) + *(.rodata*) + *(.srodata*) + . = ALIGN(4); + _end_text = .; + } > L2_SCRATCH AT > FLASH_ENVM + + /* Provide load address for copying from flash */ + _stored_text = LOADADDR(.text); + _stored_data = LOADADDR(.data); + + /* Initialized data section */ + .data : ALIGN(0x10) + { + _start_data = .; + KEEP(*(.ramcode*)) + . = ALIGN(4); + *(.data*) + . = ALIGN(4); + /* Global pointer is set to .sdata + 0x800 for efficient access + * to small data using gp-relative addressing (+/- 2KB range) */ + _global_pointer = . + 0x800; + *(.sdata*) + . = ALIGN(4); + _end_data = .; + } > L2_SCRATCH AT > FLASH_ENVM + + /* Uninitialized data section (cleared to zero on startup) */ + .bss (NOLOAD) : ALIGN(0x10) + { + _start_bss = .; + *(.bss*) + *(.sbss*) + *(COMMON) + . = ALIGN(4); + _end_bss = .; + _end = .; + } > L2_SCRATCH +} + +/* Heap starts after BSS (between _end and stack) */ +PROVIDE(_start_heap = _end); + +/* Stack configuration for multi-hart boot + * Memory layout at end of L2_SCRATCH: + * [code/data/bss/heap] ... [secondary stacks] [main stack] + * + * Stack sizes (defined in config or header): + * STACK_SIZE_PER_HART = 8192 (8KB per hart) + * STACK_SIZE = 16384 (16KB for main hart E51) + * + * Total stack area: STACK_SIZE + 4 * STACK_SIZE_PER_HART = 48KB + */ +PROVIDE(STACK_SIZE_PER_HART = 8192); + +/* End of L2 scratchpad */ +PROVIDE(_l2_scratch_end = ORIGIN(L2_SCRATCH) + LENGTH(L2_SCRATCH)); + +/* Main hart (E51) stack at very end, grows downward */ +PROVIDE(_end_stack = _l2_scratch_end); +PROVIDE(_main_hart_stack_top = _end_stack); +PROVIDE(_main_hart_stack_bottom = _main_hart_stack_top - STACK_SIZE); + +/* Main hart HLS location (at top of main stack minus 64 bytes) */ +PROVIDE(_main_hart_hls = _main_hart_stack_top - 64); + +/* Secondary hart stacks below main hart stack + * Hart 1 stack: _main_hart_stack_bottom - STACK_SIZE_PER_HART * 0 to - STACK_SIZE_PER_HART * 1 + * Hart 2 stack: _main_hart_stack_bottom - STACK_SIZE_PER_HART * 1 to - STACK_SIZE_PER_HART * 2 + * etc. + */ +PROVIDE(_secondary_hart_stack_base = _main_hart_stack_bottom - 4 * STACK_SIZE_PER_HART); + +/* Provide symbols for M-mode startup code */ +PROVIDE(__global_pointer$ = _global_pointer); + +/* Size of text section to copy (for startup code) */ +PROVIDE(_text_size = _end_text - _start_text_sram); diff --git a/hal/mpfs250.c b/hal/mpfs250.c index 2137cebf7f..2be1febe89 100644 --- a/hal/mpfs250.c +++ b/hal/mpfs250.c @@ -48,12 +48,2253 @@ #if defined(DISK_SDCARD) || defined(DISK_EMMC) #include "sdhci.h" + +/* Forward declaration of SDHCI IRQ handler */ +extern void sdhci_irq_handler(void); #endif +/* Video Kit DDR/Clock configuration is included in mpfs250.h */ + +/* ============================================================================ + * L2 Cache Controller Configuration + * + * The L2 cache controller must be properly configured before using the + * L2 scratchpad memory. At reset, only 1 cache way is enabled. + * + * This function: + * 1. Enables all cache ways (0-7) and scratchpad ways (8-11) + * 2. Configures way masks for each master (harts, DMA, AXI ports) + * 3. Disables L2 shutdown mode + * ============================================================================ */ +#ifdef WOLFBOOT_RISCV_MMODE +static void mpfs_config_l2_cache(void) +{ + uint64_t way_enable_before; + uint64_t way_enable_after; + + /* Read current way enable state */ + way_enable_before = L2_WAY_ENABLE; + + /* Enable all cache ways (0-7) plus scratchpad ways (8-11) + * Value 0x0B = ways 0-3 and 8-11 enabled (4 cache + 4 scratchpad) + * This matches the working DDR demo configuration */ + L2_WAY_ENABLE = 0x0B; + + /* Disable L2 shutdown */ + SYSREG_L2_SHUTDOWN_CR = 0; + + /* Configure way masks - allow all masters to use cache ways 0-7 */ + L2_WAY_MASK_DMA = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_AXI4_PORT0 = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_AXI4_PORT1 = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_AXI4_PORT2 = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_AXI4_PORT3 = L2_WAY_MASK_CACHE_ONLY; + + /* E51 cache masks */ + L2_WAY_MASK_E51_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_E51_ICACHE = L2_WAY_MASK_CACHE_ONLY; + + /* U54 cache masks (configure even if not using U54s yet) */ + L2_WAY_MASK_U54_1_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_1_ICACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_2_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_2_ICACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_3_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_3_ICACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_4_DCACHE = L2_WAY_MASK_CACHE_ONLY; + L2_WAY_MASK_U54_4_ICACHE = L2_WAY_MASK_CACHE_ONLY; + + /* Memory barrier to ensure all writes complete */ + __asm__ volatile("fence iorw, iorw" ::: "memory"); + + /* Read back to verify */ + way_enable_after = L2_WAY_ENABLE; + + /* Store for later reporting (can't print yet - UART not initialized) */ + (void)way_enable_before; + (void)way_enable_after; +} + +/* ============================================================================ + * DDR Controller Driver + * + * Complete DDR initialization for PolarFire SoC MPFS250T Video Kit. + * Includes NWC/PLL initialization and DDR PHY/controller setup. + * + * Based on MPFS HAL (mss_nwc_init.c, mss_pll.c, mss_ddr.c) from Microchip HSS. + * ============================================================================ */ + +static inline void mb(void) +{ + __asm__ volatile("fence iorw, iorw" ::: "memory"); +} + +/* Simple busy-loop delay + * Approximately us microseconds at ~40MHz (early boot clock) + * This is used before the timer is fully reliable */ +static void ddr_delay(uint32_t us) +{ + volatile uint32_t i; + /* At ~40MHz, ~10 loop iterations per microsecond */ + for (i = 0; i < us * 10; i++) { + __asm__ volatile("nop"); + } +} + +/* Microsecond delay using hardware timer + * Uses RISC-V time CSR which is always available */ +static void udelay(uint32_t us) +{ + /* During DDR init, use busy-loop delay since timer may not be stable */ + ddr_delay(us); +} + +/* IOSCB Bank Controllers and DLL bases */ +#define IOSCB_BANK_CNTL_SGMII_BASE 0x3E400000UL +#define IOSCB_BANK_CNTL_DDR_BASE 0x3E020000UL +#define IOSCB_DLL_SGMII_BASE 0x3E100000UL + +/* SGMII Off Mode + * + * Configure SGMII for DDR-only mode (from HSS mss_sgmii.c sgmii_off_mode) + * Even when SGMII is not used, these registers must be configured with + * Libero-generated values for proper DDR operation. + */ +static void sgmii_off_mode(void) +{ + volatile uint32_t *ioscb_dll_sgmii = (volatile uint32_t *)IOSCB_DLL_SGMII_BASE; + + /* Soft reset SGMII TIP with NV_MAP + peripheral bits, then just peripheral + * This matches HSS: SOFT_RESET_SGMII = (0x01 << 8U) | 1U; then = 1U; */ + DDRPHY_REG(0x040) = (0x01UL << 8) | 0x01UL; /* SOFT_RESET_SGMII - periph+nv_map */ + mb(); + udelay(1); + DDRPHY_REG(0x040) = 0x01UL; /* Just periph reset */ + mb(); + + /* Configure SGMII RPC registers with Libero-generated values + * From HSS setup_sgmii_rpc_per_config() - critical for clock routing! + * Note: REG_CDR_MOVE_STEP mask (0x0C000000) is cleared from SGMII_MODE + * Register offsets from mss_ddr_sgmii_phy_defs.h (NOT the same as soft reset!) */ + DDRPHY_REG(0xC04) = LIBERO_SETTING_SGMII_MODE & ~0x0C000000UL; /* SGMII_MODE */ + DDRPHY_REG(0xC08) = LIBERO_SETTING_SGMII_PLL_CNTL; /* PLL_CNTL */ + DDRPHY_REG(0xC0C) = LIBERO_SETTING_SGMII_CH0_CNTL; /* CH0_CNTL */ + DDRPHY_REG(0xC10) = LIBERO_SETTING_SGMII_CH1_CNTL; /* CH1_CNTL */ + DDRPHY_REG(0xC14) = LIBERO_SETTING_SGMII_RECAL_CNTL; /* RECAL_CNTL */ + DDRPHY_REG(0xC18) = LIBERO_SETTING_SGMII_CLK_CNTL; /* CLK_CNTL */ + DDRPHY_REG(0xC24) = LIBERO_SETTING_SGMII_SPARE_CNTL; /* SPARE_CNTL */ + mb(); + + /* Reset SGMII DLL via SCB - required for IO to be configured + * From HSS: "so we have to use scb register to reset as no APB register + * available to soft reset the IP" */ + ioscb_dll_sgmii[0] = 0x01UL; /* soft_reset at offset 0 */ + mb(); + udelay(10); +} + +/* SGMII/Clock Mux Configuration + * + * The RFCKMUX register at 0x3E200004 routes the external reference clock + * to both the DDR PLL and SGMII PLL. This MUST be configured before the + * PLLs can lock. + * + * From HSS mss_pll.c: "0x05 => ref to SGMII and DDR" + */ +static void sgmii_mux_config(void) +{ + uint32_t rfckmux; + + wolfBoot_printf("DDR: Configuring SGMII/clock mux...\n"); + + /* First, put SGMII in off mode (from HSS sgmii_off_mode) */ + sgmii_off_mode(); + + /* Enable SGMII bank controller (bring out of reset) */ + volatile uint32_t *ioscb_bank_cntl_sgmii = (volatile uint32_t *)IOSCB_BANK_CNTL_SGMII_BASE; + ioscb_bank_cntl_sgmii[0] = 0x01UL; /* soft_reset - triggers NV map load */ + mb(); + udelay(10); + + /* Method 1: Try RPC soft reset on CFM to load NV map values from FPGA */ + wolfBoot_printf(" Soft reset CFM to load NV map..."); + CFM_SGMII_REG(CFM_SGMII_SOFT_RESET) = 0x01UL; + mb(); + udelay(100); + wolfBoot_printf("done\n"); + + rfckmux = CFM_SGMII_REG(CFM_SGMII_RFCKMUX); + wolfBoot_printf(" RFCKMUX after NV load = 0x%x\n", rfckmux); + + /* Method 2: If NV map didn't have the value, try direct SCB writes */ + if (rfckmux != LIBERO_SETTING_SGMII_REFCLKMUX) { + wolfBoot_printf(" Trying direct SCB writes...\n"); + + /* Configure clock receiver for external reference - CRITICAL for ref clock! */ + CFM_SGMII_REG(CFM_SGMII_CLK_XCVR) = LIBERO_SETTING_SGMII_CLK_XCVR; + mb(); + + /* Route external reference clock to DDR and SGMII PLLs */ + CFM_SGMII_REG(CFM_SGMII_RFCKMUX) = LIBERO_SETTING_SGMII_REFCLKMUX; + mb(); + + /* SGMII clock mux */ + CFM_SGMII_REG(CFM_SGMII_SGMII_CLKMUX) = LIBERO_SETTING_SGMII_SGMII_CLKMUX; + mb(); + udelay(10); + + rfckmux = CFM_SGMII_REG(CFM_SGMII_RFCKMUX); + wolfBoot_printf(" RFCKMUX after SCB write = 0x%x\n", rfckmux); + } else { + /* NV map loaded the value, still need to configure clock receiver */ + CFM_SGMII_REG(CFM_SGMII_CLK_XCVR) = LIBERO_SETTING_SGMII_CLK_XCVR; + mb(); + } + + /* Debug: check clock receiver state */ + wolfBoot_printf(" CLK_XCVR=0x%x\n", CFM_SGMII_REG(CFM_SGMII_CLK_XCVR)); + + if (rfckmux != LIBERO_SETTING_SGMII_REFCLKMUX) { + wolfBoot_printf(" WARNING: RFCKMUX not set correctly!\n"); + } +} + +/* MSS PLL Mux Pre-Configuration + * + * Feed through required reference clocks to PLL before powering up + * From HSS mss_mux_pre_mss_pll_config() + * + * PLL RF clock mux selections (2 bits each): + * 00 = vss (ground) + * 01 = refclk_p,refclk_n (external reference - requires SGMII CFM RFCKMUX) + * 10 = scb_clk (80MHz internal oscillator) + * 11 = serdes_refclk + */ +static void mss_mux_pre_pll_config(void) +{ + uint32_t pll_ckmux; + uint32_t rfckmux; + + /* Check if RFCKMUX is configured - if not, use SCB_CLK instead */ + rfckmux = CFM_SGMII_REG(CFM_SGMII_RFCKMUX); + + if (rfckmux == LIBERO_SETTING_SGMII_REFCLKMUX) { + /* External refclk is available, use Libero settings */ + pll_ckmux = LIBERO_SETTING_MSS_PLL_CKMUX; + wolfBoot_printf(" Using external refclk (RFCKMUX=0x%x)\n", rfckmux); + } else { + /* External refclk not available, use SCB_CLK (80MHz internal) + * PLL0_RFCLK0_SEL = 10 (SCB_CLK), PLL0_RFCLK1_SEL = 10 (SCB_CLK) + * PLL1_RFCLK0_SEL = 10 (SCB_CLK), PLL1_RFCLK1_SEL = 10 (SCB_CLK) + * This gives: 0x02 | (0x02 << 2) | (0x02 << 4) | (0x02 << 6) | (0x02 << 8) = 0x2AA + */ + pll_ckmux = 0x000002AAUL; + wolfBoot_printf(" Using SCB_CLK (80MHz) as PLL ref (fallback)\n"); + } + + /* Configure PLL clock mux - select reference sources */ + CFM_MSS_REG(CFM_PLL_CKMUX) = pll_ckmux; + mb(); + + wolfBoot_printf(" PLL_CKMUX=0x%x\n", CFM_MSS_REG(CFM_PLL_CKMUX)); + + /* Configure BCLK mux for DDR PHY */ + CFM_MSS_REG(CFM_BCLKMUX) = LIBERO_SETTING_MSS_BCLKMUX; + mb(); + + /* Frequency meter (not critical but part of standard init) */ + CFM_MSS_REG(CFM_FMETER_ADDR) = LIBERO_SETTING_MSS_FMETER_ADDR; + CFM_MSS_REG(CFM_FMETER_DATAW) = LIBERO_SETTING_MSS_FMETER_DATAW; + mb(); + + /* Debug: verify writes */ + wolfBoot_printf(" BCLKMUX=0x%x\n", CFM_MSS_REG(CFM_BCLKMUX)); + + /* Delay for clock mux and reference clock to stabilize */ + udelay(1000); +} + +/* MSS PLL Initialization + * + * Configure MSS PLL following the HSS sequence from mss_pll_config() + */ +static int mss_pll_init(void) +{ + uint32_t pll_ctrl; + uint32_t timeout; + + wolfBoot_printf("DDR: Configuring MSS PLL...\n"); + + /* First check if PLL is already configured and locked by System Controller */ + pll_ctrl = MSS_PLL_REG(PLL_CTRL); + wolfBoot_printf(" Initial MSS PLL CTRL=0x%x\n", pll_ctrl); + + if (pll_ctrl & PLL_LOCK_BIT) { + wolfBoot_printf(" MSS PLL already locked!\n"); + return 0; + } + + /* Take PLLs out of reset (HSS: this is done before any configuration) */ + MSS_PLL_REG(PLL_SOFT_RESET) = PLL_INIT_OUT_RESET; + DDR_PLL_REG(PLL_SOFT_RESET) = PLL_INIT_OUT_RESET; + mb(); + + /* Power down PLL while configuring (HSS sequence: configure before mux) */ + MSS_PLL_REG(PLL_CTRL) = LIBERO_SETTING_MSS_PLL_CTRL & ~PLL_POWERDOWN_B; + mb(); + + /* Configure PLL parameters (while powered down) */ + MSS_PLL_REG(PLL_REF_FB) = LIBERO_SETTING_MSS_PLL_REF_FB; + MSS_PLL_REG(PLL_DIV_0_1) = LIBERO_SETTING_MSS_PLL_DIV_0_1; + MSS_PLL_REG(PLL_DIV_2_3) = LIBERO_SETTING_MSS_PLL_DIV_2_3; + MSS_PLL_REG(PLL_CTRL2) = LIBERO_SETTING_MSS_PLL_CTRL2; + MSS_PLL_REG(PLL_FRACN) = LIBERO_SETTING_MSS_PLL_FRACN; + MSS_PLL_REG(PLL_SSCG_0) = LIBERO_SETTING_MSS_SSCG_REG_0; + MSS_PLL_REG(PLL_SSCG_1) = LIBERO_SETTING_MSS_SSCG_REG_1; + MSS_PLL_REG(PLL_SSCG_2) = LIBERO_SETTING_MSS_SSCG_REG_2; + MSS_PLL_REG(PLL_SSCG_3) = LIBERO_SETTING_MSS_SSCG_REG_3; + MSS_PLL_REG(PLL_PHADJ) = LIBERO_SETTING_MSS_PLL_PHADJ; + mb(); + + /* Configure muxes AFTER PLL registers but BEFORE power-up (HSS sequence) */ + mss_mux_pre_pll_config(); + + /* Power up PLL */ + wolfBoot_printf(" Powering up PLL (CTRL=0x%x)...\n", + LIBERO_SETTING_MSS_PLL_CTRL | PLL_POWERDOWN_B); + MSS_PLL_REG(PLL_CTRL) = LIBERO_SETTING_MSS_PLL_CTRL | PLL_POWERDOWN_B; + mb(); + + /* Short delay for PLL to start */ + udelay(100); + + /* Debug: Show PLL state after power up */ + pll_ctrl = MSS_PLL_REG(PLL_CTRL); + wolfBoot_printf(" After power up: CTRL=0x%x\n", pll_ctrl); + + /* Wait for lock */ + wolfBoot_printf(" Waiting for MSS PLL lock..."); + timeout = 1000000; + while (timeout > 0) { + pll_ctrl = MSS_PLL_REG(PLL_CTRL); + if (pll_ctrl & PLL_LOCK_BIT) { + wolfBoot_printf("locked (0x%x)\n", pll_ctrl); + + /* Configure clock dividers before switching + * LIBERO_SETTING_MSS_CLOCK_CONFIG_CR = 0x24: + * CPU = /1 (600MHz), AXI = /2 (300MHz), APB = /4 (150MHz) + */ + SYSREG_REG(0x08) = 0x00000024UL; /* CLOCK_CONFIG_CR */ + mb(); + + /* Switch MSS to use PLL clock */ + CFM_MSS_REG(CFM_MSSCLKMUX) = LIBERO_SETTING_MSS_MSSCLKMUX; + mb(); + + /* Wait for clock switch to stabilize */ + { + volatile int i; + for (i = 0; i < 10000; i++) { /* ~1ms at new clock speed */ + __asm__ volatile("nop"); + } + } + + /* Reinitialize UART for new clock frequency */ + hal_uart_reinit(); + return 0; + } + /* Print progress every 100k iterations */ + if ((timeout % 100000) == 0) { + wolfBoot_printf("."); + } + timeout--; + udelay(1); + } + + wolfBoot_printf("TIMEOUT (0x%x)\n", pll_ctrl); + wolfBoot_printf(" REF_FB=0x%x DIV_0_1=0x%x DIV_2_3=0x%x\n", + MSS_PLL_REG(PLL_REF_FB), MSS_PLL_REG(PLL_DIV_0_1), MSS_PLL_REG(PLL_DIV_2_3)); + return -1; +} + +/* DDR PLL Initialization + * + * Configure DDR PLL following the HSS sequence from ddr_pll_config() + * This is called later, after DDR bank controller is reset and PVT calibration + */ +static int ddr_pll_init(void) +{ + volatile uint32_t *ioscb_bank_cntl_ddr = (volatile uint32_t *)IOSCB_BANK_CNTL_DDR_BASE; + uint32_t pll_ctrl; + uint32_t timeout; + + wolfBoot_printf("DDR: Configuring DDR PLL...\n"); + + /* Reset DDR bank controller to load NV map values (from HSS DDR_TRAINING_SOFT_RESET) */ + wolfBoot_printf(" DDR bank controller reset..."); + ioscb_bank_cntl_ddr[0] = 0x01UL; /* soft_reset */ + mb(); + udelay(100); + wolfBoot_printf("done\n"); + + /* DDR PLL soft reset */ + DDR_PLL_REG(PLL_SOFT_RESET) = PLL_INIT_OUT_RESET; + mb(); + + /* Power down PLL while configuring */ + DDR_PLL_REG(PLL_CTRL) = LIBERO_SETTING_DDR_PLL_CTRL & ~PLL_POWERDOWN_B; + mb(); + + /* Configure PLL parameters */ + DDR_PLL_REG(PLL_REF_FB) = LIBERO_SETTING_DDR_PLL_REF_FB; + DDR_PLL_REG(PLL_DIV_0_1) = LIBERO_SETTING_DDR_PLL_DIV_0_1; + DDR_PLL_REG(PLL_DIV_2_3) = LIBERO_SETTING_DDR_PLL_DIV_2_3; + DDR_PLL_REG(PLL_CTRL2) = LIBERO_SETTING_DDR_PLL_CTRL2; + DDR_PLL_REG(PLL_FRACN) = LIBERO_SETTING_DDR_PLL_FRACN; + DDR_PLL_REG(PLL_SSCG_0) = LIBERO_SETTING_DDR_SSCG_REG_0; + DDR_PLL_REG(PLL_SSCG_1) = LIBERO_SETTING_DDR_SSCG_REG_1; + DDR_PLL_REG(PLL_SSCG_2) = LIBERO_SETTING_DDR_SSCG_REG_2; + DDR_PLL_REG(PLL_SSCG_3) = LIBERO_SETTING_DDR_SSCG_REG_3; + DDR_PLL_REG(PLL_PHADJ) = LIBERO_SETTING_DDR_PLL_PHADJ; + mb(); + + /* Power up PLL */ + DDR_PLL_REG(PLL_CTRL) = LIBERO_SETTING_DDR_PLL_CTRL | PLL_POWERDOWN_B; + mb(); + + /* Wait for lock */ + wolfBoot_printf(" Waiting for DDR PLL lock..."); + timeout = 1000000; + while (timeout > 0) { + pll_ctrl = DDR_PLL_REG(PLL_CTRL); + if (pll_ctrl & PLL_LOCK_BIT) { + wolfBoot_printf("locked (0x%x)\n", pll_ctrl); + return 0; + } + timeout--; + udelay(1); + } + + wolfBoot_printf("TIMEOUT (0x%x)\n", pll_ctrl); + return -1; +} + +/* NWC Initialization (SCB, PLLs) + * + * Initialize the Network-on-Chip (NWC) clocking subsystem: + * 1. Configure SCB access + * 2. Enable DFI APB access for DDR PHY + * 3. Configure MSSIO for dynamic access + * 4. Configure SGMII mux to route reference clock to PLLs (CRITICAL!) + * 5. Initialize MSS PLL + * 6. Initialize DDR PLL + */ +static int nwc_init(void) +{ + int ret; + + wolfBoot_printf("DDR: NWC init...\n"); + + /* Configure SCB access timer */ + SCBCFG_REG(0x08) = MSS_SCB_ACCESS_CONFIG; + mb(); + + /* Enable DFI APB access - bit 0 = clock on (HSS uses 0x01) */ + SYSREG_REG(SYSREG_DFIAPB_CR_OFF) = 0x00000001UL; + mb(); + + /* Enable dynamic APB/SCB access to DDR PHY */ + DDRPHY_REG(PHY_STARTUP) = (0x3FUL << 16) | (0x1FUL << 8); + DDRPHY_REG(PHY_DYN_CNTL) = (0x01UL << 10) | (0x7FUL << 0); + mb(); + + wolfBoot_printf(" MSSIO..."); + /* MSSIO control sequence for dynamic enable */ + SYSREGSCB_REG(MSSIO_CONTROL_CR_OFF) = (0x07UL << 8) | (0x01UL << 11); + mb(); + udelay(5); + SYSREGSCB_REG(MSSIO_CONTROL_CR_OFF) = (0x00UL << 8) | (0x01UL << 11); + mb(); + udelay(5); + SYSREGSCB_REG(MSSIO_CONTROL_CR_OFF) = (0x00UL << 8) | (0x01UL << 11) | (0x01UL << 12); + mb(); + udelay(5); + SYSREGSCB_REG(MSSIO_CONTROL_CR_OFF) = (0x00UL << 8) | (0x01UL << 11) | (0x01UL << 12) | (0x01UL << 13); + mb(); + wolfBoot_printf("done\n"); + + /* Debug: check dynamic enable state */ + wolfBoot_printf(" STARTUP=0x%x DYN_CNTL=0x%x\n", + DDRPHY_REG(PHY_STARTUP), DDRPHY_REG(PHY_DYN_CNTL)); + wolfBoot_printf(" MSSIO_CR=0x%x\n", SYSREGSCB_REG(MSSIO_CONTROL_CR_OFF)); + + /* Configure SGMII mux to route external refclk to PLLs - MUST be done first! */ + sgmii_mux_config(); + + /* Configure MSS PLL */ + ret = mss_pll_init(); + if (ret != 0) + return -1; + + /* Initialize DDR PLL */ + ret = ddr_pll_init(); + if (ret != 0) + return -2; + + return 0; +} + +/* DDR Segment Configuration */ +static void setup_segments(void) +{ + /* Cached access segments */ + DDR_SEG_REG(SEG0_0) = LIBERO_SETTING_SEG0_0 & 0x7FFFUL; + DDR_SEG_REG(SEG0_1) = LIBERO_SETTING_SEG0_1 & 0x7FFFUL; + DDR_SEG_REG(SEG0_2) = LIBERO_SETTING_SEG0_2 & 0x7FFFUL; + DDR_SEG_REG(SEG0_3) = LIBERO_SETTING_SEG0_3 & 0x7FFFUL; + DDR_SEG_REG(SEG0_4) = LIBERO_SETTING_SEG0_4 & 0x7FFFUL; + DDR_SEG_REG(SEG0_5) = LIBERO_SETTING_SEG0_5 & 0x7FFFUL; + DDR_SEG_REG(SEG0_6) = LIBERO_SETTING_SEG0_6 & 0x7FFFUL; + + /* Non-cached access segments */ + DDR_SEG_REG(SEG1_0) = LIBERO_SETTING_SEG1_0 & 0x7FFFUL; + DDR_SEG_REG(SEG1_1) = LIBERO_SETTING_SEG1_1 & 0x7FFFUL; + DDR_SEG_REG(SEG1_2) = LIBERO_SETTING_SEG1_2 & 0x7FFFUL; + DDR_SEG_REG(SEG1_3) = LIBERO_SETTING_SEG1_3 & 0x7FFFUL; + DDR_SEG_REG(SEG1_4) = LIBERO_SETTING_SEG1_4 & 0x7FFFUL; + DDR_SEG_REG(SEG1_5) = LIBERO_SETTING_SEG1_5 & 0x7FFFUL; + DDR_SEG_REG(SEG1_6) = LIBERO_SETTING_SEG1_6 & 0x7FFFUL; + DDR_SEG_REG(SEG1_7) = LIBERO_SETTING_SEG1_7 & 0x7FFFUL; + mb(); + + /* Disable DDR blocker - critical! + * SEG0.CFG[7] = 1 allows L2 cache controller to access DDR + */ + wolfBoot_printf("DDR: Blocker@0x%lx ", DDR_SEG_BASE + SEG0_BLOCKER); + wolfBoot_printf("before=0x%x ", DDR_SEG_REG(SEG0_BLOCKER)); + DDR_SEG_REG(SEG0_BLOCKER) = 0x01UL; + mb(); + wolfBoot_printf("after=0x%x\n", DDR_SEG_REG(SEG0_BLOCKER)); +} + +/* DDR Controller Configuration */ +static void setup_controller(void) +{ + /* Controller soft reset - deassert */ + DDRCFG_REG(MC_CTRLR_SOFT_RESET) = LIBERO_SETTING_CTRLR_SOFT_RESET_N; + + /* Disable auto-init until PHY is ready */ + DDRCFG_REG(MC_AUTOINIT_DISABLE) = 0x01; + + /* Timing parameters */ + DDRCFG_REG(MC_CFG_BL) = LIBERO_SETTING_CFG_BL; + DDRCFG_REG(MC_CFG_RAS) = LIBERO_SETTING_CFG_RAS; + DDRCFG_REG(MC_CFG_RCD) = LIBERO_SETTING_CFG_RCD; + DDRCFG_REG(MC_CFG_RRD) = LIBERO_SETTING_CFG_RRD; + DDRCFG_REG(MC_CFG_RP) = LIBERO_SETTING_CFG_RP; + DDRCFG_REG(MC_CFG_RC) = LIBERO_SETTING_CFG_RC; + DDRCFG_REG(MC_CFG_FAW) = LIBERO_SETTING_CFG_FAW; + DDRCFG_REG(MC_CFG_RFC) = LIBERO_SETTING_CFG_RFC; + DDRCFG_REG(MC_CFG_RTP) = LIBERO_SETTING_CFG_RTP; + DDRCFG_REG(MC_CFG_WR) = LIBERO_SETTING_CFG_WR; + DDRCFG_REG(MC_CFG_WTR) = LIBERO_SETTING_CFG_WTR; + DDRCFG_REG(MC_CFG_STARTUP_DELAY) = LIBERO_SETTING_CFG_STARTUP_DELAY; + + /* Memory geometry */ + DDRCFG_REG(MC_CFG_MEM_COLBITS) = LIBERO_SETTING_CFG_MEM_COLBITS; + DDRCFG_REG(MC_CFG_MEM_ROWBITS) = LIBERO_SETTING_CFG_MEM_ROWBITS; + DDRCFG_REG(MC_CFG_MEM_BANKBITS) = LIBERO_SETTING_CFG_MEM_BANKBITS; + DDRCFG_REG(MC_CFG_NUM_RANKS) = LIBERO_SETTING_CFG_NUM_RANKS; + DDRCFG_REG(MC_CFG_MEMORY_TYPE) = LIBERO_SETTING_CFG_MEMORY_TYPE; + + /* Latency settings */ + DDRCFG_REG(MC_CFG_CL) = LIBERO_SETTING_CFG_CL; + DDRCFG_REG(MC_CFG_CWL) = LIBERO_SETTING_CFG_CWL; + DDRCFG_REG(MC_CFG_WL) = LIBERO_SETTING_CFG_WL; + DDRCFG_REG(MC_CFG_RL) = LIBERO_SETTING_CFG_RL; + + /* Refresh */ + DDRCFG_REG(MC_CFG_REF_PER) = LIBERO_SETTING_CFG_REF_PER; + DDRCFG_REG(MC_CFG_AUTO_REF_EN) = LIBERO_SETTING_CFG_AUTO_REF_EN; + + /* Additional timing */ + DDRCFG_REG(MC_CFG_XP) = LIBERO_SETTING_CFG_XP; + DDRCFG_REG(MC_CFG_XSR) = LIBERO_SETTING_CFG_XSR; + DDRCFG_REG(MC_CFG_MRD) = LIBERO_SETTING_CFG_MRD; + + /* DFI interface timing */ + DDRCFG_REG(MC_DFI_RDDATA_EN) = LIBERO_SETTING_CFG_DFI_T_RDDATA_EN; + DDRCFG_REG(MC_DFI_PHY_RDLAT) = LIBERO_SETTING_CFG_DFI_T_PHY_RDLAT; + DDRCFG_REG(MC_DFI_PHY_WRLAT) = LIBERO_SETTING_CFG_DFI_T_PHY_WRLAT; + DDRCFG_REG(MC_DFI_PHYUPD_EN) = LIBERO_SETTING_CFG_DFI_PHYUPD_EN; + mb(); +} + +/* DDR PHY Configuration */ +static int setup_phy(void) +{ + uint32_t pvt_stat, pll_ctrl, timeout; + + wolfBoot_printf("DDR: PHY setup..."); + + /* Soft reset DDR PHY */ + DDRPHY_REG(PHY_SOFT_RESET) = 0x01; + mb(); + udelay(10); + DDRPHY_REG(PHY_SOFT_RESET) = 0x00; + mb(); + udelay(10); + + /* Check PHY PLL status */ + pll_ctrl = DDRPHY_REG(PHY_PLL_CTRL_MAIN); + + /* Configure PHY mode (triggers state machine to copy default RPC values) */ + DDRPHY_REG(PHY_MODE) = LIBERO_SETTING_DDRPHY_MODE; + DDRPHY_REG(PHY_STARTUP) = 0x003F1F00UL; + DDRPHY_REG(PHY_DYN_CNTL) = 0x0000047FUL; + /* DPC_BITS - voltage reference settings from HSS: 0x00050422 */ + DDRPHY_REG(PHY_DPC_BITS) = LIBERO_SETTING_DPC_BITS; + mb(); + udelay(100); + + /* + * LPDDR4 WRLVL Preparation (from HSS DDR_TRAINING_INIT_DONE lines 619-624) + * Modify DPC_BITS vrgen_h for write leveling + * DDR_DPC_VRGEN_H_MASK = 0x3F0, DPC_VRGEN_H_LPDDR4_WR_LVL_VAL = 0x5 + * Formula: (dpc_bits & ~0x3F0) | (0x5 << 4) = (dpc_bits & 0xFFFFFC0F) | 0x50 + * + * Note: HSS sets rpc3_ODT=0 here but immediately overwrites it in + * set_ddr_rpc_regs() with LIBERO_SETTING_RPC_ODT_DQ (0x3). We skip + * the intermediate set to 0 since it has no effect. + */ + { + uint32_t dpc_wrlvl = (LIBERO_SETTING_DPC_BITS & 0xFFFFFC0FUL) | 0x50UL; + DDRPHY_REG(PHY_DPC_BITS) = dpc_wrlvl; + /* rpc3_ODT will be set to 0x03 in RPC config below, matching HSS */ + mb(); + } + + /* + * Flash RPC registers to SCB (from HSS DDR_TRAINING_FLASH_REGS) + * Enable DDR IO decoders by triggering soft resets + * These offsets are from mss_ddr_sgmii_phy_defs.h + */ + DDRPHY_REG(0x300) = 0x01; /* SOFT_RESET_DECODER_DRIVER @ 0x300 */ + mb(); + DDRPHY_REG(0x380) = 0x01; /* SOFT_RESET_DECODER_ODT @ 0x380 */ + mb(); + DDRPHY_REG(0x400) = 0x01; /* SOFT_RESET_DECODER_IO @ 0x400 */ + mb(); + udelay(10); + + /* + * RPC Register Configuration (from HSS set_ddr_rpc_regs for LPDDR4) + * This is critical for proper DDR operation! + * Offsets from mss_ddr_sgmii_phy_defs.h structure layout + */ + + /* LPDDR4-specific configuration */ + DDRPHY_REG(0x588) = 0x04U; /* rpc98 @ 0x588 - ibufmd_dqs setting */ + DDRPHY_REG(0x5C8) = 0x14U; /* rpc226 @ 0x5C8 */ + /* SPARE0 = 0xA000 - common mode receiver for LPDDR4 */ + DDRPHY_REG(0x1FC) = 0xA000U; /* SPARE0 */ + + /* Common RPC settings */ + DDRPHY_REG(0x46C) = 0x02U; /* rpc27 @ 0x46C */ + DDRPHY_REG(0x72C) = 0x00U; /* rpc203 @ 0x72C */ + + /* ODT (On-Die Termination) Configuration + * From HSS hw_ddr_io_bank.h for Video Kit (offsets from structure): + * rpc1_ODT @ 0x384 = ODT_CA + * rpc2_ODT @ 0x388 = ODT_CLK + * rpc3_ODT @ 0x38C = ODT_DQ + * rpc4_ODT @ 0x390 = ODT_DQS + * + * CRITICAL: Despite earlier setting rpc3_ODT=0 for WRLVL prep, the HSS + * set_ddr_rpc_regs() restores it to LIBERO_SETTING_RPC_ODT_DQ (0x3) BEFORE + * HW training starts. The HW training IP handles WRLVL with ODT enabled. + * HSS DDR debug log confirms rpc3_ODT=0x3 at END of lpddr4_manual_training. + */ + DDRPHY_REG(PHY_RPC1_ODT) = 0x02U; /* ODT_CA = LIBERO_SETTING_RPC_ODT_ADDCMD */ + DDRPHY_REG(PHY_RPC2_ODT) = 0x02U; /* ODT_CLK = LIBERO_SETTING_RPC_ODT_CLK */ + DDRPHY_REG(PHY_RPC3_ODT) = 0x03U; /* ODT_DQ = LIBERO_SETTING_RPC_ODT_DQ (0x3) */ + DDRPHY_REG(PHY_RPC4_ODT) = 0x06U; /* ODT_DQS = LIBERO_SETTING_RPC_ODT_DQS */ + + /* BCLK selection for training */ + DDRPHY_REG(0x44C) = 0x01U; /* rpc19 @ 0x44C - bclk_sel_clkn */ + DDRPHY_REG(0x450) = 0x00U; /* rpc20 @ 0x450 - bclk_sel_clkp */ + mb(); + + /* Bank controller soft reset to load RPC to SCB (from HSS DDR_TRAINING_SOFT_RESET) */ + DDR_BANKCONT_REG(0x00) = 0x01U; + mb(); + udelay(100); + + /* + * PVT Calibration (from HSS ddr_pvt_calibration in mss_sgmii.c) + * This calibrates DDR I/O using the hardware PVT calibrator + */ + wolfBoot_printf(" PVT calib..."); + + /* Wait for IOEN (IO enable) from power detectors */ + timeout = 100000; + while (timeout > 0) { + pvt_stat = DDRPHY_REG(PHY_IOC_REG1); + if (pvt_stat & PVT_IOEN_OUT) + break; + timeout--; + udelay(1); + } + if (timeout == 0) { + wolfBoot_printf("IOEN timeout\n"); + } + + /* Small delay for voltage ramp after IOEN */ + udelay(100); + + /* Set calibration clock divider and release reset + * IOC_REG6: bit 0 = calib_reset, bits 2:1 = calib_clkdiv + * Value 0x06 = clkdiv=3, reset=0 */ + DDRPHY_REG(PHY_IOC_REG6) = 0x00000006UL; + mb(); + + /* SCB PVT soft reset - load from RPC */ + IOSCB_IO_CALIB_DDR_REG(IOSCB_SOFT_RESET) = 0x01U; + mb(); + udelay(1); + IOSCB_IO_CALIB_DDR_REG(IOSCB_SOFT_RESET) = 0x00U; + mb(); + + /* Wait for calibration complete in SCB space */ + timeout = 100000; + while (timeout > 0) { + pvt_stat = IOSCB_IO_CALIB_DDR_REG(IOSCB_IOC_REG1); + if (pvt_stat & PVT_CALIB_STATUS) + break; + timeout--; + udelay(1); + } + + /* Wait for calibration complete in APB space */ + timeout = 100000; + while (timeout > 0) { + pvt_stat = DDRPHY_REG(PHY_IOC_REG1); + if (pvt_stat & PVT_CALIB_STATUS) + break; + timeout--; + udelay(1); + } + + /* Assert calibration lock in both APB and SCB registers */ + DDRPHY_REG(PHY_IOC_REG0) &= ~PVT_CALIB_LOCK; + IOSCB_IO_CALIB_DDR_REG(IOSCB_IOC_REG0) &= ~PVT_CALIB_LOCK; + mb(); + DDRPHY_REG(PHY_IOC_REG0) |= PVT_CALIB_LOCK; + IOSCB_IO_CALIB_DDR_REG(IOSCB_IOC_REG0) |= PVT_CALIB_LOCK; + mb(); + + wolfBoot_printf("done\n"); + + /* Configure training parameters - using HSS trained values */ + DDRPHY_REG(PHY_RPC145) = 0x00000008UL; /* Trained: 0x08 - ADDCMD delay */ + DDRPHY_REG(PHY_RPC147) = 0x00000009UL; /* Trained: 0x09 - DDR CLK loopback */ + DDRPHY_REG(PHY_RPC156) = 0x00000006UL; /* Trained: 0x06 */ + DDRPHY_REG(PHY_RPC166) = 0x00000002UL; /* Trained: 0x02 */ + DDRPHY_REG(PHY_RPC168) = 0x00000000UL; /* Trained: 0x00 */ + DDRPHY_REG(PHY_RPC220) = 0x0000000CUL; /* Trained: 0x0C */ + DDRPHY_REG(PHY_BCLK_SCLK) = LIBERO_SETTING_TIP_CONFIG_PARAMS_BCLK_VCOPHS_OFFSET; + + /* LPDDR4 Input Buffer Mode configuration (from Libero config) + * Critical for proper LPDDR4 signal capture */ + DDRPHY_REG(PHY_RPC95_IBUFMD_ADDCMD) = LIBERO_SETTING_RPC_IBUFMD_ADDCMD; + DDRPHY_REG(PHY_RPC96_IBUFMD_CLK) = LIBERO_SETTING_RPC_IBUFMD_CLK; + DDRPHY_REG(PHY_RPC97_IBUFMD_DQ) = LIBERO_SETTING_RPC_IBUFMD_DQ; + DDRPHY_REG(PHY_RPC98_IBUFMD_DQS) = LIBERO_SETTING_RPC_IBUFMD_DQS; + mb(); + + if (pll_ctrl & PLL_LOCK_BIT) { + wolfBoot_printf("PHY PLL locked\n"); + } else { + wolfBoot_printf("PHY PLL not locked (0x%x)\n", pll_ctrl); + } + + return 0; +} + +/* Training Reset and Clock Rotation */ +static void training_reset_and_rotate(void) +{ + uint32_t i; + + /* Assert training reset */ + DDRPHY_REG(PHY_TRAINING_RESET) = 0x00000002UL; + mb(); + + /* Disable auto-init */ + DDRCFG_REG(MC_AUTOINIT_DISABLE) = 0x01; + mb(); + + /* Controller soft reset sequence */ + wolfBoot_printf(" SR before=0x%x\n", DDRCFG_REG(MC_CTRLR_SOFT_RESET)); + DDRCFG_REG(MC_CTRLR_SOFT_RESET) = 0x00000000UL; + mb(); + wolfBoot_printf(" SR after 0=0x%x\n", DDRCFG_REG(MC_CTRLR_SOFT_RESET)); + udelay(1); + DDRCFG_REG(MC_CTRLR_SOFT_RESET) = 0x00000001UL; + mb(); + wolfBoot_printf(" SR after 1=0x%x\n", DDRCFG_REG(MC_CTRLR_SOFT_RESET)); + udelay(1); + + /* Rotate BCLK90 using expert mode */ + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x00000004UL; + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000002UL; + mb(); + + /* PLL count sequence */ + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x7CUL; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x78UL; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x78UL; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x7CUL; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x04UL; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x64UL; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x66UL; + + /* Apply BCLK VCO phase offset */ + for (i = 0; i < LIBERO_SETTING_TIP_CONFIG_PARAMS_BCLK_VCOPHS_OFFSET; i++) { + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x67UL; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x66UL; + } + + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x64UL; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x04UL; + mb(); + + /* Load delay lines */ + DDRPHY_REG(PHY_EXPERT_MV_RD_DLY) = 0x1FUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0xFFFFFFFFUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x00000000UL; + DDRPHY_REG(PHY_EXPERT_MV_RD_DLY) = 0x00UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0xFFFFFFFFUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x00000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x0000003FUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x00000000UL; + mb(); + + /* DQ/DQS output delays */ + DDRPHY_REG(PHY_EXPERT_DFI_STATUS) = 0x06UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD0) = 0xFFFFFFFFUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x0FUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD0) = 0x00000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x00000000UL; + + DDRPHY_REG(PHY_EXPERT_DFI_STATUS) = 0x04UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD0) = 0xFFFFFFFFUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x0FUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD0) = 0x00000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x00000000UL; + + DDRPHY_REG(PHY_EXPERT_DFI_STATUS) = 0x00UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x0000003FUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x00000000UL; + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000000UL; + mb(); +} + +/* Training status bits (from HSS mss_ddr_defs.h) */ +#define BCLK_SCLK_BIT (0x1U << 0U) +#define ADDCMD_BIT (0x1U << 1U) +#define WRLVL_BIT (0x1U << 2U) +#define RDGATE_BIT (0x1U << 3U) +#define DQ_DQS_BIT (0x1U << 4U) +#define TRAINING_MASK (BCLK_SCLK_BIT | ADDCMD_BIT | WRLVL_BIT | RDGATE_BIT | DQ_DQS_BIT) + +/* DDR Training */ +static int run_training(void) +{ + uint32_t timeout, dfi_stat, ctrl_stat, train_stat; + + /* Configure training skip - skip ADDCMD only (we do it manually for LPDDR4) + * 0x02 = skip ADDCMD, TIP runs: BCLK_SCLK, WRLVL, RDGATE, DQ_DQS */ + DDRPHY_REG(PHY_TRAINING_SKIP) = LIBERO_SETTING_TRAINING_SKIP_SETTING; + mb(); + + /* Configure TIP parameters (from HSS debug: TIP_CFG_PARAMS:07CFE02F) */ + DDRPHY_REG(PHY_TIP_CFG_PARAMS) = 0x07CFE02FUL; + mb(); + + /* RPC168 - RX_MD_CLKN for LPDDR4 (from HSS) */ + DDRPHY_REG(PHY_RPC168) = 0x00000000UL; + mb(); + + /* + * BCLK90 Rotation (from HSS DDR_TRAINING_ROTATE_CLK) + * Rotate BCLK90 by 90 degrees using expert mode + */ + wolfBoot_printf("DDR: BCLK90 rotation..."); + { + uint32_t i; + + /* Expert mode setup for BCLK90 rotation */ + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x04; + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x02; /* Expert mode enable */ + + /* BCLK90 rotation sequence */ + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x7C; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x78; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x78; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x7C; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x04; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x64; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x66; + + /* VCO phase offset increments (from TIP_CONFIG_PARAMS) */ + for (i = 0; i < LIBERO_SETTING_TIP_CONFIG_PARAMS_BCLK_VCOPHS_OFFSET; i++) { + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x67; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x66; + } + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x64; + DDRPHY_REG(PHY_EXPERT_PLLCNT) = 0x04; + + /* Load delay lines */ + DDRPHY_REG(PHY_EXPERT_MV_RD_DLY) = 0x1F; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0xFFFFFFFF; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x00; + DDRPHY_REG(PHY_EXPERT_MV_RD_DLY) = 0x00; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0xFFFFFFFF; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x00; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x3F; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x00; + + /* DQ output delays */ + DDRPHY_REG(PHY_EXPERT_DFI_STATUS_TO_SHIM) = 0x06; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD0) = 0xFFFFFFFF; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x0F; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD0) = 0x00; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x00; + + /* DQS output delays */ + DDRPHY_REG(PHY_EXPERT_DFI_STATUS_TO_SHIM) = 0x04; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD0) = 0xFFFFFFFF; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x0F; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD0) = 0x00; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x00; + + DDRPHY_REG(PHY_EXPERT_DFI_STATUS_TO_SHIM) = 0x00; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x3F; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x00; + + /* Exit expert mode */ + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00; + mb(); + } + wolfBoot_printf("done\n"); + + /* + * Apply BCLK phase from Libero settings + * The PHADJ register controls clock phase alignment: + * - Bits 2-4: REG_OUT0_PHSINIT (BCLK phase) + * - Bits 5-7: REG_OUT1_PHSINIT + * - Bits 8-10: REG_OUT2_PHSINIT + * - Bits 11-13: REG_OUT3_PHSINIT (BCLK90 phase) + * - Bit 14: REG_LOADPHS_B (load phase, must toggle) + */ + wolfBoot_printf("DDR: BCLK phase..."); + { + /* Use Libero-generated value, toggle LOADPHS to apply */ + uint32_t pll_phadj = LIBERO_SETTING_DDR_PLL_PHADJ | 0x4000UL; + DDR_PLL_REG(PLL_PHADJ) = pll_phadj; + mb(); + DDR_PLL_REG(PLL_PHADJ) = pll_phadj & ~0x4000UL; + mb(); + DDR_PLL_REG(PLL_PHADJ) = pll_phadj; + mb(); + wolfBoot_printf("0x%x\n", pll_phadj); + } + + ddr_delay(1000); + + /* + * LPDDR4 Training Sequence (corrected based on HSS) + * HSS sequence: Configure WRLVL -> DFI init -> wait for DFI complete -> lpddr4_manual_training -> wait for TIP + */ + wolfBoot_printf("DDR: Starting TIP training...\n"); + + /* + * CRITICAL: Configure PHY for WRLVL BEFORE training reset release + * Per HSS analysis: WRLVL config must be set before TIP starts + * 1. Configure PHY: DPC_BITS vrgen_h = 0x5, rpc3_ODT = 0x0 + * 2. MR2 WRLVL enable will be done after manual training, before TIP runs + */ + wolfBoot_printf(" Configure PHY for WRLVL..."); + { + /* Set vrgen_h = 0x5 in DPC_BITS (bits 9:4) */ + uint32_t dpc_bits = DDRPHY_REG(PHY_DPC_BITS); + uint32_t dpc_wrlvl = (dpc_bits & 0xFFFFFC0FUL) | (0x5UL << 4U); + DDRPHY_REG(PHY_DPC_BITS) = dpc_wrlvl; + DDRPHY_REG(PHY_RPC3_ODT) = 0x00U; /* ODT off for WRLVL */ + mb(); + wolfBoot_printf("DPC=0x%x ODT=0x%x...done\n", + DDRPHY_REG(PHY_DPC_BITS), DDRPHY_REG(PHY_RPC3_ODT)); + } + + /* Step 1: Release training reset */ + wolfBoot_printf(" Training reset release..."); + DDRPHY_REG(PHY_TRAINING_RESET) = 0x00000000UL; + mb(); + ddr_delay(1000); + wolfBoot_printf("done\n"); + + /* Step 2: Start DFI init */ + wolfBoot_printf(" DFI init start..."); + DDRCFG_REG(MC_DFI_INIT_START) = 0x00000000UL; + mb(); + DDRCFG_REG(MC_DFI_INIT_START) = 0x00000001UL; + mb(); + + /* Step 3: Start controller init */ + DDRCFG_REG(MC_CTRLR_INIT) = 0x00000000UL; + mb(); + DDRCFG_REG(MC_CTRLR_INIT) = 0x00000001UL; + mb(); + wolfBoot_printf("done\n"); + + /* Step 4: Wait for DFI init complete */ + wolfBoot_printf(" Wait DFI complete..."); + timeout = 100000; + while (timeout > 0) { + dfi_stat = DDRCFG_REG(MC_DFI_INIT_COMPLETE); + if (dfi_stat & 0x01) + break; + timeout--; + ddr_delay(10); + } + if (timeout == 0) { + wolfBoot_printf("TIMEOUT (0x%x)\n", dfi_stat); + return -1; + } + wolfBoot_printf("OK\n"); + + /* Lane alignment FIFO control (from HSS DDR_TRAINING_IP_SM_START_CHECK) */ + DDRPHY_REG(PHY_LANE_ALIGN_FIFO_CTRL) = 0x00; + DDRPHY_REG(PHY_LANE_ALIGN_FIFO_CTRL) = 0x02; + mb(); + + /* + * Step 5: LPDDR4 Manual Training (from HSS lpddr4_manual_training) + * This is called AFTER DFI init completes per HSS + */ + wolfBoot_printf(" LPDDR4 manual training...\n"); + + /* Device reset sequence (from HSS lpddr4_manual_training lines 5035-5053) */ + wolfBoot_printf(" Device reset..."); + DDRCFG_REG(MC_INIT_CS) = 0x01; + DDRCFG_REG(MC_INIT_DISABLE_CKE) = 0x01; + ddr_delay(50); /* 5us */ + DDRCFG_REG(MC_INIT_FORCE_RESET) = 0x01; + + DDRCFG_REG(MC_CTRLR_SOFT_RESET) = 0x01; /* Release soft reset */ + ddr_delay(25000); /* 250us */ + DDRCFG_REG(MC_INIT_FORCE_RESET) = 0x00; + ddr_delay(200000); /* 2ms minimum per LPDDR4 spec */ + DDRCFG_REG(MC_INIT_DISABLE_CKE) = 0x00; + ddr_delay(15000); /* 150us */ + DDRCFG_REG(MC_INIT_CS) = 0x01; + + DDRCFG_REG(MC_CFG_AUTO_ZQ_CAL_EN) = 0x00; + ddr_delay(50); + wolfBoot_printf("done\n"); + + /* + * DDR PLL frequency doubling for LPDDR4 training (from HSS lines 5057-5076) + * This is critical - mode register writes need slower frequency + * Save original dividers for restore after MR writes + */ + wolfBoot_printf(" PLL freq double..."); + DDRCFG_REG(MC_INIT_DISABLE_CKE) = 0x01; + ddr_delay(5000); /* 50us */ + + /* Read and save original PLL dividers */ + uint32_t div0_1_orig = DDR_PLL_REG(PLL_DIV_0_1); + uint32_t div2_3_orig = DDR_PLL_REG(PLL_DIV_2_3); + { + uint32_t div0 = div0_1_orig & 0x3F00UL; + uint32_t div1 = div0_1_orig & 0x3F000000UL; + uint32_t div2 = div2_3_orig & 0x3F00UL; + uint32_t div3 = div2_3_orig & 0x3F000000UL; + uint32_t mult = 2; + + /* Double the dividers for MR writes */ + DDR_PLL_REG(PLL_DIV_0_1) = (div0 | div1) * mult; + DDR_PLL_REG(PLL_DIV_2_3) = (div2 | div3) * mult; + + /* Wait for PHY PLL to lock */ + while ((DDRPHY_REG(PHY_PLL_CTRL_MAIN) & 0x2000000UL) == 0) {} + ddr_delay(5000); + + /* Reset delay lines after frequency change */ + DDRPHY_REG(PHY_PLL_CTRL_MAIN) &= ~0x0000003CUL; + DDRPHY_REG(PHY_PLL_CTRL_MAIN) |= 0x0000003CUL; + } + wolfBoot_printf("done\n"); + + /* Expert mode sequence after PLL doubling (from HSS lines 5067-5075) */ + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000009UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x0000003FUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x00000000UL; + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000008UL; + ddr_delay(5000); /* 50us */ + + DDRCFG_REG(MC_INIT_DISABLE_CKE) = 0x00; + ddr_delay(50000); /* 500us */ + + /* + * SECOND RESET CYCLE (from HSS lpddr4_manual_training lines 5085-5095) + * This is critical - device must be reset before MR writes + */ + wolfBoot_printf(" Second reset..."); + DDRCFG_REG(MC_INIT_CS) = 0x01; + DDRCFG_REG(MC_INIT_DISABLE_CKE) = 0x01; + ddr_delay(50); /* 5us */ + DDRCFG_REG(MC_INIT_FORCE_RESET) = 0x01; + DDRCFG_REG(MC_CTRLR_SOFT_RESET) = 0x01; + ddr_delay(25000); /* 250us */ + DDRCFG_REG(MC_INIT_FORCE_RESET) = 0x00; + ddr_delay(200000); /* 2ms */ + DDRCFG_REG(MC_INIT_DISABLE_CKE) = 0x00; + ddr_delay(15000); /* 150us */ + wolfBoot_printf("done\n"); + + /* Debug: Check controller state before MR writes */ + wolfBoot_printf(" Pre-MR: CKE=%d RST=%d CS=%d PLL=0x%x\n", + DDRCFG_REG(MC_INIT_DISABLE_CKE), + DDRCFG_REG(MC_INIT_FORCE_RESET), + DDRCFG_REG(MC_INIT_CS), + DDRPHY_REG(PHY_PLL_CTRL_MAIN)); + wolfBoot_printf(" DIV0_1=0x%x DIV2_3=0x%x\n", + DDR_PLL_REG(PLL_DIV_0_1), + DDR_PLL_REG(PLL_DIV_2_3)); + + /* LPDDR4 Mode Register Initialization (MT53D512M32D2DS-053) + * + * Write proper MR values to the DRAM. + * Values based on LPDDR4 @ 1600 Mbps (800 MHz, WL=8, RL=14) + * Updated to match Libero MSS Configurator settings. + * + * MR1 = 0x56 : nWR=16, RD preamble=toggle, WR preamble=2tCK, BL=16 + * MR2 = 0x2D : RL=14, WL=8, WLS=1 (set 1) + * MR3 = 0xF1 : PDDS=RZQ/6 (40ohm), DBI-RD/WR disabled + * MR11 = 0x31 : DQ_ODT=RZQ2 (bits 2:0=001), CA_ODT=RZQ4 (bits 6:4=011) + * MR12 = 0x32 : CA VREF=50 (from Libero LPDDR4_VREF_CA=50) + * MR13 = 0x00 : FSP-OP=0, FSP-WR=0, DMI enabled, VRCG normal + * MR14 = 0x0F : DQ VREF=15 (from Libero LPDDR4_VREF_DATA=15) + * MR22 = 0x06 : SOC_ODT=RZQ6 (40ohm, from Libero LPDDR4_SOC_ODT=RZQ6) + */ + wolfBoot_printf(" MR writes..."); + { + struct mr_write_s { + uint8_t mr; + uint8_t val; + }; + struct mr_write_s mr_writes[] = { + {1, 0x56}, {2, 0x2D}, {3, 0xF1}, {11, 0x31}, + {12, 0x32}, {13, 0x00}, {14, 0x0F}, {22, 0x06} + }; + int i, j; + uint32_t ack_cnt = 0, err_cnt = 0; + + for (i = 0; i < (int)(sizeof(mr_writes)/sizeof(mr_writes[0])); i++) { + for (j = 0; j < 10; j++) { /* 10 retries per MR */ + DDRCFG_REG(MC_INIT_CS) = 0x01; + DDRCFG_REG(MC_INIT_MR_WR_MASK) = 0xFF; /* Write all 8 bits */ + DDRCFG_REG(MC_INIT_MR_ADDR) = mr_writes[i].mr; + DDRCFG_REG(MC_INIT_MR_WR_DATA) = mr_writes[i].val; + DDRCFG_REG(MC_INIT_MR_W_REQ) = 0x01; + DDRCFG_REG(MC_INIT_MR_W_REQ) = 0x00; + mb(); + ddr_delay(500); /* 5us delay */ + if (DDRCFG_REG(MC_INIT_ACK) != 0) + ack_cnt++; + else + err_cnt++; + } + } + wolfBoot_printf("ack=%d err=%d...", ack_cnt, err_cnt); + } + wolfBoot_printf("done\n"); + + /* + * Restore PLL to normal speed after mode register writes + * (from HSS lines 5121-5136) + */ + wolfBoot_printf(" PLL freq restore..."); + DDRCFG_REG(MC_INIT_DISABLE_CKE) = 0x01; + ddr_delay(500); + + DDR_PLL_REG(PLL_DIV_0_1) = div0_1_orig; + DDR_PLL_REG(PLL_DIV_2_3) = div2_3_orig; + + /* Wait for PHY PLL to lock */ + while ((DDRPHY_REG(PHY_PLL_CTRL_MAIN) & 0x2000000UL) == 0) {} + ddr_delay(500); + + /* Reset delay lines after frequency change */ + DDRPHY_REG(PHY_PLL_CTRL_MAIN) &= ~0x0000003CUL; + DDRPHY_REG(PHY_PLL_CTRL_MAIN) |= 0x0000003CUL; + + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000009UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x0000003FUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x00000000UL; + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000008UL; + ddr_delay(500); + wolfBoot_printf("done\n"); + + /* + * CA VREF Training (from HSS lpddr4_manual_training lines 5140-5310) + * This calibrates the command/address bus voltage reference + * Must happen AFTER PLL restore at normal speed + */ + wolfBoot_printf(" CA VREF training...\n"); + DDRCFG_REG(MC_INIT_DISABLE_CKE) = 0x01; /* Disable CKE during training */ + ddr_delay(5000); /* 50us */ + { + uint32_t dpc_bits_new; + /* Force VREF to match HSS training result (0x10) instead of our sweep (0x0C) */ + uint32_t vref_answer = 0x10; /* Use HSS value directly */ +#if 0 /* Disable sweep temporarily */ + uint32_t transition_a5_min_last = 129; + uint32_t ca_indly; + uint32_t vref; + + /* Enable expert mode for delay control */ + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000021UL; + DDRPHY_REG(PHY_EXPERT_DFI_STATUS_TO_SHIM) = 0x00000000UL; + + /* Reset delay lines to 0 before sweep (from HSS expert mode setup) */ + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x0000003FUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x00000000UL; + ddr_delay(100); + + /* Outer loop: sweep CA input delay */ + for (ca_indly = 0; ca_indly < 30; ca_indly += 5) { + DDRPHY_REG(PHY_RPC145) = ca_indly; /* A9 loopback delay */ + DDRPHY_REG(PHY_RPC147) = ca_indly; /* DDR clock loopback delay */ + + uint32_t break_loop = 1; + uint32_t in_window = 0; + vref_answer = 128; + + /* Inner loop: sweep VREF values */ + for (vref = 5; vref < 30; vref++) { + uint32_t transition_a5_max = 0; + uint32_t transition_a5_min = 128; + uint32_t j; + + if (transition_a5_min_last > 128) + transition_a5_min_last = 128; + + /* Reset DPC_BITS NV map */ + DDR_BANKCONT_REG(0x00) = 0U; + ddr_delay(50); + + /* Set new VREF value: bits[17:12] = vref, bit 18 = enable */ + dpc_bits_new = (DDRPHY_REG(PHY_DPC_BITS) & 0xFFFC0FFFUL) | + (vref << 12) | (0x1UL << 18); + DDRPHY_REG(PHY_DPC_BITS) = dpc_bits_new; + ddr_delay(50); + + /* Release NV map reset */ + DDR_BANKCONT_REG(0x00) = 1U; + ddr_delay(50); + + /* Sample transition_a5 multiple times */ + for (j = 0; j < 20; j++) { + uint32_t rx_a5_last = 0xF; + uint32_t rx_a5; + uint32_t transition_a5 = 0; + uint32_t i; + + /* Load INDLY - same sequence as HSS lines 5186-5195 */ + DDRPHY_REG(PHY_EXPERT_DLYCNT_DIR1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + + /* Load OUTDLY - same sequence as HSS lines 5197-5203 */ + DDRPHY_REG(PHY_EXPERT_DLYCNT_DIR1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + + ddr_delay(50); + + /* Sweep delay and look for transition in rx_a5 */ + for (i = 0; i < (128 - ca_indly); i++) { + /* Move delay counter */ + DDRPHY_REG(PHY_EXPERT_DLYCNT_MOVE1) = 0x0UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_MOVE1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_MOVE1) = 0x0UL; + ddr_delay(5); + + /* Read rx_a5 from bits 9:8 of readback register */ + rx_a5 = (DDRPHY_REG(PHY_EXPERT_ADDCMD_READBACK) & 0x0300UL) >> 8; + + /* If we found a transition, break after 8 more steps */ + if (transition_a5 != 0) { + if ((i - transition_a5) > 8) + break; + } + + /* Detect transition (rising edge in rx_a5) */ + if (transition_a5 == 0) { + if ((rx_a5 ^ rx_a5_last) & rx_a5) { + transition_a5 = i; + } else { + rx_a5_last = rx_a5; + } + } else { + /* Verify transition is stable after 4 steps */ + if ((i - transition_a5) == 4) { + if (!((rx_a5 ^ rx_a5_last) & rx_a5)) { + transition_a5 = 0; /* False transition */ + rx_a5_last = rx_a5; + } + } + } + } + + /* Track min/max transition point */ + if (transition_a5 != 0) { + if (transition_a5 > transition_a5_max) + transition_a5_max = transition_a5; + if (transition_a5 < transition_a5_min) + transition_a5_min = transition_a5; + } + } + + /* Calculate range and check if we're in a stable window */ + { + uint32_t range_a5 = transition_a5_max - transition_a5_min; + uint32_t deltat; + + if (transition_a5_min < 10) + break_loop = 0; + + if (range_a5 <= 5) { + if (transition_a5_min > transition_a5_min_last) + deltat = transition_a5_min - transition_a5_min_last; + else + deltat = transition_a5_min_last - transition_a5_min; + + if (deltat <= 5) + in_window = (in_window << 1) | 1; + } else { + in_window = (in_window << 1) | 0; + } + + /* Found answer if 2 consecutive good windows */ + if (vref_answer == 128) { + if ((in_window & 0x3) == 0x3) { + vref_answer = vref; + break; /* Found good VREF */ + } + } + + transition_a5_min_last = transition_a5_min; + } + } + + if (break_loop) + break; + } +#endif /* Skip VREF sweep - use HSS value directly */ + + /* Apply final VREF value */ + DDR_BANKCONT_REG(0x00) = 0U; + ddr_delay(50); + + if (vref_answer == 128) { + /* Training failed - use default 0x10 */ + vref_answer = 0x10; + wolfBoot_printf("FAIL(0x%x)...", vref_answer); + } else { + wolfBoot_printf("0x%x...", vref_answer); + } + + dpc_bits_new = (DDRPHY_REG(PHY_DPC_BITS) & 0xFFFC0FFFUL) | + (vref_answer << 12) | (0x1UL << 18); + DDRPHY_REG(PHY_DPC_BITS) = dpc_bits_new; + ddr_delay(50); + + DDR_BANKCONT_REG(0x00) = 1U; + ddr_delay(5000); + } + wolfBoot_printf("done\n"); + + /* + * MANUAL ADDCMD TRAINING (from HSS lpddr4_manual_training lines 5320-5600) + * Finds optimal refclk_phase and CA output delay + */ + wolfBoot_printf(" ADDCMD training..."); + { + uint32_t init_del_offset = 0x8; + uint32_t rpc147_offset = 0x1; + uint32_t rpc145_offset = 0x0; + uint32_t bclk_phase = DDR_PLL_REG(PLL_PHADJ) & 0x700; + uint32_t bclk90_phase = DDR_PLL_REG(PLL_PHADJ) & 0x3800; + uint32_t refclk_phase; + uint32_t a5_offset_status = 1; /* 1 = FAIL, 0 = PASS */ + uint32_t max_retries = 5; + + while (a5_offset_status != 0 && max_retries > 0) { + a5_offset_status = 0; /* Assume pass */ + max_retries--; + + /* Set loopback delay offsets */ + DDRPHY_REG(PHY_RPC147) = init_del_offset + rpc147_offset; + DDRPHY_REG(PHY_RPC145) = init_del_offset + rpc145_offset; + + /* Enable expert mode for delay and PLL control */ + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000023UL; + + uint32_t j; + uint32_t difference[8] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + uint32_t transition_ck_array[8] = {0}; + uint32_t transition_a5_max = 0; + + /* Sweep 16 refclk phases (8 unique phases, sampled twice) */ + for (j = 0; j < 16; j++) { + uint32_t rx_a5, rx_a5_last = 0xF; + uint32_t rx_ck, rx_ck_last = 0x5; + uint32_t transition_a5 = 0; + uint32_t transition_ck = 0; + uint32_t i; + uint32_t transitions_found = 0; + + /* Load INDLY */ + DDRPHY_REG(PHY_EXPERT_DLYCNT_DIR1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + + /* Load OUTDLY */ + DDRPHY_REG(PHY_EXPERT_DLYCNT_DIR1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + + /* Set refclk phase */ + refclk_phase = (j % 8) << 2; + DDR_PLL_REG(PLL_PHADJ) = 0x00004003UL | bclk_phase | bclk90_phase | refclk_phase; + DDR_PLL_REG(PLL_PHADJ) = 0x00000003UL | bclk_phase | bclk90_phase | refclk_phase; + DDR_PLL_REG(PLL_PHADJ) = 0x00004003UL | bclk_phase | bclk90_phase | refclk_phase; + + ddr_delay(10); + + /* Sweep delay to find transitions */ + i = 0; + while (!transitions_found && i < 128) { + DDRPHY_REG(PHY_EXPERT_DLYCNT_MOVE1) = 0x0UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_MOVE1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_MOVE1) = 0x0UL; + ddr_delay(5); + + rx_a5 = (DDRPHY_REG(PHY_EXPERT_ADDCMD_READBACK) & 0x0300UL) >> 8; + rx_ck = DDRPHY_REG(PHY_EXPERT_ADDCMD_READBACK) & 0x000F; + + /* Check if both transitions found */ + if (transition_a5 != 0 && transition_ck != 0) { + if ((i - transition_a5) > 8 && (i - transition_ck) > 8) + transitions_found = 1; + } + + /* Detect CK transition (edge to 0x5) */ + if (transition_ck == 0) { + if (rx_ck_last != 0x5 && rx_ck == 0x5) + transition_ck = i; + rx_ck_last = rx_ck; + } else if ((i - transition_ck) == 4 && rx_ck != rx_ck_last) { + transition_ck = 0; + rx_ck_last = rx_ck; + } + + /* Detect A5 transition (rising edge) */ + if (transition_a5 == 0) { + if ((rx_a5 ^ rx_a5_last) & rx_a5) + transition_a5 = i; + else + rx_a5_last = rx_a5; + } else if ((i - transition_a5) == 4) { + if (!((rx_a5 ^ rx_a5_last) & rx_a5)) { + transition_a5 = 0; + rx_a5_last = rx_a5; + } + } + + i++; + } + + /* Track max transition_a5 */ + if (transition_a5 > transition_a5_max) + transition_a5_max = transition_a5; + + /* Store transition_ck for first 8 phases */ + if (transition_a5 != 0 && transition_ck != 0 && j < 8) + transition_ck_array[j] = transition_ck; + } + + /* Calculate differences and find minimum */ + uint32_t min_diff = 0xFF; + uint32_t min_diffp1 = 0xFF; + uint32_t min_refclk = 0; + + if (transition_a5_max < 0x20) { /* Threshold for LPDDR4 */ + a5_offset_status = 1; /* FAIL */ + } + + for (j = 0; j < 8; j++) { + if (transition_a5_max >= transition_ck_array[j]) + difference[j] = transition_a5_max - transition_ck_array[j]; + else + difference[j] = 0xFF; + } + + for (j = 0; j < 8; j++) { + if (difference[j] < min_diff) { + min_refclk = j; + min_diff = difference[j]; + min_diffp1 = difference[(j + 1) & 0x7]; + } + } + + if (min_diff == 0xFF) + a5_offset_status = 1; + + if (a5_offset_status == 0) { + /* Apply optimal phase and delay */ + refclk_phase = (min_refclk & 0x7) << 2; + DDR_PLL_REG(PLL_PHADJ) = 0x00004003UL | bclk_phase | bclk90_phase | refclk_phase; + DDR_PLL_REG(PLL_PHADJ) = 0x00000003UL | bclk_phase | bclk90_phase | refclk_phase; + DDR_PLL_REG(PLL_PHADJ) = 0x00004003UL | bclk_phase | bclk90_phase | refclk_phase; + + /* Load INDLY */ + DDRPHY_REG(PHY_EXPERT_DLYCNT_DIR1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + + /* Load OUTDLY */ + DDRPHY_REG(PHY_EXPERT_DLYCNT_DIR1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_LOAD1) = 0x000000UL; + + /* Move to optimal delay */ + for (j = 0; j < min_diffp1 && j < 128; j++) { + DDRPHY_REG(PHY_EXPERT_DLYCNT_MOVE1) = 0x0UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_MOVE1) = 0x180000UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_MOVE1) = 0x0UL; + } + + DDRPHY_REG(PHY_EXPERT_DLYCNT_DIR1) = 0x000000UL; + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000000UL; + + wolfBoot_printf("phase=%d dly=%d...", min_refclk, min_diffp1); + } else { + /* Increase offset and retry */ + init_del_offset += transition_a5_max + 5; + if (init_del_offset > 0xFF) + break; + } + } + + if (a5_offset_status != 0) + wolfBoot_printf("FAIL..."); + } + + /* POST_INITIALIZATION after ADDCMD training */ + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000008UL; + DDRPHY_REG(PHY_EXPERT_DFI_STATUS_TO_SHIM) = 0x00000000UL; + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000009UL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x0000003FUL; + DDRPHY_REG(PHY_EXPERT_DLYCNT_PAUSE) = 0x00000000UL; + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000008UL; + ddr_delay(50); + + /* Verify training values applied */ + wolfBoot_printf("PLL_PHADJ=0x%x DPC=0x%x...", + DDR_PLL_REG(PLL_PHADJ), + DDRPHY_REG(PHY_DPC_BITS)); + + /* Re-enable CKE */ + DDRCFG_REG(MC_INIT_DISABLE_CKE) = 0x00; + ddr_delay(5000); + + /* Post-ADDCMD: Re-write mode registers with corrected values */ + wolfBoot_printf(" MR re-write..."); + { + struct mr_write_s { + uint8_t mr; + uint8_t val; + }; + struct mr_write_s mr_writes[] = { + {1, 0x56}, {2, 0x2D}, {3, 0xF1}, {11, 0x31}, + {12, 0x32}, {13, 0x00}, {14, 0x0F}, {22, 0x06} + }; + int i, j; + uint32_t ack_cnt = 0, err_cnt = 0; + + for (i = 0; i < (int)(sizeof(mr_writes)/sizeof(mr_writes[0])); i++) { + for (j = 0; j < 10; j++) { + DDRCFG_REG(MC_INIT_CS) = 0x01; + DDRCFG_REG(MC_INIT_MR_WR_MASK) = 0xFF; + DDRCFG_REG(MC_INIT_MR_ADDR) = mr_writes[i].mr; + DDRCFG_REG(MC_INIT_MR_WR_DATA) = mr_writes[i].val; + DDRCFG_REG(MC_INIT_MR_W_REQ) = 0x01; + DDRCFG_REG(MC_INIT_MR_W_REQ) = 0x00; + mb(); + ddr_delay(500); + if (DDRCFG_REG(MC_INIT_ACK) != 0) + ack_cnt++; + else + err_cnt++; + } + } + wolfBoot_printf("ack=%d err=%d...", ack_cnt, err_cnt); + } + wolfBoot_printf("done\n"); + + ddr_delay(100); + + /* Debug: Check training status after manual training */ + wolfBoot_printf(" Post-manual training status:\n"); + wolfBoot_printf(" train_stat=0x%x dfi_train_complete=0x%x\n", + DDRPHY_REG(PHY_TRAINING_STATUS), + DDRCFG_REG(0x38)); /* STAT_DFI_TRAINING_COMPLETE */ + wolfBoot_printf(" gt_state=0x%x dqdqs_state=0x%x\n", + DDRPHY_REG(0x82C), DDRPHY_REG(0x83C)); + + /* ZQ calibration */ + wolfBoot_printf(" ZQ cal..."); + DDRCFG_REG(MC_INIT_ZQ_CAL_START) = 0x00000001UL; + DDRCFG_REG(MC_AUTOINIT_DISABLE) = 0x00000000UL; + + /* Wait for INIT_ACK */ + timeout = 0xFF; + while ((DDRCFG_REG(MC_INIT_ACK) == 0) && (timeout > 0)) { + ddr_delay(100); + timeout--; + } + DDRCFG_REG(MC_INIT_ZQ_CAL_START) = 0x00000000UL; + DDRCFG_REG(MC_CFG_AUTO_ZQ_CAL_EN) = 0x00000001UL; + mb(); + wolfBoot_printf("done\n"); + + /* + * Simulate HSS state machine transitions to trigger TIP progression + * HSS state machine: START_CHECK -> BCLKSCLK -> ADDCMD -> WRLVL + * TIP may need to see these state transitions before it can start WRLVL + */ + wolfBoot_printf(" Simulate state machine transitions..."); + { + uint32_t train_stat = DDRPHY_REG(PHY_TRAINING_STATUS); + + /* Step 1: Verify BCLK_SCLK is complete (should be bit 0 set) */ + if (train_stat & BCLK_SCLK_BIT) { + wolfBoot_printf("BCLK_SCLK done "); + } else { + wolfBoot_printf("BCLK_SCLK not done (0x%x) ", train_stat); + } + + /* Step 2: Simulate entering ADDCMD state + * Per HSS: Check if ADDCMD is skipped (training_skip bit 1) + * If skipped, immediately transition to WRLVL state + */ + uint32_t training_skip = DDRPHY_REG(PHY_TRAINING_SKIP); + if (training_skip & ADDCMD_BIT) { + wolfBoot_printf("ADDCMD skipped "); + /* Simulate transition to WRLVL state - add delay to let TIP detect */ + ddr_delay(50000); /* 500us delay for state transition */ + } else { + /* ADDCMD not skipped - wait for ADDCMD completion */ + wolfBoot_printf("ADDCMD not skipped, waiting... "); + uint32_t addcmd_timeout = 100000; /* 10 seconds */ + while ((addcmd_timeout > 0) && !(train_stat & ADDCMD_BIT)) { + train_stat = DDRPHY_REG(PHY_TRAINING_STATUS); + addcmd_timeout--; + ddr_delay(10); + } + if (train_stat & ADDCMD_BIT) { + wolfBoot_printf("ADDCMD complete "); + } else { + wolfBoot_printf("ADDCMD timeout "); + } + } + + /* Step 3: Enable WRLVL in MR2 (TIP may need this before starting WRLVL) + * Per User Guide Section 2.7.3.4: MR2 bit 7 must be set to enable WRLVL mode + */ + wolfBoot_printf("MR2 WRLVL enable..."); + DDRCFG_REG(MC_INIT_CS) = 0x01; + DDRCFG_REG(MC_INIT_MR_WR_MASK) = 0xFF; + DDRCFG_REG(MC_INIT_MR_ADDR) = 2; /* MR2 */ + DDRCFG_REG(MC_INIT_MR_WR_DATA) = 0xAD; /* MR2 = 0x2D | 0x80 (WRLVL enable) */ + DDRCFG_REG(MC_INIT_MR_W_REQ) = 0x01; + DDRCFG_REG(MC_INIT_MR_W_REQ) = 0x00; + mb(); + ddr_delay(10000); /* 100us delay for MR2 write to propagate */ + + /* Step 4: Ensure TIP is running and give it time to detect state transition */ + if ((DDRPHY_REG(PHY_TRAINING_START) & 0x01) == 0) { + DDRPHY_REG(PHY_TRAINING_START) = 0x00000001UL; + mb(); + } + ddr_delay(50000); /* 500us delay for TIP to detect WRLVL state transition */ + wolfBoot_printf("done\n"); + + /* Check initial status after state machine simulation */ + uint32_t init_stat = DDRPHY_REG(PHY_TRAINING_STATUS); + wolfBoot_printf(" Post-state-machine: train_stat=0x%x\n", init_stat); + } + + /* + * Wait for TIP to complete training phases automatically + * Per HSS analysis: After state machine transitions, TIP should start WRLVL automatically + * + * Training phases: + * - BCLK_SCLK (already done) + * - Write Leveling (WRLVL) - TIP runs automatically after state transition + * - DQS Gate Training (RDGATE) - TIP runs automatically + * - Read Data Eye Training (DQ_DQS) - TIP runs automatically + */ + wolfBoot_printf(" Wait for TIP WRLVL to start and complete...\n"); + { + uint32_t timeout = 1000000; /* 10 seconds max wait */ + uint32_t train_stat_check; + uint32_t lane; + uint32_t all_lanes_trained = 0; + uint32_t training_complete = 0; + + /* Per HSS successful training logs: training_status should show: + * bit 0 = BCLK_SCLK done + * bit 2 = WRLVL done + * bit 3 = RDGATE done + * bit 4 = DQ_DQS done + * So training_status = 0x1D indicates all phases complete + */ + uint32_t last_train_stat = 0; + uint32_t progress_count = 0; + while (timeout > 0 && !training_complete) { + /* Check training status register */ + train_stat_check = DDRPHY_REG(PHY_TRAINING_STATUS); + + /* Print progress every 100ms if status changes */ + if (train_stat_check != last_train_stat) { + wolfBoot_printf(" Progress: train_stat=0x%x (iter=%d)\n", + train_stat_check, 1000000 - timeout); + last_train_stat = train_stat_check; + progress_count++; + } + + /* Check if all lanes have non-zero write leveling delays (primary indicator) */ + all_lanes_trained = 1; + for (lane = 0; lane < 4; lane++) { + DDRPHY_REG(0x800) = lane; /* lane_select */ + ddr_delay(10); + if (DDRPHY_REG(0x830) == 0) { /* wl_delay_0 */ + all_lanes_trained = 0; + break; + } + } + + /* Training complete when: + * 1. All lanes have WL delays (WRLVL done) + * 2. Training status shows WRLVL+RDGATE+DQ_DQS bits set (0x1C or 0x1D) + * Note: gt_state=0xB is normal per HSS logs, not an error + */ + if (all_lanes_trained && (train_stat_check & (WRLVL_BIT | RDGATE_BIT | DQ_DQS_BIT))) { + training_complete = 1; + break; + } + + timeout--; + ddr_delay(10); /* 100us per iteration */ + + /* Print status every 1 second if no progress */ + if ((timeout % 10000) == 0 && progress_count == 0) { + DDRPHY_REG(0x800) = 0; /* Select lane 0 */ + ddr_delay(10); + wolfBoot_printf(" Waiting... train_stat=0x%x wl_dly=0x%x gt_state=0x%x\n", + train_stat_check, + DDRPHY_REG(0x830), /* wl_delay_0 */ + DDRPHY_REG(0x82C)); /* gt_state */ + } + } + + /* Debug: Print training status */ + wolfBoot_printf(" Training status: 0x%x\n", DDRPHY_REG(PHY_TRAINING_STATUS)); + wolfBoot_printf(" training_skip=0x%x training_reset=0x%x\n", + DDRPHY_REG(PHY_TRAINING_SKIP), DDRPHY_REG(PHY_TRAINING_RESET)); + + /* Print per-lane TIP status (from HSS tip_register_status) */ + wolfBoot_printf(" Per-lane status:\n"); + for (lane = 0; lane < 5; lane++) { + DDRPHY_REG(0x800) = lane; /* lane_select */ + ddr_delay(50); + wolfBoot_printf(" L%d: gt_err=0x%x gt_state=0x%x wl_dly=0x%x dqdqs_st=0x%x\n", + lane, + DDRPHY_REG(0x81C), /* gt_err_comb */ + DDRPHY_REG(0x82C), /* gt_state */ + DDRPHY_REG(0x830), /* wl_delay_0 */ + DDRPHY_REG(0x83C)); /* dqdqs_state */ + } + + /* Additional TIP debug info */ + wolfBoot_printf(" TIP cfg: tip_cfg_params=0x%x\n", DDRPHY_REG(PHY_TIP_CFG_PARAMS)); + wolfBoot_printf(" BCLK: pll_phadj=0x%x bclk_sclk=0x%x\n", + DDR_PLL_REG(PLL_PHADJ), DDRPHY_REG(PHY_BCLK_SCLK)); + wolfBoot_printf(" RPC: rpc145=0x%x rpc147=0x%x rpc156=0x%x rpc166=0x%x\n", + DDRPHY_REG(PHY_RPC145), DDRPHY_REG(PHY_RPC147), + DDRPHY_REG(PHY_RPC156), DDRPHY_REG(PHY_RPC166)); + + if (training_complete && all_lanes_trained) { + wolfBoot_printf(" TIP training complete!\n"); + } else { + wolfBoot_printf(" TIP training timeout or incomplete\n"); + wolfBoot_printf(" all_lanes_trained=%d train_stat=0x%x\n", + all_lanes_trained, train_stat_check); + } + } + + /* + * Restore ODT and disable WRLVL in MR2 after TIP completes + * Per User Guide: WRLVL mode must be disabled after training + */ + wolfBoot_printf(" Restore ODT and disable WRLVL..."); + { + /* Restore ODT (per HSS: rpc3_odt=0x3 after training) */ + DDRPHY_REG(PHY_RPC3_ODT) = 0x03U; + mb(); + + /* Disable WRLVL in MR2 (bit 7 = 0) */ + DDRCFG_REG(MC_INIT_CS) = 0x01; + DDRCFG_REG(MC_INIT_MR_WR_MASK) = 0xFF; + DDRCFG_REG(MC_INIT_MR_ADDR) = 2; /* MR2 */ + DDRCFG_REG(MC_INIT_MR_WR_DATA) = 0x2D; /* MR2 normal (WRLVL disabled) */ + DDRCFG_REG(MC_INIT_MR_W_REQ) = 0x01; + DDRCFG_REG(MC_INIT_MR_W_REQ) = 0x00; + mb(); + ddr_delay(1000); + wolfBoot_printf("done\n"); + } + + /* Check final training status */ + train_stat = DDRPHY_REG(PHY_TRAINING_STATUS); + wolfBoot_printf(" Final train_stat=0x%x\n", train_stat); + + /* Write calibration using MTC (Memory Test Controller) + * Based on HSS write_calibration_using_mtc() + */ + wolfBoot_printf("Write calib..."); + { + uint32_t cal_data; + uint32_t lane; + uint32_t result; + uint32_t lane_status = 0; + uint32_t lane_calib[5] = {0}; + const uint32_t num_lanes = 4; /* Video Kit has 4 data lanes */ + + /* Enable expert mode for write calibration */ + DDRPHY_REG(PHY_EXPERT_MODE_EN) = 0x00000008UL; + + /* Sweep write calibration offset from 0 to F */ + for (cal_data = 0x00000; cal_data < 0xFFFFF; cal_data += 0x11111) { + /* Set write calibration offset for all lanes */ + DDRPHY_REG(PHY_EXPERT_WRCALIB) = cal_data; + + for (lane = 0; lane < num_lanes; lane++) { + if (lane_status & (1 << lane)) + continue; /* Already calibrated this lane */ + + uint8_t mask = (1 << lane); + + /* Configure MTC for this lane */ + DDRCFG_REG(MT_STOP_ON_ERROR) = 0; + DDRCFG_REG(MT_EN_SINGLE) = 0; + DDRCFG_REG(MT_DATA_PATTERN) = 0; /* Counting pattern */ + DDRCFG_REG(MT_ADDR_PATTERN) = 0; /* Sequential */ + DDRCFG_REG(MT_START_ADDR_0) = 0; + DDRCFG_REG(MT_START_ADDR_1) = 0; + DDRCFG_REG(MT_ADDR_BITS) = 20; /* 1MB test size (2^20) */ + + /* Set error masks - unmask only the lane under test */ + DDRCFG_REG(MT_ERROR_MASK_0) = 0xFFFFFFFF; + DDRCFG_REG(MT_ERROR_MASK_1) = 0xFFFFFFFF; + DDRCFG_REG(MT_ERROR_MASK_2) = 0xFFFFFFFF; + DDRCFG_REG(MT_ERROR_MASK_3) = 0xFFFFFFFF; + DDRCFG_REG(MT_ERROR_MASK_4) = 0xFFFFFFFF; + + if (mask & 0x1) { + DDRCFG_REG(MT_ERROR_MASK_0) &= 0xFFFFFF00; + DDRCFG_REG(MT_ERROR_MASK_1) &= 0xFFFFF00F; + DDRCFG_REG(MT_ERROR_MASK_2) &= 0xFFFF00FF; + DDRCFG_REG(MT_ERROR_MASK_3) &= 0xFFF00FFF; + } + if (mask & 0x2) { + DDRCFG_REG(MT_ERROR_MASK_0) &= 0xFFFF00FF; + DDRCFG_REG(MT_ERROR_MASK_1) &= 0xFFF00FFF; + DDRCFG_REG(MT_ERROR_MASK_2) &= 0xFF00FFFF; + DDRCFG_REG(MT_ERROR_MASK_3) &= 0xF00FFFFF; + } + if (mask & 0x4) { + DDRCFG_REG(MT_ERROR_MASK_0) &= 0xFF00FFFF; + DDRCFG_REG(MT_ERROR_MASK_1) &= 0xF00FFFFF; + DDRCFG_REG(MT_ERROR_MASK_2) &= 0x00FFFFFF; + DDRCFG_REG(MT_ERROR_MASK_3) &= 0x0FFFFFFF; + DDRCFG_REG(MT_ERROR_MASK_4) &= 0xFFFFFFF0; + } + if (mask & 0x8) { + DDRCFG_REG(MT_ERROR_MASK_0) &= 0x00FFFFFF; + DDRCFG_REG(MT_ERROR_MASK_1) &= 0x0FFFFFFF; + DDRCFG_REG(MT_ERROR_MASK_2) &= 0xFFFFFFF0; + DDRCFG_REG(MT_ERROR_MASK_3) &= 0xFFFFFF00; + DDRCFG_REG(MT_ERROR_MASK_4) &= 0xFFFFF00F; + } + + /* Run MTC test */ + DDRCFG_REG(MT_EN) = 0; + DDRCFG_REG(MT_EN_SINGLE) = 0; + DDRCFG_REG(MT_EN_SINGLE) = 1; + + /* Wait for MTC completion */ + timeout = 0xFFFFFF; + while ((DDRCFG_REG(MT_DONE_ACK) & 0x01) == 0 && timeout > 0) + timeout--; + + if (timeout == 0) { + wolfBoot_printf("MTC timeout..."); + break; + } + + /* Check result */ + result = DDRCFG_REG(MT_ERROR_STS) & 0x01; + if (result == 0) { + /* Lane passed */ + lane_calib[lane] = cal_data & 0xF; + lane_status |= (1 << lane); + } + } + + /* Check if all lanes calibrated */ + if (lane_status == ((1 << num_lanes) - 1)) + break; + } + + if (lane_status == ((1 << num_lanes) - 1)) { + /* All lanes calibrated - set final calibration value */ + uint32_t final_calib = 0; + for (lane = 0; lane < num_lanes; lane++) + final_calib |= (lane_calib[lane] << (lane * 4)); + DDRPHY_REG(PHY_EXPERT_WRCALIB) = final_calib; + wolfBoot_printf("ok (0x%x)\n", final_calib); + } else { + wolfBoot_printf("FAIL (lanes=0x%x)\n", lane_status); + } + } + + train_stat = DDRPHY_REG(PHY_TRAINING_STATUS); + wolfBoot_printf(" Final status=0x%x\n", train_stat); + + /* Step 7: Check controller init done */ + ctrl_stat = DDRCFG_REG(MC_CTRLR_INIT_DONE); + wolfBoot_printf(" Controller INIT_DONE=0x%x\n", ctrl_stat); + + /* Enable auto-refresh */ + DDRCFG_REG(MC_CFG_AUTO_REF_EN) = 0x01; + mb(); + + return 0; +} + +/* DDR Memory Test */ +static int memory_test(void) +{ + volatile uint32_t *ddr = (volatile uint32_t *)0x80000000UL; + uint32_t patterns[] = { + 0x55555555UL, + 0xAAAAAAAAUL, + 0x12345678UL, + 0xFEDCBA98UL + }; + uint32_t readback; + int i, errors = 0; + uint32_t train_stat, blocker; + + uint32_t ctrl_done; + + /* Check if training is complete enough */ + train_stat = DDRPHY_REG(PHY_TRAINING_STATUS); + blocker = DDR_SEG_REG(SEG0_BLOCKER); + ctrl_done = DDRCFG_REG(MC_CTRLR_INIT_DONE); + wolfBoot_printf("DDR: Memory test @ 0x80000000...\n"); + wolfBoot_printf(" Training=0x%x Blocker=0x%x INIT_DONE=0x%x\n", + train_stat, blocker, ctrl_done); + + if (!(blocker & 0x01)) { + wolfBoot_printf(" ERROR: DDR blocker not disabled!\n"); + return -1; + } + + if (!ctrl_done) { + wolfBoot_printf(" WARNING: Controller INIT_DONE not set\n"); + /* Try memory test anyway to see if it works */ + } + + for (i = 0; i < 4; i++) { + wolfBoot_printf(" [%d] Write 0x%x...", i, patterns[i]); + ddr[i] = patterns[i]; + mb(); + readback = ddr[i]; + wolfBoot_printf("Read 0x%x ", readback); + if (readback != patterns[i]) { + wolfBoot_printf("FAIL\n"); + errors++; + } else { + wolfBoot_printf("OK\n"); + } + } + + if (errors == 0) { + wolfBoot_printf(" PASSED\n"); + return 0; + } + + wolfBoot_printf("FAILED (%d/4)\n", errors); + return -1; +} + +/* Main DDR Initialization Entry Point */ +int mpfs_ddr_init(void) +{ + int ret; + + wolfBoot_printf("\n========================================\n"); + wolfBoot_printf("MPFS DDR Init (Video Kit LPDDR4 2GB)\n"); + wolfBoot_printf("MT53D512M32D2DS-053 x32 @ 1600 Mbps\n"); + wolfBoot_printf("========================================\n"); + + /* Step 1: NWC/PLL initialization */ + ret = nwc_init(); + if (ret != 0) { + wolfBoot_printf("DDR: NWC init FAILED\n"); + return -1; + } + + /* Step 2: Enable DDR controller clock */ + wolfBoot_printf("DDR: Enable DDRC clock/reset..."); + wolfBoot_printf("CLK before=0x%x ", SYSREG_REG(SYSREG_SUBBLK_CLOCK_CR_OFF)); + SYSREG_REG(SYSREG_SUBBLK_CLOCK_CR_OFF) |= MSS_PERIPH_DDRC; + mb(); + wolfBoot_printf("after=0x%x\n", SYSREG_REG(SYSREG_SUBBLK_CLOCK_CR_OFF)); + + /* Step 3: Reset DDR controller */ + SYSREG_REG(SYSREG_SOFT_RESET_CR_OFF) |= MSS_PERIPH_DDRC; + mb(); + udelay(1); + SYSREG_REG(SYSREG_SOFT_RESET_CR_OFF) &= ~MSS_PERIPH_DDRC; + mb(); + udelay(1); + wolfBoot_printf(" RST=0x%x\n", SYSREG_REG(SYSREG_SOFT_RESET_CR_OFF)); + /* Debug: Test write to MC_BASE2 (now correctly at 0x20084000) */ + wolfBoot_printf(" Test MC_BASE2@0x%lx: ", DDRCFG_BASE + MC_BASE2); + wolfBoot_printf("SR=0x%x ", DDRCFG_REG(MC_CTRLR_SOFT_RESET)); + wolfBoot_printf("RAS=0x%x\n", DDRCFG_REG(MC_CFG_RAS)); + wolfBoot_printf("done\n"); + + /* Step 4: Setup segments and blocker */ + setup_segments(); + + /* Step 5: Configure controller timing */ + setup_controller(); + + /* Step 6: Configure PHY */ + ret = setup_phy(); + if (ret != 0) + wolfBoot_printf("DDR: PHY setup warning\n"); + + /* Step 7: Training reset and clock rotation */ + training_reset_and_rotate(); + wolfBoot_printf("DDR: After rotation SR_N=0x%x\n", DDRCFG_REG(MC_CTRLR_SOFT_RESET)); + + /* Step 8: TIP configuration (use correct register) */ + DDRPHY_REG(PHY_TIP_CFG_PARAMS) = LIBERO_SETTING_TIP_CFG_PARAMS; + mb(); + + /* Step 9: Run training */ + ret = run_training(); + if (ret != 0) { + wolfBoot_printf("DDR: Training FAILED\n"); + return -2; + } + + /* Step 10: Memory test */ + ret = memory_test(); + if (ret != 0) + return -3; + + wolfBoot_printf("DDR: Initialization COMPLETE\n"); + wolfBoot_printf("========================================\n"); + + return 0; +} + +#endif /* WOLFBOOT_RISCV_MMODE */ + + +/* ============================================================================ + * Multi-Hart Support (M-Mode only) + * ============================================================================ */ +#ifdef WOLFBOOT_RISCV_MMODE +/* ============================================================================ + * Multi-Hart Support + * + * These functions handle waking secondary harts (U54 cores) and the + * communication protocol between E51 (main hart) and U54s. + * ============================================================================ */ + +/* Linker symbols for hart stacks and HLS */ +extern uint64_t _main_hart_hls; + +/* CLINT MSIP register access for sending IPIs */ +#define CLINT_MSIP_REG(hart) (*(volatile uint32_t*)(CLINT_BASE + (hart) * 4)) + +/** + * mpfs_get_main_hls - Get pointer to main hart's HLS + * Returns: Pointer to HLS_DATA structure + */ +static HLS_DATA* mpfs_get_main_hls(void) +{ + return (HLS_DATA*)&_main_hart_hls; +} + +/** + * mpfs_signal_main_hart_started - Signal to secondary harts that main hart is ready + * + * Called by E51 after basic initialization. Secondary harts are waiting in WFI + * for this signal before they signal their own readiness. + */ +static void mpfs_signal_main_hart_started(void) +{ + HLS_DATA* hls = mpfs_get_main_hls(); + + hls->in_wfi_indicator = HLS_MAIN_HART_STARTED; + hls->my_hart_id = MPFS_FIRST_HART; + + /* Memory barrier to ensure write is visible to other harts */ + __asm__ volatile("fence iorw, iorw" ::: "memory"); +} + +/** + * mpfs_wake_secondary_harts - Wake all U54 cores via IPI + * + * This function implements the hart wake-up protocol: + * 1. Wait for each hart to signal it's in WFI + * 2. Send IPI to wake the hart + * 3. Wait for hart to acknowledge wake-up + * + * Returns: Number of harts successfully woken + */ +int mpfs_wake_secondary_harts(void) +{ + int hart_id; + int woken_count = 0; + + wolfBoot_printf("Waking secondary harts...\n"); + + for (hart_id = MPFS_FIRST_U54_HART; hart_id <= MPFS_LAST_U54_HART; hart_id++) { + /* Note: In this simplified implementation, we just send IPIs. + * The full implementation would wait for HLS_OTHER_HART_IN_WFI + * from each hart, but we don't have per-hart HLS pointers yet. + * For now, we just send the IPI and the hart will wake when ready. */ + + wolfBoot_printf(" Sending IPI to hart %d...", hart_id); + + /* Send software interrupt (IPI) to this hart */ + CLINT_MSIP_REG(hart_id) = 0x01; + + /* Memory barrier */ + __asm__ volatile("fence iorw, iorw" ::: "memory"); + + /* Small delay for hart to respond (~1ms) */ + udelay(1000); + + woken_count++; + wolfBoot_printf(" done\n"); + } + + wolfBoot_printf("Woke %d secondary harts\n", woken_count); + + return woken_count; +} + +/** + * secondary_hart_entry - Entry point for secondary harts (U54 cores) + * + * Each U54 core uses its own MMUART: + * Hart 1 -> MMUART1 (/dev/ttyUSB1), Hart 2 -> MMUART2, etc. + */ +void secondary_hart_entry(unsigned long hartid, HLS_DATA* hls) +{ + /* Message template with placeholder for hart ID at position 5 */ + char msg[] = "Hart X: Woken, waiting for Linux boot...\n"; + (void)hls; + + /* Initialize this hart's dedicated UART */ + uart_init_hart(hartid); + + /* Update hart ID in message (position 5) */ + msg[5] = '0' + (char)hartid; + + /* Write to this hart's UART */ + uart_write_hart(hartid, msg, sizeof(msg) - 1); + + /* Wait for Linux to take over via SBI */ + while (1) { + __asm__ volatile("wfi"); + } +} +#endif /* WOLFBOOT_RISCV_MMODE */ + void hal_init(void) { +#ifdef WOLFBOOT_RISCV_MMODE + int ddr_ret; + + /* Configure L2 cache controller first (before using L2 scratchpad heavily) */ + mpfs_config_l2_cache(); + + /* Signal to secondary harts that main hart is ready */ + mpfs_signal_main_hart_started(); +#endif + +#ifdef DEBUG_UART + /* Enable clock and release from soft reset for debug UART */ + SYSREG_SUBBLK_CLOCK_CR |= (MSS_PERIPH_MMUART0 << DEBUG_UART_PORT); + SYSREG_SOFT_RESET_CR &= ~(MSS_PERIPH_MMUART0 << DEBUG_UART_PORT); + uart_init(); +#endif + wolfBoot_printf("wolfBoot Version: %s (%s %s)\n", - LIBWOLFBOOT_VERSION_STRING,__DATE__, __TIME__); + LIBWOLFBOOT_VERSION_STRING, __DATE__, __TIME__); +#ifdef WOLFBOOT_RISCV_MMODE + wolfBoot_printf("Running on E51 (hart 0) in M-mode\n"); + + /* Initialize DDR controller (includes NWC/PLL initialization) */ + ddr_ret = mpfs_ddr_init(); + if (ddr_ret != 0) { + wolfBoot_printf("DDR init failed (%d) - halting\n", ddr_ret); + while (1) __asm__ volatile("wfi"); + } +#endif } /* ============================================================================ @@ -329,23 +2570,6 @@ void* hal_get_dts_address(void) } #endif -#if defined(DISK_SDCARD) || defined(DISK_EMMC) -/* ============================================================================ - * SDHCI Platform HAL Implementation - * ============================================================================ */ - -/* Register access functions for generic SDHCI driver */ -uint32_t sdhci_reg_read(uint32_t offset) -{ - return *((volatile uint32_t*)(EMMC_SD_BASE + offset)); -} - -void sdhci_reg_write(uint32_t offset, uint32_t val) -{ - *((volatile uint32_t*)(EMMC_SD_BASE + offset)) = val; -} -#endif /* DISK_SDCARD || DISK_EMMC */ - /* ============================================================================ * PLIC - Platform-Level Interrupt Controller (MPFS250-specific) * @@ -355,7 +2579,36 @@ void sdhci_reg_write(uint32_t offset, uint32_t val) * - plic_dispatch_irq(): Dispatch IRQ to appropriate handler * ============================================================================ */ -/* Get the PLIC context for the current hart in S-mode */ +/* Get the PLIC context for the current hart + * + * PLIC Context IDs for MPFS250: + * Hart 0 (E51): Context 0 = M-mode (E51 has no S-mode) + * Hart 1 (U54): Context 1 = M-mode, Context 2 = S-mode + * Hart 2 (U54): Context 3 = M-mode, Context 4 = S-mode + * Hart 3 (U54): Context 5 = M-mode, Context 6 = S-mode + * Hart 4 (U54): Context 7 = M-mode, Context 8 = S-mode + */ +#ifdef WOLFBOOT_RISCV_MMODE +/* M-mode: Read hart ID directly from CSR */ +static uint32_t get_hartid_mmode(void) +{ + uint32_t hartid; + __asm__ volatile("csrr %0, mhartid" : "=r"(hartid)); + return hartid; +} + +uint32_t plic_get_context(void) +{ + uint32_t hart_id = get_hartid_mmode(); + /* E51 (hart 0): M-mode only, context 0 + * U54 (harts 1-4): M-mode context = hart_id * 2 - 1 */ + if (hart_id == 0) { + return 0; /* E51 M-mode context */ + } + return (hart_id * 2) - 1; /* U54 M-mode context */ +} +#else +/* S-mode: Hart ID passed by boot stage, stored in tp register */ extern unsigned long get_boot_hartid(void); uint32_t plic_get_context(void) { @@ -363,10 +2616,6 @@ uint32_t plic_get_context(void) /* Get S-mode context for a given hart (1-4 for U54 cores) */ return hart_id * 2; } - -/* Forward declaration of SDHCI IRQ handler */ -#if defined(DISK_SDCARD) || defined(DISK_EMMC) -extern void sdhci_irq_handler(void); #endif /* Dispatch IRQ to appropriate platform handler */ @@ -393,7 +2642,7 @@ void plic_dispatch_irq(uint32_t irq) void sdhci_platform_init(void) { /* Release MMC controller from reset */ - SYSREG_SOFT_RESET_CR &= ~SYSREG_SOFT_RESET_CR_MMC; + SYSREG_SOFT_RESET_CR &= ~MSS_PERIPH_MMC; } /* Platform interrupt setup - called from sdhci_init() */ @@ -420,6 +2669,17 @@ void sdhci_platform_set_bus_mode(int is_emmc) (void)is_emmc; /* Nothing additional needed for MPFS - mode is set in generic driver */ } + +/* Register access functions for generic SDHCI driver */ +uint32_t sdhci_reg_read(uint32_t offset) +{ + return *((volatile uint32_t*)(EMMC_SD_BASE + offset)); +} + +void sdhci_reg_write(uint32_t offset, uint32_t val) +{ + *((volatile uint32_t*)(EMMC_SD_BASE + offset)) = val; +} #endif /* DISK_SDCARD || DISK_EMMC */ /* ============================================================================ @@ -428,10 +2688,6 @@ void sdhci_platform_set_bus_mode(int is_emmc) #ifdef DEBUG_UART -#ifndef DEBUG_UART_BASE -#define DEBUG_UART_BASE MSS_UART1_LO_BASE -#endif - /* Configure baud divisors with fractional baud rate support. * * UART baud rate divisor formula: divisor = PCLK / (baudrate * 16) @@ -478,6 +2734,52 @@ static void uart_config_clk(uint32_t baudrate) } } +/* New APB clock after MSS PLL lock + * This should match the configured MSS PLL output 2 (APB/AHB clock). + * From HSS: LIBERO_SETTING_MSS_APB_AHB_CLK = 150000000 (150 MHz) + */ +#define MSS_APB_AHB_CLK_PLL 150000000UL + +/* Reconfigure UART baud rate divisor for a specific clock */ +static void uart_config_clk_with_freq(uint32_t baudrate, uint64_t pclk) +{ + /* Scale up for precision: (PCLK * 128) / (baudrate * 16) */ + uint32_t div_x128 = (uint32_t)((8UL * pclk) / baudrate); + uint32_t div_x64 = div_x128 / 2u; + + /* Extract integer and fractional parts */ + uint32_t div_int = div_x64 / 64u; + uint32_t div_frac = div_x64 - (div_int * 64u); + + /* Apply rounding correction from x128 calculation */ + div_frac += (div_x128 - (div_int * 128u)) - (div_frac * 2u); + + if (div_int > (uint32_t)UINT16_MAX) + return; + + /* Write 16-bit divisor: set DLAB, write high/low bytes, clear DLAB */ + MMUART_LCR(DEBUG_UART_BASE) |= DLAB_MASK; + MMUART_DMR(DEBUG_UART_BASE) = (uint8_t)(div_int >> 8); + MMUART_DLR(DEBUG_UART_BASE) = (uint8_t)div_int; + MMUART_LCR(DEBUG_UART_BASE) &= ~DLAB_MASK; + + /* Enable fractional divisor if integer divisor > 1 */ + if (div_int > 1u) { + MMUART_MM0(DEBUG_UART_BASE) |= EFBR_MASK; + MMUART_DFR(DEBUG_UART_BASE) = (uint8_t)div_frac; + } + else { + MMUART_MM0(DEBUG_UART_BASE) &= ~EFBR_MASK; + } +} + +/* Reinitialize UART after clock change (MSS PLL lock) */ +void hal_uart_reinit(void) +{ + /* Reconfigure baud rate for new clock frequency */ + uart_config_clk_with_freq(115200, MSS_APB_AHB_CLK_PLL); +} + void uart_init(void) { /* Disable special modes: LIN, IrDA, SmartCard */ @@ -533,3 +2835,193 @@ void uart_write(const char* buf, unsigned int sz) } } #endif /* DEBUG_UART */ + +#ifdef WOLFBOOT_RISCV_MMODE +/** + * uart_init_hart - Initialize UART for a specific hart + * + * Each U54 core uses its own MMUART: + * Hart 0 (E51) -> MMUART0 (already initialized by hal_init) + * Hart 1 (U54_1) -> MMUART1 + * Hart 2 (U54_2) -> MMUART2 + * Hart 3 (U54_3) -> MMUART3 + * Hart 4 (U54_4) -> MMUART4 + * + * @hartid: The hart ID (1-4 for U54 cores) + */ +void uart_init_hart(unsigned long hartid) +{ + unsigned long base; + + if (hartid == 0 || hartid > 4) { + return; /* Hart 0 uses main UART, invalid harts ignored */ + } + + base = UART_BASE_FOR_HART(hartid); + + /* Enable clock and release from soft reset for this UART + * The peripheral bit positions are: + * MMUART0 = bit 5, MMUART1 = bit 6, MMUART2 = bit 7, etc. + * MSS_PERIPH_MMUART0 = (1 << 5), so shift by hartid */ + SYSREG_SUBBLK_CLOCK_CR |= (MSS_PERIPH_MMUART0 << hartid); + + /* Memory barrier before modifying reset */ + __asm__ volatile("fence iorw, iorw" ::: "memory"); + + /* Release from soft reset */ + SYSREG_SOFT_RESET_CR &= ~(MSS_PERIPH_MMUART0 << hartid); + + /* Memory barrier */ + __asm__ volatile("fence iorw, iorw" ::: "memory"); + + /* Longer delay for clock to stabilize (critical for reliable UART) */ + udelay(100); + + /* Disable special modes: LIN, IrDA, SmartCard */ + MMUART_MM0(base) &= ~ELIN_MASK; + MMUART_MM1(base) &= ~EIRD_MASK; + MMUART_MM2(base) &= ~EERR_MASK; + + /* Disable interrupts */ + MMUART_IER(base) = 0u; + + /* Reset and configure FIFOs */ + MMUART_FCR(base) = 0u; + MMUART_FCR(base) |= CLEAR_RX_FIFO_MASK | CLEAR_TX_FIFO_MASK; + MMUART_FCR(base) |= RXRDY_TXRDYN_EN_MASK; + + /* Disable loopback */ + MMUART_MCR(base) &= ~(LOOP_MASK | RLOOP_MASK); + + /* Set LSB-first */ + MMUART_MM1(base) &= ~(E_MSB_TX_MASK | E_MSB_RX_MASK); + + /* Disable AFM, single wire mode */ + MMUART_MM2(base) &= ~(EAFM_MASK | ESWM_MASK); + + /* Disable TX time guard, RX timeout, fractional baud */ + MMUART_MM0(base) &= ~(ETTG_MASK | ERTO_MASK | EFBR_MASK); + + /* Clear timing registers */ + MMUART_GFR(base) = 0u; + MMUART_TTG(base) = 0u; + MMUART_RTO(base) = 0u; + + /* Configure baud rate (115200) + * Using EXACT same calculation as uart_config_clk for consistency */ + { + const uint64_t pclk = MSS_APB_AHB_CLK; + const uint32_t baudrate = 115200; + + /* Scale up for precision: (PCLK * 128) / (baudrate * 16) */ + uint32_t div_x128 = (uint32_t)((8UL * pclk) / baudrate); + uint32_t div_x64 = div_x128 / 2u; + + /* Extract integer and fractional parts */ + uint32_t div_int = div_x64 / 64u; + uint32_t div_frac = div_x64 - (div_int * 64u); + + /* Apply rounding correction from x128 calculation (same as uart_config_clk) */ + div_frac += (div_x128 - (div_int * 128u)) - (div_frac * 2u); + + /* Enable DLAB to access divisor registers */ + MMUART_LCR(base) |= DLAB_MASK; + + /* Write DMR before DLR (same order as uart_config_clk) */ + MMUART_DMR(base) = (uint8_t)(div_int >> 8); + MMUART_DLR(base) = (uint8_t)div_int; + + /* Clear DLAB */ + MMUART_LCR(base) &= ~DLAB_MASK; + + /* Configure fractional baud rate if needed */ + if (div_frac > 0u) { + MMUART_MM0(base) |= EFBR_MASK; + MMUART_DFR(base) = (uint8_t)div_frac; + } else { + MMUART_MM0(base) &= ~EFBR_MASK; + } + } + + /* Set line config: 8N1 */ + MMUART_LCR(base) = MSS_UART_DATA_8_BITS | + MSS_UART_NO_PARITY | + MSS_UART_ONE_STOP_BIT; + + /* Small delay after configuration */ + udelay(10); +} + +/** + * uart_write_hart - Write string to a specific hart's UART + * + * @hartid: The hart ID (0-4) + * @buf: Buffer to write + * @sz: Number of bytes to write + */ +void uart_write_hart(unsigned long hartid, const char* buf, unsigned int sz) +{ + unsigned long base; + uint32_t pos = 0; + + if (hartid > 4) { + return; + } + + base = UART_BASE_FOR_HART(hartid); + + while (sz-- > 0) { + char c = buf[pos++]; + if (c == '\n') { + while ((MMUART_LSR(base) & MSS_UART_THRE) == 0); + MMUART_THR(base) = '\r'; + } + while ((MMUART_LSR(base) & MSS_UART_THRE) == 0); + MMUART_THR(base) = c; + } +} + +/** + * uart_printf_hart - Simple printf to a specific hart's UART + * Only supports %d, %x, %s, %lu formats for minimal footprint + */ +static void uart_printf_hart(unsigned long hartid, const char* fmt, ...) +{ + char buf[128]; + int len = 0; + const char* p = fmt; + + /* Very simple printf implementation */ + while (*p && len < (int)sizeof(buf) - 1) { + if (*p == '%') { + p++; + if (*p == 'l' && *(p+1) == 'u') { + /* %lu - unsigned long */ + p += 2; + /* Skip for now - just print placeholder */ + buf[len++] = '['; + buf[len++] = 'N'; + buf[len++] = ']'; + } else if (*p == 'd') { + p++; + buf[len++] = '['; + buf[len++] = 'N'; + buf[len++] = ']'; + } else if (*p == 's') { + p++; + buf[len++] = '['; + buf[len++] = 'S'; + buf[len++] = ']'; + } else { + buf[len++] = '%'; + buf[len++] = *p++; + } + } else { + buf[len++] = *p++; + } + } + buf[len] = '\0'; + + uart_write_hart(hartid, buf, len); +} +#endif /* WOLFBOOT_RISCV_MMODE */ diff --git a/hal/mpfs250.h b/hal/mpfs250.h index 36c3bda239..2e11ec37d5 100644 --- a/hal/mpfs250.h +++ b/hal/mpfs250.h @@ -27,8 +27,17 @@ /* PolarFire SoC MPFS250T board specific configuration */ -/* APB/AHB Clock Frequency */ -#define MSS_APB_AHB_CLK 150000000 +/* APB/AHB Clock Frequency + * M-mode (out of reset): 40 MHz + * S-mode (after HSS): 150 MHz + */ +#ifndef MSS_APB_AHB_CLK + #ifdef WOLFBOOT_RISCV_MMODE + #define MSS_APB_AHB_CLK 40000000 + #else + #define MSS_APB_AHB_CLK 150000000 + #endif +#endif /* Hardware Base Address */ #define SYSREG_BASE 0x20002000 @@ -36,23 +45,28 @@ /* Write "0xDEAD" to cause a full MSS reset*/ #define SYSREG_MSS_RESET_CR (*((volatile uint32_t*)(SYSREG_BASE + 0x18))) +/* Sub-block Clock Control Register (enables peripheral clocks) */ +#define SYSREG_SUBBLK_CLOCK_CR (*((volatile uint32_t*)(SYSREG_BASE + 0x84))) + /* Peripheral Soft Reset Control Register */ #define SYSREG_SOFT_RESET_CR (*((volatile uint32_t*)(SYSREG_BASE + 0x88))) -#define SYSREG_SOFT_RESET_CR_ENVM (1U << 0) -#define SYSREG_SOFT_RESET_CR_MMC (1U << 3) -#define SYSREG_SOFT_RESET_CR_MMUART0 (1U << 5) -#define SYSREG_SOFT_RESET_CR_MMUART1 (1U << 6) -#define SYSREG_SOFT_RESET_CR_MMUART2 (1U << 7) -#define SYSREG_SOFT_RESET_CR_MMUART3 (1U << 8) -#define SYSREG_SOFT_RESET_CR_MMUART4 (1U << 9) -#define SYSREG_SOFT_RESET_CR_SPI0 (1U << 10) -#define SYSREG_SOFT_RESET_CR_SPI1 (1U << 11) -#define SYSREG_SOFT_RESET_CR_QSPI (1U << 19) -#define SYSREG_SOFT_RESET_CR_GPIO0 (1U << 20) -#define SYSREG_SOFT_RESET_CR_GPIO1 (1U << 21) -#define SYSREG_SOFT_RESET_CR_GPIO2 (1U << 22) -#define SYSREG_SOFT_RESET_CR_DDRC (1U << 23) -#define SYSREG_SOFT_RESET_CR_ATHENA (1U << 28) /* Crypto hardware accelerator */ + +/* MSS Peripheral control bits (shared by SUBBLK_CLOCK_CR and SOFT_RESET_CR) */ +#define MSS_PERIPH_ENVM (1U << 0) +#define MSS_PERIPH_MMC (1U << 3) +#define MSS_PERIPH_MMUART0 (1U << 5) +#define MSS_PERIPH_MMUART1 (1U << 6) +#define MSS_PERIPH_MMUART2 (1U << 7) +#define MSS_PERIPH_MMUART3 (1U << 8) +#define MSS_PERIPH_MMUART4 (1U << 9) +#define MSS_PERIPH_SPI0 (1U << 10) +#define MSS_PERIPH_SPI1 (1U << 11) +#define MSS_PERIPH_QSPI (1U << 19) +#define MSS_PERIPH_GPIO0 (1U << 20) +#define MSS_PERIPH_GPIO1 (1U << 21) +#define MSS_PERIPH_GPIO2 (1U << 22) +#define MSS_PERIPH_DDRC (1U << 23) +#define MSS_PERIPH_ATHENA (1U << 28) /* Crypto hardware accelerator */ /* UART */ @@ -68,6 +82,42 @@ #define MSS_UART3_HI_BASE 0x28104000UL #define MSS_UART4_HI_BASE 0x28106000UL +/* UART base address array for per-hart access (LO addresses for M-mode) */ +#ifndef __ASSEMBLER__ +static const unsigned long MSS_UART_BASE_ADDR[] = { + MSS_UART0_LO_BASE, /* Hart 0 (E51) -> MMUART0 */ + MSS_UART1_LO_BASE, /* Hart 1 (U54_1) -> MMUART1 */ + MSS_UART2_LO_BASE, /* Hart 2 (U54_2) -> MMUART2 */ + MSS_UART3_LO_BASE, /* Hart 3 (U54_3) -> MMUART3 */ + MSS_UART4_LO_BASE /* Hart 4 (U54_4) -> MMUART4 */ +}; +#define UART_BASE_FOR_HART(hart) (MSS_UART_BASE_ADDR[(hart) < 5 ? (hart) : 0]) +#endif /* __ASSEMBLER__ */ + +/* Debug UART port selection (0-4): M-mode defaults to UART0, S-mode to UART1 */ +#ifndef DEBUG_UART_PORT + #ifdef WOLFBOOT_RISCV_MMODE + #define DEBUG_UART_PORT 0 + #else + #define DEBUG_UART_PORT 1 + #endif +#endif + +/* Derive base address from port number */ +#if DEBUG_UART_PORT == 0 + #define DEBUG_UART_BASE MSS_UART0_LO_BASE +#elif DEBUG_UART_PORT == 1 + #define DEBUG_UART_BASE MSS_UART1_LO_BASE +#elif DEBUG_UART_PORT == 2 + #define DEBUG_UART_BASE MSS_UART2_LO_BASE +#elif DEBUG_UART_PORT == 3 + #define DEBUG_UART_BASE MSS_UART3_LO_BASE +#elif DEBUG_UART_PORT == 4 + #define DEBUG_UART_BASE MSS_UART4_LO_BASE +#else + #error "Invalid DEBUG_UART_PORT (must be 0-4)" +#endif + #define MMUART_RBR(base) *((volatile uint8_t*)((base)) + 0x00) /* Receiver buffer register */ #define MMUART_IER(base) *((volatile uint8_t*)((base)) + 0x04) /* Interrupt enable register */ #define MMUART_IIR(base) *((volatile uint8_t*)((base)) + 0x08) /* Interrupt ID register */ @@ -169,6 +219,562 @@ #define ATHENA_BASE (SYSREG_BASE + 0x125000) +/* ============================================================================ + * L2 Cache Controller (CACHE_CTRL @ 0x02010000) + * Controls cache ways, way masks, and scratchpad configuration + * ============================================================================ */ +#define L2_CACHE_BASE 0x02010000UL + +/* L2 Cache Control Registers */ +#define L2_CONFIG (*(volatile uint64_t*)(L2_CACHE_BASE + 0x000)) +#define L2_WAY_ENABLE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x008)) +#define L2_FLUSH64 (*(volatile uint64_t*)(L2_CACHE_BASE + 0x200)) +#define L2_FLUSH32 (*(volatile uint32_t*)(L2_CACHE_BASE + 0x240)) + +/* Way Mask Registers - control which cache ways each master can access + * Value 0xFF = access to ways 0-7 (cache ways) + * Scratchpad ways (8-11) require explicit enabling */ +#define L2_WAY_MASK_DMA (*(volatile uint64_t*)(L2_CACHE_BASE + 0x800)) +#define L2_WAY_MASK_AXI4_PORT0 (*(volatile uint64_t*)(L2_CACHE_BASE + 0x808)) +#define L2_WAY_MASK_AXI4_PORT1 (*(volatile uint64_t*)(L2_CACHE_BASE + 0x810)) +#define L2_WAY_MASK_AXI4_PORT2 (*(volatile uint64_t*)(L2_CACHE_BASE + 0x818)) +#define L2_WAY_MASK_AXI4_PORT3 (*(volatile uint64_t*)(L2_CACHE_BASE + 0x820)) +#define L2_WAY_MASK_E51_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x828)) +#define L2_WAY_MASK_E51_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x830)) +#define L2_WAY_MASK_U54_1_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x838)) +#define L2_WAY_MASK_U54_1_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x840)) +#define L2_WAY_MASK_U54_2_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x848)) +#define L2_WAY_MASK_U54_2_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x850)) +#define L2_WAY_MASK_U54_3_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x858)) +#define L2_WAY_MASK_U54_3_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x860)) +#define L2_WAY_MASK_U54_4_DCACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x868)) +#define L2_WAY_MASK_U54_4_ICACHE (*(volatile uint64_t*)(L2_CACHE_BASE + 0x870)) + +/* L2 Shutdown Control Register */ +#define SYSREG_L2_SHUTDOWN_CR (*(volatile uint32_t*)(SYSREG_BASE + 0x174)) + +/* L2 Cache/Scratchpad constants */ +#define L2_NUM_CACHE_WAYS 8 /* Ways 0-7 are cache */ +#define L2_NUM_SCRATCH_WAYS 4 /* Ways 8-11 are scratchpad */ +#define L2_WAY_BYTE_LENGTH 0x20000 /* 128KB per way */ +#define L2_SCRATCH_BASE 0x0A000000UL +#define L2_SCRATCH_SIZE (L2_NUM_SCRATCH_WAYS * L2_WAY_BYTE_LENGTH) /* 512KB */ + +/* Way enable values */ +#define L2_WAY_ENABLE_RESET 0x01 /* Only way 0 at reset */ +#define L2_WAY_ENABLE_ALL_CACHE 0xFF /* Ways 0-7 (all cache ways) */ +#define L2_WAY_ENABLE_WITH_SCRATCH 0x0FFF /* Ways 0-11 (cache + scratchpad) */ + +/* Way mask for cache-only access (no scratchpad) */ +#define L2_WAY_MASK_CACHE_ONLY 0xFF + + +/* ============================================================================ + * NWC (Northwest Corner) - Clock and System Configuration + * + * The NWC contains clocks, PLLs, SGMII, and DDR PHY configuration. + * These registers must be configured for proper system operation. + * ============================================================================ */ + +/* SCB Configuration Block (SCBCFG @ 0x37080000) */ +#define SCBCFG_BASE 0x37080000UL +#define SCBCFG_TIMER (*(volatile uint32_t*)(SCBCFG_BASE + 0x08)) + +/* MSS_SCB_ACCESS_CONFIG value for proper SCB access timing */ +#define MSS_SCB_ACCESS_CONFIG 0x0008A080UL + +/* DDR SGMII PHY Configuration (CFG_DDR_SGMII_PHY @ 0x20007000) */ +#define CFG_DDR_SGMII_PHY_BASE 0x20007000UL +#define DDRPHY_STARTUP (*(volatile uint32_t*)(CFG_DDR_SGMII_PHY_BASE + 0x008)) +#define DDRPHY_DYN_CNTL (*(volatile uint32_t*)(CFG_DDR_SGMII_PHY_BASE + 0xC1C)) + +/* DDR PHY startup configuration value (from working DDR demo) */ +#define DDRPHY_STARTUP_CONFIG 0x003F1F00UL +#define DDRPHY_DYN_CNTL_CONFIG 0x0000047FUL + +/* DFI APB interface control (enables DDR PHY APB access) */ +#define SYSREG_DFIAPB_CR (*(volatile uint32_t*)(SYSREG_BASE + 0x98)) + +/* CLINT - Core Local Interruptor (for timer and software interrupts) + * Note: CLINT macros are defined in hal/riscv.h, only define base if not present */ +#ifndef CLINT_BASE +#define CLINT_BASE 0x02000000UL +#endif + +/* RTC Clock Frequency (1 MHz after divisor) */ +#define RTC_CLOCK_FREQ 1000000UL + + +/* ============================================================================ + * Hart Local Storage (HLS) - Per-hart communication structure + * + * Used for inter-hart communication during boot. + * Located at top of each hart's stack (sp - 64). + * ============================================================================ */ +#define HLS_DEBUG_AREA_SIZE 64 + +#ifndef __ASSEMBLER__ +typedef struct { + volatile uint32_t in_wfi_indicator; /* 0x00: Hart status indicator */ + volatile uint32_t my_hart_id; /* 0x04: Hart ID */ + volatile uint32_t shared_mem_marker; /* 0x08: Init marker */ + volatile uint32_t shared_mem_status; /* 0x0C: Status */ + volatile uint64_t* shared_mem; /* 0x10: Shared memory pointer */ + volatile uint64_t reserved[2]; /* 0x18: Reserved/padding */ +} HLS_DATA; /* Size: 64 bytes (HLS_DEBUG_AREA_SIZE) */ +#endif /* __ASSEMBLER__ */ + +/* HLS status indicator values */ +#define HLS_MAIN_HART_STARTED 0x12344321UL +#define HLS_OTHER_HART_IN_WFI 0x12345678UL +#define HLS_OTHER_HART_PASSED_WFI 0x87654321UL +#define HLS_MAIN_HART_FIN_INIT 0x55555555UL + +/* Number of harts on MPFS */ +#define MPFS_NUM_HARTS 5 +#define MPFS_FIRST_HART 0 /* E51 is hart 0 */ +#define MPFS_FIRST_U54_HART 1 /* First U54 is hart 1 */ +#define MPFS_LAST_U54_HART 4 /* Last U54 is hart 4 */ + +/* Stack configuration per hart */ +#ifndef STACK_SIZE_PER_HART +#define STACK_SIZE_PER_HART 8192 +#endif + +/* Multi-hart function declarations */ +#ifndef __ASSEMBLER__ +#ifdef WOLFBOOT_RISCV_MMODE +int mpfs_wake_secondary_harts(void); +void secondary_hart_entry(unsigned long hartid, HLS_DATA* hls); +void uart_init_hart(unsigned long hartid); +void uart_write_hart(unsigned long hartid, const char* buf, unsigned int sz); +#endif +#endif /* __ASSEMBLER__ */ + + +/* ============================================================================ + * DDR Controller and PHY Configuration + * + * MPFS DDR subsystem consists of: + * - DDR Controller (DDRCFG_BASE @ 0x20080000) - timing, addressing, refresh + * - DDR PHY (CFG_DDR_SGMII_PHY @ 0x20007000) - physical interface, training + * - Segment registers for address translation + * - SCB PLLs for clock generation + * + * Video Kit: 4x MT40A512M16LY-075:E (LPDDR4, 2GB total) + * ============================================================================ */ + +/* DDR Base Addresses */ +#define SYSREGSCB_BASE 0x20003000UL +#define DDRCFG_BASE 0x20080000UL /* DDR Controller CSR APB (from HSS) */ +#define DDR_SEG_BASE 0x20005D00UL /* From HSS mss_seg.h */ + +/* SCB PLL Bases */ +#define SCB_MSS_PLL_BASE 0x3E001000UL +#define SCB_DDR_PLL_BASE 0x3E010000UL + +/* Clock Fabric Mux bases */ +#define SCB_CFM_MSS_BASE 0x3E002000UL +#define SCB_CFM_SGMII_BASE 0x3E200000UL + +/* DDR Bank Controller (for NV map reset during VREF training) */ +#define SCB_BANKCONT_DDR_BASE 0x3E020000UL + +/* Register Access Macros */ +#define SYSREG_REG(off) (*(volatile uint32_t*)(SYSREG_BASE + (off))) +#define SYSREGSCB_REG(off) (*(volatile uint32_t*)(SYSREGSCB_BASE + (off))) +#define DDRCFG_REG(off) (*(volatile uint32_t*)(DDRCFG_BASE + (off))) +#define DDRPHY_REG(off) (*(volatile uint32_t*)(CFG_DDR_SGMII_PHY_BASE + (off))) +#define DDR_BANKCONT_REG(off) (*(volatile uint32_t*)(SCB_BANKCONT_DDR_BASE + (off))) +#define DDR_SEG_REG(off) (*(volatile uint32_t*)(DDR_SEG_BASE + (off))) +#define SCBCFG_REG(off) (*(volatile uint32_t*)(SCBCFG_BASE + (off))) +#define MSS_PLL_REG(off) (*(volatile uint32_t*)(SCB_MSS_PLL_BASE + (off))) +#define DDR_PLL_REG(off) (*(volatile uint32_t*)(SCB_DDR_PLL_BASE + (off))) +#define CFM_MSS_REG(off) (*(volatile uint32_t*)(SCB_CFM_MSS_BASE + (off))) +#define CFM_SGMII_REG(off) (*(volatile uint32_t*)(SCB_CFM_SGMII_BASE + (off))) + +/* SYSREG Offsets */ +#define SYSREG_SUBBLK_CLOCK_CR_OFF 0x84 +#define SYSREG_SOFT_RESET_CR_OFF 0x88 +#define SYSREG_DFIAPB_CR_OFF 0x98 +#define MSSIO_CONTROL_CR_OFF 0x1BC + +/* PLL Register Offsets */ +#define PLL_SOFT_RESET 0x000 +#define PLL_CTRL 0x004 +#define PLL_REF_FB 0x008 +#define PLL_FRACN 0x00C +#define PLL_DIV_0_1 0x010 +#define PLL_DIV_2_3 0x014 +#define PLL_CTRL2 0x018 +#define PLL_PHADJ 0x020 +#define PLL_SSCG_0 0x024 +#define PLL_SSCG_1 0x028 +#define PLL_SSCG_2 0x02C +#define PLL_SSCG_3 0x030 + +/* PLL Control Bits */ +#define PLL_POWERDOWN_B (1UL << 0) +#define PLL_LOCK_BIT (1UL << 25) +#define PLL_INIT_OUT_RESET 0x00000003UL + +/* CFM Register Offsets */ +#define CFM_BCLKMUX 0x004 +#define CFM_PLL_CKMUX 0x008 +#define CFM_MSSCLKMUX 0x00C +#define CFM_FMETER_ADDR 0x014 +#define CFM_FMETER_DATAW 0x018 + +/* SGMII CFM Register Offsets (at SCB_CFM_SGMII_BASE 0x3E200000) */ +#define CFM_SGMII_SOFT_RESET 0x000 /* Soft reset */ +#define CFM_SGMII_RFCKMUX 0x004 /* Reference clock mux - routes refclk to DDR/SGMII PLLs */ +#define CFM_SGMII_SGMII_CLKMUX 0x008 /* SGMII clock mux */ +#define CFM_SGMII_SPARE0 0x00C /* Spare register */ +#define CFM_SGMII_CLK_XCVR 0x010 /* Clock receiver config */ + +/* DDR PHY Register Offsets */ +#define PHY_SOFT_RESET 0x000 +#define PHY_MODE 0x004 +#define PHY_STARTUP 0x008 +#define PHY_PLL_CTRL_MAIN 0x084 +#define PHY_DPC_BITS 0x184 +#define PHY_BANK_STATUS 0x188 +#define PHY_IOC_REG0 0x204 +#define PHY_IOC_REG1 0x208 +#define PHY_IOC_REG2 0x20C +#define PHY_IOC_REG3 0x210 +#define PHY_IOC_REG6 0x21C /* Calibration reset/clock divider */ +#define PHY_DYN_CNTL 0xC1C /* Correct offset from HSS mss_ddr_sgmii_phy_defs.h */ +#define PHY_BCLK_SCLK 0x808 +#define PHY_TRAINING_SKIP 0x80C +#define PHY_TRAINING_START 0x810 +#define PHY_TRAINING_STATUS 0x814 +#define PHY_TRAINING_RESET 0x818 +#define PHY_TIP_CFG 0x828 +#define PHY_TIP_CFG_PARAMS 0x8D0 /* TIP configuration parameters */ +#define PHY_EXPERT_MODE_EN 0x878 +/* Expert delay control registers - corrected offsets from HSS */ +#define PHY_EXPERT_DLYCNT_MOVE0 0x87C /* Delay count move reg0 */ +#define PHY_EXPERT_DLYCNT_MOVE1 0x880 /* Delay count move reg1 (CA training) */ +#define PHY_EXPERT_DLYCNT_DIRECTION0 0x884 /* Delay direction reg0 */ +#define PHY_EXPERT_DLYCNT_DIR1 0x888 /* Delay direction reg1 */ +#define PHY_EXPERT_DLYCNT_LOAD0 0x88C /* Delay load reg0 */ +#define PHY_EXPERT_DLYCNT_LOAD1 0x890 /* Delay load reg1 */ +#define PHY_EXPERT_DFI_STATUS_TO_SHIM 0x8CC /* DFI status override to shim */ +#define PHY_LANE_ALIGN_FIFO_CTRL 0x8D8 /* Lane alignment FIFO control */ +#define PHY_EXPERT_MV_RD_DLY 0x89C +#define PHY_EXPERT_DLYCNT_PAUSE 0x8A0 +#define PHY_EXPERT_PLLCNT 0x8A4 +#define PHY_EXPERT_DQ_READBACK 0x8A8 +#define PHY_EXPERT_ADDCMD_READBACK 0x8AC /* Bits 13:12 = rx_bclksclk, 3:0 = rx_ck */ +#define PHY_EXPERT_DFI_STATUS 0x8B0 +#define PHY_RPC95_IBUFMD_ADDCMD 0x57C /* LPDDR4 Input Buffer Mode - ADDCMD */ +#define PHY_RPC96_IBUFMD_CLK 0x580 /* LPDDR4 Input Buffer Mode - CLK */ +#define PHY_RPC97_IBUFMD_DQ 0x584 /* LPDDR4 Input Buffer Mode - DQ */ +#define PHY_RPC98_IBUFMD_DQS 0x588 /* LPDDR4 Input Buffer Mode - DQS */ +#define PHY_RPC145 0x644 /* ADDCMD delay offset (A9 loopback) */ +#define PHY_RPC147 0x64C /* DDR clock loopback delay */ +#define PHY_RPC156 0x670 +#define PHY_RPC166 0x698 +#define PHY_RPC168 0x6A0 /* RX_MD_CLKN for LPDDR4 training */ +#define PHY_RPC220 0x770 + +/* LPDDR4 Input Buffer Mode settings (from Libero config) */ +#define LIBERO_SETTING_RPC_IBUFMD_ADDCMD 0x00000003UL +#define LIBERO_SETTING_RPC_IBUFMD_CLK 0x00000004UL +#define LIBERO_SETTING_RPC_IBUFMD_DQ 0x00000003UL +#define LIBERO_SETTING_RPC_IBUFMD_DQS 0x00000004UL + +/* ODT (On-Die Termination) RPC registers */ +#define PHY_RPC1_ODT 0x384 /* ODT_CA - Command/Address */ +#define PHY_RPC2_ODT 0x388 /* ODT_CLK - Clock */ +#define PHY_RPC3_ODT 0x38C /* ODT_DQ - Data (set to 0 for WRLVL, 3 normally) */ +#define PHY_RPC4_ODT 0x390 /* ODT_DQS - Data Strobe */ + +/* PVT calibration bits */ +#define PVT_CALIB_START (1U << 0) +#define PVT_CALIB_LOCK (1U << 14) /* Bit 14 for calib_lock */ +#define PVT_CALIB_STATUS (1U << 2) /* Bit 2 for sro_calib_status */ +#define PVT_IOEN_OUT (1U << 4) /* Bit 4 for sro_ioen_out */ + +/* IOSCB IO Calibration DDR base (SCB space for PVT calibration) */ +#define IOSCB_IO_CALIB_DDR_BASE 0x3E040000UL +#define IOSCB_IO_CALIB_DDR_REG(off) (*(volatile uint32_t*)(IOSCB_IO_CALIB_DDR_BASE + (off))) +#define IOSCB_SOFT_RESET 0x000 /* Soft reset register */ +#define IOSCB_IOC_REG0 0x004 /* IOC_REG0 in SCB space */ +#define IOSCB_IOC_REG1 0x008 /* IOC_REG1 in SCB space */ + +/* DDR Controller Register Offsets + * + * From HSS mss_ddr_sgmii_regs.h: + * - MC_BASE2 @ DDRCFG_BASE + 0x4000 = 0x20084000 (all controller registers) + * - DFI_BASE @ DDRCFG_BASE + 0x10000 = 0x20090000 (DFI interface) + * + * All timing and init registers are in MC_BASE2. + */ + +/* MC_BASE2 registers (DDRCFG_BASE + 0x4000) */ +#define MC_BASE2 0x4000 +#define MC_CTRLR_SOFT_RESET (MC_BASE2 + 0x00) +#define MC_AUTOINIT_DISABLE (MC_BASE2 + 0x10) +#define MC_INIT_FORCE_RESET (MC_BASE2 + 0x14) +#define MC_INIT_GEARDOWN_EN (MC_BASE2 + 0x18) +#define MC_INIT_DISABLE_CKE (MC_BASE2 + 0x1C) +#define MC_INIT_CS (MC_BASE2 + 0x20) +#define MC_INIT_PRECHARGE_ALL (MC_BASE2 + 0x24) +#define MC_INIT_REFRESH (MC_BASE2 + 0x28) +#define MC_INIT_ZQ_CAL_REQ (MC_BASE2 + 0x2C) +#define MC_INIT_ACK (MC_BASE2 + 0x30) +#define MC_CFG_BL (MC_BASE2 + 0x34) +#define MC_CTRLR_INIT (MC_BASE2 + 0x38) +#define MC_CTRLR_INIT_DONE (MC_BASE2 + 0x3C) +#define MC_CFG_AUTO_REF_EN (MC_BASE2 + 0x40) +#define MC_CFG_RAS (MC_BASE2 + 0x44) +#define MC_CFG_RCD (MC_BASE2 + 0x48) +#define MC_CFG_RRD (MC_BASE2 + 0x4C) +#define MC_CFG_RP (MC_BASE2 + 0x50) +#define MC_CFG_RC (MC_BASE2 + 0x54) +#define MC_CFG_FAW (MC_BASE2 + 0x58) +#define MC_CFG_RFC (MC_BASE2 + 0x5C) +#define MC_CFG_RTP (MC_BASE2 + 0x60) +#define MC_CFG_WR (MC_BASE2 + 0x64) +#define MC_CFG_WTR (MC_BASE2 + 0x68) +#define MC_CFG_CL (MC_BASE2 + 0x74) +#define MC_CFG_STARTUP_DELAY (MC_BASE2 + 0x80) +#define MC_CFG_MEM_COLBITS (MC_BASE2 + 0x84) +#define MC_CFG_MEM_ROWBITS (MC_BASE2 + 0x88) +#define MC_CFG_MEM_BANKBITS (MC_BASE2 + 0x8C) +#define MC_CFG_XP (MC_BASE2 + 0x9C) +#define MC_CFG_XSR (MC_BASE2 + 0xA0) +#define MC_CFG_MRD (MC_BASE2 + 0xA8) +#define MC_CFG_REF_PER (MC_BASE2 + 0xB0) +#define MC_INIT_MR_W_REQ (MC_BASE2 + 0x1F0) /* MR write request (from HSS mss_ddr_sgmii_regs.h) */ +#define MC_INIT_MR_ADDR (MC_BASE2 + 0x1F4) /* MR address */ +#define MC_INIT_MR_WR_DATA (MC_BASE2 + 0x1F8) /* MR write data */ +#define MC_INIT_MR_WR_MASK (MC_BASE2 + 0x1FC) /* MR write mask */ +#define MC_INIT_ZQ_CAL_START (MC_BASE2 + 0xDC) +#define MC_CFG_AUTO_ZQ_CAL_EN (MC_BASE2 + 0xE0) +#define MC_CFG_CWL (MC_BASE2 + 0xF4) +#define MC_CFG_MEMORY_TYPE (MC_BASE2 + 0x104) +#define MC_CFG_NUM_RANKS (MC_BASE2 + 0x10C) +#define MC_CFG_WL (MC_BASE2 + 0x188) +#define MC_CFG_RL (MC_BASE2 + 0x18C) + +/* DFI registers (DDRCFG_BASE + 0x10000) */ +#define DFI_BASE 0x10000 +#define MC_DFI_RDDATA_EN (DFI_BASE + 0x00) +#define MC_DFI_PHY_RDLAT (DFI_BASE + 0x04) +#define MC_DFI_PHY_WRLAT (DFI_BASE + 0x08) +#define MC_DFI_PHYUPD_EN (DFI_BASE + 0x0C) +#define MC_DFI_INIT_COMPLETE (DFI_BASE + 0x34) +#define MC_DFI_INIT_START (DFI_BASE + 0x50) + +/* Memory Test Controller (MTC) registers - at DDRCFG_BASE + 0x4400 */ +#define MTC_BASE 0x4400 +#define MT_EN (MTC_BASE + 0x00) +#define MT_EN_SINGLE (MTC_BASE + 0x04) +#define MT_STOP_ON_ERROR (MTC_BASE + 0x08) +#define MT_DATA_PATTERN (MTC_BASE + 0x14) +#define MT_ADDR_PATTERN (MTC_BASE + 0x18) +#define MT_ADDR_BITS (MTC_BASE + 0x20) +#define MT_ERROR_STS (MTC_BASE + 0x24) +#define MT_DONE_ACK (MTC_BASE + 0x28) +#define MT_START_ADDR_0 (MTC_BASE + 0xB4) +#define MT_START_ADDR_1 (MTC_BASE + 0xB8) +#define MT_ERROR_MASK_0 (MTC_BASE + 0xBC) +#define MT_ERROR_MASK_1 (MTC_BASE + 0xC0) +#define MT_ERROR_MASK_2 (MTC_BASE + 0xC4) +#define MT_ERROR_MASK_3 (MTC_BASE + 0xC8) +#define MT_ERROR_MASK_4 (MTC_BASE + 0xCC) + +/* PHY write calibration register */ +#define PHY_EXPERT_WRCALIB 0x8BC + +/* DDR Segment Register Offsets */ +#define SEG0_0 0x00 +#define SEG0_1 0x04 +#define SEG0_2 0x08 +#define SEG0_3 0x0C +#define SEG0_4 0x10 +#define SEG0_5 0x14 +#define SEG0_6 0x18 +#define SEG0_BLOCKER 0x1C +#define SEG1_0 0x20 +#define SEG1_1 0x24 +#define SEG1_2 0x28 +#define SEG1_3 0x2C +#define SEG1_4 0x30 +#define SEG1_5 0x34 +#define SEG1_6 0x38 +#define SEG1_7 0x3C + +/* DDR Memory Map */ +#define DDR_BASE_CACHED 0x80000000UL /* Cached access */ +#define DDR_BASE_NONCACHED 0xC0000000UL /* Non-cached access */ +#define DDR_BASE_NONCACHED_WCB 0xD0000000UL /* Non-cached with write-combining */ +#define DDR_SIZE 0x80000000UL /* 2GB (Video Kit) */ + +/* DDR Init return codes */ +#define DDR_INIT_SUCCESS 0 +#define DDR_INIT_TIMEOUT -1 +#define DDR_INIT_TRAINING_FAIL -2 +#define DDR_INIT_MEM_TEST_FAIL -3 + +/* ============================================================================ + * Video Kit Clock/DDR Configuration (Libero-generated settings) + * + * Reference: 125 MHz external oscillator + * DDR: 4x MT40A512M16LY-075:E (LPDDR4, 2GB total) + * ============================================================================ */ + +/* MSS PLL Configuration + * + * For 600 MHz CPU clock / 150 MHz APB clock: + * - Reference clock: 125 MHz (from external oscillator) + * - VCO: ~2.4 GHz + * - REFDIV = 1 (must be non-zero!) + * - FBDIV = 20 (2500 MHz / 125 MHz) + */ +#define LIBERO_SETTING_MSS_PLL_CTRL 0x0100001FUL +#define LIBERO_SETTING_MSS_PLL_REF_FB 0x00000500UL /* RFDIV=5 (from HSS Video Kit config) */ +#define LIBERO_SETTING_MSS_PLL_FRACN 0x00000000UL +#define LIBERO_SETTING_MSS_PLL_DIV_0_1 0x03000100UL /* DIV0=3, DIV1=1 */ +#define LIBERO_SETTING_MSS_PLL_DIV_2_3 0x01000300UL +#define LIBERO_SETTING_MSS_PLL_CTRL2 0x00001020UL +#define LIBERO_SETTING_MSS_PLL_PHADJ 0x00004003UL +#define LIBERO_SETTING_MSS_SSCG_REG_0 0x00000000UL +#define LIBERO_SETTING_MSS_SSCG_REG_1 0x00000000UL +#define LIBERO_SETTING_MSS_SSCG_REG_2 0x00000060UL +#define LIBERO_SETTING_MSS_SSCG_REG_3 0x00000001UL + +/* MSS Clock Fabric Mux Configuration */ +#define LIBERO_SETTING_MSS_BCLKMUX 0x00000208UL +#define LIBERO_SETTING_MSS_PLL_CKMUX 0x00000155UL +#define LIBERO_SETTING_MSS_MSSCLKMUX 0x00000003UL +#define LIBERO_SETTING_MSS_FMETER_ADDR 0x00000000UL +#define LIBERO_SETTING_MSS_FMETER_DATAW 0x00000000UL + +/* SGMII Configuration (from Video Kit Libero design hw_sgmii_tip.h) + * These are used by sgmii_off_mode() to properly configure the SGMII + * RPC registers even when SGMII is not used (DDR-only mode). + */ +#define LIBERO_SETTING_SGMII_CLK_XCVR 0x00002C30UL /* Clock receiver */ +#define LIBERO_SETTING_SGMII_REFCLKMUX 0x00000005UL /* Route refclk to DDR/SGMII PLLs */ +#define LIBERO_SETTING_SGMII_SGMII_CLKMUX 0x00000000UL /* SGMII clock mux (not used) */ + +/* SGMII TIP RPC registers (from hw_sgmii_tip.h) */ +#define LIBERO_SETTING_SGMII_MODE 0x08C0F2FFUL /* SGMII mode config */ +#define LIBERO_SETTING_SGMII_PLL_CNTL 0x80140101UL /* SGMII PLL control */ +#define LIBERO_SETTING_SGMII_CH0_CNTL 0x37F07770UL /* Channel 0 control */ +#define LIBERO_SETTING_SGMII_CH1_CNTL 0x37F07770UL /* Channel 1 control */ +#define LIBERO_SETTING_SGMII_RECAL_CNTL 0x000020C8UL /* Recalibration control */ +#define LIBERO_SETTING_SGMII_CLK_CNTL 0xF00050CCUL /* Clock control */ +#define LIBERO_SETTING_SGMII_SPARE_CNTL 0xFF000000UL /* Spare control */ + +/* DDR PLL Configuration + * + * For LPDDR4 at 1600 MT/s (800 MHz DDR clock): + * - Reference clock: 125 MHz (from external oscillator via SGMII CFM) + * - VCO frequency determined by RFDIV and INTIN settings + * - RFDIV = 5 (bits 13:8 of REF_FB) + * + * Values from HSS Video Kit hw_clk_ddr_pll.h (Libero-generated) + */ +#define LIBERO_SETTING_DDR_PLL_CTRL 0x0100003FUL +#define LIBERO_SETTING_DDR_PLL_REF_FB 0x00000500UL /* RFDIV=5 */ +#define LIBERO_SETTING_DDR_PLL_FRACN 0x00000000UL +#define LIBERO_SETTING_DDR_PLL_DIV_0_1 0x02000100UL /* POST0DIV=1, POST1DIV=2 (from HSS) */ +#define LIBERO_SETTING_DDR_PLL_DIV_2_3 0x01000100UL /* POST2DIV=1, POST3DIV=1 (from HSS) */ +#define LIBERO_SETTING_DDR_PLL_CTRL2 0x00001020UL +#define LIBERO_SETTING_DDR_PLL_PHADJ 0x00005003UL /* Phase init from HSS Video Kit */ +#define LIBERO_SETTING_DDR_SSCG_REG_0 0x00000000UL +#define LIBERO_SETTING_DDR_SSCG_REG_1 0x00000000UL +#define LIBERO_SETTING_DDR_SSCG_REG_2 0x00000080UL /* INTIN=0x80 (128) */ +#define LIBERO_SETTING_DDR_SSCG_REG_3 0x00000001UL + +/* DDR PHY Mode: LPDDR4, 32-bit, no ECC */ +#define LIBERO_SETTING_DDRPHY_MODE 0x00014A24UL + +/* DDR Segment Configuration (address translation) */ +#define LIBERO_SETTING_SEG0_0 0x80007F80UL +#define LIBERO_SETTING_SEG0_1 0x80007000UL +#define LIBERO_SETTING_SEG0_2 0x00000000UL +#define LIBERO_SETTING_SEG0_3 0x00000000UL +#define LIBERO_SETTING_SEG0_4 0x00000000UL +#define LIBERO_SETTING_SEG0_5 0x00000000UL +#define LIBERO_SETTING_SEG0_6 0x00000000UL +#define LIBERO_SETTING_SEG1_0 0x00000000UL +#define LIBERO_SETTING_SEG1_1 0x00000000UL +#define LIBERO_SETTING_SEG1_2 0x80007F40UL +#define LIBERO_SETTING_SEG1_3 0x80006C00UL +#define LIBERO_SETTING_SEG1_4 0x80007F30UL +#define LIBERO_SETTING_SEG1_5 0x80006800UL +#define LIBERO_SETTING_SEG1_6 0x00000000UL +#define LIBERO_SETTING_SEG1_7 0x00000000UL + +/* DDR Training Options */ +/* Training skip bits: + * Bit 0 = skip BCLK_SCLK + * Bit 1 = skip ADDCMD + * Bit 2 = skip WRLVL + * Bit 3 = skip RDGATE + * Bit 4 = skip DQ_DQS + * + * 0x02 = skip ADDCMD only (we do it in software) + * 0x1F = skip ALL training (use pre-trained values from NV map) + */ +#define LIBERO_SETTING_TRAINING_SKIP_SETTING 0x00000002UL +#define LIBERO_SETTING_TRAINING_SKIP_ALL 0x0000001FUL +#define LIBERO_SETTING_TIP_CFG_PARAMS 0x07CFE02FUL +#define LIBERO_SETTING_TIP_CONFIG_PARAMS_BCLK_VCOPHS_OFFSET 0x00000002UL + +/* DPC Bits - voltage reference settings (from HSS hw_ddr_io_bank.h) */ +#define LIBERO_SETTING_DPC_BITS 0x00050422UL + +/* DDR Controller Timing (LPDDR4 @ 1600 Mbps - MT53D512M32D2DS-053) + * RL=14, WL=8 at 1600 Mbps (800 MHz) + * Timings for 16Gb x32 LPDDR4 device + */ +#define LIBERO_SETTING_CTRLR_SOFT_RESET_N 0x00000001UL +#define LIBERO_SETTING_CFG_BL 0x00000000UL +#define LIBERO_SETTING_CFG_AUTO_REF_EN 0x00000001UL +#define LIBERO_SETTING_CFG_RAS 0x00000022UL +#define LIBERO_SETTING_CFG_RCD 0x0000000FUL +#define LIBERO_SETTING_CFG_RRD 0x00000008UL +#define LIBERO_SETTING_CFG_RP 0x00000011UL +#define LIBERO_SETTING_CFG_RC 0x00000033UL +#define LIBERO_SETTING_CFG_FAW 0x00000020UL +#define LIBERO_SETTING_CFG_RFC 0x000000E0UL +#define LIBERO_SETTING_CFG_RTP 0x00000008UL +#define LIBERO_SETTING_CFG_WR 0x00000010UL +#define LIBERO_SETTING_CFG_WTR 0x00000008UL +#define LIBERO_SETTING_CFG_STARTUP_DELAY 0x00027100UL +#define LIBERO_SETTING_CFG_MEM_COLBITS 0x0000000AUL +#define LIBERO_SETTING_CFG_MEM_ROWBITS 0x00000010UL +#define LIBERO_SETTING_CFG_MEM_BANKBITS 0x00000003UL +#define LIBERO_SETTING_CFG_NUM_RANKS 0x00000001UL +#define LIBERO_SETTING_CFG_MEMORY_TYPE 0x00000400UL +#define LIBERO_SETTING_CFG_CL 0x00000005UL +#define LIBERO_SETTING_CFG_CWL 0x00000005UL +#define LIBERO_SETTING_CFG_WL 0x00000008UL +#define LIBERO_SETTING_CFG_RL 0x0000000EUL +#define LIBERO_SETTING_CFG_REF_PER 0x00000C34UL +#define LIBERO_SETTING_CFG_XP 0x00000006UL +#define LIBERO_SETTING_CFG_XSR 0x0000001FUL +#define LIBERO_SETTING_CFG_MRD 0x0000000CUL +#define LIBERO_SETTING_CFG_DFI_T_RDDATA_EN 0x00000015UL +#define LIBERO_SETTING_CFG_DFI_T_PHY_RDLAT 0x00000006UL +#define LIBERO_SETTING_CFG_DFI_T_PHY_WRLAT 0x00000003UL +#define LIBERO_SETTING_CFG_DFI_PHYUPD_EN 0x00000001UL + +/* DDR function declarations */ +#ifndef __ASSEMBLER__ +#ifdef WOLFBOOT_RISCV_MMODE +int mpfs_ddr_init(void); +void hal_uart_reinit(void); +#endif +#endif /* __ASSEMBLER__ */ + /* ============================================================================ * PLIC - Platform-Level Interrupt Controller (MPFS250-specific configuration) diff --git a/hal/riscv.h b/hal/riscv.h index 4195645ce5..5052feac0a 100644 --- a/hal/riscv.h +++ b/hal/riscv.h @@ -22,21 +22,23 @@ #ifndef RISCV_H #define RISCV_H +/* ============================================================================ + * RISC-V Privilege Mode Selection + * + * - Machine mode (direct boot from eNVM) : WOLFBOOT_RISCV_MMODE + * - Supervisor mode (running under HSS/SBI) : default + * + * ============================================================================ */ -/* TODO: Add support for machine mode wolfBoot */ -#if 1 -#define WOLFBOOT_RISCV_SMODE /* supervisor mode */ -#else -#define WOLFBOOT_RISCV_MMODE /* machine mode */ -#endif - -/* Initial stack pointer address (stack grows downward from here) */ + /* Initial stack pointer address (stack grows downward from here) */ #ifndef WOLFBOOT_STACK_TOP -#ifdef WOLFBOOT_RISCV_SMODE -#define WOLFBOOT_STACK_TOP 0x80200000 -#else -#define WOLFBOOT_STACK_TOP 0x80000000 -#endif + #ifdef WOLFBOOT_RISCV_MMODE + /* M-mode: Stack at end of L2 Scratchpad (256KB) */ + #define WOLFBOOT_STACK_TOP 0x0A040000 + #else + /* S-mode: Stack in DDR */ + #define WOLFBOOT_STACK_TOP 0x80200000 + #endif #endif /* ============================================================================ @@ -75,6 +77,13 @@ #define CSR_MIMPID 0xF13 /* Implementation ID */ #define CSR_MHARTID 0xF14 /* Hardware thread ID */ +#ifdef WOLFBOOT_RISCV_MMODE +#define MODE_PREFIX(__suffix) m##__suffix +#else +#define MODE_PREFIX(__suffix) s##__suffix +#endif + + /* ============================================================================ * CSR Access Macros * ============================================================================ */ @@ -124,10 +133,40 @@ /* ============================================================================ * Status Register Bits (mstatus/sstatus) * ============================================================================ */ -#define MSTATUS_MIE (1 << 3) /* Machine-mode global interrupt enable */ -#define MSTATUS_MPIE (1 << 7) /* Machine-mode previous interrupt enable */ -#define SSTATUS_SIE (1 << 1) /* Supervisor-mode global interrupt enable */ -#define SSTATUS_SPIE (1 << 5) /* Supervisor-mode previous interrupt enable */ +/* Privilege Levels */ +#define PRV_U 0 /* User mode */ +#define PRV_S 1 /* Supervisor mode */ +#define PRV_M 3 /* Machine mode */ + +/* MSTATUS Register Bits */ +#define MSTATUS_UIE (1UL << 0) /* User interrupt enable */ +#define MSTATUS_SIE (1UL << 1) /* Supervisor interrupt enable */ +#define MSTATUS_MIE (1UL << 3) /* Machine interrupt enable */ +#define MSTATUS_UPIE (1UL << 4) /* User previous interrupt enable */ +#define MSTATUS_SPIE (1UL << 5) /* Supervisor previous interrupt enable */ +#define MSTATUS_MPIE (1UL << 7) /* Machine previous interrupt enable */ +#define MSTATUS_SPP (1UL << 8) /* Supervisor previous privilege (1 bit) */ +#define MSTATUS_MPP_SHIFT 11 +#define MSTATUS_MPP_MASK (3UL << MSTATUS_MPP_SHIFT) +#define MSTATUS_MPP_M (PRV_M << MSTATUS_MPP_SHIFT) /* MPP = Machine */ +#define MSTATUS_MPP_S (PRV_S << MSTATUS_MPP_SHIFT) /* MPP = Supervisor */ +#define MSTATUS_MPP_U (PRV_U << MSTATUS_MPP_SHIFT) /* MPP = User */ +#define MSTATUS_FS_SHIFT 13 +#define MSTATUS_FS_MASK (3UL << MSTATUS_FS_SHIFT) +#define MSTATUS_FS_OFF (0UL << MSTATUS_FS_SHIFT) /* FPU off */ +#define MSTATUS_FS_INIT (1UL << MSTATUS_FS_SHIFT) /* FPU initial */ +#define MSTATUS_FS_CLEAN (2UL << MSTATUS_FS_SHIFT) /* FPU clean */ +#define MSTATUS_FS_DIRTY (3UL << MSTATUS_FS_SHIFT) /* FPU dirty */ +#define MSTATUS_MPRV (1UL << 17) /* Modify privilege */ +#define MSTATUS_SUM (1UL << 18) /* Supervisor user memory access */ +#define MSTATUS_MXR (1UL << 19) /* Make executable readable */ +#define MSTATUS_TVM (1UL << 20) /* Trap virtual memory */ +#define MSTATUS_TW (1UL << 21) /* Timeout wait */ +#define MSTATUS_TSR (1UL << 22) /* Trap SRET */ + +/* SSTATUS Register Bits (subset visible to S-mode) */ +#define SSTATUS_SIE (1UL << 1) /* Supervisor-mode global interrupt enable */ +#define SSTATUS_SPIE (1UL << 5) /* Supervisor-mode previous interrupt enable */ /* ============================================================================ * Machine Interrupt Enable (MIE) Register Bits @@ -136,6 +175,14 @@ #define MIE_MTIE (1 << IRQ_M_TIMER) /* Machine timer interrupt enable */ #define MIE_MEIE (1 << IRQ_M_EXT) /* Machine external interrupt enable */ +/* ============================================================================ + * Machine Interrupt Pending (MIP) Register Bits + * Same bit positions as MIE, used to check/set pending interrupts + * ============================================================================ */ +#define MIP_MSIP (1 << IRQ_M_SOFT) /* Machine software interrupt pending */ +#define MIP_MTIP (1 << IRQ_M_TIMER) /* Machine timer interrupt pending */ +#define MIP_MEIP (1 << IRQ_M_EXT) /* Machine external interrupt pending */ + /* ============================================================================ * Supervisor Interrupt Enable (SIE) Register Bits * ============================================================================ */ @@ -143,6 +190,13 @@ #define SIE_STIE (1 << IRQ_S_TIMER) /* Supervisor timer interrupt enable */ #define SIE_SEIE (1 << IRQ_S_EXT) /* Supervisor external interrupt enable */ +/* ============================================================================ + * Supervisor Interrupt Pending (SIP) Register Bits + * ============================================================================ */ +#define SIP_SSIP (1 << IRQ_S_SOFT) /* Supervisor software interrupt pending */ +#define SIP_STIP (1 << IRQ_S_TIMER) /* Supervisor timer interrupt pending */ +#define SIP_SEIP (1 << IRQ_S_EXT) /* Supervisor external interrupt pending */ + /* ============================================================================ * Exception Cause Register (MCAUSE/SCAUSE) Definitions * ============================================================================ */ @@ -269,5 +323,78 @@ extern void plic_dispatch_irq(uint32_t irq); #endif /* PLIC_BASE && !__ASSEMBLER__ */ +/* ============================================================================ + * CLINT - Core Local Interruptor (M-mode only) + * + * The CLINT provides software interrupts (IPI) and timer functionality + * for machine mode. Used for inter-hart communication and timer-based delays. + * + * CLINT Memory Map (standard offsets from CLINT_BASE): + * 0x0000-0x3FFF: MSIP registers (1 word per hart, software interrupt pending) + * 0x4000-0xBFF7: MTIMECMP registers (8 bytes per hart, timer compare) + * 0xBFF8-0xBFFF: MTIME register (8 bytes, global timer counter) + * ============================================================================ */ +#ifdef WOLFBOOT_RISCV_MMODE + +#ifndef CLINT_BASE +#define CLINT_BASE 0x02000000UL +#endif + +#define CLINT_MSIP_OFFSET 0x0000UL +#define CLINT_MTIMECMP_OFFSET 0x4000UL +#define CLINT_MTIME_OFFSET 0xBFF8UL + +#ifndef __ASSEMBLER__ + +/* MSIP (Machine Software Interrupt Pending) - one per hart */ +#define CLINT_MSIP(hart) \ + (*((volatile uint32_t*)(CLINT_BASE + CLINT_MSIP_OFFSET + ((hart) * 4)))) + +/* MTIMECMP - 64-bit timer compare value, one per hart */ +#define CLINT_MTIMECMP_LO(hart) \ + (*((volatile uint32_t*)(CLINT_BASE + CLINT_MTIMECMP_OFFSET + ((hart) * 8)))) +#define CLINT_MTIMECMP_HI(hart) \ + (*((volatile uint32_t*)(CLINT_BASE + CLINT_MTIMECMP_OFFSET + ((hart) * 8) + 4))) + +/* MTIME - 64-bit global timer counter (shared across all harts) */ +#define CLINT_MTIME_LO \ + (*((volatile uint32_t*)(CLINT_BASE + CLINT_MTIME_OFFSET))) +#define CLINT_MTIME_HI \ + (*((volatile uint32_t*)(CLINT_BASE + CLINT_MTIME_OFFSET + 4))) + +#endif /* !__ASSEMBLER__ */ + +#endif /* WOLFBOOT_RISCV_MMODE */ + +/* ============================================================================ + * L2 Cache Controller (M-mode only) + * + * The L2 cache controller manages the shared L2 cache and LIM (Loosely + * Integrated Memory) / Scratchpad configuration. + * ============================================================================ */ +#ifdef WOLFBOOT_RISCV_MMODE + +#ifndef L2_CACHE_CTRL_BASE +#define L2_CACHE_CTRL_BASE 0x02010000UL +#endif + +/* L2 Cache Controller register offsets */ +#define L2_CONFIG_OFFSET 0x000UL +#define L2_WAYENABLE_OFFSET 0x008UL +#define L2_FLUSH64_OFFSET 0x200UL + +#ifndef __ASSEMBLER__ + +#define L2_CONFIG_REG \ + (*((volatile uint32_t*)(L2_CACHE_CTRL_BASE + L2_CONFIG_OFFSET))) +#define L2_WAYENABLE_REG \ + (*((volatile uint32_t*)(L2_CACHE_CTRL_BASE + L2_WAYENABLE_OFFSET))) +#define L2_FLUSH64_REG \ + (*((volatile uint64_t*)(L2_CACHE_CTRL_BASE + L2_FLUSH64_OFFSET))) + +#endif /* !__ASSEMBLER__ */ + +#endif /* WOLFBOOT_RISCV_MMODE */ + #endif /* RISCV_H */ diff --git a/src/boot_riscv.c b/src/boot_riscv.c index a25e7aefbf..22849e5005 100644 --- a/src/boot_riscv.c +++ b/src/boot_riscv.c @@ -239,8 +239,152 @@ int WEAKFUNCTION hal_dts_fixup(void* dts_addr) } #endif +#ifdef WOLFBOOT_RISCV_MMODE +/* ============================================================================ + * M-mode to S-mode Transition Support + * + * When booting Linux from M-mode, we need to: + * 1. Configure PMP to allow S-mode full memory access + * 2. Delegate appropriate traps to S-mode + * 3. Set up MSTATUS.MPP = S-mode + * 4. Use MRET to atomically switch to S-mode + * ============================================================================ */ + +/** + * setup_pmp_for_smode - Configure PMP for S-mode full access + * + * Sets up PMP entry 0 to allow S-mode full read/write/execute access + * to all of physical memory (0x0 to 0xFFFFFFFFFFFFFFFF for RV64). + */ +static void setup_pmp_for_smode(void) +{ + /* PMP configuration: + * - pmpcfg0[7:0] controls pmpaddr0 + * - A = 3 (NAPOT - naturally aligned power-of-2) + * - R=1, W=1, X=1 (full access) + * + * For NAPOT with all 1s in pmpaddr, we get full address space coverage. + * pmpaddr = (address >> 2) | ((size >> 3) - 1) + * For full 64-bit space: pmpaddr = 0x1FFFFFFFFFFFFFFF (all ones, shifted) + */ + unsigned long pmpaddr_val = -1UL; /* All 1s = cover entire address space */ + unsigned long pmpcfg_val; + + /* A=NAPOT(3), R=1, W=1, X=1 = 0b00011111 = 0x1F */ + pmpcfg_val = 0x1F; + + /* Write pmpaddr0 first, then pmpcfg0 */ + csr_write(pmpaddr0, pmpaddr_val); + csr_write(pmpcfg0, pmpcfg_val); + + /* Memory barrier */ + __asm__ volatile("sfence.vma" ::: "memory"); +} + +/** + * delegate_traps_to_smode - Delegate exceptions and interrupts to S-mode + * + * This allows S-mode (Linux) to handle its own traps without M-mode + * involvement for most cases. + */ +static void delegate_traps_to_smode(void) +{ + unsigned long medeleg_val; + unsigned long mideleg_val; + + /* Delegate these exceptions to S-mode: + * - Instruction misaligned (0) + * - Instruction access fault (1) + * - Illegal instruction (2) + * - Breakpoint (3) + * - Load address misaligned (4) + * - Load access fault (5) + * - Store address misaligned (6) + * - Store access fault (7) + * - Environment call from U-mode (8) + * - Environment call from S-mode (9) - NO, this goes to M-mode for SBI + * - Instruction page fault (12) + * - Load page fault (13) + * - Store page fault (15) + */ + medeleg_val = (1 << 0) | /* Instruction address misaligned */ + (1 << 1) | /* Instruction access fault */ + (1 << 2) | /* Illegal instruction */ + (1 << 3) | /* Breakpoint */ + (1 << 4) | /* Load address misaligned */ + (1 << 5) | /* Load access fault */ + (1 << 6) | /* Store address misaligned */ + (1 << 7) | /* Store access fault */ + (1 << 8) | /* Environment call from U-mode */ + (1 << 12) | /* Instruction page fault */ + (1 << 13) | /* Load page fault */ + (1 << 15); /* Store page fault */ + + /* Delegate these interrupts to S-mode: + * - S-mode software interrupt (1) + * - S-mode timer interrupt (5) + * - S-mode external interrupt (9) + */ + mideleg_val = (1 << IRQ_S_SOFT) | + (1 << IRQ_S_TIMER) | + (1 << IRQ_S_EXT); + + csr_write(medeleg, medeleg_val); + csr_write(mideleg, mideleg_val); +} + +/** + * enter_smode - Transition from M-mode to S-mode and jump to entry point + * + * @entry: Entry point address (will be loaded into MEPC) + * @hartid: Hart ID (passed to kernel in a0) + * @dtb: DTB address (passed to kernel in a1) + * + * This function never returns. It uses MRET to atomically: + * 1. Switch privilege level from M to S + * 2. Jump to the entry point + */ +static void __attribute__((noreturn)) enter_smode(unsigned long entry, + unsigned long hartid, + unsigned long dtb) +{ + unsigned long mstatus_val; + + /* Set up MEPC with entry point */ + csr_write(mepc, entry); + + /* Configure MSTATUS: + * - MPP = 01 (S-mode) - when MRET executes, we'll be in S-mode + * - MPIE = 1 - interrupts will be enabled after MRET + * - Clear MIE to disable interrupts during transition + */ + mstatus_val = csr_read(mstatus); + mstatus_val &= ~MSTATUS_MPP_MASK; /* Clear MPP field */ + mstatus_val |= MSTATUS_MPP_S; /* Set MPP = S-mode */ + mstatus_val |= MSTATUS_MPIE; /* Set MPIE */ + mstatus_val &= ~MSTATUS_MIE; /* Clear MIE */ + csr_write(mstatus, mstatus_val); + + /* Disable virtual memory (satp = 0) */ + csr_write(satp, 0); + + /* Execute MRET with a0=hartid, a1=dtb */ + __asm__ volatile( + "mv a0, %0\n" /* hartid in a0 */ + "mv a1, %1\n" /* dtb in a1 */ + "mret\n" + : : "r"(hartid), "r"(dtb) : "a0", "a1" + ); + + __builtin_unreachable(); +} +#endif /* WOLFBOOT_RISCV_MMODE */ + #if __riscv_xlen == 64 -/* Get the hartid saved by boot_riscv_start.S in the tp register */ +/* Get the hartid saved by boot_riscv_start.S in the tp register + * Note: In M-mode, hartid was read from mhartid CSR and stored in tp. + * In S-mode, hartid was passed by the boot stage in a0 and saved to tp. + */ unsigned long get_boot_hartid(void) { unsigned long hartid; @@ -260,6 +404,8 @@ void do_boot(const uint32_t *app_offset) #endif #ifdef MMU unsigned long dts_addr; +#else + unsigned long dts_addr = 0; #endif #ifdef MMU @@ -298,10 +444,35 @@ void do_boot(const uint32_t *app_offset) * enters the kernel. Secondary harts are started via SBI HSM extension. */ -#if __riscv_xlen == 64 -#ifdef MMU +#ifdef WOLFBOOT_RISCV_MMODE + /* + * M-mode to S-mode transition for booting Linux: + * 1. Set up PMP to allow S-mode full memory access + * 2. Delegate traps/interrupts to S-mode + * 3. Use MRET to switch to S-mode and jump to kernel + */ +#ifdef DEBUG_BOOT + wolfBoot_printf("Setting up M-mode to S-mode transition...\n"); + wolfBoot_printf(" PMP: Configuring for S-mode access\n"); +#endif + setup_pmp_for_smode(); + +#ifdef DEBUG_BOOT + wolfBoot_printf(" Delegating traps to S-mode\n"); +#endif + delegate_traps_to_smode(); + +#ifdef DEBUG_BOOT + wolfBoot_printf(" Entering S-mode: entry=0x%lx, hartid=%lu, dtb=0x%lx\n", + (unsigned long)app_offset, hartid, dts_addr); +#endif + /* This never returns */ + enter_smode((unsigned long)app_offset, hartid, dts_addr); + +#elif __riscv_xlen == 64 asm volatile( - #ifdef WOLFBOOT_RISCV_SMODE + #if defined(MMU) && !defined(WOLFBOOT_RISCV_MMODE) + /* S-mode boot (e.g., when running under HSS/OpenSBI) */ "csrw satp, zero\n" "sfence.vma\n" #endif @@ -310,14 +481,7 @@ void do_boot(const uint32_t *app_offset) "jr %2\n" : : "r"(hartid), "r"(dts_addr), "r"(app_offset) : "a0", "a1" ); -#else - asm volatile( - "mv a0, %0\n" - "mv a1, zero\n" - "jr %1\n" - : : "r"(hartid), "r"(app_offset) : "a0", "a1" - ); -#endif + #else /* RV32 */ /* RV32: typically bare-metal without Linux, simpler boot */ asm volatile("jr %0" : : "r"(app_offset)); @@ -360,8 +524,6 @@ void RAMFUNCTION arch_reboot(void) AON_WDOGKEY = AON_WDOGKEY_VALUE; AON_WDOGFEED = 1; - while(1) - ; wolfBoot_panic(); } @@ -373,8 +535,6 @@ void WEAKFUNCTION arch_reboot(void) SYSREG_MSS_RESET_CR = 0xDEAD; #endif - while(1) - ; wolfBoot_panic(); } diff --git a/src/boot_riscv_start.S b/src/boot_riscv_start.S index 4ed11d5efc..48a0147c6f 100644 --- a/src/boot_riscv_start.S +++ b/src/boot_riscv_start.S @@ -25,22 +25,20 @@ #include "hal/mpfs250.h" #endif -#ifdef WOLFBOOT_RISCV_SMODE -#define MODE_PREFIX(__suffix) s##__suffix -#else -#define MODE_PREFIX(__suffix) m##__suffix -#endif - /* ============================================================================ * RISC-V Boot Entry Point * ============================================================================ * - * For RV64 (typically running under SBI): + * For RV64 S-mode (typically running under SBI): * Entry conditions (passed by prior boot stage / SBI): * a0 = hart ID (hardware thread identifier) * a1 = pointer to device tree blob (DTB) in memory * + * For RV64 M-mode (direct boot from eNVM): + * Runs as first code after reset, reads hart ID from CSR + * Must initialize all hardware from scratch + * * For RV32 (typically bare metal): * Starts fresh, reads hart ID from CSR * @@ -49,16 +47,425 @@ .globl _reset _reset: #if __riscv_xlen == 64 - /* ---------- RV64 Boot Sequence ---------- */ #ifdef WOLFBOOT_RISCV_MMODE + /* ======================================================================== + * RV64 Machine Mode Boot Sequence + * + * This path is for direct boot from eNVM on PolarFire SoC. + * Entry point (_reset) is in eNVM at 0x20220100. + * + * Boot flow: + * 1. Run early init from eNVM (this .init section) + * 2. Copy main code from eNVM to L2 SRAM + * 3. Jump to L2 SRAM to continue execution + * 4. Initialize .data and .bss sections + * 5. Handle multi-hart (E51 runs, U54s park in WFI) + * ======================================================================== */ + +#if 0 //def TARGET_mpfs250 + /* + * Early UART initialization for M-mode boot on MPFS. + * This enables debug output during the boot sequence. + */ + li t0, 0x20000000 /* UART0 base */ + li t1, 0x20002000 /* SYSREG_BASE */ + /* Enable UART0 clock */ + lw t2, 0x84(t1) /* Read SUBBLK_CLOCK_CR */ + ori t2, t2, 0x20 /* Set bit 5 (MMUART0) */ + sw t2, 0x84(t1) + /* Release UART0 from reset */ + lw t2, 0x88(t1) /* Read SOFT_RESET_CR */ + li t3, 0xFFFFFFDF /* ~0x20 - Clear bit 5 */ + and t2, t2, t3 + sw t2, 0x88(t1) + /* Set up UART for 115200 @ 40MHz: divisor ~ 22 */ + lbu t2, 0x0c(t0) /* Read LCR */ + ori t2, t2, 0x80 /* Set DLAB */ + sb t2, 0x0c(t0) + li t2, 22 /* Divisor */ + sb t2, 0x00(t0) /* DLR */ + sb zero, 0x04(t0) /* DMR */ + lbu t2, 0x0c(t0) + andi t2, t2, 0x7f /* Clear DLAB */ + ori t2, t2, 0x03 /* 8N1 */ + sb t2, 0x0c(t0) +#endif + + /* + * Clear the Return Address Stack (RAS) by executing nested calls. + * This prevents stale return addresses from causing misprediction. + */ + call .L_clear_ras + + /* + * Read hart ID from CSR (we're the first code running on this core) + */ + csrr a0, mhartid + mv tp, a0 /* Save hart ID in tp for later use */ + + /* + * Disable and clear all interrupts during initialization + */ + li t0, MSTATUS_MIE + csrc mstatus, t0 /* Clear global interrupt enable */ + csrw mie, zero /* Disable all interrupt sources */ + csrw mip, zero /* Clear any pending interrupts */ + + /* + * Initialize M-mode CSRs + */ + csrw mscratch, zero + csrw mcause, zero + csrw mepc, zero + /* - * Machine Mode: Read hart ID from CSR since we're the first code - * running on this core. In Supervisor mode, the SBI passes it in a0. + * Clear PMP configuration (allow all access initially) */ - csrr a0, CSR_MHARTID + csrw pmpcfg0, zero + csrw pmpcfg2, zero + + /* + * Check if this is the boot hart (hart 0 = E51 on MPFS) + * Other harts (U54 cores) should wait in eNVM (this .init section) + */ + mv a0, tp /* Get saved hart ID */ + bnez a0, .L_secondary_hart_wait_envm + +#ifdef TARGET_mpfs250 + /* + * Configure L2 Cache for LIM/Scratchpad usage BEFORE copying to L2 + * Must enable ways AND disable shutdown before accessing L2 scratchpad. + * L2_WAY_ENABLE (0x02010008) = 0x0B (enable ways 0-3 and 8-11) + * SYSREG_L2_SHUTDOWN_CR (0x20002174) = 0 (disable shutdown) + */ + li t1, 0x02010000 /* L2_CTRL_BASE */ + li t2, 0x0B /* Ways 0-3 (cache) + 8-11 (scratchpad) */ + sd t2, 8(t1) /* L2_WAY_ENABLE */ + fence + + li t1, 0x20002000 /* SYSREG_BASE */ + sw zero, 0x174(t1) /* SYSREG_L2_SHUTDOWN_CR = 0 */ + fence #endif + /* + * Copy .text section from eNVM to L2 SRAM + * + * NOTE: We CANNOT use 'la' for L2 SRAM addresses because 'la' uses PC-relative + * addressing (auipc+addi) which fails when the offset exceeds 32-bit range. + * eNVM is at 0x20220000, L2 SRAM is at 0x0A000000 - the difference wraps around + * in 32-bit arithmetic causing incorrect addresses. + * + * Solution: Use absolute addressing for destination and size. + * - Source (_stored_text): Use 'la' - same address range as this code + * - Dest: Use 'li' with hardcoded L2 SRAM base address + * - Size: Use 'li' with value from linker (updated by linker script) + * + * The values are read from the data embedded right after this code. + */ + la t0, _stored_text /* Source: eNVM - la works, same address range */ + la t3, _copy_params /* Load parameters from eNVM */ + ld t1, 0(t3) /* t1 = dest addr (from linker) */ + ld t2, 8(t3) /* t2 = byte count (from linker) */ + add t2, t1, t2 /* t2 = end address */ + +.L_copy_text: + bgeu t1, t2, .L_copy_text_done /* if dest >= end, done */ + ld t3, 0(t0) + sd t3, 0(t1) + addi t0, t0, 8 + addi t1, t1, 8 + j .L_copy_text +.L_copy_text_done: + + /* Flush instruction cache to ensure copied code is visible */ + fence.i + + /* + * Jump to main initialization code in L2 SRAM + * From here on, we execute from L2 SRAM (faster than eNVM) + */ + la t0, .L_sram_entry + jr t0 + +/* + * Clear Return Address Stack helper (in .init section, runs from eNVM) + * Two-deep nested calls to flush RAS prediction state + */ +.L_clear_ras: + mv t0, ra + nop + call .L_clear_ras_inner + nop + mv ra, t0 + ret + +.L_clear_ras_inner: + nop + nop + ret + +/* + * Copy parameters: These values are used by the copy loop. + * They are stored as data in the .init section (eNVM) so they can be loaded + * with 'la' which works for addresses in the same region as the PC. + * The linker will fill these in at link time. + */ +.align 3 +_copy_params: + .dword _start_text_sram /* Destination address (L2 SRAM) */ + .dword _text_size /* Number of bytes to copy */ + +/* + * Secondary harts (U54 cores) initialization and WFI loop + * In M-mode on MPFS, the E51 (hart 0) is the boot hart. + * U54 cores (harts 1-4) wait here until explicitly woken by E51. + * + * The protocol uses Hart Local Storage (HLS) for communication: + * 1. Secondary hart signals HLS_OTHER_HART_IN_WFI + * 2. Main hart sends IPI via CLINT_MSIP + * 3. Secondary hart wakes, signals HLS_OTHER_HART_PASSED_WFI + * 4. Secondary hart jumps to secondary_hart_entry() + */ +.L_secondary_hart_wait_envm: + /* Disable all interrupts initially */ + li t0, MSTATUS_MIE + csrc mstatus, t0 + csrw mie, zero + csrw mip, zero + + /* Enable only machine software interrupt (for IPI wake) */ + li t0, MIP_MSIP + csrw mie, t0 + + /* Set up stack for this hart - each hart gets STACK_SIZE_PER_HART bytes + * Stack layout: hart0 stack, hart1 stack, ... hart4 stack + * We need to calculate: stack_base + (hartid + 1) * STACK_SIZE_PER_HART */ + csrr a0, mhartid + la t0, _secondary_hart_stack_base + li t1, STACK_SIZE_PER_HART + addi a1, a0, 1 /* hartid + 1 */ + mul t2, a1, t1 /* (hartid + 1) * STACK_SIZE_PER_HART */ + add sp, t0, t2 /* stack top for this hart */ + + /* Align stack to 16 bytes */ + li t0, -16 + and sp, sp, t0 + + /* Allocate HLS at top of stack */ + addi sp, sp, -64 /* HLS_DEBUG_AREA_SIZE = 64 */ + mv s11, sp /* Save HLS pointer in s11 (callee-saved) */ + + /* Clear HLS area */ + sd zero, 0(s11) + sd zero, 8(s11) + sd zero, 16(s11) + sd zero, 24(s11) + sd zero, 32(s11) + sd zero, 40(s11) + sd zero, 48(s11) + sd zero, 56(s11) + + /* Wait for main hart (E51) to signal it has started + * Main hart sets HLS_MAIN_HART_STARTED at its HLS location */ + li t3, 0x12344321 /* HLS_MAIN_HART_STARTED */ + la t1, _main_hart_hls /* E51's HLS location */ + +.L_wait_main_hart: + lwu t2, 0(t1) /* Read main hart's indicator */ + bne t3, t2, .L_wait_main_hart /* Wait until main hart started */ + + /* Signal that we're in WFI state */ + li t0, 0x12345678 /* HLS_OTHER_HART_IN_WFI */ + sw t0, 0(s11) /* Write to our HLS */ + + /* Memory barrier to ensure write is visible */ + fence iorw, iorw + +.L_secondary_wfi_loop: + wfi + + /* Check if it was a software interrupt (IPI) */ + csrr t0, mip + andi t0, t0, MIP_MSIP + beqz t0, .L_secondary_wfi_loop /* Keep waiting if not software interrupt */ + + /* Clear the software interrupt by writing 0 to CLINT_MSIP */ + csrr a0, mhartid + li t0, 0x02000000 /* CLINT_BASE */ + slli t1, a0, 2 /* hartid * 4 */ + add t0, t0, t1 /* CLINT_MSIP[hartid] */ + sw zero, 0(t0) /* Clear MSIP */ + + /* Signal that we've passed WFI */ + li t0, 0x87654321 /* HLS_OTHER_HART_PASSED_WFI */ + sw t0, 0(s11) + + /* Memory barrier */ + fence iorw, iorw + + /* Flush instruction cache */ + fence.i + + /* Jump to secondary hart C entry point + * a0 = hartid (already in a0 from mhartid read) + * s11 = HLS pointer + * Use indirect jump since secondary_hart_entry is in .text (L2 SRAM) + * and we're in .init (eNVM) - too far for direct jump */ + csrr a0, mhartid + mv a1, s11 /* Pass HLS pointer as second arg */ + la t0, secondary_hart_entry + jr t0 + +/* + * ============================================================================ + * Code below this point runs from L2 SRAM after being copied from eNVM + * ============================================================================ + */ +.section .text +.L_sram_entry: + /* + * Setup trap handler (now pointing to SRAM location) + */ + la t0, trap_vector_table + csrw mtvec, t0 + /* Ensure mtvec is updated before continuing */ +1: + csrr t1, mtvec + bne t0, t1, 1b + + /* + * Initialize all general-purpose registers to zero + * (tp/x4 already holds hart ID, skip it) + */ + li x1, 0 + li x2, 0 + li x3, 0 + /* x4 (tp) = hart ID, don't clear */ + li x5, 0 + li x6, 0 + li x7, 0 + li x8, 0 + li x9, 0 + li x10, 0 + li x11, 0 + li x12, 0 + li x13, 0 + li x14, 0 + li x15, 0 + li x16, 0 + li x17, 0 + li x18, 0 + li x19, 0 + li x20, 0 + li x21, 0 + li x22, 0 + li x23, 0 + li x24, 0 + li x25, 0 + li x26, 0 + li x27, 0 + li x28, 0 + li x29, 0 + li x30, 0 + li x31, 0 + + /* + * Verify XLEN matches compilation (64-bit) + * MSB of misa is 1 for RV32, 2 for RV64 + */ +.L_xlen_check: + csrr t0, misa + bltz t0, .L_xlen_ok /* RV64: MSB set means negative */ + j .L_xlen_check /* Loop if not RV64 */ +.L_xlen_ok: + + /* + * Initialize global pointer for efficient small data access + * The linker provides __global_pointer$ at .sdata + 0x800 + */ + .option push + .option norelax + la gp, __global_pointer$ + .option pop + + /* + * Initialize stack pointer from linker symbol + * Stack is at end of L2_SCRATCH, grows downward + */ + la sp, _end_stack + li t0, -16 + and sp, sp, t0 /* Ensure 16-byte alignment */ + mv s0, sp /* Set frame pointer */ + + /* + * Primary hart (E51, hart 0) continues with initialization + * (Secondary harts already parked in .init section) + */ + + /* + * Copy .data section from flash to RAM + */ + la t0, _stored_data /* Source: flash */ + la t1, _start_data /* Dest: RAM */ + la t2, _end_data + beq t0, t1, .L_data_copy_done /* Skip if already in place */ +.L_data_copy: + beq t1, t2, .L_data_copy_done + ld t3, 0(t0) + sd t3, 0(t1) + addi t0, t0, 8 + addi t1, t1, 8 + j .L_data_copy +.L_data_copy_done: + + /* + * Clear .bss section + */ + la t0, _start_bss + la t1, _end_bss +.L_bss_clear: + beq t0, t1, .L_bss_clear_done + sd zero, 0(t0) + addi t0, t0, 8 + j .L_bss_clear +.L_bss_clear_done: + +#ifndef TARGET_mpfs250 + /* + * Clear bus error unit accrued register on start-up + * This is cleared by the first hart only + * NOTE: Only for SiFive cores - MPFS does not have BEU at these addresses + */ + la a4,0x01700020UL + sb x0, 0(a4) + la a4,0x01701020UL + sb x0, 0(a4) + la a4,0x01702020UL + sb x0, 0(a4) + la a4,0x01703020UL + sb x0, 0(a4) + la a4,0x01704020UL + sb x0, 0(a4) +#endif + + /* + * Jump to C entry point + * a0 = hart ID + */ + mv a0, tp + j main + +#else + /* ======================================================================== + * RV64 Supervisor Mode Boot Sequence + * + * Entry conditions (passed by prior boot stage / SBI): + * a0 = hart ID (hardware thread identifier) + * a1 = pointer to device tree blob (DTB) in memory + * ======================================================================== */ + /* * Preserve boot parameters in callee-saved registers: * tp (x4) = hart ID - Used for multi-hart coordination. The RISC-V ABI @@ -79,52 +486,38 @@ _reset: /* * Configure trap/exception handler: * Load address of trap_vector_table into the trap-vector base-address - * register (mtvec in M-mode, stvec in S-mode). All synchronous exceptions + * register (stvec in S-mode). All synchronous exceptions * and interrupts will vector through this table. */ la t0, trap_vector_table - csrw MODE_PREFIX(tvec), t0 + csrw stvec, t0 /* * Disable all interrupt sources initially by clearing the - * interrupt-enable register (mie/sie). This prevents spurious + * interrupt-enable register (sie). This prevents spurious * interrupts during early initialization. */ - csrw MODE_PREFIX(ie), zero + csrw sie, zero /* * Enable interrupt sources: * - Software Interrupts (IPIs) for multi-hart boot coordination * - External Interrupts for PLIC-routed peripheral interrupts (e.g., MMC) - * - * M-mode: MSIE (Software) + MEIE (External) - * S-mode: SSIE (Software) + SEIE (External) */ -#ifdef WOLFBOOT_RISCV_SMODE li t0, (SIE_SSIE | SIE_SEIE) -#else - li t0, (MIE_MSIE | MIE_MEIE) -#endif - csrs MODE_PREFIX(ie), t0 + csrs sie, t0 /* - * Enable global interrupts by setting the SIE/MIE bit in sstatus/mstatus. + * Enable global interrupts by setting the SIE bit in sstatus. * Without this, the CPU will never take interrupts regardless of the - * per-source enables in sie/mie. - * - * M-mode: mstatus.MIE (bit 3) - * S-mode: sstatus.SIE (bit 1) + * per-source enables in sie. */ -#ifdef WOLFBOOT_RISCV_SMODE li t0, SSTATUS_SIE -#else - li t0, MSTATUS_MIE -#endif - csrs MODE_PREFIX(status), t0 + csrs sstatus, t0 /* * Initialize stack pointer: - * WOLFBOOT_STACK_TOP = 0x80000000 (M-mode) or 0x80200000 (S-mode) + * WOLFBOOT_STACK_TOP = 0x80200000 (S-mode, in DDR) * * The stack grows downward from this address. RISC-V calling convention * requires 16-byte stack alignment, enforced by AND with -16 (0xFFFF...FFF0). @@ -154,6 +547,8 @@ _reset: mv a0, tp j main +#endif /* WOLFBOOT_RISCV_MMODE */ + #else /* __riscv_xlen == 32 */ /* ---------- RV32 Boot Sequence ---------- */ diff --git a/src/my_custom_encrypt_key.c b/src/my_custom_encrypt_key.c new file mode 100644 index 0000000000..bf301726c7 --- /dev/null +++ b/src/my_custom_encrypt_key.c @@ -0,0 +1,19 @@ +#include "wolfboot/wolfboot.h" +#include "image.h" + +int RAMFUNCTION wolfBoot_get_encrypt_key(uint8_t *key, uint8_t *nonce) +{ + int i; + /* Test key: "0123456789abcdef0123456789abcdef" (32 bytes for AES-256) */ + const char test_key[] = "0123456789abcdef0123456789abcdef"; + /* Test nonce: "0123456789abcdef" (16 bytes) */ + const char test_nonce[] = "0123456789abcdef"; + + for (i = 0; i < ENCRYPT_KEY_SIZE && i < (int)sizeof(test_key); i++) { + key[i] = (uint8_t)test_key[i]; + } + for (i = 0; i < ENCRYPT_NONCE_SIZE && i < (int)sizeof(test_nonce); i++) { + nonce[i] = (uint8_t)test_nonce[i]; + } + return 0; +} \ No newline at end of file diff --git a/src/sdhci.c b/src/sdhci.c index 3c38617b11..6d182a6174 100644 --- a/src/sdhci.c +++ b/src/sdhci.c @@ -47,6 +47,13 @@ static uint32_t g_rca = 0; /* SD Card Relative Address */ static volatile uint32_t g_mmc_irq_status = 0; static volatile int g_mmc_irq_pending = 0; +/* Microsecond delay using hardware timer */ +static void udelay(uint32_t us) +{ + uint64_t start = hal_get_timer_us(); + while ((hal_get_timer_us() - start) < us); +} + /* ============================================================================ * Register Access Helpers * ============================================================================ */ @@ -150,7 +157,7 @@ void sdhci_irq_handler(void) /* Signal that interrupt was handled */ g_mmc_irq_pending = 1; -#ifdef DEBUG_SDHCI +#ifdef DEBUG_SDHCI_IRQ wolfBoot_printf("sdhci_irq_handler: status=0x%08X, flags=0x%02X\n", status, g_mmc_irq_status); #endif @@ -492,7 +499,7 @@ static int sdhci_wait_busy(int check_dat0) } /* Reset data and command lines to recover from errors */ -static void sdhci_reset_lines(void) +static inline void sdhci_reset_lines(void) { sdhci_reg_or(SDHCI_SRS11, SDHCI_SRS11_RESET_DAT_CMD); while (SDHCI_REG(SDHCI_SRS11) & SDHCI_SRS11_RESET_DAT_CMD); @@ -909,7 +916,7 @@ static int emmc_send_op_cond(uint32_t ocr_arg, uint32_t *ocr_reg) } /* Small delay between retries */ - for (volatile int i = 0; i < 1000; i++); + udelay(10); } while (--timeout > 0); @@ -972,6 +979,13 @@ static int emmc_card_full_init(void) int status; uint32_t ocr_reg; + /* Set power to 3.3v */ + status = sdhci_set_power(SDHCI_SRS10_BVS_3_3V); + if (status != 0) { + wolfBoot_printf("eMMC: Failed to set power\n"); + return status; + } + /* Send CMD0 (GO_IDLE) to reset eMMC */ status = sdhci_cmd(MMC_CMD0_GO_IDLE, 0, SDHCI_RESP_NONE); if (status != 0) { @@ -980,7 +994,7 @@ static int emmc_card_full_init(void) } /* Small delay after reset */ - for (volatile int i = 0; i < 10000; i++); + udelay(100); /* Send CMD1 with operating conditions (3.3V, sector mode) */ status = emmc_send_op_cond(MMC_DEVICE_3_3V_VOLT_SET, &ocr_reg); @@ -1402,17 +1416,6 @@ int sdhci_init(void) sdhci_set_clock(SDHCI_CLK_400KHZ); #ifdef DISK_EMMC - /* ========================================================================= - * eMMC Initialization Path - * ========================================================================= */ - - /* Set power to 3.3v */ - status = sdhci_set_power(SDHCI_SRS10_BVS_3_3V); - if (status != 0) { - wolfBoot_printf("eMMC: Failed to set power\n"); - return status; - } - /* Run full eMMC card initialization */ status = emmc_card_full_init(); if (status != 0) { @@ -1421,10 +1424,6 @@ int sdhci_init(void) } #else /* DISK_SDCARD */ - /* ========================================================================= - * SD Card Initialization Path - * ========================================================================= */ - /* Run full SD card initialization */ status = sdcard_card_full_init(); if (status != 0) { @@ -1439,6 +1438,16 @@ int sdhci_init(void) /* Set data timeout to 3000ms */ status = sdhci_set_timeout(SDHCI_DATA_TIMEOUT_US); } + + wolfBoot_printf("sdhci_init: %s status: %d\n", + #ifdef DISK_EMMC + "eMMC" + #else + "SD" + #endif + , status + ); + return status; } diff --git a/src/update_disk.c b/src/update_disk.c index 614eedf0e7..4302f7a1ff 100644 --- a/src/update_disk.c +++ b/src/update_disk.c @@ -263,7 +263,9 @@ void RAMFUNCTION wolfBoot_start(void) uint32_t load_off; #ifdef MMU uint8_t *dts_addr = NULL; + #ifdef WOLFBOOT_FDT uint32_t dts_size = 0; + #endif #endif char part_name[4] = {'P', ':', 'X', '\0'}; uint64_t start_us, elapsed_ms; diff --git a/src/vector_riscv.S b/src/vector_riscv.S index a79f89a900..88b80c4beb 100644 --- a/src/vector_riscv.S +++ b/src/vector_riscv.S @@ -65,22 +65,15 @@ STORE x29, 29 * REGBYTES(sp) STORE x30, 30 * REGBYTES(sp) STORE x31, 31 * REGBYTES(sp) -#ifdef WOLFBOOT_RISCV_SMODE - csrr a0, scause - csrr a1, sepc - csrr a2, stval -#else - csrr a0, mcause - csrr a1, mepc - csrr a2, mtval -#endif + + csrr a0, MODE_PREFIX(cause) + csrr a1, MODE_PREFIX(epc) + csrr a2, MODE_PREFIX(tval) + mv a3, sp jal handle_trap -#ifdef WOLFBOOT_RISCV_SMODE - csrw sepc, a0 -#else - csrw mepc, a0 -#endif + csrw MODE_PREFIX(epc), a0 + .endm .macro trap_exit @@ -116,11 +109,9 @@ LOAD x31, 31 * REGBYTES(sp) LOAD x2, 2 * REGBYTES(sp) addi sp, sp, 32 * REGBYTES -#ifdef WOLFBOOT_RISCV_SMODE - sret -#else - mret -#endif + + MODE_PREFIX(ret) + .endm #else /* __riscv_xlen == 32 */ diff --git a/tools/scripts/mpfs_program.sh b/tools/scripts/mpfs_program.sh new file mode 100755 index 0000000000..f85d33dcf5 --- /dev/null +++ b/tools/scripts/mpfs_program.sh @@ -0,0 +1,1245 @@ +#!/bin/bash +# +# wolfBoot PolarFire SoC Programming Script +# Automates building, flashing, and verifying wolfBoot on MPFS target +# +# Supports two modes: +# S-Mode: Traditional HSS-based boot via USB mass storage flashing +# M-Mode: Direct eNVM programming via JTAG using mpfsBootmodeProgrammer +# + +set -e + +# Configuration - S-Mode (HSS-based boot) +HSS_TTY="${HSS_TTY:-/dev/ttyUSB1}" +WOLFBOOT_TTY="${WOLFBOOT_TTY:-/dev/ttyUSB4}" +BLOCK_DEV="${BLOCK_DEV:-/dev/sda}" +PI_HOST="${PI_HOST:-pi@Pi4}" +GPIO_PIN="${GPIO_PIN:-26}" +BAUD_RATE="${BAUD_RATE:-115200}" +TIMEOUT_HSS="${TIMEOUT_HSS:-30}" +TIMEOUT_BLOCK="${TIMEOUT_BLOCK:-15}" +TIMEOUT_WOLFBOOT="${TIMEOUT_WOLFBOOT:-30}" +WOLFBOOT_BIN="${WOLFBOOT_BIN:-wolfboot.bin}" +CONFIG_FILE="${CONFIG_FILE:-./config/examples/polarfire_mpfs250.config}" +STORAGE_MODE="${STORAGE_MODE:-}" # Can be "emmc" or "sdcard" + +# Configuration - M-Mode (JTAG-based programming) +MMODE_TTY="${MMODE_TTY:-/dev/ttyUSB1}" +MMODE_TTY_U54="${MMODE_TTY_U54:-/dev/ttyUSB4}" # U54 hart 1 UART (MMUART1) +MMODE_CONFIG="${MMODE_CONFIG:-./config/examples/polarfire_mpfs250-m.config}" +MMODE_DIE="${MMODE_DIE:-MPFS250T}" +MMODE_PACKAGE="${MMODE_PACKAGE:-FCG1152}" +TIMEOUT_MMODE="${TIMEOUT_MMODE:-30}" + +# Configuration - HSS (Hart Software Services) +HSS_DIR="${HSS_DIR:-../hart-software-services}" +HSS_BOARD="${HSS_BOARD:-mpfs-video-kit}" +HSS_TTY_DEBUG="${HSS_TTY_DEBUG:-/dev/ttyUSB1}" # E51 UART for DDR debug output +HSS_CROSS_COMPILE="${HSS_CROSS_COMPILE:-/opt/Microchip/SoftConsole-v2022.2-RISC-V-747/riscv-unknown-elf-gcc/bin/riscv64-unknown-elf-}" +TIMEOUT_HSS_CAPTURE="${TIMEOUT_HSS_CAPTURE:-60}" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Logging functions +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check for required tools (S-Mode) +check_dependencies() { + local missing=0 + for tool in ssh dtc hss-payload-generator lsblk; do + if ! command -v "$tool" &>/dev/null; then + log_error "Required tool '$tool' not found" + missing=1 + fi + done + if [[ $missing -eq 1 ]]; then + exit 1 + fi +} + +# Check for required tools (M-Mode) +check_mmode_dependencies() { + local missing=0 + + # Check for ssh (needed for GPIO power control) + if ! command -v ssh &>/dev/null; then + log_error "Required tool 'ssh' not found" + missing=1 + fi + + # Check SC_INSTALL_DIR is set + if [[ -z "$SC_INSTALL_DIR" ]]; then + log_error "SC_INSTALL_DIR environment variable is not set" + log_error "Please set it to your SoftConsole installation directory" + missing=1 + fi + + # Check mpfsBootmodeProgrammer.jar exists + local programmer_jar="$SC_INSTALL_DIR/extras/mpfs/mpfsBootmodeProgrammer.jar" + if [[ -n "$SC_INSTALL_DIR" ]] && [[ ! -f "$programmer_jar" ]]; then + log_error "mpfsBootmodeProgrammer.jar not found at: $programmer_jar" + missing=1 + fi + + # Check Java exists in SoftConsole + local java_bin="$SC_INSTALL_DIR/eclipse/jre/bin/java" + if [[ -n "$SC_INSTALL_DIR" ]] && [[ ! -x "$java_bin" ]]; then + log_error "Java not found at: $java_bin" + missing=1 + fi + + if [[ $missing -eq 1 ]]; then + exit 1 + fi +} + +# Power control functions +power_off() { + log_info "Powering OFF target..." + ssh "$PI_HOST" "raspi-gpio set $GPIO_PIN op dl" +} + +power_on() { + log_info "Powering ON target..." + ssh "$PI_HOST" "raspi-gpio set $GPIO_PIN op dh" +} + +power_cycle() { + log_info "Power cycling target..." + ssh "$PI_HOST" "raspi-gpio set $GPIO_PIN op dl && sleep 1 && raspi-gpio set $GPIO_PIN op dh" +} + +# Build wolfBoot +build_wolfboot() { + log_info "Building wolfBoot..." + + log_info "Copying config: $CONFIG_FILE -> .config" + cp "$CONFIG_FILE" .config + + log_info "Running make clean..." + make clean + + # Build with storage-specific flags if specified + local make_opts="" + if [[ "$STORAGE_MODE" == "emmc" ]]; then + make_opts="DISK_EMMC=1" + log_info "Building for eMMC storage..." + elif [[ "$STORAGE_MODE" == "sdcard" ]]; then + make_opts="DISK_SDCARD=1" + log_info "Building for SD card storage..." + fi + + log_info "Building wolfboot.elf..." + make $make_opts wolfboot.elf + + log_info "Size of wolfboot.elf:" + size wolfboot.elf + + log_info "Compiling device tree..." + dtc -I dts -O dtb hal/mpfs.dts -o hal/mpfs.dtb + + log_info "Generating HSS payload..." + hss-payload-generator -vvv -c ./hal/mpfs.yaml "$WOLFBOOT_BIN" + + log_success "Build completed successfully!" +} + +# Build wolfBoot for M-Mode +build_wolfboot_mmode() { + log_info "Building wolfBoot for M-Mode..." + + log_info "Copying config: $MMODE_CONFIG -> .config" + cp "$MMODE_CONFIG" .config + + log_info "Running make clean..." + make clean + + log_info "Building wolfboot.elf..." + make wolfboot.elf + + log_info "Size of wolfboot.elf:" + size wolfboot.elf + + log_success "M-Mode build completed successfully!" +} + +# Flash wolfboot.elf via JTAG using mpfsBootmodeProgrammer +flash_jtag() { + log_info "Flashing wolfboot.elf via JTAG (bootmode 1)..." + log_info "Die: $MMODE_DIE, Package: $MMODE_PACKAGE" + + local java_bin="$SC_INSTALL_DIR/eclipse/jre/bin/java" + local programmer_jar="$SC_INSTALL_DIR/extras/mpfs/mpfsBootmodeProgrammer.jar" + + # Run the bootmode programmer + "$java_bin" -jar "$programmer_jar" \ + --bootmode 1 \ + --die "$MMODE_DIE" \ + --package "$MMODE_PACKAGE" \ + --workdir "$PWD" \ + wolfboot.elf + + log_success "JTAG flash completed successfully!" +} + +# Capture M-Mode UART output from both E51 and U54 UARTs +capture_mmode_output() { + local output_file="${1:-mmode_output_$(date +%Y%m%d_%H%M%S).log}" + local e51_pid="" + local u54_pid="" + + log_info "Capturing M-Mode output..." + log_info " E51 (hart 0): $MMODE_TTY" + log_info " U54 (hart 1): $MMODE_TTY_U54" + log_info "Capture will run for $TIMEOUT_MMODE seconds" + + # Configure serial ports + stty -F "$MMODE_TTY" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw 2>/dev/null || true + + # Check if U54 UART exists + if [[ -e "$MMODE_TTY_U54" ]]; then + stty -F "$MMODE_TTY_U54" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw 2>/dev/null || true + fi + + # Create temp files for each UART + local e51_tmp=$(mktemp) + local u54_tmp=$(mktemp) + + # Start capturing from E51 UART (MMUART0) in background + timeout "$TIMEOUT_MMODE" cat "$MMODE_TTY" 2>/dev/null > "$e51_tmp" & + e51_pid=$! + + # Start capturing from U54 UART (MMUART1) in background if available + if [[ -e "$MMODE_TTY_U54" ]]; then + timeout "$TIMEOUT_MMODE" cat "$MMODE_TTY_U54" 2>/dev/null > "$u54_tmp" & + u54_pid=$! + fi + + # Wait for captures to complete + wait $e51_pid 2>/dev/null || true + if [[ -n "$u54_pid" ]]; then + wait $u54_pid 2>/dev/null || true + fi + + # Combine outputs with prefixes + { + echo "=== E51 (hart 0) Output - MMUART0 ===" + cat "$e51_tmp" + echo "" + if [[ -e "$MMODE_TTY_U54" ]] && [[ -s "$u54_tmp" ]]; then + echo "=== U54 (hart 1) Output - MMUART1 ===" + cat "$u54_tmp" + fi + } | tee "$output_file" + + # Cleanup temp files + rm -f "$e51_tmp" "$u54_tmp" + + echo "" + log_success "Output captured to $output_file" +} + +# Wait for HSS "Press a key to enter CLI" prompt on M-Mode TTY +# This indicates the device is powered on and ready for JTAG programming +wait_for_hss_ready_mmode() { + log_info "Waiting for HSS ready prompt on $MMODE_TTY..." + + # Configure serial port + stty -F "$MMODE_TTY" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw -icanon min 1 time 0 + + # Open file descriptor for serial port + exec 4<>"$MMODE_TTY" + + # Clear any stale data + while read -r -t 0.1 -u 4 line 2>/dev/null; do + : # discard + done + + local buffer="" + local elapsed=0 + + log_info "Waiting for 'Press a key to enter CLI' (timeout: ${TIMEOUT_HSS}s)..." + while [[ $elapsed -lt $TIMEOUT_HSS ]]; do + # Read available data with timeout + if read -r -t 1 -u 4 line 2>/dev/null; then + echo "$line" + buffer+="$line" + if [[ "$buffer" == *"Press a key to enter CLI"* ]]; then + exec 4>&- + log_success "HSS ready - device is powered and initialized" + return 0 + fi + else + # Show progress every 5 seconds + if [[ $((elapsed % 5)) -eq 0 ]] && [[ $elapsed -gt 0 ]]; then + echo -n "." + fi + fi + elapsed=$((elapsed + 1)) + done + + exec 4>&- + echo "" + log_error "Timeout waiting for HSS ready prompt" + return 1 +} + +# Wait for HSS CLI prompt and enter usbdmsc mode +# NOTE: Serial port must be opened BEFORE power cycling to catch the CLI prompt! +enter_usbdmsc_mode() { + local do_power_cycle="${1:-0}" # Optional: power cycle as part of this function + + log_info "Waiting for HSS CLI prompt on $HSS_TTY..." + + # Configure serial port FIRST (before power cycle if applicable) + stty -F "$HSS_TTY" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw -icanon min 1 time 0 + + # Open file descriptor for serial port BEFORE power cycling + # This ensures we don't miss the CLI prompt window + exec 3<>"$HSS_TTY" + + # Clear any stale data from the serial buffer + log_info "Clearing stale serial data..." + while read -r -t 0.1 -u 3 line 2>/dev/null; do + : # discard stale data + done + + # Power cycle if requested (do this AFTER opening serial port) + if [[ "$do_power_cycle" -eq 1 ]]; then + log_info "Power cycling target..." + ssh "$PI_HOST" "raspi-gpio set $GPIO_PIN op dl && sleep 1 && raspi-gpio set $GPIO_PIN op dh" + log_info "Waiting for HSS to start..." + fi + + local buffer="" + local found_prompt=0 + local found_cli=0 + local start_time=$(date +%s) + local current_time + + # Wait for "Press a key to enter CLI, ESC to skip" + log_info "Waiting for HSS boot prompt (timeout: ${TIMEOUT_HSS}s)..." + while true; do + # Check actual elapsed time + current_time=$(date +%s) + if [[ $((current_time - start_time)) -ge $TIMEOUT_HSS ]]; then + break + fi + + # Read available data with short timeout (0.5s to stay responsive) + if read -r -t 0.5 -u 3 line 2>/dev/null; then + echo "$line" + buffer+="$line" + if [[ "$buffer" == *"Press a key to enter CLI"* ]]; then + found_prompt=1 + # Send key immediately to enter CLI before timeout + log_info "Found CLI prompt, sending key..." + echo -e "\r" >&3 + break + fi + fi + done + + if [[ $found_prompt -eq 0 ]]; then + exec 3>&- + log_error "Timeout waiting for HSS CLI prompt" + log_error "The HSS may have auto-booted before the script could catch the CLI prompt." + log_error "Check if HSS has CLI enabled and has sufficient timeout." + return 1 + fi + + # Wait for the >> prompt + buffer="" + start_time=$(date +%s) + log_info "Waiting for HSS command prompt..." + while true; do + current_time=$(date +%s) + if [[ $((current_time - start_time)) -ge 10 ]]; then + break + fi + if read -r -t 0.5 -u 3 line 2>/dev/null; then + echo "$line" + buffer+="$line" + if [[ "$buffer" == *">>"* ]]; then + found_cli=1 + break + fi + fi + done + + if [[ $found_cli -eq 0 ]]; then + exec 3>&- + log_error "Timeout waiting for HSS command prompt" + return 1 + fi + + # Select storage device first if specified, then enter USBDMSC mode + # The storage mode command (emmc/sdcard) selects the device, then usbdmsc + # activates USB mass storage mode for the selected device. + if [[ -n "$STORAGE_MODE" ]]; then + log_info "Selecting storage device: $STORAGE_MODE" + echo "$STORAGE_MODE" >&3 + sleep 1 + # Read response from storage selection + while read -r -t 1 -u 3 line 2>/dev/null; do + echo "$line" + done + fi + + # Send usbdmsc command to enter USB mass storage mode + log_info "Sending usbdmsc command..." + echo "usbdmsc" >&3 + sleep 2 + + # Read any remaining output + while read -r -t 1 -u 3 line 2>/dev/null; do + echo "$line" + done + + # Close file descriptor + exec 3>&- + + log_success "USBDMSC mode activated" +} + +# Unmount any mounted partitions on the block device +unmount_block_device() { + log_info "Checking for mounted partitions on $BLOCK_DEV..." + + # Find all mounted partitions on this device + local mounted_parts + mounted_parts=$(mount | grep "^${BLOCK_DEV}" | awk '{print $1}' || true) + + if [[ -n "$mounted_parts" ]]; then + for part in $mounted_parts; do + log_info "Unmounting $part..." + if sudo umount "$part" 2>/dev/null; then + log_success "Unmounted $part" + else + log_warn "Failed to unmount $part (may already be unmounted)" + fi + done + # Give system time to fully release the device + sleep 1 + else + log_info "No mounted partitions found on $BLOCK_DEV" + fi +} + +# Wait for block device to become available +wait_for_block_device() { + local partition="${BLOCK_DEV}1" + local elapsed=0 + + log_info "Waiting for $partition to become available..." + + while [[ $elapsed -lt $TIMEOUT_BLOCK ]]; do + if lsblk "$partition" &>/dev/null; then + # Wait a bit more for the device to be fully ready + sleep 1 + if lsblk "$partition" &>/dev/null; then + log_success "Block device $partition is available" + lsblk "$partition" + return 0 + fi + fi + sleep 1 + elapsed=$((elapsed + 1)) + echo -n "." + done + + echo "" + log_error "Timeout waiting for $partition" + return 1 +} + +# Flash and verify wolfboot.bin +flash_and_verify() { + local partition="${BLOCK_DEV}1" + + log_info "Flashing $WOLFBOOT_BIN to $partition..." + + # Flash the image + sudo dd if="$WOLFBOOT_BIN" of="$partition" bs=512 status=progress + sync + + log_info "Verifying flash..." + + # Verify - cmp should report "EOF on wolfboot.bin" if successful + local cmp_output + cmp_output=$(sudo cmp "$WOLFBOOT_BIN" "$partition" 2>&1) || true + + if echo "$cmp_output" | grep -q "EOF on $WOLFBOOT_BIN"; then + log_success "Verification successful: $cmp_output" + return 0 + elif [[ -z "$cmp_output" ]]; then + # No output from cmp means files are identical up to the size of the smaller one + log_success "Verification successful (files match)" + return 0 + else + log_error "Verification failed: $cmp_output" + return 1 + fi +} + +# Capture wolfBoot output +capture_wolfboot_output() { + local output_file="${1:-wolfboot_output_$(date +%Y%m%d_%H%M%S).log}" + + log_info "Capturing wolfBoot output from $WOLFBOOT_TTY to $output_file..." + log_info "Press Ctrl+C to stop capture" + + # Configure serial port + stty -F "$WOLFBOOT_TTY" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw + + # Use timeout with cat to capture output, or just cat if user wants manual stop + if [[ -n "$TIMEOUT_WOLFBOOT" ]] && [[ "$TIMEOUT_WOLFBOOT" -gt 0 ]]; then + timeout "$TIMEOUT_WOLFBOOT" cat "$WOLFBOOT_TTY" | tee "$output_file" || true + else + cat "$WOLFBOOT_TTY" | tee "$output_file" + fi + + log_success "Output captured to $output_file" +} + +# Capture wolfBoot output using timeout and cat +capture_wolfboot_output_timed() { + local output_file="${1:-wolfboot_output_$(date +%Y%m%d_%H%M%S).log}" + + log_info "Capturing wolfBoot output from $WOLFBOOT_TTY..." + log_info "Capture will run for $TIMEOUT_WOLFBOOT seconds" + + # Configure serial port + stty -F "$WOLFBOOT_TTY" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw + + # Use timeout with cat to capture output + timeout "$TIMEOUT_WOLFBOOT" cat "$WOLFBOOT_TTY" 2>/dev/null | tee "$output_file" || true + + echo "" + log_success "Output captured to $output_file" +} + +# Set storage mode and boot (for final boot after flashing) +# Starts wolfBoot capture immediately before sending boot command to avoid missing output +set_storage_and_boot() { + local output_file="${1:-wolfboot_output_$(date +%Y%m%d_%H%M%S).log}" + + if [[ -z "$STORAGE_MODE" ]]; then + log_info "No storage mode specified, letting device boot normally" + return 0 + fi + + log_info "Waiting for HSS CLI prompt to set storage mode..." + + # Configure serial port immediately so we don't miss early boot output + stty -F "$HSS_TTY" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw -icanon min 1 time 0 + + # Open file descriptor for serial port + exec 3<>"$HSS_TTY" + + # Clear any stale data from before power cycle + while read -r -t 0.1 -u 3 line 2>/dev/null; do + : # discard + done + + local buffer="" + local found_prompt=0 + local found_cli=0 + local start_time + local current_time + local last_progress=0 + + # Wait for "Press a key to enter CLI, ESC to skip" + # Use longer timeout for post-flash boot (device may need to initialize) + local boot_timeout=$((TIMEOUT_HSS * 2)) + log_info "Waiting for HSS boot prompt (timeout: ${boot_timeout}s)..." + start_time=$(date +%s) + while true; do + current_time=$(date +%s) + local elapsed=$((current_time - start_time)) + if [[ $elapsed -ge $boot_timeout ]]; then + break + fi + + # Read available data with short timeout + if read -r -t 0.5 -u 3 line 2>/dev/null; then + echo "$line" + buffer+="$line" + if [[ "$buffer" == *"Press a key to enter CLI"* ]]; then + found_prompt=1 + break + fi + else + # Show progress every 5 seconds + if [[ $((elapsed / 5)) -gt $last_progress ]] && [[ $elapsed -gt 0 ]]; then + echo -n "." + last_progress=$((elapsed / 5)) + fi + fi + done + + if [[ $found_prompt -eq 0 ]]; then + exec 3>&- + echo "" + log_error "Timeout waiting for HSS CLI prompt after ${boot_timeout}s" + log_error "Device may still be booting or HSS output not detected" + return 1 + fi + + # Send a key to enter CLI + sleep 0.5 + echo -e "\r" >&3 + + # Wait for the >> prompt + buffer="" + start_time=$(date +%s) + log_info "Waiting for HSS command prompt..." + while true; do + current_time=$(date +%s) + if [[ $((current_time - start_time)) -ge 10 ]]; then + break + fi + if read -r -t 0.5 -u 3 line 2>/dev/null; then + echo "$line" + buffer+="$line" + if [[ "$buffer" == *">>"* ]]; then + found_cli=1 + break + fi + fi + done + + if [[ $found_cli -eq 0 ]]; then + exec 3>&- + log_error "Timeout waiting for HSS command prompt" + return 1 + fi + + # Send storage mode command + log_info "Setting storage mode to: $STORAGE_MODE" + echo "$STORAGE_MODE" >&3 + sleep 1 + + # Read response + while read -r -t 1 -u 3 line 2>/dev/null; do + echo "$line" + done + + # Configure wolfBoot serial port BEFORE sending boot command + stty -F "$WOLFBOOT_TTY" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw + + # Start wolfBoot capture in background BEFORE sending boot command + # This ensures we don't miss any early boot output + log_info "Starting wolfBoot capture (output: $output_file)..." + timeout "$TIMEOUT_WOLFBOOT" cat "$WOLFBOOT_TTY" 2>/dev/null | tee "$output_file" & + local capture_pid=$! + + # Give capture process a moment to start + sleep 0.2 + + # Send boot command to boot from the selected storage + log_info "Sending boot command..." + echo "boot" >&3 + + # Close HSS file descriptor immediately to avoid delays + exec 3>&- + + log_success "Boot command sent, capturing wolfBoot output for ${TIMEOUT_WOLFBOOT}s..." + + # Wait for capture to complete + wait $capture_pid || true + + echo "" + log_success "Output captured to $output_file" +} + +# Show usage +usage() { + cat << EOF +Usage: $(basename "$0") [OPTIONS] [COMMAND] + +wolfBoot PolarFire SoC Programming Script + +Supports two modes: + S-Mode: Traditional HSS-based boot via USB mass storage flashing + M-Mode: Direct eNVM programming via JTAG using mpfsBootmodeProgrammer + +S-Mode Commands (HSS-based): + all Run full S-Mode workflow (build, flash via USB, capture) + build Build wolfBoot with HSS payload + flash Flash and verify only (assumes device in USBDMSC mode) + capture Capture wolfBoot output only (from $WOLFBOOT_TTY) + +M-Mode Commands (JTAG-based): + mmode Run full M-Mode workflow (build, flash via JTAG, capture) + mmode-build Build wolfBoot for M-Mode only + mmode-flash Flash via JTAG only (assumes wolfboot.elf exists) + mmode-capture Capture M-Mode output only (from $MMODE_TTY) + +HSS Commands (Hart Software Services): + hss Run full HSS workflow (build, program via JTAG, capture DDR debug) + hss-build Build HSS only + hss-program Program HSS via JTAG only (assumes HSS is built) + hss-capture Capture HSS DDR debug output only (from $HSS_TTY_DEBUG) + +Common Commands: + power-cycle Power cycle the target only + power-on Power on the target + power-off Power off the target + +S-Mode Options: + -c, --config FILE Config file (default: $CONFIG_FILE) + -d, --device DEV Block device (default: $BLOCK_DEV) + -H, --hss-tty TTY HSS serial port (default: $HSS_TTY) + -W, --wolfboot-tty TTY wolfBoot serial port (default: $WOLFBOOT_TTY) + -s, --storage MODE Storage mode: 'emmc' or 'sdcard' (default: none) + +M-Mode Options: + --mmode-config FILE M-Mode config file (default: $MMODE_CONFIG) + --mmode-tty TTY E51 serial port (default: $MMODE_TTY) + --mmode-tty-u54 TTY U54 serial port (default: $MMODE_TTY_U54) + --die DIE Device die (default: $MMODE_DIE) + --package PKG Device package (default: $MMODE_PACKAGE) + +HSS Options: + --hss-dir DIR HSS source directory (default: $HSS_DIR) + --hss-board BOARD HSS board name (default: $HSS_BOARD) + --hss-tty TTY HSS debug serial port (default: $HSS_TTY_DEBUG) + +Common Options: + -h, --help Show this help message + -p, --pi-host HOST Pi host for power control (default: $PI_HOST) + -g, --gpio PIN GPIO pin for power control (default: $GPIO_PIN) + -o, --output FILE Output file for captured log + -t, --timeout SEC Timeout for capture (default: $TIMEOUT_WOLFBOOT) + --skip-build Skip the build step in 'all' or 'mmode' command + +Environment Variables: + S-Mode: + HSS_TTY HSS serial port + WOLFBOOT_TTY wolfBoot serial port + BLOCK_DEV Block device for flashing + STORAGE_MODE Storage mode: 'emmc' or 'sdcard' + TIMEOUT_HSS Timeout for HSS prompt (default: 30s) + TIMEOUT_BLOCK Timeout for block device (default: 15s) + TIMEOUT_WOLFBOOT Timeout for capture (default: 30s) + + M-Mode: + SC_INSTALL_DIR SoftConsole installation directory (REQUIRED for M-Mode) + MMODE_TTY E51 serial port (default: /dev/ttyUSB1) + MMODE_TTY_U54 U54 serial port (default: /dev/ttyUSB4) + MMODE_CONFIG M-Mode config file + MMODE_DIE Device die (default: MPFS250T) + MMODE_PACKAGE Device package (default: FCG1152) + TIMEOUT_MMODE Timeout for M-Mode capture (default: 30s) + + HSS: + HSS_DIR HSS source directory (default: ../hart-software-services) + HSS_BOARD HSS board name (default: mpfs-video-kit) + HSS_TTY_DEBUG HSS debug serial port (default: /dev/ttyUSB1) + HSS_CROSS_COMPILE Cross compiler prefix for HSS + TIMEOUT_HSS_CAPTURE Timeout for HSS capture (default: 60s) + + Common: + PI_HOST Pi host for power control + GPIO_PIN GPIO pin number + BAUD_RATE Serial baud rate (default: 115200) + +Examples: + # S-Mode (HSS-based boot) + $(basename "$0") all # Full workflow (default eMMC) + $(basename "$0") -s sdcard all # Full workflow using SD card + $(basename "$0") --skip-build all # Flash existing build + $(basename "$0") build # Build only + + # M-Mode (JTAG programming) + $(basename "$0") mmode # Full M-Mode workflow + $(basename "$0") --skip-build mmode # Flash existing wolfboot.elf + $(basename "$0") mmode-flash # Just flash via JTAG + $(basename "$0") mmode-capture # Just capture M-Mode output + $(basename "$0") -o test.log mmode # Save output to specific file + + # HSS (Hart Software Services - for DDR debugging) + $(basename "$0") hss # Full HSS workflow (build, program, capture) + $(basename "$0") --skip-build hss # Program existing HSS build + $(basename "$0") hss-build # Build HSS only + $(basename "$0") hss-program # Program HSS via JTAG only + $(basename "$0") hss-capture # Capture HSS DDR debug output + $(basename "$0") -o ddr.log hss # Save DDR debug to specific file + +EOF +} + +# Main workflow +run_all() { + local skip_build="${1:-0}" + local output_file="${2:-}" + + log_info "Starting full wolfBoot programming workflow..." + echo "" + + # Step 1: Build (if not skipped) + if [[ "$skip_build" -eq 0 ]]; then + log_info "=== Step 1: Building wolfBoot ===" + build_wolfboot + echo "" + else + log_info "=== Step 1: Skipping build ===" + if [[ ! -f "$WOLFBOOT_BIN" ]]; then + log_error "$WOLFBOOT_BIN not found. Cannot skip build." + exit 1 + fi + echo "" + fi + + # Step 2+3: Power cycle and enter USBDMSC mode + # NOTE: These are combined because serial port must be opened BEFORE power cycling + # to catch the HSS CLI prompt window (which has a short timeout) + log_info "=== Step 2: Power cycling and entering USBDMSC mode ===" + enter_usbdmsc_mode 1 # Pass 1 to trigger power cycle + echo "" + + # Step 3: Wait for block device + log_info "=== Step 3: Waiting for block device ===" + wait_for_block_device + echo "" + + # Step 4: Flash and verify + log_info "=== Step 4: Flashing and verifying ===" + flash_and_verify + echo "" + + # Step 5: Unmount and power cycle again + log_info "=== Step 5: Unmounting and power cycling for boot ===" + unmount_block_device + sleep 1 + power_cycle + echo "" + + # Step 6: Set storage mode and boot, then capture wolfBoot output + if [[ -n "$STORAGE_MODE" ]]; then + # When storage mode is set, set_storage_and_boot handles both boot and capture + # to avoid missing early wolfBoot output + log_info "=== Step 6: Setting storage mode, booting, and capturing output ===" + set_storage_and_boot "$output_file" + echo "" + else + # No storage mode - just capture output (device boots automatically) + log_info "=== Step 6: Capturing wolfBoot output ===" + capture_wolfboot_output_timed "$output_file" + echo "" + fi + + log_success "=== Workflow completed successfully! ===" +} + +# Build HSS +build_hss() { + log_info "Building HSS..." + log_info "HSS directory: $HSS_DIR" + log_info "Board: $HSS_BOARD" + + if [[ ! -d "$HSS_DIR" ]]; then + log_error "HSS directory not found: $HSS_DIR" + exit 1 + fi + + pushd "$HSS_DIR" > /dev/null + + log_info "Running make clean..." + make clean + + log_info "Building HSS for $HSS_BOARD..." + make BOARD="$HSS_BOARD" CROSS_COMPILE="$HSS_CROSS_COMPILE" -j8 + + popd > /dev/null + + log_success "HSS build completed successfully!" +} + +# Program HSS via JTAG +program_hss() { + log_info "Programming HSS via JTAG..." + log_info "HSS directory: $HSS_DIR" + log_info "Board: $HSS_BOARD" + + if [[ ! -d "$HSS_DIR" ]]; then + log_error "HSS directory not found: $HSS_DIR" + exit 1 + fi + + pushd "$HSS_DIR" > /dev/null + + make BOARD="$HSS_BOARD" program + + popd > /dev/null + + log_success "HSS programming completed!" +} + +# Capture HSS DDR debug output +capture_hss_output() { + local output_file="${1:-hss_ddr_output_$(date +%Y%m%d_%H%M%S).log}" + + log_info "Capturing HSS DDR debug output from $HSS_TTY_DEBUG..." + log_info "Capture will run for $TIMEOUT_HSS_CAPTURE seconds" + + # Configure serial port + stty -F "$HSS_TTY_DEBUG" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw 2>/dev/null || true + + # Capture output with timeout + timeout "$TIMEOUT_HSS_CAPTURE" cat "$HSS_TTY_DEBUG" 2>/dev/null | tee "$output_file" || true + + echo "" + log_success "HSS output captured to $output_file" + + # Check for DDR training result + if grep -q "DDR_TRAINING_PASS" "$output_file" 2>/dev/null; then + log_success "DDR training PASSED!" + elif grep -q "DDR_TRAINING_FAIL" "$output_file" 2>/dev/null; then + log_error "DDR training FAILED!" + else + log_warn "DDR training result not detected in output" + fi +} + +# HSS workflow: build, program, and capture DDR debug output +run_hss() { + local skip_build="${1:-0}" + local output_file="${2:-hss_ddr_output_$(date +%Y%m%d_%H%M%S).log}" + + log_info "Starting HSS programming workflow..." + log_info "This will build HSS, program via JTAG, and capture DDR debug output" + echo "" + + # Step 1: Build HSS (if not skipped) + if [[ "$skip_build" -eq 0 ]]; then + log_info "=== Step 1: Building HSS ===" + build_hss + echo "" + else + log_info "=== Step 1: Skipping HSS build ===" + echo "" + fi + + # Step 2: Power on target + log_info "=== Step 2: Powering on target ===" + power_on + sleep 2 # Give device time to power up + echo "" + + # Step 3: Start capture and program HSS + # We start capture BEFORE programming so we don't miss any output + # The JTAG programming will reset the device and it will boot with new HSS + log_info "=== Step 3: Starting capture and programming HSS ===" + + # Configure serial port + stty -F "$HSS_TTY_DEBUG" "$BAUD_RATE" cs8 -cstopb -parenb -echo raw 2>/dev/null || true + + # Start capture in background + log_info "Starting HSS output capture (output: $output_file)..." + timeout "$TIMEOUT_HSS_CAPTURE" cat "$HSS_TTY_DEBUG" 2>/dev/null > "$output_file" & + local capture_pid=$! + + # Give capture a moment to start + sleep 1 + + # Program HSS (this will trigger a reset and boot) + log_info "Programming HSS..." + program_hss + echo "" + + # Step 4: Wait for capture to complete + log_info "=== Step 4: Waiting for capture to complete ===" + log_info "Capturing for up to $TIMEOUT_HSS_CAPTURE seconds..." + wait $capture_pid 2>/dev/null || true + + echo "" + log_success "Capture completed: $output_file" + + # Show capture statistics + local line_count=$(wc -l < "$output_file" 2>/dev/null || echo "0") + local byte_count=$(wc -c < "$output_file" 2>/dev/null || echo "0") + log_info "Captured $line_count lines ($byte_count bytes)" + + # Check for DDR training result + if grep -q "DDR_TRAINING_PASS" "$output_file" 2>/dev/null; then + log_success "=== HSS DDR Training PASSED! ===" + elif grep -q "DDR_TRAINING_FAIL" "$output_file" 2>/dev/null; then + log_error "=== HSS DDR Training FAILED ===" + else + log_warn "=== DDR training result not detected ===" + log_info "Check $output_file for details" + fi +} + +# M-Mode workflow +run_mmode() { + local skip_build="${1:-0}" + local output_file="${2:-mmode_output_$(date +%Y%m%d_%H%M%S).log}" + + log_info "Starting M-Mode wolfBoot programming workflow..." + log_info "Mode: JTAG programming to eNVM (bootmode 1)" + echo "" + + # Step 1: Build (if not skipped) + if [[ "$skip_build" -eq 0 ]]; then + log_info "=== Step 1: Building wolfBoot (M-Mode) ===" + build_wolfboot_mmode + echo "" + else + log_info "=== Step 1: Skipping build ===" + if [[ ! -f "wolfboot.elf" ]]; then + log_error "wolfboot.elf not found. Cannot skip build." + exit 1 + fi + echo "" + fi + + # Step 2: Power on target (JTAG requires power) + log_info "=== Step 2: Powering on target ===" + power_on + sleep 2 # Give device time to power up + echo "" + + # Step 3: Flash via JTAG + log_info "=== Step 3: Flashing via JTAG ===" + flash_jtag + echo "" + + # Step 4: Power cycle to boot with new firmware + log_info "=== Step 4: Power cycling to boot new firmware ===" + power_off + sleep 1 + power_on + echo "" + + # Step 5: Capture M-Mode output (should show "wolfBoot Version: " if successful) + log_info "=== Step 5: Capturing M-Mode output ===" + log_info "Looking for 'wolfBoot Version:' in output..." + capture_mmode_output "$output_file" + echo "" + + # Check if wolfBoot started successfully + if grep -q "wolfBoot" "$output_file" 2>/dev/null; then + log_success "=== M-Mode workflow completed successfully! ===" + log_success "wolfBoot output detected in $output_file" + else + log_warn "=== M-Mode workflow completed ===" + log_warn "No wolfBoot output detected - check $output_file for details" + fi +} + +# Parse command line arguments +SKIP_BUILD=0 +OUTPUT_FILE="" +COMMAND="" + +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + usage + exit 0 + ;; + -c|--config) + CONFIG_FILE="$2" + shift 2 + ;; + -d|--device) + BLOCK_DEV="$2" + shift 2 + ;; + -H|--hss-tty) + HSS_TTY="$2" + shift 2 + ;; + -W|--wolfboot-tty) + WOLFBOOT_TTY="$2" + shift 2 + ;; + -p|--pi-host) + PI_HOST="$2" + shift 2 + ;; + -g|--gpio) + GPIO_PIN="$2" + shift 2 + ;; + -o|--output) + OUTPUT_FILE="$2" + shift 2 + ;; + -t|--timeout) + TIMEOUT_WOLFBOOT="$2" + TIMEOUT_MMODE="$2" + shift 2 + ;; + -s|--storage) + STORAGE_MODE="$2" + if [[ "$STORAGE_MODE" != "emmc" && "$STORAGE_MODE" != "sdcard" ]]; then + log_error "Invalid storage mode: $STORAGE_MODE (must be 'emmc' or 'sdcard')" + exit 1 + fi + shift 2 + ;; + --mmode-config) + MMODE_CONFIG="$2" + shift 2 + ;; + --mmode-tty) + MMODE_TTY="$2" + shift 2 + ;; + --mmode-tty-u54) + MMODE_TTY_U54="$2" + shift 2 + ;; + --die) + MMODE_DIE="$2" + shift 2 + ;; + --package) + MMODE_PACKAGE="$2" + shift 2 + ;; + --skip-build) + SKIP_BUILD=1 + shift + ;; + --hss-dir) + HSS_DIR="$2" + shift 2 + ;; + --hss-board) + HSS_BOARD="$2" + shift 2 + ;; + --hss-tty) + HSS_TTY_DEBUG="$2" + shift 2 + ;; + all|build|flash|capture|power-cycle|power-on|power-off|mmode|mmode-build|mmode-flash|mmode-capture|hss|hss-build|hss-program|hss-capture) + COMMAND="$1" + shift + ;; + *) + log_error "Unknown option: $1" + usage + exit 1 + ;; + esac +done + +# Default to 'all' if no command specified +COMMAND="${COMMAND:-all}" + +# Check dependencies based on command +case "$COMMAND" in + mmode|mmode-build|mmode-flash|mmode-capture) + check_mmode_dependencies + ;; + hss|hss-build|hss-program|hss-capture) + # HSS commands need ssh for power control and make for building + if ! command -v ssh &>/dev/null; then + log_error "Required tool 'ssh' not found" + exit 1 + fi + if ! command -v make &>/dev/null; then + log_error "Required tool 'make' not found" + exit 1 + fi + ;; + power-cycle|power-on|power-off) + # Power commands only need ssh + if ! command -v ssh &>/dev/null; then + log_error "Required tool 'ssh' not found" + exit 1 + fi + ;; + *) + check_dependencies + ;; +esac + +# Execute command +case "$COMMAND" in + all) + run_all "$SKIP_BUILD" "$OUTPUT_FILE" + ;; + build) + build_wolfboot + ;; + flash) + wait_for_block_device + flash_and_verify + unmount_block_device + ;; + capture) + capture_wolfboot_output_timed "$OUTPUT_FILE" + ;; + mmode) + run_mmode "$SKIP_BUILD" "$OUTPUT_FILE" + ;; + mmode-build) + build_wolfboot_mmode + ;; + mmode-flash) + if [[ ! -f "wolfboot.elf" ]]; then + log_error "wolfboot.elf not found. Run 'mmode-build' first." + exit 1 + fi + flash_jtag + ;; + mmode-capture) + capture_mmode_output "$OUTPUT_FILE" + ;; + hss) + run_hss "$SKIP_BUILD" "$OUTPUT_FILE" + ;; + hss-build) + build_hss + ;; + hss-program) + power_on + sleep 2 + program_hss + ;; + hss-capture) + capture_hss_output "$OUTPUT_FILE" + ;; + power-cycle) + power_cycle + ;; + power-on) + power_on + ;; + power-off) + power_off + ;; + *) + log_error "Unknown command: $COMMAND" + usage + exit 1 + ;; +esac +