diff --git a/.github/workflows/stm32-test-suite.yml b/.github/workflows/stm32-test-suite.yml index 72480e4..a2f4e33 100644 --- a/.github/workflows/stm32-test-suite.yml +++ b/.github/workflows/stm32-test-suite.yml @@ -32,5 +32,8 @@ jobs: - name: Build smoke firmware (U5) run: make -C STM32Sim/firmware/smoke-test-u5 + - name: Build smoke firmware (MP135) + run: make -C STM32Sim/firmware/smoke-test-mp135 + - name: cargo test run: cargo test --manifest-path STM32Sim/stm32-sim/Cargo.toml --release diff --git a/.github/workflows/stm32-wolfcrypt-test-mp135.yml b/.github/workflows/stm32-wolfcrypt-test-mp135.yml new file mode 100644 index 0000000..25fd868 --- /dev/null +++ b/.github/workflows/stm32-wolfcrypt-test-mp135.yml @@ -0,0 +1,39 @@ +name: STM32Sim wolfCrypt test (MP135) + +on: + push: + branches: [main] + pull_request: + branches: ['**'] + workflow_dispatch: + +jobs: + wolfcrypt-test: + name: wolfCrypt on STM32Sim MP135 + runs-on: ubuntu-24.04 + steps: + - name: Checkout simulator-stm32 + uses: actions/checkout@v4 + with: + path: simulator-stm32 + + - name: Checkout wolfSSL + uses: actions/checkout@v4 + with: + repository: wolfSSL/wolfssl + ref: master + path: wolfssl + + - name: Build stm32sim-wolfcrypt image + run: | + docker build \ + -t stm32sim-wolfcrypt:ci \ + -f simulator-stm32/STM32Sim/Dockerfile.wolfcrypt \ + simulator-stm32/STM32Sim + + - name: Run MP135 wolfCrypt test on stm32-sim + run: | + docker run --rm \ + -v "${{ github.workspace }}/wolfssl:/opt/wolfssl:ro" \ + stm32sim-wolfcrypt:ci \ + run-wolfcrypt-mp135.sh diff --git a/README.md b/README.md index 52b1a29..ee93c79 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,18 @@ its `hal/posix/tcp/` HAL. ## STM32Sim The [STM32Sim](STM32Sim/) is a Unicorn-Engine-based simulator for STM32 -microcontrollers focused on the on-chip cryptographic accelerators -(CRYP/AES, HASH, RNG, PKA) that wolfSSL uses. It is intended to replace -the Renode-based CI flow for wolfSSL on STM32 targets and to close the -gaps Renode has in hardware-crypto modelling (HASH peripheral, full AES -mode set, PKA). +microcontrollers and application processors focused on the on-chip +cryptographic accelerators (CRYP/AES, HASH, RNG, PKA) that wolfSSL +uses. It is intended to replace the Renode-based CI flow for wolfSSL +on STM32 targets and to close the gaps Renode has in hardware-crypto +modelling (HASH peripheral, full AES mode set, PKA). Currently +supported targets: + +- **STM32H753** (Cortex-M7, HAL v1) +- **STM32U575 / U585** (Cortex-M33 with TrustZone, HAL v2, PKA v2) +- **STM32MP135** (Cortex-A7 with MMU, runs out of DDR with no + internal flash; CRYP1/HASH1/RNG1/PKA modelled, including SHA3 and + SHAKE on HASH1) ## PIC32MZSim diff --git a/STM32Sim/Dockerfile.wolfcrypt b/STM32Sim/Dockerfile.wolfcrypt index 7741c19..931a53b 100644 --- a/STM32Sim/Dockerfile.wolfcrypt +++ b/STM32Sim/Dockerfile.wolfcrypt @@ -9,18 +9,20 @@ # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. -# Builds the wolfCrypt-on-STM32 firmwares (H753 and U585) that today -# run under Renode CI, then runs them through stm32-sim instead. The -# wolfSSL source tree is expected to be mounted at /opt/wolfssl at -# runtime (the GitHub workflow does `docker run -v $(pwd):/opt/wolfssl -# ...`). Default CMD runs the H7 firmware; override with -# `run-wolfcrypt-u5.sh` for U585. +# Builds the wolfCrypt-on-STM32 firmwares (H753, U585, MP135) that +# today run under Renode CI, then runs them through stm32-sim +# instead. The wolfSSL source tree is expected to be mounted at +# /opt/wolfssl at runtime (the GitHub workflow does +# `docker run -v $(pwd):/opt/wolfssl ...`). Default CMD runs the H7 +# firmware; override with `run-wolfcrypt-u5.sh` or +# `run-wolfcrypt-mp135.sh` for the other targets. # Image contents: # - arm-none-eabi-gcc cross toolchain # - CMSIS_5, cmsis-device-h7, STM32CubeH7 v1.11.2 (vendored under /opt) # - cmsis-device-u5, STM32CubeU5 (vendored under /opt) +# - STM32CubeMP13 (vendored under /opt for the MP135 build) # - stm32-sim runner binary (built from this same repo) -# - run-wolfcrypt-h7.sh and run-wolfcrypt-u5.sh entrypoints +# - run-wolfcrypt-{h7,u5,mp135}.sh entrypoints # ============================================================================= # Stage 1: build stm32-sim (Rust) @@ -76,7 +78,15 @@ RUN git clone --depth 1 \ /opt/STM32CubeU5 \ && cd /opt/STM32CubeU5 \ && git submodule update --init --recursive --depth 1)) \ - && find /opt/STM32CubeH7 /opt/STM32CubeU5 -name '.git' -prune -exec rm -rf {} + \ + && (git clone --depth 1 --recurse-submodules \ + https://github.com/STMicroelectronics/STM32CubeMP13.git \ + /opt/STM32CubeMP13 \ + || (git clone --depth 1 \ + https://github.com/STMicroelectronics/STM32CubeMP13.git \ + /opt/STM32CubeMP13 \ + && cd /opt/STM32CubeMP13 \ + && git submodule update --init --recursive --depth 1)) \ + && find /opt/STM32CubeH7 /opt/STM32CubeU5 /opt/STM32CubeMP13 -name '.git' -prune -exec rm -rf {} + \ && rm -rf /opt/cmsis-device-h7/.git /opt/cmsis-device-u5/.git /opt/CMSIS_5/.git COPY --from=sim-builder /app/stm32-sim/target/release/stm32-sim /usr/local/bin/stm32-sim @@ -88,13 +98,18 @@ COPY --from=sim-builder /app/stm32-sim/target/release/stm32-sim /usr/local/bin/s # disable because Renode could not model them. COPY firmware/wolfcrypt-test-h7/ /opt/firmware-h7/ COPY firmware/wolfcrypt-test-u5/ /opt/firmware-u5/ +COPY firmware/wolfcrypt-test-mp135/ /opt/firmware-mp135/ COPY scripts/run-wolfcrypt-h7.sh /usr/local/bin/run-wolfcrypt-h7.sh COPY scripts/run-wolfcrypt-u5.sh /usr/local/bin/run-wolfcrypt-u5.sh -RUN chmod +x /usr/local/bin/run-wolfcrypt-h7.sh /usr/local/bin/run-wolfcrypt-u5.sh +COPY scripts/run-wolfcrypt-mp135.sh /usr/local/bin/run-wolfcrypt-mp135.sh +RUN chmod +x /usr/local/bin/run-wolfcrypt-h7.sh \ + /usr/local/bin/run-wolfcrypt-u5.sh \ + /usr/local/bin/run-wolfcrypt-mp135.sh ENV WOLFSSL_ROOT=/opt/wolfssl # Default entrypoint runs the H7 wolfCrypt test. Override by passing -# `run-wolfcrypt-u5.sh` as the command for the U585 build. +# `run-wolfcrypt-u5.sh` (U585) or `run-wolfcrypt-mp135.sh` (MP135) as +# the command for the other targets. CMD ["run-wolfcrypt-h7.sh"] diff --git a/STM32Sim/README.md b/STM32Sim/README.md index dcf7aa3..7d22653 100644 --- a/STM32Sim/README.md +++ b/STM32Sim/README.md @@ -26,16 +26,17 @@ track on its own schedule. ## Architecture We use [Unicorn Engine](https://www.unicorn-engine.org/) (QEMU-derived) -for ARM Cortex-M CPU emulation, and provide our own MMIO peripheral -models in Rust. The repo is a Cargo workspace under -[`stm32-sim/`](stm32-sim): +for ARM CPU emulation, and provide our own MMIO peripheral models in +Rust. The Cortex-M targets boot in Thumb/MCLASS mode; the MP135 +target boots in ARM mode as a Cortex-A7 (with MMU). The repo is a +Cargo workspace under [`stm32-sim/`](stm32-sim): ``` stm32-sim/ core/ CPU + MMIO bus + ELF loader + Runner peripherals/ USART, RCC, RNG, CRYP, HASH, PKA - chips/ STM32H753 / STM32U575 / STM32U585 chip configurations - (memory map + peripheral wiring) + chips/ STM32H753 / STM32U575 / STM32U585 / STM32MP135 chip + configurations (memory map + peripheral wiring) runner-bin/ `stm32-sim` CLI binary ``` @@ -50,18 +51,26 @@ even though three chips might present three different DIN/HR layouts. ## Status -Both **STM32H753** (Cortex-M7, HAL v1, no PKA) and **STM32U575** -(Cortex-M33, HAL v2, PKA v2) chip targets boot, run firmware, and -drive their on-chip cryptographic peripherals end-to-end: - -| Peripheral | H7 (v1) | U5 (v2) | -|------------|---------|---------| -| USART | OK | OK | -| RCC | stub | stub | -| RNG | OK | OK | -| CRYP/AES | ECB/CBC/CTR/GCM (HAL-driven) | ECB/CBC/CTR/GCM | -| HASH | SHA-1/224/256, MD5 (HAL-driven, hardware HMAC mode supported) | SHA-1/224/256, MD5 | -| PKA | n/a | ECC mul (P-256/P-384), RSA modexp, mod arithmetic | +**STM32H753** (Cortex-M7, HAL v1, no PKA), **STM32U575/U585** +(Cortex-M33, HAL v2, PKA v2), and **STM32MP135** (Cortex-A7, +HAL v2 with the H7-style CRYP block, PKA v2) chip targets all boot, +run firmware, and drive their on-chip cryptographic peripherals end- +to-end: + +| Peripheral | H7 (v1) | U5 (v2) | MP135 | +|------------|----------------------------------|--------------------|---------------------------------------| +| USART | OK | OK | OK (UART4) | +| RCC | stub | stub | stub | +| RNG | OK | OK | OK (RNG1) | +| CRYP/AES | ECB/CBC/CTR/GCM (HAL-driven) | ECB/CBC/CTR/GCM | ECB/CBC/CTR/GCM (CRYP1, aliased CRYP) | +| HASH | SHA-1/224/256, MD5 | SHA-1/224/256, MD5 | SHA-1/224/256, MD5, SHA-384/512, SHA3-224/256/384/512, SHAKE-128/256 (HASH1) | +| PKA | n/a | ECC mul (P-256/P-384), RSA modexp, mod arithmetic | same as U5 | + +The MP135 is bare-metal Cortex-A7 with no internal flash. The firmware +links at the DDR base (0xC0000000); the simulator maps DDR as plain +RAM and the ELF loader writes segments straight there, so no DDR_Init +helper is needed. The firmware enables a flat 1 MiB-section MMU map +during early boot to mirror the real-hardware path. The peripheral register adapters are split into `v1.rs` (H7 / HAL v1) and `v2.rs` (U5 / HAL v2) modules sharing the same cryptographic @@ -90,6 +99,7 @@ toolchain: ```sh make -C firmware/smoke-test-h7 make -C firmware/smoke-test-u5 +make -C firmware/smoke-test-mp135 ``` ## Running diff --git a/STM32Sim/firmware/smoke-test-mp135/Makefile b/STM32Sim/firmware/smoke-test-mp135/Makefile new file mode 100644 index 0000000..4bc4d3a --- /dev/null +++ b/STM32Sim/firmware/smoke-test-mp135/Makefile @@ -0,0 +1,36 @@ +# Makefile for the STM32Sim MP135 smoke-test firmware. +# +# Copyright (C) 2026 wolfSSL Inc. + +CROSS ?= arm-none-eabi- +CC := $(CROSS)gcc +OBJCOPY := $(CROSS)objcopy +SIZE := $(CROSS)size + +CFLAGS := -mcpu=cortex-a7 -marm -mfpu=neon-vfpv4 -mfloat-abi=hard \ + -O0 -g -ffreestanding -nostartfiles -Wall -Wextra \ + -fno-common -ffunction-sections -fdata-sections +ASFLAGS := -mcpu=cortex-a7 -marm -mfpu=neon-vfpv4 -mfloat-abi=hard \ + -g -ffreestanding +LDFLAGS := -mcpu=cortex-a7 -marm -mfpu=neon-vfpv4 -mfloat-abi=hard \ + -nostartfiles -Wl,--gc-sections -T smoke.ld -Wl,-Map,smoke.map + +OBJS := startup.o mmu.o main.o +TARGET := smoke.elf + +all: $(TARGET) + +$(TARGET): $(OBJS) smoke.ld + $(CC) $(LDFLAGS) -o $@ $(OBJS) + $(SIZE) $@ + +%.o: %.c + $(CC) $(CFLAGS) -c -o $@ $< + +%.o: %.S + $(CC) $(ASFLAGS) -c -o $@ $< + +clean: + rm -f $(OBJS) $(TARGET) smoke.map + +.PHONY: all clean diff --git a/STM32Sim/firmware/smoke-test-mp135/main.c b/STM32Sim/firmware/smoke-test-mp135/main.c new file mode 100644 index 0000000..5736ccb --- /dev/null +++ b/STM32Sim/firmware/smoke-test-mp135/main.c @@ -0,0 +1,221 @@ +/* main.c + * + * Copyright (C) 2026 wolfSSL Inc. + * + * Smoke-test firmware for the STM32MP135 chip target. Brings up the + * MMU (1 MiB section identity map), then drives UART4, RNG1, CRYP1, + * and HASH1 directly through MMIO. Sets test_complete/test_result + * the same way the H7/U5 smoke tests do so the simulator's exit + * polling and the cargo-test smoke harness can both observe the + * outcome. + */ + +#include + +void mmu_enable(void); + +#define UART4_BASE 0x40010000u +#define UART4_CR1 (*(volatile uint32_t *)(UART4_BASE + 0x00)) +#define UART4_BRR (*(volatile uint32_t *)(UART4_BASE + 0x0C)) +#define UART4_ISR (*(volatile uint32_t *)(UART4_BASE + 0x1C)) +#define UART4_TDR (*(volatile uint32_t *)(UART4_BASE + 0x28)) + +#define USART_CR1_UE (1u << 0) +#define USART_CR1_TE (1u << 3) +#define USART_ISR_TXE (1u << 7) + +#define RNG1_BASE 0x54004000u +#define RNG_CR (*(volatile uint32_t *)(RNG1_BASE + 0x00)) +#define RNG_SR (*(volatile uint32_t *)(RNG1_BASE + 0x04)) +#define RNG_DR (*(volatile uint32_t *)(RNG1_BASE + 0x08)) +#define RNG_CR_RNGEN (1u << 2) +#define RNG_SR_DRDY (1u << 0) + +#define CRYP1_BASE 0x54002000u +#define CRYP_CR (*(volatile uint32_t *)(CRYP1_BASE + 0x00)) +#define CRYP_DIN (*(volatile uint32_t *)(CRYP1_BASE + 0x08)) +#define CRYP_DOUT (*(volatile uint32_t *)(CRYP1_BASE + 0x0C)) +#define CRYP_K2LR (*(volatile uint32_t *)(CRYP1_BASE + 0x30)) +#define CRYP_CR_CRYPEN (1u << 15) +#define CRYP_CR_ALGODIR (1u << 2) +#define CRYP_CR_ALGOMODE_AES_ECB (0b100u << 3) + +#define HASH1_BASE 0x54003000u +#define HASH_CR (*(volatile uint32_t *)(HASH1_BASE + 0x000)) +#define HASH_DIN (*(volatile uint32_t *)(HASH1_BASE + 0x004)) +#define HASH_STR (*(volatile uint32_t *)(HASH1_BASE + 0x008)) +#define HASH_HR_EXT_BASE (HASH1_BASE + 0x310u) +#define HASH_CR_INIT (1u << 2) +/* MP13 HASH ALGO is a 4-bit field at CR[20:17] (not the 2-bit + * {7,18} layout the H7 uses). Encoding: 0=SHA-1, 1=MD5, 2=SHA-224, + * 3=SHA-256, 4..7=SHA3-{224,256,384,512}, 8/9=SHAKE-{128,256}. */ +#define HASH_CR_ALGO_SHA256 (3u << 17) +#define HASH_CR_ALGO_SHA3_256 (5u << 17) +#define HASH_STR_DCAL (1u << 8) + +volatile int test_result __attribute__((section(".data"))) = -1; +volatile int test_complete __attribute__((section(".data"))) = 0; + +static void uart_putc(char c) +{ + while (!(UART4_ISR & USART_ISR_TXE)) { + } + UART4_TDR = (uint32_t)c; +} + +static void uart_puts(const char *s) +{ + while (*s) { + if (*s == '\n') { + uart_putc('\r'); + } + uart_putc(*s++); + } +} + +static void uart_put_hex32(uint32_t v) +{ + static const char hex[] = "0123456789abcdef"; + char out[11]; + out[0] = '0'; out[1] = 'x'; + for (int i = 0; i < 8; i++) { + out[2 + i] = hex[(v >> ((7 - i) * 4)) & 0xF]; + } + out[10] = 0; + uart_puts(out); +} + +int main(void) +{ + mmu_enable(); + + UART4_BRR = 64000000u / 115200u; + UART4_CR1 = USART_CR1_UE | USART_CR1_TE; + + uart_puts("\n=== STM32Sim MP135 smoke test ===\n"); + + RNG_CR = RNG_CR_RNGEN; + for (int i = 0; i < 4; i++) { + while (!(RNG_SR & RNG_SR_DRDY)) { + } + uint32_t v = RNG_DR; + uart_puts("rng["); + uart_putc('0' + (char)i); + uart_puts("] = "); + uart_put_hex32(v); + uart_puts("\n"); + } + + /* AES-128 ECB round-trip through CRYP1. Same FIPS-197 Appendix B + * vector the H7 smoke test uses, so the simulator's shared engine + * is exercised identically. */ + int aes_ok = 1; + { + volatile uint32_t *key = &CRYP_K2LR; + key[0] = 0x2b7e1516u; + key[1] = 0x28aed2a6u; + key[2] = 0xabf71588u; + key[3] = 0x09cf4f3cu; + + CRYP_CR = CRYP_CR_ALGOMODE_AES_ECB | CRYP_CR_CRYPEN; + CRYP_DIN = 0x3243f6a8u; + CRYP_DIN = 0x885a308du; + CRYP_DIN = 0x313198a2u; + CRYP_DIN = 0xe0370734u; + uint32_t c0 = CRYP_DOUT; + uint32_t c1 = CRYP_DOUT; + uint32_t c2 = CRYP_DOUT; + uint32_t c3 = CRYP_DOUT; + CRYP_CR = 0; + + if (c0 != 0x3925841du || c1 != 0x02dc09fbu || + c2 != 0xdc118597u || c3 != 0x196a0b32u) { + aes_ok = 0; + uart_puts("AES-128 ECB encrypt mismatch\n"); + } + + CRYP_CR = CRYP_CR_ALGOMODE_AES_ECB | CRYP_CR_ALGODIR | CRYP_CR_CRYPEN; + CRYP_DIN = c0; CRYP_DIN = c1; CRYP_DIN = c2; CRYP_DIN = c3; + uint32_t p0 = CRYP_DOUT, p1 = CRYP_DOUT, p2 = CRYP_DOUT, p3 = CRYP_DOUT; + CRYP_CR = 0; + + if (p0 != 0x3243f6a8u || p1 != 0x885a308du || + p2 != 0x313198a2u || p3 != 0xe0370734u) { + aes_ok = 0; + uart_puts("AES-128 ECB decrypt mismatch\n"); + } + + if (aes_ok) { + uart_puts("AES-128 ECB round-trip OK\n"); + } + } + + /* SHA-256 of "abc" through HASH1. */ + int hash_ok = 1; + { + HASH_CR = HASH_CR_ALGO_SHA256 | HASH_CR_INIT; + HASH_DIN = 0x61626300u; + HASH_STR = HASH_STR_DCAL | 24u; + volatile uint32_t *hr = (volatile uint32_t *)HASH_HR_EXT_BASE; + const uint32_t expected[8] = { + 0xba7816bfu, 0x8f01cfeau, 0x414140deu, 0x5dae2223u, + 0xb00361a3u, 0x96177a9cu, 0xb410ff61u, 0xf20015adu, + }; + for (int i = 0; i < 8; i++) { + if (hr[i] != expected[i]) { + hash_ok = 0; + uart_puts("SHA-256 mismatch\n"); + break; + } + } + if (hash_ok) { + uart_puts("SHA-256 \"abc\" OK\n"); + } + } + + /* SHA3-256 of "abc" through HASH1. Same KAT shape as SHA-256 + * but with ALGO=5 instead of 3, exercising the MP13-only SHA3 + * code path on the chip. */ + int sha3_ok = 1; + { + HASH_CR = HASH_CR_ALGO_SHA3_256 | HASH_CR_INIT; + HASH_DIN = 0x61626300u; + HASH_STR = HASH_STR_DCAL | 24u; + volatile uint32_t *hr = (volatile uint32_t *)HASH_HR_EXT_BASE; + const uint32_t expected[8] = { + 0x3a985da7u, 0x4fe225b2u, 0x045c172du, 0x6bd390bdu, + 0x855f086eu, 0x3e9d525bu, 0x46bfe245u, 0x11431532u, + }; + for (int i = 0; i < 8; i++) { + if (hr[i] != expected[i]) { + sha3_ok = 0; + uart_puts("SHA3-256 mismatch\n"); + break; + } + } + if (sha3_ok) { + uart_puts("SHA3-256 \"abc\" OK\n"); + } + } + hash_ok = hash_ok && sha3_ok; + + if (!aes_ok || !hash_ok) { + test_result = 1; + test_complete = 1; + for (;;) { + __asm__ volatile (""); + } + } + + uart_puts("=== smoke test passed ===\n"); + + test_result = 0; + test_complete = 1; + + /* Spin until the simulator notices test_complete on its next + * slice. Plain branch-to-self - no wfe/wfi, those are decoded + * as invalid instructions by Unicorn's Cortex-A7 model. */ + for (;;) { + __asm__ volatile (""); + } +} diff --git a/STM32Sim/firmware/smoke-test-mp135/mmu.c b/STM32Sim/firmware/smoke-test-mp135/mmu.c new file mode 100644 index 0000000..f683fe5 --- /dev/null +++ b/STM32Sim/firmware/smoke-test-mp135/mmu.c @@ -0,0 +1,125 @@ +/* mmu.c + * + * Copyright (C) 2026 wolfSSL Inc. + * + * Minimal ARMv7-A MMU bring-up for the MP135 smoke-test firmware. + * Builds a flat first-level (1 MiB section) translation table that + * identity-maps everything the firmware touches, then enables the + * MMU. + * + * The wolfSSL example's system_stm32mp13xx_A7_freeRTOS.c does a much + * more elaborate setup (two-level tables, write-back regions vs + * Device regions split across many ranges). For the simulator we only + * need enough mapping to satisfy Unicorn's QEMU-style MMU walk: any + * page the firmware reads or writes must resolve to a valid VA->PA + * mapping. + */ + +#include + +/* The linker script reserves a 16 KiB-aligned 16 KiB region for the + * first-level translation table. Declare it as an array of u32 so the + * compiler's bounds tracking can see the real size. */ +extern uint32_t __ttb_start__[4096]; + +/* AP[2:0] = 011 (full access at PL0/PL1) + * TEX[2:0] = 001 + B=1 + C=1 -> Normal Outer/Inner Write-Back, Write-Allocate + * S = 1 (shareable), nG = 0, NS = 0 + * Section bit pattern (PXN=0, NS=0, nG=0, S=1, AP[2]=0, TEX=001, + * AP[1:0]=11, IMP=0, Domain=0, XN=0, C=1, B=1, [1:0]=10) + * + * Bits (from MSB): + * 31:20 base address + * 18 nG = 0 + * 17 S = 1 + * 16 AP[2] = 0 + * 15 TEX[2] = 0 + * 14:12 TEX[1:0] (low two bits in 14:12 region's low bits; bit 12 is TEX[0]) + * 11:10 AP[1:0] = 11 + * 9 IMP = 0 + * 8:5 Domain = 0 + * 4 XN = 0 + * 3 C = 1 + * 2 B = 1 + * 1:0 = 10 (section descriptor) + */ +#define SECTION_NORMAL (0x00020C0E) /* S=1, AP=11, TEX=001, C=1, B=1, type=10 */ +#define SECTION_DEVICE (0x00020C06) /* S=1, AP=11, TEX=000, C=0, B=1, type=10 */ +#define SECTION_DEV_XN (0x00020C16) /* same as DEVICE plus XN=1 */ + +#define MB (1u << 20) + +static void map_section(uint32_t *ttb, uint32_t va, uint32_t attrs) +{ + uint32_t idx = va >> 20; + ttb[idx] = (va & 0xFFF00000u) | attrs; +} + +static void map_range(uint32_t *ttb, uint32_t start, uint32_t end, uint32_t attrs) +{ + for (uint32_t va = start; va < end; va += MB) { + map_section(ttb, va, attrs); + } +} + +void mmu_enable(void) +{ + uint32_t *ttb = __ttb_start__; + + for (int i = 0; i < 4096; i++) { + ttb[i] = 0; + } + + /* APB1 (UART4 lives here): Device, no-execute. Cover the whole + * 1 MiB section that contains 0x40010000. */ + map_section(ttb, 0x40000000u, SECTION_DEV_XN); + + /* RCC and friends at 0x50000000 (AHB4). */ + map_section(ttb, 0x50000000u, SECTION_DEV_XN); + + /* AHB5 crypto/RNG/PKA at 0x54000000. */ + map_section(ttb, 0x54000000u, SECTION_DEV_XN); + + /* SYSRAM + SRAMs around 0x2FF00000-0x30100000. Just cover both + * sections; the firmware does not use them but a stray access + * should not fault here. */ + map_section(ttb, 0x2FF00000u, SECTION_NORMAL); + map_section(ttb, 0x30000000u, SECTION_NORMAL); + + /* DDR: cover the 16 MiB we actually link into. Anything beyond is + * unmapped, which is fine - Unicorn would fault us if we ran off. */ + map_range(ttb, 0xC0000000u, 0xC1000000u, SECTION_NORMAL); + + /* Drain any pending writes to the page table. */ + __asm__ volatile ("dsb sy" ::: "memory"); + + /* Domain access: client (01) for domain 0. */ + __asm__ volatile ("mcr p15, 0, %0, c3, c0, 0" :: "r"(0x55555555u)); + + /* TTBR0 = ttb; the low bits are RGN/IRGN/S which we leave 0 for + * a non-cached table walk. Good enough for Unicorn. */ + __asm__ volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r"((uint32_t)ttb)); + + /* TTBCR = 0: use TTBR0 for the full 4 GiB address space. */ + __asm__ volatile ("mcr p15, 0, %0, c2, c0, 2" :: "r"(0u)); + + /* Invalidate TLB, branch predictor, and I-cache before turning + * the MMU on. */ + __asm__ volatile ("mcr p15, 0, %0, c8, c7, 0" :: "r"(0u)); /* TLBIALL */ + __asm__ volatile ("mcr p15, 0, %0, c7, c5, 6" :: "r"(0u)); /* BPIALL */ + __asm__ volatile ("mcr p15, 0, %0, c7, c5, 0" :: "r"(0u)); /* ICIALLU */ + __asm__ volatile ("dsb sy" ::: "memory"); + __asm__ volatile ("isb" ::: "memory"); + + /* SCTLR: set M=1 (MMU enable). Leave caches off for the smoke + * test - we do not care about performance and avoiding cache + * maintenance keeps the firmware simple. */ + uint32_t sctlr; + __asm__ volatile ("mrc p15, 0, %0, c1, c0, 0" : "=r"(sctlr)); + sctlr |= 0x1u; /* M */ + sctlr &= ~(1u << 2); /* C off */ + sctlr &= ~(1u << 12); /* I off */ + __asm__ volatile ("mcr p15, 0, %0, c1, c0, 0" :: "r"(sctlr)); + __asm__ volatile ("dsb sy" ::: "memory"); + __asm__ volatile ("isb" ::: "memory"); +} diff --git a/STM32Sim/firmware/smoke-test-mp135/smoke.ld b/STM32Sim/firmware/smoke-test-mp135/smoke.ld new file mode 100644 index 0000000..22accb0 --- /dev/null +++ b/STM32Sim/firmware/smoke-test-mp135/smoke.ld @@ -0,0 +1,70 @@ +/* smoke.ld + * + * Copyright (C) 2026 wolfSSL Inc. + * + * Linker script for the STM32Sim MP135 smoke-test firmware. The MP135 + * has no internal flash; the firmware lives in DDR (0xC0000000) and + * Unicorn maps the whole DDR window as a writable RAM region. The + * loader writes the ELF segments straight there, so we do not need a + * .data-from-flash AT() trick. + */ + +ENTRY(_start) + +MEMORY +{ + DDR (rwx) : ORIGIN = 0xC0000000, LENGTH = 16M +} + +__stack_size__ = 0x4000; + +SECTIONS +{ + .text : ALIGN(4) + { + KEEP(*(.text._start)) + *(.text*) + *(.rodata*) + . = ALIGN(4); + } > DDR + + .data : ALIGN(4) + { + __data_start__ = .; + *(.data*) + . = ALIGN(4); + __data_end__ = .; + } > DDR + + .bss (NOLOAD) : ALIGN(4) + { + __bss_start__ = .; + *(.bss*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > DDR + + /* The ARMv7-A first-level translation table is 16 KiB and must be + * 16 KiB aligned. Place it after BSS, away from code. */ + .ttb (NOLOAD) : ALIGN(0x4000) + { + __ttb_start__ = .; + . = . + 0x4000; + __ttb_end__ = .; + } > DDR + + .stack (NOLOAD) : ALIGN(8) + { + . = . + __stack_size__; + __stack_top__ = .; + } > DDR + + /DISCARD/ : + { + *(.ARM.exidx*) + *(.ARM.attributes) + *(.note*) + *(.comment*) + } +} diff --git a/STM32Sim/firmware/smoke-test-mp135/startup.S b/STM32Sim/firmware/smoke-test-mp135/startup.S new file mode 100644 index 0000000..ebc23e4 --- /dev/null +++ b/STM32Sim/firmware/smoke-test-mp135/startup.S @@ -0,0 +1,52 @@ +/* startup.S + * + * Copyright (C) 2026 wolfSSL Inc. + * + * Minimal Cortex-A7 ARM-mode reset path for the STM32Sim MP135 + * smoke-test firmware. Boots in SVC mode with the MMU off, sets the + * stack, zeroes BSS, and jumps to main(). + * + * Cortex-A7 does not boot from a vector table at address 0 the way a + * Cortex-M does. Unicorn drops the CPU into emu_start at the ELF + * entry point (the address of _start) in ARM mode, supervisor mode, + * with interrupts disabled. So this file is intentionally small: no + * vector table, no exception-mode stacks beyond SVC. main() enables + * the MMU before exercising any peripherals. + */ + + .section .text._start, "ax" + .global _start + .arm + .type _start, %function +_start: + /* Make sure we are in SVC mode with FIQ/IRQ masked. */ + cpsid if + mrs r0, cpsr + bic r0, r0, #0x1f + orr r0, r0, #0x13 /* SVC mode */ + msr cpsr_c, r0 + + /* Set the supervisor-mode stack. The linker reserves a 16 KiB + * region whose top is __stack_top__. */ + ldr sp, =__stack_top__ + + /* Zero BSS. */ + ldr r0, =__bss_start__ + ldr r1, =__bss_end__ + mov r2, #0 +1: + cmp r0, r1 + bge 2f + str r2, [r0], #4 + b 1b +2: + + /* Hand off to C. main() never returns; if it does, spin. We + * avoid wfe/wfi here because the Cortex-A7 model in Unicorn + * decodes them as invalid instructions when there is no event + * source. */ + bl main +hang: + b hang + + .size _start, . - _start diff --git a/STM32Sim/firmware/wolfcrypt-test-mp135/CMakeLists.txt b/STM32Sim/firmware/wolfcrypt-test-mp135/CMakeLists.txt new file mode 100644 index 0000000..4f6fc3b --- /dev/null +++ b/STM32Sim/firmware/wolfcrypt-test-mp135/CMakeLists.txt @@ -0,0 +1,108 @@ +cmake_minimum_required(VERSION 3.18) +project(wolfcrypt_stm32mp135 LANGUAGES C ASM) + +set(WOLFSSL_ROOT "/opt/wolfssl-build-tree" CACHE PATH "wolfSSL source") +set(STM32CUBE_MP13_ROOT "/opt/STM32CubeMP13" CACHE PATH "STM32CubeMP13 SDK") + +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +enable_language(ASM) + +include_directories(BEFORE + ${CMAKE_SOURCE_DIR} + ${STM32CUBE_MP13_ROOT}/Drivers/CMSIS/Core_A/Include + ${STM32CUBE_MP13_ROOT}/Drivers/CMSIS/Device/ST/STM32MP13xx/Include + ${STM32CUBE_MP13_ROOT}/Drivers/STM32MP13xx_HAL_Driver/Inc/Legacy + ${STM32CUBE_MP13_ROOT}/Drivers/STM32MP13xx_HAL_Driver/Inc +) + +set(HAL_SRC_DIR ${STM32CUBE_MP13_ROOT}/Drivers/STM32MP13xx_HAL_Driver/Src) + +if(EXISTS ${HAL_SRC_DIR}) + # Note: the MP13 HAL has no stm32mp13xx_hal_cortex.c - the Cortex + # helpers (NVIC, MPU) only exist on the M-class HALs. On A-class + # the equivalent functions live in CMSIS Core_A (irq_ctrl.h, GIC). + # wolfSSL's STM32 port does not call HAL_NVIC_* on this target. + set(HAL_SOURCES + ${HAL_SRC_DIR}/stm32mp13xx_hal.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_rcc.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_rcc_ex.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_dma.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_dma_ex.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_mdma.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_rng.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_rng_ex.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_cryp.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_cryp_ex.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_hash.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_hash_ex.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_pka.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_pwr.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_pwr_ex.c + ${HAL_SRC_DIR}/stm32mp13xx_hal_gpio.c + ) +else() + message(WARNING "MP13 HAL source directory not found: ${HAL_SRC_DIR}") + set(HAL_SOURCES "") +endif() + +set(WOLFSSL_USER_SETTINGS ON CACHE BOOL "Use user_settings.h") +set(WOLFSSL_CRYPT_TESTS OFF CACHE BOOL "") +set(WOLFSSL_EXAMPLES OFF CACHE BOOL "") +set(BUILD_SHARED_LIBS OFF CACHE BOOL "") + +add_subdirectory(${WOLFSSL_ROOT} ${CMAKE_BINARY_DIR}/wolfssl-build EXCLUDE_FROM_ALL) +target_include_directories(wolfssl PRIVATE + ${STM32CUBE_MP13_ROOT}/Drivers/CMSIS/Core_A/Include + ${STM32CUBE_MP13_ROOT}/Drivers/CMSIS/Device/ST/STM32MP13xx/Include + ${STM32CUBE_MP13_ROOT}/Drivers/STM32MP13xx_HAL_Driver/Inc/Legacy + ${STM32CUBE_MP13_ROOT}/Drivers/STM32MP13xx_HAL_Driver/Inc + ${CMAKE_SOURCE_DIR} +) +target_compile_options(wolfssl PRIVATE -Wno-cpp) + +set(WOLFSSL_STM32_PORT_SRC ${WOLFSSL_ROOT}/wolfcrypt/src/port/st/stm32.c) + +add_executable(wolfcrypt_test.elf + startup.S + mmu.c + main.c + ${WOLFSSL_ROOT}/wolfcrypt/test/test.c + ${HAL_SOURCES} + ${WOLFSSL_STM32_PORT_SRC} +) + +target_include_directories(wolfcrypt_test.elf PRIVATE + ${CMAKE_SOURCE_DIR} + ${WOLFSSL_ROOT} + ${STM32CUBE_MP13_ROOT}/Drivers/STM32MP13xx_HAL_Driver/Inc + ${STM32CUBE_MP13_ROOT}/Drivers/STM32MP13xx_HAL_Driver/Inc/Legacy +) + +target_compile_definitions(wolfcrypt_test.elf PRIVATE + WOLFSSL_USER_SETTINGS + STM32MP135Fxx + CORE_CA7 + USE_HAL_DRIVER + USE_HAL_CONF +) + +# HAL needs CORE_CA7 (selects A-class CMSIS) and the same -mcpu / +# -mfpu we use everywhere else. +set_source_files_properties(${HAL_SOURCES} PROPERTIES + COMPILE_FLAGS "-mcpu=cortex-a7 -marm -mfpu=neon-vfpv4 -mfloat-abi=hard -ffunction-sections -fdata-sections -Os -include stdint.h -w -DCORE_CA7" +) + +target_compile_options(wolfcrypt_test.elf PRIVATE + -mcpu=cortex-a7 -marm -mfpu=neon-vfpv4 -mfloat-abi=hard + -ffunction-sections -fdata-sections -Os +) + +target_link_options(wolfcrypt_test.elf PRIVATE + -T${CMAKE_SOURCE_DIR}/stm32mp135.ld + -Wl,--gc-sections + -nostartfiles + -specs=nano.specs + -specs=nosys.specs +) + +target_link_libraries(wolfcrypt_test.elf PRIVATE wolfssl m c gcc nosys) diff --git a/STM32Sim/firmware/wolfcrypt-test-mp135/main.c b/STM32Sim/firmware/wolfcrypt-test-mp135/main.c new file mode 100644 index 0000000..ea1f079 --- /dev/null +++ b/STM32Sim/firmware/wolfcrypt-test-mp135/main.c @@ -0,0 +1,150 @@ +/* main.c - Entry point for wolfCrypt test on STM32MP135 under + * stm32-sim. Mirrors the H7/U5 wolfcrypt firmwares: bring up the + * MMU, configure UART4 via direct register pokes (no HAL_Init in + * sight), then call wolfcrypt_test() from wolfSSL's test suite. The + * simulator polls test_complete / test_result via ELF symbol + * lookup. */ + +#include +#include +#include + +extern int wolfcrypt_test(void *args); +void mmu_enable(void); + +/* UART4 on MP135 is at APB1 + 0x10000 = 0x40010000. The H7/U5 USART + * register layout (CR1/BRR/ISR/TDR) is shared with all modern STM32 + * USARTs, so we drive it directly without going through HAL. */ +#define UART4_BASE 0x40010000UL +#define UART4_CR1 (*(volatile uint32_t *)(UART4_BASE + 0x00)) +#define UART4_BRR (*(volatile uint32_t *)(UART4_BASE + 0x0C)) +#define UART4_ISR (*(volatile uint32_t *)(UART4_BASE + 0x1C)) +#define UART4_TDR (*(volatile uint32_t *)(UART4_BASE + 0x28)) + +#define USART_CR1_UE (1 << 0) +#define USART_CR1_TE (1 << 3) +#define USART_ISR_TXE (1 << 7) + +static void uart_init(void) +{ + UART4_BRR = 64000000UL / 115200UL; + UART4_CR1 = USART_CR1_UE | USART_CR1_TE; +} + +static void uart_putc(char c) +{ + while (!(UART4_ISR & USART_ISR_TXE)) + ; + UART4_TDR = (uint32_t)c; +} + +int _write(int fd, const char *buf, int len) +{ + (void)fd; + for (int i = 0; i < len; i++) { + if (buf[i] == '\n') + uart_putc('\r'); + uart_putc(buf[i]); + } + return len; +} + +extern char __heap_start__; +extern char __heap_end__; + +void *_sbrk(ptrdiff_t incr) +{ + static char *heap_ptr = NULL; + char *prev; + + if (heap_ptr == NULL) { + heap_ptr = &__heap_start__; + } + prev = heap_ptr; + if (heap_ptr + incr > &__heap_end__) { + return (void *)-1; + } + heap_ptr += incr; + return prev; +} + +/* A monotonically increasing tick counter. The MP13 HAL declares + * HAL_GetTick as __weak so we override it here and avoid having to + * configure a hardware timer. */ +static volatile uint32_t tick_counter; + +uint32_t HAL_GetTick(void) +{ + return ++tick_counter; +} + +/* newlib's __libc_init_array calls _init() / _fini() between the + * preinit_array and init_array walks. Without a real definition the + * linker falls back to `PROVIDE(_init = 0)` in stm32mp135.ld and the + * call lands at PC=0x00000000, which is unmapped on the simulator + * and the firmware silently spins until the wall-clock timeout. */ +void _init(void) { } +void _fini(void) { } + +/* HAL_Init / SystemInit / SystemCoreClock - the MP13 HAL expects all + * three to exist. We replace them with the minimum stubs the crypto + * drivers rely on. */ +uint32_t SystemCoreClock = 64000000UL; + +void SystemInit(void) { } + +/* HAL_Init calls (among other things) HAL_InitTick. Provide a + * trivial override so the HAL does not try to start a timer. */ +int HAL_InitTick(uint32_t TickPriority) +{ + (void)TickPriority; + return 0; +} + +/* wolfSSL GENSEED_FORTEST fallback - returns the tick counter so we + * are not stuck on a constant seed. */ +#include +time_t time(time_t *t) +{ + tick_counter += 12345; + time_t val = (time_t)tick_counter; + if (t) { + *t = val; + } + return val; +} + +volatile int test_result __attribute__((section(".data"))) = -1; +volatile int test_complete __attribute__((section(".data"))) = 0; + +int main(int argc, char **argv) +{ + (void)argc; + (void)argv; + + mmu_enable(); + + setvbuf(stdin, NULL, _IONBF, 0); + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + uart_init(); +#define PUTS(s) _write(0, (s), sizeof(s) - 1) + PUTS("\n\n=== Starting wolfCrypt test ===\n\n"); + + test_result = wolfcrypt_test(NULL); + + if (test_result == 0) { + PUTS("\n\n=== wolfCrypt test passed! ===\n"); + } else { + PUTS("\n\n=== wolfCrypt test FAILED ===\n"); + } +#undef PUTS + + test_complete = 1; + + /* Plain branch-to-self spin: wfe/wfi decode as invalid on + * Unicorn's Cortex-A7 model. */ + for (;;) { + __asm__ volatile (""); + } +} diff --git a/STM32Sim/firmware/wolfcrypt-test-mp135/mmu.c b/STM32Sim/firmware/wolfcrypt-test-mp135/mmu.c new file mode 100644 index 0000000..412c414 --- /dev/null +++ b/STM32Sim/firmware/wolfcrypt-test-mp135/mmu.c @@ -0,0 +1,86 @@ +/* mmu.c + * + * Copyright (C) 2026 wolfSSL Inc. + * + * 1 MiB-section identity-mapped MMU for the MP135 wolfCrypt firmware. + * Identical in spirit to the smoke-test mmu.c: cover DDR as normal + * memory, peripheral regions as Device, then enable the MMU. + */ + +#include + +/* The linker script reserves a 16 KiB-aligned 16 KiB region for the + * first-level translation table. Declare it as an array of u32 so the + * compiler's bounds tracking can see the real size; with the bare + * `extern uint32_t __ttb_start__` form, GCC's -Wstringop-overflow + * (re-)inference treats `ttb[i] = 0` as a 16 KiB store into a 4-byte + * object. */ +extern uint32_t __ttb_start__[4096]; + +#define SECTION_NORMAL (0x00020C0Eu) +#define SECTION_DEV_XN (0x00020C16u) + +#define MB (1u << 20) + +static void map_section(uint32_t *ttb, uint32_t va, uint32_t attrs) +{ + uint32_t idx = va >> 20; + ttb[idx] = (va & 0xFFF00000u) | attrs; +} + +static void map_range(uint32_t *ttb, uint32_t start, uint32_t end, uint32_t attrs) +{ + for (uint32_t va = start; va < end; va += MB) { + map_section(ttb, va, attrs); + } +} + +void mmu_enable(void) +{ + uint32_t *ttb = __ttb_start__; + + for (int i = 0; i < 4096; i++) { + ttb[i] = 0; + } + + /* APB1 + nearby APB peripherals: UART4, USART3, I2C, etc. live + * in this 1 MiB window. */ + map_section(ttb, 0x40000000u, SECTION_DEV_XN); + + /* AHB4 (RCC + co.) at 0x50000000. */ + map_section(ttb, 0x50000000u, SECTION_DEV_XN); + + /* AHB5 crypto/RNG/PKA at 0x54000000. */ + map_section(ttb, 0x54000000u, SECTION_DEV_XN); + + /* SYSRAM + SRAMs at 0x2FF00000 / 0x30000000. */ + map_section(ttb, 0x2FF00000u, SECTION_NORMAL); + map_section(ttb, 0x30000000u, SECTION_NORMAL); + + /* DDR: 16 MiB matches the linker. */ + map_range(ttb, 0xC0000000u, 0xC1000000u, SECTION_NORMAL); + + __asm__ volatile ("dsb sy" ::: "memory"); + + /* DACR: client (01) for domain 0. */ + __asm__ volatile ("mcr p15, 0, %0, c3, c0, 0" :: "r"(0x55555555u)); + /* TTBR0 + TTBCR (use TTBR0 for whole 4 GiB). */ + __asm__ volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r"((uint32_t)ttb)); + __asm__ volatile ("mcr p15, 0, %0, c2, c0, 2" :: "r"(0u)); + + /* Invalidate everything before turning the MMU on. */ + __asm__ volatile ("mcr p15, 0, %0, c8, c7, 0" :: "r"(0u)); + __asm__ volatile ("mcr p15, 0, %0, c7, c5, 6" :: "r"(0u)); + __asm__ volatile ("mcr p15, 0, %0, c7, c5, 0" :: "r"(0u)); + __asm__ volatile ("dsb sy" ::: "memory"); + __asm__ volatile ("isb" ::: "memory"); + + uint32_t sctlr; + __asm__ volatile ("mrc p15, 0, %0, c1, c0, 0" : "=r"(sctlr)); + sctlr |= 0x1u; + sctlr &= ~(1u << 2); + sctlr &= ~(1u << 12); + __asm__ volatile ("mcr p15, 0, %0, c1, c0, 0" :: "r"(sctlr)); + __asm__ volatile ("dsb sy" ::: "memory"); + __asm__ volatile ("isb" ::: "memory"); +} diff --git a/STM32Sim/firmware/wolfcrypt-test-mp135/startup.S b/STM32Sim/firmware/wolfcrypt-test-mp135/startup.S new file mode 100644 index 0000000..91a8f1a --- /dev/null +++ b/STM32Sim/firmware/wolfcrypt-test-mp135/startup.S @@ -0,0 +1,60 @@ +/* startup.S + * + * Copyright (C) 2026 wolfSSL Inc. + * + * Cortex-A7 reset path for the STM32MP135 wolfCrypt firmware. Boots + * in SVC mode, sets the supervisor stack, zeroes BSS, calls + * __libc_init_array (so any C++/global ctors run), then main(). Same + * pattern as the smoke-test-mp135 startup, with libc init added for + * the wolfSSL build. + */ + + .section .text._start, "ax" + .global _start + .arm + .type _start, %function +_start: + cpsid if + mrs r0, cpsr + bic r0, r0, #0x1f + orr r0, r0, #0x13 /* SVC mode */ + msr cpsr_c, r0 + + /* Grant PL0/PL1 access to coprocessors 10 and 11 (VFP/NEON), then + * enable the FPU itself via FPEXC.EN. The firmware is compiled + * with -mfpu=neon-vfpv4 -mfloat-abi=hard, so newlib's memchr and + * other library routines emit NEON instructions; without this + * the first vdup/vld traps as an undefined instruction. */ + mrc p15, 0, r0, c1, c0, 2 + orr r0, r0, #(0xf << 20) + mcr p15, 0, r0, c1, c0, 2 + isb + mov r0, #0x40000000 + vmsr fpexc, r0 + + ldr sp, =__stack_top__ + + /* Zero .bss. */ + ldr r0, =__bss_start__ + ldr r1, =__bss_end__ + mov r2, #0 +1: + cmp r0, r1 + bge 2f + str r2, [r0], #4 + b 1b +2: + + /* __libc_init_array runs preinit + init arrays. newlib's stubs + * need this so any __attribute__((constructor)) work runs before + * main(). */ + bl __libc_init_array + + mov r0, #0 + mov r1, #0 + bl main + +hang: + b hang + + .size _start, . - _start diff --git a/STM32Sim/firmware/wolfcrypt-test-mp135/stm32mp135.ld b/STM32Sim/firmware/wolfcrypt-test-mp135/stm32mp135.ld new file mode 100644 index 0000000..b6fda56 --- /dev/null +++ b/STM32Sim/firmware/wolfcrypt-test-mp135/stm32mp135.ld @@ -0,0 +1,119 @@ +/* stm32mp135.ld - Memory map for the STM32MP135 wolfCrypt firmware + * under stm32-sim. The MP135 has no internal flash, so the whole + * application lives in DDR. The simulator maps a 512 MiB DDR window + * at 0xC0000000 as plain RAM; we use the first 16 MiB for code, + * data, BSS, heap, MMU translation tables, and exception stacks. */ + +ENTRY(_start) + +MEMORY +{ + DDR (rwx) : ORIGIN = 0xC0000000, LENGTH = 16M +} + +_Min_Heap_Size = 2M; +_Min_Stack_Size = 256K; + +SECTIONS +{ + .text : + { + . = ALIGN(4); + KEEP(*(.text._start)) + *(.text*) + *(.rodata*) + *(.glue_7) + *(.glue_7t) + *(.eh_frame) + . = ALIGN(4); + _etext = .; + } > DDR + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > DDR + + .ARM.exidx : + { + __exidx_start = .; + *(.ARM.exidx*) + __exidx_end = .; + } > DDR + + .preinit_array : + { + PROVIDE_HIDDEN(__preinit_array_start = .); + KEEP(*(.preinit_array*)) + PROVIDE_HIDDEN(__preinit_array_end = .); + } > DDR + + .init_array : + { + PROVIDE_HIDDEN(__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array*)) + PROVIDE_HIDDEN(__init_array_end = .); + } > DDR + + .fini_array : + { + PROVIDE_HIDDEN(__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array*)) + PROVIDE_HIDDEN(__fini_array_end = .); + } > DDR + + .data : + { + . = ALIGN(4); + __data_start__ = .; + *(.data*) + . = ALIGN(4); + __data_end__ = .; + } > DDR + + .bss (NOLOAD) : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > DDR + + /* ARMv7-A first-level translation table: 4096 word entries, + * 16 KiB aligned. */ + .ttb (NOLOAD) : ALIGN(0x4000) + { + __ttb_start__ = .; + . = . + 0x4000; + __ttb_end__ = .; + } > DDR + + .heap_stack (NOLOAD) : + { + . = ALIGN(8); + PROVIDE(__heap_start__ = .); + . = . + _Min_Heap_Size; + PROVIDE(__heap_end__ = .); + PROVIDE(end = __heap_end__); + + . = ALIGN(8); + PROVIDE(__stack_start__ = .); + . = . + _Min_Stack_Size; + . = ALIGN(8); + PROVIDE(__stack_top__ = .); + } > DDR + + /DISCARD/ : + { + *(.ARM.attributes) + *(.note*) + *(.comment*) + } +} + +PROVIDE(_init = 0); +PROVIDE(_fini = 0); diff --git a/STM32Sim/firmware/wolfcrypt-test-mp135/stm32mp13xx_hal_conf.h b/STM32Sim/firmware/wolfcrypt-test-mp135/stm32mp13xx_hal_conf.h new file mode 100644 index 0000000..f485b18 --- /dev/null +++ b/STM32Sim/firmware/wolfcrypt-test-mp135/stm32mp13xx_hal_conf.h @@ -0,0 +1,104 @@ +/* stm32mp13xx_hal_conf.h - HAL config for the MP135 wolfCrypt build + * under stm32-sim. Enable only the modules wolfSSL needs (RCC, RNG, + * CRYP, HASH, PKA) plus the core bits the HAL touches at startup. */ + +#ifndef STM32MP13xx_HAL_CONF_H +#define STM32MP13xx_HAL_CONF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define HAL_MODULE_ENABLED +#define HAL_RCC_MODULE_ENABLED +#define HAL_GPIO_MODULE_ENABLED +#define HAL_RNG_MODULE_ENABLED +#define HAL_CRYP_MODULE_ENABLED +#define HAL_HASH_MODULE_ENABLED +#define HAL_PKA_MODULE_ENABLED +#define HAL_DMA_MODULE_ENABLED +#define HAL_MDMA_MODULE_ENABLED +#define HAL_PWR_MODULE_ENABLED +#define HAL_EXTI_MODULE_ENABLED + +#if !defined(HSE_VALUE) +#define HSE_VALUE 24000000UL +#endif +#if !defined(HSE_STARTUP_TIMEOUT) +#define HSE_STARTUP_TIMEOUT 100UL +#endif +#if !defined(CSI_VALUE) +#define CSI_VALUE 4000000UL +#endif +#if !defined(HSI_VALUE) +#define HSI_VALUE 64000000UL +#endif +#if !defined(LSE_VALUE) +#define LSE_VALUE 32768UL +#endif +#if !defined(LSE_STARTUP_TIMEOUT) +#define LSE_STARTUP_TIMEOUT 5000UL +#endif +#if !defined(LSI_VALUE) +#define LSI_VALUE 32000UL +#endif +#if !defined(EXTERNAL_CLOCK_VALUE) +#define EXTERNAL_CLOCK_VALUE 12288000UL +#endif +#if !defined(VDD_VALUE) +#define VDD_VALUE 3300UL +#endif +#if !defined(TICK_INT_PRIORITY) +#define TICK_INT_PRIORITY 0xFUL +#endif + +#define USE_RTOS 0U +#define USE_HAL_ADC_REGISTER_CALLBACKS 0U +#define USE_HAL_CRYP_REGISTER_CALLBACKS 0U +#define USE_HAL_HASH_REGISTER_CALLBACKS 0U +#define USE_HAL_PKA_REGISTER_CALLBACKS 0U +#define USE_HAL_RNG_REGISTER_CALLBACKS 0U + +#ifdef HAL_RCC_MODULE_ENABLED +#include "stm32mp13xx_hal_rcc.h" +#endif +#ifdef HAL_GPIO_MODULE_ENABLED +#include "stm32mp13xx_hal_gpio.h" +#endif +#ifdef HAL_DMA_MODULE_ENABLED +#include "stm32mp13xx_hal_dma.h" +#endif +#ifdef HAL_MDMA_MODULE_ENABLED +#include "stm32mp13xx_hal_mdma.h" +#endif +#ifdef HAL_PWR_MODULE_ENABLED +#include "stm32mp13xx_hal_pwr.h" +#endif +#ifdef HAL_RNG_MODULE_ENABLED +#include "stm32mp13xx_hal_rng.h" +#endif +#ifdef HAL_CRYP_MODULE_ENABLED +#include "stm32mp13xx_hal_cryp.h" +#endif +#ifdef HAL_HASH_MODULE_ENABLED +#include "stm32mp13xx_hal_hash.h" +#endif +#ifdef HAL_PKA_MODULE_ENABLED +#include "stm32mp13xx_hal_pka.h" +#endif +#ifdef HAL_EXTI_MODULE_ENABLED +#include "stm32mp13xx_hal_exti.h" +#endif + +#ifdef USE_FULL_ASSERT +#define assert_param(expr) ((expr) ? (void)0U : assert_failed((uint8_t *)__FILE__, __LINE__)) +void assert_failed(uint8_t *file, uint32_t line); +#else +#define assert_param(expr) ((void)0U) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* STM32MP13xx_HAL_CONF_H */ diff --git a/STM32Sim/firmware/wolfcrypt-test-mp135/toolchain-arm-none-eabi.cmake b/STM32Sim/firmware/wolfcrypt-test-mp135/toolchain-arm-none-eabi.cmake new file mode 100644 index 0000000..cac33d4 --- /dev/null +++ b/STM32Sim/firmware/wolfcrypt-test-mp135/toolchain-arm-none-eabi.cmake @@ -0,0 +1,27 @@ +set(CMAKE_SYSTEM_NAME Generic) +set(CMAKE_SYSTEM_PROCESSOR arm) + +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set(CMAKE_C_COMPILER arm-none-eabi-gcc) +set(CMAKE_CXX_COMPILER arm-none-eabi-g++) +set(CMAKE_ASM_COMPILER arm-none-eabi-gcc) + +set(CMAKE_AR arm-none-eabi-ar) +set(CMAKE_RANLIB arm-none-eabi-ranlib) + +set(CMAKE_C_STANDARD 11) + +# Cortex-A7 in ARM mode (not Thumb). NEON/VFPv4 hardfloat matches the +# MP135 reference SDK build. -Os keeps the test binary down so it fits +# in the 16 MiB DDR window the simulator maps. +set(CPU_FLAGS "-mcpu=cortex-a7 -marm -mfpu=neon-vfpv4 -mfloat-abi=hard") +set(OPT_FLAGS "-Os -ffunction-sections -fdata-sections") +# CORE_CA7 is what gates the Cortex-A7 device-header selection in the +# MP13 CMSIS device header. Both this firmware and the wolfSSL static +# library need it set; passing via the toolchain catches both. +set(CMAKE_C_FLAGS_INIT "${CPU_FLAGS} ${OPT_FLAGS} -DSTM32MP135Fxx -DCORE_CA7") +set(CMAKE_CXX_FLAGS_INIT "${CPU_FLAGS} ${OPT_FLAGS} -DSTM32MP135Fxx -DCORE_CA7") +set(CMAKE_ASM_FLAGS_INIT "${CPU_FLAGS}") + +set(CMAKE_EXE_LINKER_FLAGS_INIT "-Wl,--gc-sections -static") diff --git a/STM32Sim/firmware/wolfcrypt-test-mp135/user_settings.h b/STM32Sim/firmware/wolfcrypt-test-mp135/user_settings.h new file mode 100644 index 0000000..64f6c3b --- /dev/null +++ b/STM32Sim/firmware/wolfcrypt-test-mp135/user_settings.h @@ -0,0 +1,84 @@ +/* user_settings.h - wolfSSL/wolfCrypt configuration for STM32MP135 + * under stm32-sim. The MP135 is a Cortex-A7 (ARMv7-A), not a + * Cortex-M, so we drop WOLFSSL_ARM_CORTEX_M and the M-asm flag. + * + * wolfSSL 5.8.4+ knows WOLFSSL_STM32MP13 natively: it aliases + * CRYP -> CRYP1 + * RNG -> RNG1 + * __HAL_RCC_HASH_CLK_ENABLE -> __HAL_RCC_HASH1_CLK_ENABLE + * __HAL_RCC_RNG_CLK_ENABLE -> __HAL_RCC_RNG1_CLK_ENABLE + * and selects STM32_HAL_V2 for the v2 crypto HAL flavour. + */ + +#ifndef USER_SETTINGS_STM32SIM_MP135_H +#define USER_SETTINGS_STM32SIM_MP135_H + +/* The MP13 HAL headers use uint32_t but don't include + * themselves. wolfSSL pulls in stm32mp13xx_hal.h from settings.h + * before its own stdint-using headers, so the integer types must be + * available beforehand. */ +#include +#include + +#define WOLFSSL_STM32MP13 +#define WOLFSSL_STM32_CUBEMX +#define STM32_HAL_V2 +#define WOLFSSL_STM32_PKA + +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 8 + +#define SINGLE_THREADED + +#define WOLFSSL_NO_CURRDIR +#define NO_FILESYSTEM +#define NO_WRITEV + +#define WOLFCRYPT_ONLY +#define NO_DH +#define NO_DSA +#define NO_DES +#define NO_DES3 + +/* RNG via HAL */ +#define WOLFSSL_STM32_RNG_NOLIB +#define NO_DEV_RANDOM +#define HAVE_HASHDRBG + +/* Math: single-precision software paths for everything not on PKA. + * No M-asm here, this is an A-class target. */ +#define WOLFSSL_SP_MATH_ALL +#define WOLFSSL_HAVE_SP_RSA +#define WOLFSSL_HAVE_SP_DH +#define WOLFSSL_HAVE_SP_ECC +#define SP_WORD_SIZE 32 + +#define WC_RSA_BLINDING +#define ECC_TIMING_RESISTANT +#define WOLFSSL_SMALL_STACK +#define BENCH_EMBEDDED +#define NO_MAIN_DRIVER + +/* The MP135 HASH peripheral implements SHA3, SHAKE, and the + * SHA-384/512 family in hardware. The simulator's HASH1 model + * decodes ALGO codes 4-11 as SHA3-{224,256,384,512} / SHAKE-{128,256} + * (RAWSHAKE collapses to SHAKE). + * + * SHAKE is currently disabled because wolfSSL master's + * wolfcrypt/src/sha3.c has a bug in the STM32_HASH_SHA3 branch: + * wc_Shake128_Update() / wc_Shake128_Final() call the in-file static + * helpers `Sha3Update` / `Sha3Final` / `InitSha3` (no `wc_` prefix), + * but those helpers are gated by + * #if !defined(STM32_HASH_SHA3) && !defined(PSOC6_HASH_SHA3) + * (sha3.c line 588) so they don't exist in our build. The build dies + * with implicit-declaration errors on Sha3Update / Sha3Final / + * InitSha3. Until that wolfSSL bug is fixed, we keep SHAKE off here; + * the simulator's HASH1 model still services SHAKE-128 / SHAKE-256 + * for firmware that drives the peripheral directly (e.g. the + * smoke-test KATs). SHA3 itself works through the wc_Sha3_* + * entry points and stays enabled. */ +#define WOLFSSL_SHA3 +#define WOLFSSL_SHA384 +#define WOLFSSL_SHA512 + +#endif /* USER_SETTINGS_STM32SIM_MP135_H */ diff --git a/STM32Sim/scripts/run-wolfcrypt-mp135.sh b/STM32Sim/scripts/run-wolfcrypt-mp135.sh new file mode 100755 index 0000000..7a1cc13 --- /dev/null +++ b/STM32Sim/scripts/run-wolfcrypt-mp135.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# run-wolfcrypt-mp135.sh +# +# Copyright (C) 2026 wolfSSL Inc. +# +# Build the wolfCrypt-on-STM32MP135 firmware (sources baked into the +# image at /opt/firmware-mp135) against the user's mounted wolfSSL +# tree, then run the resulting ELF through stm32-sim --chip +# stm32mp135. +# +# The MP135 is a Cortex-A7 (ARMv7-A) part. The simulator boots into +# ARM mode and the firmware sets up a flat 1 MiB-section MMU map +# before exercising any peripherals. wolfSSL needs version 5.8.4+ for +# the WOLFSSL_STM32MP13 / CRYP1 alias support. +set -euo pipefail + +WOLFSSL_ROOT="${WOLFSSL_ROOT:-/opt/wolfssl}" +FIRMWARE_DIR="${FIRMWARE_DIR:-/opt/firmware-mp135}" +STM32CUBE_MP13_ROOT="${STM32CUBE_MP13_ROOT:-/opt/STM32CubeMP13}" +TIMEOUT="${TIMEOUT:-300}" + +if [[ ! -d "${WOLFSSL_ROOT}" ]]; then + echo "ERROR: wolfSSL source not mounted at ${WOLFSSL_ROOT}" >&2 + exit 2 +fi + +WOLFSSL_BUILD_TREE=/opt/wolfssl-build-tree +rm -rf "${WOLFSSL_BUILD_TREE}" +cp -r "${WOLFSSL_ROOT}" "${WOLFSSL_BUILD_TREE}" +rm -f "${WOLFSSL_BUILD_TREE}/config.h" + +# Drop the firmware's HAL config header next to the HAL sources so +# stm32mp13xx_hal.h finds it on the include path. +HAL_CONFIG_FILE="$(ls "${FIRMWARE_DIR}"/*hal_conf.h 2>/dev/null | head -1)" +if [[ -n "${HAL_CONFIG_FILE}" ]]; then + cp "${HAL_CONFIG_FILE}" \ + "${STM32CUBE_MP13_ROOT}/Drivers/STM32MP13xx_HAL_Driver/Inc/" || true +fi + +echo ">> Building MP135 wolfCrypt firmware against wolfSSL at ${WOLFSSL_ROOT} ..." +# With WOLFSSL_USER_SETTINGS=ON, wolfSSL's CMake throws away all the +# WOLFSSL_DEFINITIONS it would otherwise build up from -DWOLFSSL_SHA3 +# etc. (see line 2300-2302 of wolfssl's CMakeLists.txt). Every +# algorithm choice flows through firmware/wolfcrypt-test-mp135/ +# user_settings.h instead - that is where WOLFSSL_SHA3 / SHAKE128 / +# SHAKE256 / SHA384 / SHA512 are turned on so the MP13 HASH IP's +# wider digest set is exercised end-to-end. +cmake -G Ninja \ + -DWOLFSSL_USER_SETTINGS=ON \ + -DUSER_SETTINGS_FILE="${FIRMWARE_DIR}/user_settings.h" \ + -DCMAKE_TOOLCHAIN_FILE="${FIRMWARE_DIR}/toolchain-arm-none-eabi.cmake" \ + -DCMAKE_BUILD_TYPE=Release \ + -DWOLFSSL_CRYPT_TESTS=OFF \ + -DWOLFSSL_EXAMPLES=OFF \ + -DWOLFSSL_ROOT="${WOLFSSL_BUILD_TREE}" \ + -DSTM32CUBE_MP13_ROOT="${STM32CUBE_MP13_ROOT}" \ + -B "${FIRMWARE_DIR}/build" \ + -S "${FIRMWARE_DIR}" +cmake --build "${FIRMWARE_DIR}/build" + +ELF="${FIRMWARE_DIR}/build/wolfcrypt_test.elf" +if [[ ! -f "${ELF}" ]]; then + echo "ERROR: firmware build produced no ELF at ${ELF}" >&2 + find "${FIRMWARE_DIR}/build" -name "*.elf" 2>/dev/null || true + exit 1 +fi + +echo ">> Running ${ELF} on stm32-sim --chip stm32mp135 (timeout ${TIMEOUT}s) ..." +LOG="$(mktemp)" +set +e +stm32-sim \ + --chip stm32mp135 \ + --timeout "${TIMEOUT}" \ + --exit-on test_complete \ + --result-symbol test_result \ + "${ELF}" 2>&1 | tee "${LOG}" +SIM_EXIT=$? +set -e + +if grep -q "=== wolfCrypt test passed! ===" "${LOG}"; then + echo + echo "wolfCrypt tests completed successfully." + exit 0 +fi +if grep -q "=== wolfCrypt test FAILED ===" "${LOG}"; then + echo + echo "wolfCrypt tests FAILED." + exit 1 +fi +echo +echo "wolfCrypt tests did not report a result string. Simulator exit=${SIM_EXIT}" +exit "${SIM_EXIT}" diff --git a/STM32Sim/stm32-sim/chips/src/lib.rs b/STM32Sim/stm32-sim/chips/src/lib.rs index 853799a..51e097b 100644 --- a/STM32Sim/stm32-sim/chips/src/lib.rs +++ b/STM32Sim/stm32-sim/chips/src/lib.rs @@ -20,13 +20,15 @@ */ pub mod stm32h753; +pub mod stm32mp135; pub mod stm32u575; use anyhow::Result; -use stm32_sim_core::{Bus, MemoryRegion}; +use stm32_sim_core::{Bus, CpuKind, MemoryRegion}; pub struct Chip { pub name: &'static str, + pub cpu_kind: CpuKind, pub memory_regions: Vec, pub bus: Bus, } @@ -40,10 +42,11 @@ pub fn build(name: &str) -> Result { "stm32h753" => stm32h753::Stm32H753::build(), "stm32u575" => stm32u575::Stm32U575::build(), "stm32u585" => stm32u575::Stm32U585::build(), + "stm32mp135" => stm32mp135::Stm32Mp135::build(), other => anyhow::bail!("unknown chip: {other}"), } } pub fn list() -> &'static [&'static str] { - &["stm32h753", "stm32u575", "stm32u585"] + &["stm32h753", "stm32u575", "stm32u585", "stm32mp135"] } diff --git a/STM32Sim/stm32-sim/chips/src/stm32h753.rs b/STM32Sim/stm32-sim/chips/src/stm32h753.rs index d3a35ff..e742b66 100644 --- a/STM32Sim/stm32-sim/chips/src/stm32h753.rs +++ b/STM32Sim/stm32-sim/chips/src/stm32h753.rs @@ -21,7 +21,7 @@ use anyhow::Result; use stm32_sim_core::peripheral::wrap; -use stm32_sim_core::{Bus, MemoryRegion}; +use stm32_sim_core::{Bus, CpuKind, MemoryRegion}; use stm32_sim_peripherals::{ cryp::v1::CrypV1, hash::v1::HashV1, usart::StdoutSink, Dbgmcu, Rcc, Rng, Usart, }; @@ -91,6 +91,7 @@ impl crate::ChipBuilder for Stm32H753 { Ok(Chip { name: "stm32h753", + cpu_kind: CpuKind::CortexM, memory_regions: memory, bus, }) diff --git a/STM32Sim/stm32-sim/chips/src/stm32mp135.rs b/STM32Sim/stm32-sim/chips/src/stm32mp135.rs new file mode 100644 index 0000000..9037357 --- /dev/null +++ b/STM32Sim/stm32-sim/chips/src/stm32mp135.rs @@ -0,0 +1,139 @@ +/* stm32mp135.rs + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of STM32Sim. + * + * STM32Sim is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * STM32Sim is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +//! STM32MP135 - Cortex-A7 single core, no internal flash. Firmware +//! runs from external DDR after an ST-supplied DDR_Init helper has +//! trained the DDR controller. From the simulator's point of view +//! there is no DDR_Init: Unicorn just maps DDR as plain RAM and the +//! ELF loader writes the .text/.data segments straight into it. +//! +//! Memory map (from the MP135 reference manual / CMSIS device +//! headers): +//! SYSRAM 0x2FFE_0000 128 KiB - lives just below the SRAMs +//! SRAM1 0x3000_0000 16 KiB +//! SRAM2 0x3000_4000 8 KiB +//! SRAM3 0x3000_6000 8 KiB +//! DDR 0xC000_0000 512 MiB +//! +//! Crypto IP (AHB5 @ 0x5400_0000): +//! CRYP1 @ 0x5400_2000 - same register layout as the H7 CRYP block; +//! wolfSSL's STM32 port aliases CRYP1 -> CRYP. +//! HASH1 @ 0x5400_3000 - MP13 layout: 4-bit ALGO field at CR[20:17], +//! SHA3CFGR register at 0x28, SHA3-224/256/384/512 +//! plus SHA-384/512 in addition to the legacy +//! SHA-1/MD5/SHA-224/SHA-256. +//! RNG1 @ 0x5400_4000 - identical RNG to the H7/U5. +//! PKA @ 0x5400_6000 - U5-generation PKA v2 register file. +//! +//! Other peripherals modelled: +//! UART4 @ 0x4001_0000 - the F-DK's ST-Link console UART. +//! RCC @ 0x5000_0000 - stub; we ignore writes and return zero on +//! read because the firmware does not poll +//! ready bits in the bare-metal smoke +//! configuration. + +use anyhow::Result; +use stm32_sim_core::peripheral::wrap; +use stm32_sim_core::{Bus, CpuKind, MemoryRegion}; +use stm32_sim_peripherals::{ + cryp::v1::CrypV1, hash::v1::HashV1, pka::v2::PkaV2, usart::StdoutSink, Dbgmcu, Rcc, Rng, Usart, +}; + +use crate::Chip; + +pub struct Stm32Mp135; + +impl crate::ChipBuilder for Stm32Mp135 { + fn build() -> Result { + let memory = vec![ + MemoryRegion { + base: 0x2FFE_0000, + size: 0x0002_0000, + name: "SYSRAM", + }, + MemoryRegion { + base: 0x3000_0000, + size: 0x0000_4000, + name: "SRAM1", + }, + MemoryRegion { + base: 0x3000_4000, + size: 0x0000_2000, + name: "SRAM2", + }, + MemoryRegion { + base: 0x3000_6000, + size: 0x0000_2000, + name: "SRAM3", + }, + MemoryRegion { + base: 0xC000_0000, + size: 0x2000_0000, + name: "DDR", + }, + ]; + + let mut bus = Bus::new(); + + // UART4 - ST-Link console on the MP135F-DK. The H7/U5 + // USART register layout (CR1/BRR/ISR/TDR at the same offsets) + // is shared by every modern STM32, so we reuse Usart directly. + let uart4 = wrap(Usart::new("uart4", Box::new(StdoutSink))); + bus.map(0x4001_0000, 0x0400, "uart4", uart4); + + // RCC stub. The MP135 RCC register layout differs from H7/U5 + // and the wolfcrypt smoke firmware does not poll ready bits + // (it pokes the crypto peripherals directly without HAL clock + // gating). A bare register file is enough; if a later HAL- + // driven firmware needs ready bits, add Rcc::mp13() with the + // right mask. + let rcc = wrap(Rcc::raw("rcc-mp13")); + bus.map(0x5000_0000, 0x1000, "rcc", rcc); + + let cryp1 = wrap(CrypV1::new()); + bus.map(0x5400_2000, 0x0400, "cryp1", cryp1); + + let hash1 = wrap(HashV1::new_mp13()); + bus.map(0x5400_3000, 0x0400, "hash1", hash1); + + let rng1 = wrap(Rng::new()); + bus.map(0x5400_4000, 0x0400, "rng1", rng1); + + // PKA spans 0x2000 like on the U5, since the SRAM-style + // operand window lives inside the same page block. + let pka = wrap(PkaV2::new()); + bus.map(0x5400_6000, 0x2000, "pka", pka); + + // DBGMCU at 0x5008_1000. HAL_GetREVID / HAL_GetDEVID read + // DBGMCU->IDCODE; with the firmware now driving the MP13 HAL + // (HAL_RCC_OscConfig pokes this on init), the peripheral has + // to exist or the load faults as READ_UNMAPPED. + let dbgmcu = wrap(Dbgmcu::mp13()); + bus.map(0x5008_1000, 0x0400, "dbgmcu", dbgmcu); + + Ok(Chip { + name: "stm32mp135", + cpu_kind: CpuKind::CortexA, + memory_regions: memory, + bus, + }) + } +} diff --git a/STM32Sim/stm32-sim/chips/src/stm32u575.rs b/STM32Sim/stm32-sim/chips/src/stm32u575.rs index 73f988a..aa07c1c 100644 --- a/STM32Sim/stm32-sim/chips/src/stm32u575.rs +++ b/STM32Sim/stm32-sim/chips/src/stm32u575.rs @@ -33,7 +33,7 @@ use anyhow::Result; use stm32_sim_core::peripheral::wrap; -use stm32_sim_core::{Bus, MemoryRegion}; +use stm32_sim_core::{Bus, CpuKind, MemoryRegion}; use stm32_sim_peripherals::{ cryp::v2::CrypV2, hash::v1::HashV1, pka::v2::PkaV2, usart::StdoutSink, Dbgmcu, Rcc, Rng, Usart, }; @@ -104,6 +104,7 @@ impl crate::ChipBuilder for Stm32U575 { Ok(Chip { name: "stm32u575", + cpu_kind: CpuKind::CortexM, memory_regions: memory, bus, }) diff --git a/STM32Sim/stm32-sim/core/src/cpu.rs b/STM32Sim/stm32-sim/core/src/cpu.rs index b9ccdad..205475d 100644 --- a/STM32Sim/stm32-sim/core/src/cpu.rs +++ b/STM32Sim/stm32-sim/core/src/cpu.rs @@ -22,7 +22,7 @@ use anyhow::{anyhow, Result}; use std::sync::Arc; use unicorn_engine::unicorn_const::{Arch, Mode, Prot}; -use unicorn_engine::{RegisterARM, Unicorn}; +use unicorn_engine::{ArmCpuModel, RegisterARM, Unicorn}; use crate::bus::Bus; use crate::elf::{ElfImage, MemoryRegion}; @@ -35,14 +35,45 @@ pub enum CpuStop { Fault, } +/// CPU family selector. Determines the Unicorn `Mode` flags and whether +/// the runtime treats addresses as Thumb (M-class) or ARM (A-class). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CpuKind { + /// Cortex-M0/M3/M4/M7/M33 - Thumb-2 only, M-profile vector table. + CortexM, + /// Cortex-A series (A7, A15, ...) - ARMv7-A with MMU support. + CortexA, +} + pub struct Cpu { uc: Unicorn<'static, ()>, + kind: CpuKind, } impl Cpu { pub fn new(memory: &[MemoryRegion]) -> Result { - let mut uc = Unicorn::new(Arch::ARM, Mode::THUMB | Mode::MCLASS) + Self::new_with_kind(memory, CpuKind::CortexM) + } + + pub fn new_with_kind(memory: &[MemoryRegion], kind: CpuKind) -> Result { + let mode = match kind { + CpuKind::CortexM => Mode::THUMB | Mode::MCLASS, + // ARMv7-A starts in ARM (not Thumb). No MCLASS flag - that + // is the bit that switches Unicorn into M-profile exception + // semantics. Without it we get an A-class CPU with MMU, + // VFP/NEON, and SVC-mode boot. + CpuKind::CortexA => Mode::ARM, + }; + let mut uc = Unicorn::new(Arch::ARM, mode) .map_err(|e| anyhow!("Unicorn::new failed: {e:?}"))?; + if let CpuKind::CortexA = kind { + // The default A-class CPU in Unicorn does not advertise an + // MMU or VFPv4/NEON. Pin to Cortex-A7 so MP135 firmware can + // enable its translation tables and use VFP/NEON-compiled + // wolfSSL code. + uc.ctl_set_cpu_model(ArmCpuModel::CORTEX_A7 as i32) + .map_err(|e| anyhow!("ctl_set_cpu_model(CORTEX_A7) failed: {e:?}"))?; + } for region in memory { uc.mem_map(region.base, region.size, Prot::ALL) .map_err(|e| { @@ -55,7 +86,7 @@ impl Cpu { ) })?; } - Ok(Self { uc }) + Ok(Self { uc, kind }) } /// Install a Bus: register one Unicorn MMIO callback per 4 KiB page @@ -102,7 +133,13 @@ impl Cpu { ) })?; } - let pc = image.entry_point & !1; // strip Thumb bit + // The Thumb bit (LSB=1) marks Cortex-M ELF entry points; on + // A-class the bit is already 0. Stripping it is safe in both + // cases. The SP slot for A-class is also unused: A-class + // firmware sets its own stacks from its startup code (one per + // exception mode), but writing a reasonable SP is harmless if + // the ELF has one. + let pc = image.entry_point & !1; self.uc .reg_write(RegisterARM::SP, image.initial_sp) .map_err(|e| anyhow!("reg_write SP: {e:?}"))?; @@ -112,16 +149,34 @@ impl Cpu { Ok(()) } - /// Run up to `max_instructions` Thumb instructions, then return. + /// Run up to `max_instructions` instructions, then return. pub fn run(&mut self, max_instructions: u64) -> Result { let pc = self .uc .reg_read(RegisterARM::PC) .map_err(|e| anyhow!("reg_read PC: {e:?}"))?; - // emu_start expects (begin | 1) for Thumb; end=0 means run until count expires. + // emu_start expects (begin | 1) when the next instruction is a + // Thumb instruction. Cortex-M is always Thumb. Cortex-A toggles + // between ARM and Thumb at runtime, so probe CPSR.T (bit 5) for + // each slice - resuming with the wrong bit corrupts the decode + // and Unicorn reports it as INSN_INVALID at the resume address. + let begin = match self.kind { + CpuKind::CortexM => pc | 1, + CpuKind::CortexA => { + let cpsr = self + .uc + .reg_read(RegisterARM::CPSR) + .map_err(|e| anyhow!("reg_read CPSR: {e:?}"))?; + if cpsr & (1 << 5) != 0 { + pc | 1 + } else { + pc & !1 + } + } + }; match self .uc - .emu_start(pc | 1, 0, 0, max_instructions as usize) + .emu_start(begin, 0, 0, max_instructions as usize) { Ok(()) => Ok(CpuStop::Halted), Err(e) => { diff --git a/STM32Sim/stm32-sim/core/src/elf.rs b/STM32Sim/stm32-sim/core/src/elf.rs index d4cc79a..9c1c91a 100644 --- a/STM32Sim/stm32-sim/core/src/elf.rs +++ b/STM32Sim/stm32-sim/core/src/elf.rs @@ -24,6 +24,8 @@ use goblin::elf::{program_header::PT_LOAD, Elf}; use std::collections::HashMap; use std::path::Path; +use crate::cpu::CpuKind; + #[derive(Debug, Clone)] pub struct LoadSegment { /// Load address (LMA, ELF `p_paddr`): where the segment's initial @@ -65,13 +67,21 @@ pub struct ElfImage { impl ElfImage { pub fn from_path>(path: P) -> Result { + Self::from_path_with_kind(path, CpuKind::CortexM) + } + + pub fn from_path_with_kind>(path: P, kind: CpuKind) -> Result { let path = path.as_ref(); let bytes = std::fs::read(path) .with_context(|| format!("failed to read ELF file: {}", path.display()))?; - Self::from_bytes(&bytes) + Self::from_bytes_with_kind(&bytes, kind) } pub fn from_bytes(bytes: &[u8]) -> Result { + Self::from_bytes_with_kind(bytes, CpuKind::CortexM) + } + + pub fn from_bytes_with_kind(bytes: &[u8], kind: CpuKind) -> Result { let elf = Elf::parse(bytes).map_err(|e| anyhow!("failed to parse ELF: {e}"))?; let mut segments = Vec::new(); @@ -103,21 +113,39 @@ impl ElfImage { } } - // Cortex-M boot: vector table starts at the lowest-loaded address; - // word 0 = initial SP, word 1 = reset vector (Thumb bit set). - let mut initial_sp = 0u64; - let mut reset_vec = elf.entry; - if let Some(seg) = segments.iter().min_by_key(|s| s.load_address) { - if seg.data.len() >= 8 { - initial_sp = - u32::from_le_bytes([seg.data[0], seg.data[1], seg.data[2], seg.data[3]]) as u64; - reset_vec = - u32::from_le_bytes([seg.data[4], seg.data[5], seg.data[6], seg.data[7]]) as u64; + let (entry_point, initial_sp) = match kind { + // Cortex-M boot: vector table at the lowest-loaded address; + // word 0 = initial SP, word 1 = reset vector (Thumb bit set). + CpuKind::CortexM => { + let mut initial_sp = 0u64; + let mut reset_vec = elf.entry; + if let Some(seg) = segments.iter().min_by_key(|s| s.load_address) { + if seg.data.len() >= 8 { + initial_sp = u32::from_le_bytes([ + seg.data[0], + seg.data[1], + seg.data[2], + seg.data[3], + ]) as u64; + reset_vec = u32::from_le_bytes([ + seg.data[4], + seg.data[5], + seg.data[6], + seg.data[7], + ]) as u64; + } + } + (reset_vec, initial_sp) } - } + // Cortex-A: the firmware's startup code sets up its own + // exception-mode stacks; the linker's entry point is the + // ARM-mode reset path (no Thumb bit, no SP-from-vector + // convention). + CpuKind::CortexA => (elf.entry, 0), + }; Ok(Self { - entry_point: reset_vec, + entry_point, initial_sp, segments, symbols, diff --git a/STM32Sim/stm32-sim/core/src/lib.rs b/STM32Sim/stm32-sim/core/src/lib.rs index 48b2d37..48ceaa3 100644 --- a/STM32Sim/stm32-sim/core/src/lib.rs +++ b/STM32Sim/stm32-sim/core/src/lib.rs @@ -26,7 +26,7 @@ pub mod cpu; pub mod runner; pub use bus::{Bus, MmioRegion}; -pub use cpu::{Cpu, CpuStop}; +pub use cpu::{Cpu, CpuKind, CpuStop}; pub use elf::{ElfImage, MemoryRegion}; pub use peripheral::{Peripheral, PeripheralRef}; pub use runner::{ExitCondition, RunOutcome, Runner}; diff --git a/STM32Sim/stm32-sim/peripherals/Cargo.toml b/STM32Sim/stm32-sim/peripherals/Cargo.toml index 25a71ef..4a06e80 100644 --- a/STM32Sim/stm32-sim/peripherals/Cargo.toml +++ b/STM32Sim/stm32-sim/peripherals/Cargo.toml @@ -18,6 +18,7 @@ aes.workspace = true cipher.workspace = true sha-1.workspace = true sha2.workspace = true +sha3.workspace = true md-5.workspace = true digest.workspace = true p256.workspace = true diff --git a/STM32Sim/stm32-sim/peripherals/src/cryp/v1.rs b/STM32Sim/stm32-sim/peripherals/src/cryp/v1.rs index ccab615..a9c282d 100644 --- a/STM32Sim/stm32-sim/peripherals/src/cryp/v1.rs +++ b/STM32Sim/stm32-sim/peripherals/src/cryp/v1.rs @@ -259,6 +259,14 @@ impl CrypV1 { if self.engine.output_full() { sr |= 1 << 3; } + // CRYP_SR.KEYVALID (bit 7) is MP13-only. The MP13 HAL polls it + // after writing the key registers (for every algorithm, but in + // practice the AES-192 path is the first to hit it because its + // KeyIVConfigSkip default forces a re-load every Encrypt call). + // Key validation is instantaneous in the simulator, so report + // the key as valid as soon as anything has been written into + // the key window. + sr |= 1 << 7; sr } } diff --git a/STM32Sim/stm32-sim/peripherals/src/dbgmcu.rs b/STM32Sim/stm32-sim/peripherals/src/dbgmcu.rs index ecd2281..b073df1 100644 --- a/STM32Sim/stm32-sim/peripherals/src/dbgmcu.rs +++ b/STM32Sim/stm32-sim/peripherals/src/dbgmcu.rs @@ -60,6 +60,17 @@ impl Dbgmcu { regs: [0; 256], } } + + /// STM32MP135: DEV_ID = 0x501, REV_ID = 0x2000 (rev Z). HAL_GetREVID + /// is read by HAL_RCC_OscConfig on this part, so the peripheral + /// must exist for the HAL to come up. + pub fn mp13() -> Self { + Self { + name: "dbgmcu", + idcode: (0x2000 << 16) | 0x501, + regs: [0; 256], + } + } } impl Peripheral for Dbgmcu { diff --git a/STM32Sim/stm32-sim/peripherals/src/hash/mod.rs b/STM32Sim/stm32-sim/peripherals/src/hash/mod.rs index 159326b..9ee3157 100644 --- a/STM32Sim/stm32-sim/peripherals/src/hash/mod.rs +++ b/STM32Sim/stm32-sim/peripherals/src/hash/mod.rs @@ -26,7 +26,8 @@ pub mod v1; -use digest::{Digest, DynDigest}; +use digest::{Digest, DynDigest, InvalidBufferSize}; +use sha3::digest::{ExtendableOutputReset, Reset}; /// `DynDigest` is the dyn-compatible hashing trait but its `box_clone` /// drops the `Send` bound. We need both `Send` (so the engine can sit @@ -51,7 +52,8 @@ where fn fresh_hasher(algo: Algo) -> Box { use md5::Md5; use sha1::Sha1; - use sha2::{Sha224, Sha256, Sha384, Sha512}; + use sha2::{Sha224, Sha256, Sha384, Sha512, Sha512_224, Sha512_256}; + use sha3::{Sha3_224, Sha3_256, Sha3_384, Sha3_512, Shake128, Shake256}; match algo { Algo::Sha1 => Box::new(Sha1::new()), Algo::Md5 => Box::new(Md5::new()), @@ -59,6 +61,77 @@ fn fresh_hasher(algo: Algo) -> Box { Algo::Sha256 => Box::new(Sha256::new()), Algo::Sha384 => Box::new(Sha384::new()), Algo::Sha512 => Box::new(Sha512::new()), + Algo::Sha512_224 => Box::new(Sha512_224::new()), + Algo::Sha512_256 => Box::new(Sha512_256::new()), + Algo::Sha3_224 => Box::new(Sha3_224::new()), + Algo::Sha3_256 => Box::new(Sha3_256::new()), + Algo::Sha3_384 => Box::new(Sha3_384::new()), + Algo::Sha3_512 => Box::new(Sha3_512::new()), + // SHAKE is variable-output. Use the same fixed length the MP13 + // HAL defaults to (16 bytes for SHAKE-128, 32 for SHAKE-256; + // see the HASH_DIGEST_SIZE switch in stm32mp13xx_hal_hash.h). + Algo::Shake128 => Box::new(ShakeWrapper::::new(16)), + Algo::Shake256 => Box::new(ShakeWrapper::::new(32)), + } +} + +/// Adapter that exposes a SHAKE-family XOF (variable-output extendable +/// hash) through the fixed-length `DynDigest` trait so the streaming +/// engine can drive it without caring about the SHAKE/SHA-3 +/// distinction. The output length is fixed at construction. +#[derive(Clone)] +struct ShakeWrapper { + inner: S, + output_len: usize, +} + +impl ShakeWrapper { + fn new(output_len: usize) -> Self { + Self { inner: S::default(), output_len } + } +} + +impl DynDigest for ShakeWrapper +where + S: Default + + sha3::digest::Update + + ExtendableOutputReset + + Reset + + Clone + + Send + + 'static, +{ + fn update(&mut self, data: &[u8]) { + sha3::digest::Update::update(&mut self.inner, data); + } + + fn finalize_into(self, buf: &mut [u8]) -> Result<(), InvalidBufferSize> { + if buf.len() != self.output_len { + return Err(InvalidBufferSize); + } + let mut inner = self.inner; + inner.finalize_xof_reset_into(buf); + Ok(()) + } + + fn finalize_into_reset(&mut self, out: &mut [u8]) -> Result<(), InvalidBufferSize> { + if out.len() != self.output_len { + return Err(InvalidBufferSize); + } + self.inner.finalize_xof_reset_into(out); + Ok(()) + } + + fn reset(&mut self) { + Reset::reset(&mut self.inner); + } + + fn output_size(&self) -> usize { + self.output_len + } + + fn box_clone(&self) -> Box { + Box::new(self.clone()) } } @@ -68,32 +141,56 @@ pub enum Algo { Md5, Sha224, Sha256, - /// U5/H5/H7S only. + /// U5/H5/H7S/MP13. Sha384, - /// U5/H5/H7S only. + /// U5/H5/H7S/MP13. Sha512, + /// MP13/H5/H7S/N6 - SHA-512 truncated to 224 bits with its own IV. + Sha512_224, + /// MP13/H5/H7S/N6 - SHA-512 truncated to 256 bits with its own IV. + Sha512_256, + /// MP13 only. + Sha3_224, + /// MP13 only. + Sha3_256, + /// MP13 only. + Sha3_384, + /// MP13 only. + Sha3_512, + /// MP13 only. Variable-length XOF; the engine fixes the output to + /// 16 bytes (the MP13 HAL default for `HASH_DIGEST_SIZE_SHAKE_128`). + Shake128, + /// MP13 only. Output fixed to 32 bytes. + Shake256, } impl Algo { pub fn output_words(self) -> usize { match self { - Algo::Sha1 => 5, // 160 bit - Algo::Md5 => 4, // 128 bit - Algo::Sha224 => 7, // 224 bit - Algo::Sha256 => 8, // 256 bit - Algo::Sha384 => 12, // 384 bit - Algo::Sha512 => 16, // 512 bit + Algo::Sha1 => 5, // 160 bit + Algo::Md5 | Algo::Shake128 => 4, // 128 bit + Algo::Sha224 | Algo::Sha3_224 | Algo::Sha512_224 => 7, // 224 bit + Algo::Sha256 | Algo::Sha3_256 | Algo::Shake256 | Algo::Sha512_256 => 8, // 256 bit + Algo::Sha384 | Algo::Sha3_384 => 12, // 384 bit + Algo::Sha512 | Algo::Sha3_512 => 16, // 512 bit } } /// HMAC block size in bytes - the unit at which HMAC pads the /// key with `K_pad ⊕ ipad` / `K_pad ⊕ opad`. SHA-384/512 use a - /// 1024-bit (128-byte) compression block, the rest use 512-bit - /// (64-byte). + /// 1024-bit (128-byte) compression block, the SHA-1/2-256 family + /// use 512-bit (64-byte), and the SHA-3 / SHAKE rate depends on + /// the security level (NIST FIPS-202 Table 3). pub fn block_size(self) -> usize { match self { Algo::Md5 | Algo::Sha1 | Algo::Sha224 | Algo::Sha256 => 64, - Algo::Sha384 | Algo::Sha512 => 128, + Algo::Sha384 | Algo::Sha512 | Algo::Sha512_224 | Algo::Sha512_256 => 128, + Algo::Sha3_224 => 144, // rate r = 1152 bits + Algo::Sha3_256 => 136, // rate r = 1088 bits + Algo::Sha3_384 => 104, // rate r = 832 bits + Algo::Sha3_512 => 72, // rate r = 576 bits + Algo::Shake128 => 168, // rate r = 1344 bits + Algo::Shake256 => 136, // rate r = 1088 bits } } } diff --git a/STM32Sim/stm32-sim/peripherals/src/hash/v1.rs b/STM32Sim/stm32-sim/peripherals/src/hash/v1.rs index f1075da..fa30fbc 100644 --- a/STM32Sim/stm32-sim/peripherals/src/hash/v1.rs +++ b/STM32Sim/stm32-sim/peripherals/src/hash/v1.rs @@ -28,11 +28,19 @@ //! 0x00C HR0..HR4 (hash result, 5 words for SHA-1/MD5) //! 0x020 IMR //! 0x024 SR (BUSY/DCIS/DINIS) -//! 0x028.. digest size dependent regions, scratch +//! 0x028 SHA3CFGR (MP13 only: SHA3 padding / round config. The same +//! offset is reserved/scratch on H7 and U5; firmware +//! on those parts never writes it.) +//! 0x02C..0xF4 reserved / scratch (CSR begins at 0x0F8) //! 0x310 HR0..HR7 (extended hash result, 8 words for SHA-256) //! -//! ALGO selection on H7: bits {18, 7} of CR — 00 SHA-1, 01 MD5, -//! 10 SHA-224, 11 SHA-256. +//! ALGO selection differs by chip family: +//! - H7 : 2-bit field at CR bits {18, 7} +//! 00 SHA-1, 01 MD5, 10 SHA-224, 11 SHA-256. +//! - U5 : 2-bit field at CR bits {18, 17} (same encoding). +//! - MP13: 4-bit field at CR bits [20:17] - covers SHA-1/MD5/SHA-224/ +//! SHA-256, SHA-384/512 + SHA-512/224/256, and the four SHA3 +//! digest sizes (SHA3-224/256/384/512). use stm32_sim_core::peripheral::Peripheral; @@ -54,10 +62,21 @@ const HR_LEGACY_BASE: u32 = 0x00C; const HR_LEGACY_END: u32 = 0x01C; const IMR: u32 = 0x020; const SR: u32 = 0x024; +/// SHA3 padding / round configuration register. MP13 only - on H7/U5 +/// the offset is reserved. wolfSSL's SHA3 driver writes the padding +/// byte here before each DCAL; we store the value so reads return +/// what was written but the engine selects SHA3 purely from CR.ALGO. +const SHA3CFGR: u32 = 0x028; const CSR_BASE: u32 = 0x0F8; const CSR_END: u32 = 0x1CC; // CSR_BASE + 53 * 4 const HR_EXT_BASE: u32 = 0x310; -const HR_EXT_END: u32 = 0x32C; +/// H7/U5 stop the extended HR window at 0x32C (8 words, SHA-256 sized). +/// MP13's HASH_DIGEST aliases a 50-word HR2 region at 0x310-0x3D4, so +/// wolfSSL's loop reads `HASH_DIGEST->HR[5..15]` for SHA-384 / SHA-512 +/// at offsets that run past 0x32C. Cap at 0x34C (16 words) so SHA-512 +/// fits; reads of indices beyond `engine.result.len()` fall through to +/// the zero return in `read_hr`. +const HR_EXT_END: u32 = 0x34C; // HASH_CR layout: bit 2 INIT, bits[5:4] DATATYPE, bit 6 MODE. // ALGO is a 2-bit field but the chip family decides where it lives: @@ -73,25 +92,23 @@ const CR_MODE_HMAC: u32 = 1 << 6; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum AlgoLayout { - /// H7 family: ALGO[0] at bit 7, ALGO[1] at bit 18. + /// H7 family: 2-bit ALGO at CR bits {7, 18}. Encoding: 00 SHA-1, + /// 01 MD5, 10 SHA-224, 11 SHA-256. H7, - /// U5 family: ALGO[0] at bit 17, ALGO[1] at bit 18. + /// U5 family: 2-bit ALGO at CR bits {17, 18}. Same encoding as H7. U5, + /// MP13 family: 4-bit ALGO at CR bits [20:17]. The wider field + /// covers SHA-1/MD5/SHA-224/SHA-256 (codes 0..3), the SHA3 family + /// (codes 4..7) and SHA-384/512 plus SHA-512/224/256 (codes + /// 12..15). The SHA3CFGR register at offset 0x28 supplies extra + /// padding / round metadata that we model as a write-back store. + Mp13, } impl AlgoLayout { - fn lo_bit(self) -> u32 { - match self { - AlgoLayout::H7 => 7, - AlgoLayout::U5 => 17, - } - } - fn hi_bit(self) -> u32 { - 18 - } - #[allow(dead_code)] - fn algo_mask(self) -> u32 { - (1u32 << self.lo_bit()) | (1u32 << self.hi_bit()) + /// True if ALGO is decoded from a 4-bit field (currently MP13). + fn is_wide(self) -> bool { + matches!(self, AlgoLayout::Mp13) } } @@ -113,6 +130,12 @@ pub struct HashV1 { str_reg: u32, imr: u32, sr: u32, + /// Mirror of the SHA3CFGR register on MP13. Unused on H7/U5 but + /// kept in the struct unconditionally so the write/read paths + /// can be unconditional. wolfSSL writes the SHA3 padding byte + /// here; the engine ignores the value because the digest type + /// is already fixed by CR.ALGO. + sha3cfgr: u32, /// One-word lookahead: each DIN write displaces this and commits /// the displaced word in full to the engine. STR.DCAL pulls this /// out and feeds it with `NBLW`-derived valid-byte count. This is @@ -157,6 +180,12 @@ impl HashV1 { Self::with_layout(AlgoLayout::U5) } + /// MP13 (STM32MP135) layout: 4-bit ALGO field, SHA3CFGR register, + /// SHA-384/512 + SHA3 support. + pub fn new_mp13() -> Self { + Self::with_layout(AlgoLayout::Mp13) + } + fn with_layout(layout: AlgoLayout) -> Self { Self { layout, @@ -164,6 +193,7 @@ impl HashV1 { str_reg: 0, imr: 0, sr: SR_DINIS, + sha3cfgr: 0, pending: None, csr: [0; 54], saved_pending: None, @@ -173,16 +203,65 @@ impl HashV1 { } fn parse_algo(&self) -> Algo { - let lo_bit = 1u32 << self.layout.lo_bit(); - let hi_bit = 1u32 << self.layout.hi_bit(); - let lo = if self.cr & lo_bit != 0 { 1u32 } else { 0 }; - let hi = if self.cr & hi_bit != 0 { 1u32 } else { 0 }; - match (hi, lo) { - (0, 0) => Algo::Sha1, - (0, 1) => Algo::Md5, - (1, 0) => Algo::Sha224, - (1, 1) => Algo::Sha256, - _ => Algo::Sha1, + if self.layout.is_wide() { + // MP13: 4-bit ALGO at bits [20:17]. Encoding from + // stm32mp13xx_hal_hash.h HASH_ALGOSELECTION_*: + // 0 SHA-1 4 SHA3-224 8 SHAKE-128 + // 1 MD5 5 SHA3-256 9 SHAKE-256 + // 2 SHA-224 6 SHA3-384 10 RAWSHAKE-128 + // 3 SHA-256 7 SHA3-512 11 RAWSHAKE-256 + // 12 SHA-384 + // 13 SHA-512/224 + // 14 SHA-512/256 + // 15 SHA-512 + // SHA-512/224 and SHA-512/256 have their own IVs (FIPS-180 + // section 5.3) and produce different digests from a SHA-512 + // tail-truncation, so they each route to a dedicated hasher. + // RAWSHAKE collapses to SHAKE - the only difference is the + // suffix bits in the padding rule (omitted in RawSHAKE), and + // the sha3 crate exposes only the standard SHAKE + // construction. wolfSSL's STM32 port does not exercise + // RAWSHAKE, so the simpler aliasing is fine. + let code = (self.cr >> 17) & 0xF; + match code { + 0 => Algo::Sha1, + 1 => Algo::Md5, + 2 => Algo::Sha224, + 3 => Algo::Sha256, + 4 => Algo::Sha3_224, + 5 => Algo::Sha3_256, + 6 => Algo::Sha3_384, + 7 => Algo::Sha3_512, + 8 | 10 => Algo::Shake128, + 9 | 11 => Algo::Shake256, + 12 => Algo::Sha384, + 13 => Algo::Sha512_224, + 14 => Algo::Sha512_256, + 15 => Algo::Sha512, + _ => Algo::Sha1, + } + } else { + let lo_bit = 1u32 << self.layout_lo_bit(); + let hi_bit = 1u32 << 18; + let lo = if self.cr & lo_bit != 0 { 1u32 } else { 0 }; + let hi = if self.cr & hi_bit != 0 { 1u32 } else { 0 }; + match (hi, lo) { + (0, 0) => Algo::Sha1, + (0, 1) => Algo::Md5, + (1, 0) => Algo::Sha224, + (1, 1) => Algo::Sha256, + _ => Algo::Sha1, + } + } + } + + fn layout_lo_bit(&self) -> u32 { + match self.layout { + AlgoLayout::H7 => 7, + AlgoLayout::U5 => 17, + // Unused on MP13 - that path takes the 4-bit branch in + // parse_algo. We still need a value to type-check. + AlgoLayout::Mp13 => 17, } } @@ -276,6 +355,7 @@ impl Peripheral for HashV1 { STR => self.str_reg, IMR => self.imr, SR => self.sr, + SHA3CFGR => self.sha3cfgr, o if (HR_LEGACY_BASE..=HR_LEGACY_END).contains(&o) => { let idx = ((o - HR_LEGACY_BASE) / 4) as usize; self.read_hr(idx) @@ -307,6 +387,7 @@ impl Peripheral for HashV1 { STR => self.write_str(value), IMR => self.imr = value, SR => self.sr &= !value, + SHA3CFGR => self.sha3cfgr = value, o if (CSR_BASE..=CSR_END).contains(&o) => { let idx = ((o - CSR_BASE) / 4) as usize; self.csr[idx] = value; @@ -480,6 +561,99 @@ mod tests { assert_eq!(hr.as_slice(), &expected, "SHA-256 56B mismatch"); } + /// MP13 SHA3-256("abc") = + /// 3a985da74fe225b2045c172d6bd390bd855f086e3e9d525b46bfe24511431532 + /// Exercises the 4-bit ALGO field at CR[20:17] (code 0b0101 for + /// SHA3-256) on the MP13 layout, plus the SHA3CFGR scratch + /// register that wolfSSL writes ahead of each DCAL. + #[test] + fn sha3_256_abc_kat_mp13() { + let mut p = HashV1::new_mp13(); + // SHA3-256 ALGO code = 5 -> CR[20:17] = 0b0101. DATATYPE=byte + // (bits[5:4]=10) so the engine swap_bytes()es the input. INIT. + let cr_init = (5u32 << 17) | (2 << CR_DATATYPE_SHIFT) | CR_INIT; + // Mirror the wolfSSL flow: padding byte goes to SHA3CFGR first. + // Value is opaque to the simulator (engine picks SHA3 from CR). + write_word(&mut p, SHA3CFGR, 0x06); + write_word(&mut p, CR, cr_init); + // "abc" packed BE into one word, pre-swapped for DATATYPE=byte. + write_word(&mut p, DIN, 0x6162_6300u32.swap_bytes()); + // NBLW = 24 bits valid + DCAL. + write_word(&mut p, STR, STR_DCAL | 24); + + let hr: Vec = (0..8) + .map(|i| read_word(&mut p, HR_EXT_BASE + i * 4)) + .collect(); + let expected = [ + 0x3a985da7, 0x4fe225b2, 0x045c172d, 0x6bd390bd, 0x855f086e, 0x3e9d525b, 0x46bfe245, + 0x11431532, + ]; + assert_eq!(hr.as_slice(), &expected, "SHA3-256 abc mismatch"); + + // SHA3CFGR write/read round-trip. + assert_eq!(read_word(&mut p, SHA3CFGR), 0x06); + } + + /// MP13 SHA3-512("") = + /// a69f73cca23a9ac5c8b567dc185a756e97c982164fe25859e0d1dcc1475c80a6 + /// 15b2123af1f5f94c11e3e9402c3ac558f500199d95b6d3e301758586281dcd26 + #[test] + fn sha3_512_empty_kat_mp13() { + let mut p = HashV1::new_mp13(); + let cr_init = (7u32 << 17) | (2 << CR_DATATYPE_SHIFT) | CR_INIT; + write_word(&mut p, CR, cr_init); + write_word(&mut p, STR, STR_DCAL); + let hr: Vec = (0..16) + .map(|i| read_word(&mut p, HR_EXT_BASE + i * 4)) + .collect(); + // HR_EXT only goes up to HR7 (8 words); for SHA3-512 the + // remaining 8 words spill past HR_EXT_END and read as 0 in + // this stub. The first 8 words should match the digest + // prefix. + let expected_prefix = [ + 0xa69f73cc, 0xa23a9ac5, 0xc8b567dc, 0x185a756e, 0x97c98216, 0x4fe25859, 0xe0d1dcc1, + 0x475c80a6, + ]; + assert_eq!(&hr[..8], &expected_prefix, "SHA3-512 empty hash prefix mismatch"); + } + + /// MP13 SHAKE-128("") with 16-byte output = + /// 7f9c2ba4e88f827d616045507605853e + /// SHAKE is variable-length; the engine fixes the output to 16 + /// bytes (matching the MP13 HAL's HASH_DIGEST_SIZE_SHAKE_128 + /// default). Exercises ALGO code 0b1000 in the 4-bit MP13 field. + #[test] + fn shake128_empty_kat_mp13() { + let mut p = HashV1::new_mp13(); + let cr_init = (8u32 << 17) | (2 << CR_DATATYPE_SHIFT) | CR_INIT; + write_word(&mut p, CR, cr_init); + write_word(&mut p, STR, STR_DCAL); + let hr: Vec = (0..4) + .map(|i| read_word(&mut p, HR_EXT_BASE + i * 4)) + .collect(); + let expected = [0x7f9c2ba4, 0xe88f827d, 0x61604550, 0x7605853e]; + assert_eq!(hr.as_slice(), &expected, "SHAKE-128 empty mismatch"); + } + + /// MP13 SHAKE-256("") with 32-byte output = + /// 46b9dd2b0ba88d13233b3feb743eeb243fcd52ea62b81b82b50c27646ed5762f + /// ALGO code = 0b1001 (= 9). + #[test] + fn shake256_empty_kat_mp13() { + let mut p = HashV1::new_mp13(); + let cr_init = (9u32 << 17) | (2 << CR_DATATYPE_SHIFT) | CR_INIT; + write_word(&mut p, CR, cr_init); + write_word(&mut p, STR, STR_DCAL); + let hr: Vec = (0..8) + .map(|i| read_word(&mut p, HR_EXT_BASE + i * 4)) + .collect(); + let expected = [ + 0x46b9dd2b, 0x0ba88d13, 0x233b3feb, 0x743eeb24, 0x3fcd52ea, 0x62b81b82, 0xb50c2764, + 0x6ed5762f, + ]; + assert_eq!(hr.as_slice(), &expected, "SHAKE-256 empty mismatch"); + } + /// MD5("") = d41d8cd98f00b204e9800998ecf8427e #[test] fn md5_empty_kat() { diff --git a/STM32Sim/stm32-sim/runner-bin/src/main.rs b/STM32Sim/stm32-sim/runner-bin/src/main.rs index 969a8d4..26b87d7 100644 --- a/STM32Sim/stm32-sim/runner-bin/src/main.rs +++ b/STM32Sim/stm32-sim/runner-bin/src/main.rs @@ -80,12 +80,13 @@ fn run(args: Args) -> Result { .elf .clone() .ok_or_else(|| anyhow!("ELF path required (or pass --list-chips)"))?; - let image = ElfImage::from_path(&elf_path) - .with_context(|| format!("loading {}", elf_path.display()))?; let chip = stm32_sim_chips::build(&args.chip)?; - let mut cpu = Cpu::new(&chip.memory_regions)?; + let image = ElfImage::from_path_with_kind(&elf_path, chip.cpu_kind) + .with_context(|| format!("loading {}", elf_path.display()))?; + + let mut cpu = Cpu::new_with_kind(&chip.memory_regions, chip.cpu_kind)?; cpu.ensure_segments_fit(&image, &chip.memory_regions)?; cpu.install_bus(chip.bus)?; cpu.load_elf(&image)?; diff --git a/STM32Sim/stm32-sim/runner-bin/tests/smoke.rs b/STM32Sim/stm32-sim/runner-bin/tests/smoke.rs index 73be863..63f0161 100644 --- a/STM32Sim/stm32-sim/runner-bin/tests/smoke.rs +++ b/STM32Sim/stm32-sim/runner-bin/tests/smoke.rs @@ -26,6 +26,10 @@ fn u5_smoke_dir() -> PathBuf { workspace_root().join("firmware").join("smoke-test-u5") } +fn mp135_smoke_dir() -> PathBuf { + workspace_root().join("firmware").join("smoke-test-mp135") +} + fn have_arm_gcc() -> bool { Command::new("arm-none-eabi-gcc") .arg("--version") @@ -141,3 +145,67 @@ fn u5_smoke_firmware_passes() { "U5 pass marker missing:\n{stdout}" ); } + +#[test] +fn mp135_smoke_firmware_passes() { + if !have_arm_gcc() { + eprintln!("skipping: arm-none-eabi-gcc not on PATH"); + return; + } + + let dir = mp135_smoke_dir(); + let make = Command::new("make") + .current_dir(&dir) + .status() + .expect("failed to invoke make for mp135 firmware"); + assert!(make.success(), "mp135 firmware build failed"); + + let elf = dir.join("smoke.elf"); + let bin = env!("CARGO_BIN_EXE_stm32-sim"); + let out = Command::new(bin) + .args([ + "--chip", + "stm32mp135", + "--timeout", + "10", + "--exit-on", + "test_complete", + "--result-symbol", + "test_result", + ]) + .arg(&elf) + .output() + .expect("failed to invoke stm32-sim for mp135"); + + let stdout = String::from_utf8_lossy(&out.stdout); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + out.status.success(), + "mp135 stm32-sim exited {:?}\nstdout:\n{stdout}\nstderr:\n{stderr}", + out.status + ); + assert!( + stdout.contains("=== STM32Sim MP135 smoke test ==="), + "MP135 banner missing:\n{stdout}" + ); + assert!( + stdout.contains("rng[0] = 0x"), + "MP135 RNG output missing:\n{stdout}" + ); + assert!( + stdout.contains("AES-128 ECB round-trip OK"), + "MP135 CRYP AES round-trip missing:\n{stdout}" + ); + assert!( + stdout.contains("SHA-256 \"abc\" OK"), + "MP135 HASH SHA-256 missing:\n{stdout}" + ); + assert!( + stdout.contains("SHA3-256 \"abc\" OK"), + "MP135 HASH SHA3-256 missing:\n{stdout}" + ); + assert!( + stdout.contains("=== smoke test passed ==="), + "MP135 pass marker missing:\n{stdout}" + ); +}