Files
ipxe/src/arch/riscv/prefix/libprefix.S
Michael Brown 5bda1727b4 [riscv] Allow for poisoning .bss section before early initialisation
On startup, we may be running from read-only memory, and therefore
cannot zero the .bss section (or write to the .data section) until we
have parsed the system memory map and relocated ourselves to somewhere
suitable in RAM.  The code that runs during this early initialisation
stage must be carefully written to avoid writing to the .data section
and to avoid reading from or writing to the .bss section.

Detecting code that erroneously writes to the .data or .bss sections
is relatively easy since running from read-only memory (e.g. via
QEMU's -pflash option) will immediately reveal the bug.  Detecting
code that erroneously reads from the .bss section is harder, since in
a freshly powered-on machine (or in a virtual machine) there is a high
probability that the contents of the memory will be zero even before
we explicitly zero out the section.

Add the ability to fill the .bss section with an invalid non-zero
value to expose bugs in early initialisation code that erroneously
relies upon variables in .bss before the section has been zeroed.  We
use the value 0xeb55eb55eb55eb55 ("EBSS") since this is immediately
recognisable as a value in a crash dump, and will trigger a page fault
if dereferenced since the address is in a non-canonical form.

Poisoning the .bss can be done only when the image is known to already
reside in writable memory.  It will overwrite the relocation records,
and so can be done only on a system where relocation is known to be
unnecessary (e.g. because paging is supported).  We therefore do not
enable this behaviour by default, but leave it as a configurable
option via the config/fault.h header.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
2025-07-30 12:31:15 +01:00

1530 lines
39 KiB
ArmAsm

/*
* Copyright (C) 2025 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* You can also choose to distribute this program under the terms of
* the Unmodified Binary Distribution Licence (as given in the file
* COPYING.UBDL), provided that you have satisfied its requirements.
*/
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL )
/** @file
*
* RISC-V prefix library
*
*/
#include <config/serial.h>
#include <config/fault.h>
.section ".note.GNU-stack", "", @progbits
.text
/* Link-time base address of _prefix
*
* This will be not be updated if runtime relocations are applied.
*/
.section ".rodata.prefix_link", "a", @progbits
.balign ( __riscv_xlen / 8 )
prefix_link:
.dword _base
.size prefix_link, . - prefix_link
/* Virtual address of _prefix
*
* This will be updated if runtime relocations are applied.
*/
.section ".rodata.prefix_virt", "a", @progbits
.balign ( __riscv_xlen / 8 )
prefix_virt:
.dword _prefix
.size prefix_virt, . - prefix_virt
/*****************************************************************************
*
* Print character via debug console extension
*
*****************************************************************************
*
* Print a single character via the SBI DBCN extension.
*
* Parameters:
*
* a0 - Character to print
*
* Returns:
*
* a0 - Zero if character printed successfully
* a1 - Overwritten
* a6 - Overwritten
* a7 - Overwritten
*
*/
/* SBI debug console extension */
#define SBI_DBCN ( ( 'D' << 24 ) | ( 'B' << 16 ) | ( 'C' << 8 ) | 'N' )
#define SBI_DBCN_WRITE_BYTE 0x02
.macro print_char_dbcn
li a7, SBI_DBCN
li a6, SBI_DBCN_WRITE_BYTE
ecall
.endm
/*****************************************************************************
*
* Print character via legacy extension
*
*****************************************************************************
*
* Print a single character via the SBI putchar legacy extension.
*
* Parameters:
*
* a0 - Character to print
*
* Returns:
*
* a0 - Overwritten
* a7 - Overwritten
*
*/
/* SBI legacy console putchar */
#define SBI_LEGACY_PUTCHAR 0x01
.macro print_char_legacy
li a7, SBI_LEGACY_PUTCHAR
ecall
.endm
/*****************************************************************************
*
* Print character via early UART
*
*****************************************************************************
*
* Print a single character via a UART.
*
* For devices without a functional SBI console, a UART at a hardcoded
* address can be used as a last resort mechanism for obtaining debug
* output from the prefix.
*
* Parameters:
*
* a0 - Character to print
*
* Returns:
*
* a0 - Preserved
* a1 - May be overwritten
* a6 - May be overwritten
* a7 - May be overwritten
*
*/
/* Default to no UART, if not specified */
#ifndef EARLY_UART_MODEL
#define EARLY_UART_MODEL none
#endif
/* Default to a register shift of zero, if not specified */
#ifndef EARLY_UART_REG_SHIFT
#define EARLY_UART_REG_SHIFT 0
#endif
#define print_char_uart _C2 ( print_char_uart_, EARLY_UART_MODEL )
#define early_uart_reg_base _C2 ( early_uart_reg_base_, __riscv_xlen )
/* Print character via nonexistent UART */
.macro print_char_uart_none
.endm
/*
* Get UART base address (64-bit addressing)
*/
.macro early_uart_reg_base_64 reg
csrr \reg, satp
beqz \reg, early_uart_reg_base_64_nonpaged_\@
LOADN \reg, early_uart_reg_base_64_virt
j early_uart_reg_base_64_done_\@
early_uart_reg_base_64_nonpaged_\@:
li \reg, EARLY_UART_REG_BASE
early_uart_reg_base_64_done_\@:
.endm
/*
* Get UART base address (32-bit addressing)
*/
.macro early_uart_reg_base_32 reg
li \reg, EARLY_UART_REG_BASE
sub \reg, \reg, tp
.endm
/*****************************************************************************
*
* Print character via 8250-compatible early UART
*
*****************************************************************************
*
* Print a single character via an 8250- or 16550-compatible UART.
*
* Parameters:
*
* a0 - Character to print
*
* Returns:
*
* a0 - Preserved
* a1 - Overwritten
* a7 - Overwritten
*
*/
/* 8250-compatible UART transmit registers */
#define EARLY_UART_8250_TX ( 0 << EARLY_UART_REG_SHIFT )
#define EARLY_UART_8250_LSR ( 5 << EARLY_UART_REG_SHIFT )
#define EARLY_UART_8250_LSR_THRE 0x20
.macro print_char_uart_8250
early_uart_reg_base a7
sb a0, EARLY_UART_8250_TX(a7)
fence
early_uart_8250_wait_\@:
lbu a1, EARLY_UART_8250_LSR(a7)
andi a1, a1, EARLY_UART_8250_LSR_THRE
beqz a1, early_uart_8250_wait_\@
.endm
/*****************************************************************************
*
* Print character via SiFive-compatible early UART
*
*****************************************************************************
*
* Print a single character via a SiFive-compatible UART.
*
* Parameters:
*
* a0 - Character to print
*
* Returns:
*
* a0 - Preserved
* a1 - Overwritten
* a7 - Overwritten
*
*/
/* SiFive-compatible UART transmit registers */
#define EARLY_UART_SIFIVE_TXFIFO ( 0 << EARLY_UART_REG_SHIFT )
.macro print_char_uart_sifive
early_uart_reg_base a7
sw a0, EARLY_UART_SIFIVE_TXFIFO(a7)
fence
early_uart_sifive_wait_\@:
lw a1, EARLY_UART_SIFIVE_TXFIFO(a7)
bltz a1, early_uart_sifive_wait_\@
.endm
/*****************************************************************************
*
* Print single character to early UART (from C code)
*
*****************************************************************************
*
* This function is called by the SBI console driver to output a
* character to the early UART (if enabled).
*
* The standard C ABI applies to this function.
*
* Parameters:
*
* a0 - Character to print
*
* Returns: none
*
*/
.section ".prefix.early_uart_putchar", "ax", @progbits
.globl early_uart_putchar
early_uart_putchar:
print_char_uart
ret
.size early_uart_putchar, . - early_uart_putchar
/*****************************************************************************
*
* Print message to debug console
*
*****************************************************************************
*
* Print a NUL-terminated string to the debug console.
*
* This function prints one character at a time via the "write byte"
* call (rather than using "write string"), since this avoids any need
* to know the current virtual-physical address translation. It does
* not require a valid stack.
*
* Note that the parameter is passed in register t1 (rather than a0)
* and all non-temporary registers are preserved.
*
* Parameters:
*
* t1 - Pointer to string
*
* Returns: none
*
*/
.section ".prefix.print_message", "ax", @progbits
.globl print_message
print_message:
/* Handle alternate link register */
mv t0, ra
print_message_alt:
/* Register usage:
*
* a0 - current character
* t0 - alternate link register
* t1 - character pointer
* t2 - preserved a0
* t3 - preserved a1
* t4 - preserved a6
* t5 - preserved a7
*/
mv t2, a0
mv t3, a1
mv t4, a6
mv t5, a7
1: /* Print each character in turn */
lbu a0, (t1)
addi t1, t1, 1
beqz a0, 2f
print_char_uart
print_char_dbcn
beqz a0, 1b
lbu a0, -1(t1)
print_char_legacy
j 1b
2:
/* Restore registers and return (via alternate link register) */
mv a7, t5
mv a6, t4
mv a1, t3
mv a0, t2
jr t0
.size print_message, . - print_message
/*
* Display progress message (if debugging is enabled)
*/
.macro progress message
#ifndef NDEBUG
.section ".rodata.progress_\@", "a", @progbits
progress_\@:
.asciz "\message"
.size progress_\@, . - progress_\@
.previous
la t1, progress_\@
jal t0, print_message_alt
#endif
.endm
/*****************************************************************************
*
* Print hexadecimal value to debug console
*
*****************************************************************************
*
* Print a register value in hexadecimal to the debug console.
*
* This function does not require a valid stack.
*
* Note that the parameters are passed in registers t1 and t2 (rather
* than a0) and all non-temporary registers are preserved.
*
* Parameters:
*
* t1 - Value to print
* t2 - Number of bits to print (must be a multiple of 4)
*
* Returns: none
*
*/
/*
* Convert a single nibble to an ASCII character
*/
.macro nibble_to_ascii reg
addi \reg, \reg, -10
bltz \reg, dec_\@
addi \reg, \reg, ( 'a' - ( '0' + 10 ) )
dec_\@: addi \reg, \reg, ( '0' + 10 )
.endm
.section ".prefix.print_hex_value", "ax", @progbits
.globl print_hex_value
print_hex_value:
/* Handle alternate link register */
mv t0, ra
print_hex_value_alt:
/* Register usage:
*
* a0 - current digit / general temporary
* t0 - alternate link register
* t1 - current value
* t2 - digit counter
* t3 - preserved a0
* t4 - preserved a1
* t5 - preserved a6
* t6 - preserved a7
*/
mv t3, a0
mv t4, a1
mv t5, a6
mv t6, a7
/* Skip any unprinted digits */
li a0, __riscv_xlen
sub a0, a0, t2
sll t1, t1, a0
1: /* Print each digit in turn */
srli a0, t1, ( __riscv_xlen - 4 )
nibble_to_ascii a0
print_char_uart
print_char_dbcn
beqz a0, 2f
srli a0, t1, ( __riscv_xlen - 4 )
nibble_to_ascii a0
print_char_legacy
2: slli t1, t1, 4
addi t2, t2, -4
bgtz t2, 1b
/* Restore registers and return (via alternate link register) */
mv a7, t6
mv a6, t5
mv a1, t4
mv a0, t3
jr t0
.size print_hex_value, . - print_hex_value
/*
* Display hexadecimal register value (if debugging is enabled)
*/
.macro print_hex_reg reg, bits=__riscv_xlen
#ifndef NDEBUG
mv t1, \reg
li t2, \bits
jal t0, print_hex_value_alt
#endif
.endm
/*
* Display hexadecimal symbol address (if debugging is enabled)
*/
.macro print_hex_addr sym
#ifndef NDEBUG
la t1, \sym
li t2, __riscv_xlen
jal t0, print_hex_value_alt
#endif
.endm
/*
* Display hexadecimal data value (if debugging is enabled)
*/
.macro print_hex_data sym
#ifndef NDEBUG
LOADN t1, \sym
li t2, __riscv_xlen
jal t0, print_hex_value_alt
#endif
.endm
/*****************************************************************************
*
* Apply compressed relocation records
*
*****************************************************************************
*
* Apply compressed relocation records to fix up iPXE to run at its
* current virtual address.
*
* This function must run before .bss is zeroed (since the relocation
* records are overlaid with .bss). It does not require a valid stack
* pointer.
*
* Parameters: none
*
* a0 - Relocation records
*
* Returns: none
*
*/
/** Number of bits in a skip value */
#define ZREL_SKIP_BITS 19
.section ".prefix.apply_relocs", "ax", @progbits
.globl apply_relocs
apply_relocs:
/* Register usage:
*
* a0 - current relocation record pointer
* a1 - current relocation target address
* a2 - relocation addend
* a3 - current relocation record value
* a4 - number of bits remaining in current relocation record
*/
la a1, _prefix
/* Calculate relocation addend */
LOADN a2, prefix_virt
sub a2, a1, a2
/* Skip applying relocations if addend is zero */
beqz a2, apply_relocs_done
progress " reloc"
/* Test writability
*
* We do this to avoid accidentally sending an undefined
* sequence of commands to a flash device, if we are started
* from read-only memory with no paging support.
*
* We attempt to write an all-ones pattern, on the basis that
* this pattern will harmlessly cause any flash device
* conforming to the CFI01 specification to enter the default
* "read array" state.
*/
la t0, apply_relocs_test
li t1, -1
STOREN t1, (t0)
LOADN t2, (t0)
bne t1, t2, apply_relocs_failed
apply_relocs_loop:
/* Read new relocation record */
LOADN a3, (a0)
addi a0, a0, ( __riscv_xlen / 8 )
li a4, ( __riscv_xlen - 1 )
/* Consume and apply skip, if present (i.e. if MSB=0) */
bltz a3, 1f
addi a4, a4, -ZREL_SKIP_BITS
srli t0, a3, ( __riscv_xlen - ( ZREL_SKIP_BITS + 1 ) )
slli t0, t0, ( ( __riscv_xlen / 32 ) + 1 )
add a1, a1, t0
1:
/* Apply relocations corresponding to set bits in record */
1: andi t0, a3, 1
beqz t0, 2f
LOADN t1, (a1)
add t1, t1, a2
STOREN t1, (a1)
2: addi a1, a1, ( __riscv_xlen / 8 )
srli a3, a3, 1
addi a4, a4, -1
bnez a4, 1b
/* Loop until we have reached a terminator record (MSB=0, offset=0) */
bnez a3, apply_relocs_loop
/* Check that relocations were applied successfully */
la t0, _prefix
LOADN t1, prefix_virt
bne t0, t1, apply_relocs_failed
apply_relocs_done:
/* Return to caller */
progress " ok\r\n"
ret
apply_relocs_failed:
/* Failure to apply relocations (if relocations were needed)
* is a fatal error.
*/
progress " failed\r\n"
j reset_system
.size apply_relocs, . - apply_relocs
/* Writability test
*
* Placed within .data rather than .bss, since we need this to
* be within the range of the stored iPXE image.
*/
.section ".data.apply_relocs_test", "aw", @progbits
.balign ( __riscv_xlen / 8 )
apply_relocs_test:
.space ( __riscv_xlen / 8 )
.size apply_relocs_test, . - apply_relocs_test
/*****************************************************************************
*
* Enable paging
*
*****************************************************************************
*
* This function must be called with flat physical addressing. It
* does not require a valid stack pointer.
*
* Parameters:
*
* a0 - Page table to fill in (4kB, must be aligned to a 4kB boundary)
*
* Returns:
*
* a0 - Size of accessible physical address space (or zero for no limit)
* tp - Virtual address offset
* pc - Updated to a virtual address if paging enabled
*
*/
/** Number of bits in a page offset */
#define PAGE_SHIFT 12
/** Page size */
#define PAGE_SIZE ( 1 << PAGE_SHIFT )
/** Size of a page table entry (log2) */
#define PTE_SIZE_LOG2 ( ( __riscv_xlen / 32 ) + 1 )
/** Size of a page table entry */
#define PTE_SIZE ( 1 << PTE_SIZE_LOG2 )
/** Number of page table entries (log2) */
#define PTE_COUNT_LOG2 ( PAGE_SHIFT - PTE_SIZE_LOG2 )
/** Number of page table entries */
#define PTE_COUNT ( 1 << PTE_COUNT_LOG2 )
/** Number of bits in a virtual or physical page number */
#define VPPN_SHIFT PTE_COUNT_LOG2
/* Page table entry flags */
#define PTE_V 0x00000001 /**< Page table entry is valid */
#define PTE_R 0x00000002 /**< Page is readable */
#define PTE_W 0x00000004 /**< Page is writable */
#define PTE_X 0x00000008 /**< Page is executable */
#define PTE_A 0x00000040 /**< Page has been accessed */
#define PTE_D 0x00000080 /**< Page is dirty */
/* Page table entry flags for our leaf pages */
#define PTE_LEAF ( PTE_D | PTE_A | PTE_X | PTE_W | PTE_R | PTE_V )
/** Physical page number LSB in PTE */
#define PTE_PPN_LSB(x) ( 10 + (x) * VPPN_SHIFT )
#define PTE_PPN4_LSB PTE_PPN_LSB(4) /**< PPN[4] LSB (Sv57) */
#define PTE_PPN3_LSB PTE_PPN_LSB(3) /**< PPN[3] LSB (Sv57 & Sv48) */
#define PTE_PPN2_LSB PTE_PPN_LSB(2) /**< PPN[2] LSB (Sv57, Sv48, & Sv39) */
#define PTE_PPN1_LSB PTE_PPN_LSB(1) /**< PPN[1] LSB (all levels) */
#define PTE_PPN0_LSB PTE_PPN_LSB(0) /**< PPN[0] LSB (all levels) */
/** Page table entry physical page address shift */
#define PTE_PPN_SHIFT ( PAGE_SHIFT - PTE_PPN0_LSB )
/** Virtual page number LSB */
#define VPN_LSB(x) ( PAGE_SHIFT + (x) * VPPN_SHIFT )
#define VPN4_LSB VPN_LSB(4) /**< VPN[4] LSB (Sv57) */
#define VPN3_LSB VPN_LSB(3) /**< VPN[3] LSB (Sv57 & Sv48) */
#define VPN2_LSB VPN_LSB(2) /**< VPN[2] LSB (Sv57, Sv48, & Sv39) */
#define VPN1_LSB VPN_LSB(1) /**< VPN[1] LSB (all levels) */
#define VPN0_LSB VPN_LSB(0) /**< VPN[0] LSB (all levels) */
/* Paging modes */
#define SATP_MODE_SV57 10 /**< Five-level paging (Sv57) */
#define SATP_MODE_SV48 9 /**< Four-level paging (Sv48) */
#define SATP_MODE_SV39 8 /**< Three-level paging (Sv39) */
#define SATP_MODE_SV32 1 /**< Two-level paging (Sv32) */
/** Paging mode shift */
#if __riscv_xlen == 64
#define SATP_MODE_SHIFT 60
#else
#define SATP_MODE_SHIFT 31
#endif
.globl enable_paging
.equ enable_paging, _C2 ( enable_paging_, __riscv_xlen )
/* Paging mode names (for debug messages) */
.section ".rodata.paging_mode_names", "a", @progbits
paging_mode_names:
.asciz "none"
.org ( paging_mode_names + 5 * SATP_MODE_SV32 )
.asciz "Sv32"
.org ( paging_mode_names + 5 * SATP_MODE_SV39 )
.asciz "Sv39"
.org ( paging_mode_names + 5 * SATP_MODE_SV48 )
.asciz "Sv48"
.org ( paging_mode_names + 5 * SATP_MODE_SV57 )
.asciz "Sv57"
.size paging_mode_names, . - paging_mode_names
/*
* Display paging mode name (if debugging is enabled)
*/
.macro paging_mode_name reg
#ifndef NDEBUG
slli t0, \reg, 2
add t0, t0, \reg
la t1, paging_mode_names
add t1, t1, t0
jal t0, print_message_alt
#endif
.endm
/* Maximum physical alignment
*
* We align to a "megapage" boundary to simplify the task of
* setting up page table mappings.
*/
.globl _max_align
.equ _max_align, ( 1 << VPN1_LSB )
/* Space for page table
*
* This can be used only once .bss is known to be writable.
*/
.section ".bss.page_table", "a", @nobits
.globl page_table
.balign PAGE_SIZE
page_table:
.space PAGE_SIZE
.size page_table, . - page_table
/* Convert physical address to virtual address */
.macro phys_to_virt rd, rs:vararg
_C2 ( phys_to_virt_, __riscv_xlen ) \rd, \rs
.endm
/*****************************************************************************
*
* Disable paging
*
*****************************************************************************
*
* This function may be called with either virtual or flat physical
* addressing. It does not require a valid stack pointer.
*
* Parameters:
*
* tp - Virtual address offset
*
* Returns:
*
* tp - Virtual address offset (zeroed)
* pc - Updated to a physical address
*
*/
.globl disable_paging
.equ disable_paging, _C2 ( disable_paging_, __riscv_xlen )
/*****************************************************************************
*
* Enable 64-bit paging
*
*****************************************************************************
*
* Construct a 64-bit page table to identity-map the whole of the
* mappable physical address space, and to map iPXE itself at its
* link-time address (which must be 2MB-aligned and be within the
* upper half of the kernel address space).
*
* This function must be called with flat physical addressing. It
* does not require a valid stack pointer.
*
* Parameters:
*
* a0 - Page table to fill in (4kB, must be aligned to a 4kB boundary)
*
* Returns:
*
* a0 - Size of accessible physical address space (or zero for no limit)
* tp - Virtual address offset
* pc - Updated to a virtual address if paging enabled
*
* A 4kB 64-bit page table contains 512 8-byte PTEs. We choose to use
* these as:
*
* - PTE[0-255] : Identity map for the physical address space.
*
* This conveniently requires exactly 256 PTEs, regardless of the
* paging level. Higher paging levels are able to identity-map a
* larger physical address space:
*
* Sv57 : 256 x 256TB "petapages" (55-bit physical address space)
* Sv48 : 256 x 512GB "terapages" (46-bit physical address space)
* Sv39 : 256 x 1GB "gigapages" (37-bit physical address space)
*
* Note that Sv48 and Sv39 cannot identity-map the whole of the
* available physical address space, since the virtual address
* space is not large enough (and is halved by the constraint
* that virtual addresses with bit 47/38 set must also have all
* higher bits set, and so cannot identity-map to a 55-bit
* physical address).
*
* - PTE[x-y] : Virtual address map for iPXE
*
* These are 2MB "megapages" used to map the link-time virtual
* address range used by iPXE itself. We can use any 2MB-aligned
* range within 0xffffffffe0800000-0xffffffffffc00000, which
* breaks down as:
*
* VPN[4] = 511 (in Sv57, must be all-ones in Sv48 and Sv39)
* VPN[3] = 511 (in Sv57 and Sv48, must be all-ones in Sv39)
* VPN[2] = 511 (in all paging levels)
* VPN[1] = 260-510 (in all paging levels)
* VPN[0] = 0 (in all paging levels)
*
* In most builds, only a single 2MB "megapage" will be needed.
* We choose a link-time starting address of 0xffffffffeb000000
* within the permitted range, since the "eb" pattern is fairly
* distinctive and so makes it easy to visually identify any
* addresses originating from within iPXE's virtual address
* space.
*
* - PTE[511] : Recursive next level page table pointer
*
* This is a non-leaf PTE that points back to the page table
* itself. It acts as the next level page table pointer for:
*
* VPN[4] = 511 (in Sv57)
* VPN[3] = 511 (in Sv57 and Sv48)
* VPN[2] = 511 (in Sv57, Sv48, and Sv39)
*
* This recursive usage creates some duplicate mappings within
* unused portions of the virtual address space, but allows us to
* use only a single physical 4kB page table.
*/
/** SBI base extension */
#define SBI_BASE 0x10
#define SBI_BASE_MVENDORID 0x04
/** Non-standard T-Head page table entry additional flags
*
* T-Head processors such as the C910 use the high bits of the PTE in
* a very non-standard way that is incompatible with the RISC-V
* specification.
*
* As per the "Memory Attribute Extension (XTheadMae)", bits 62 and 61
* represent cacheability and "bufferability" (i.e. write-back
* cacheability) respectively. If we do not enable these bits, then
* the processor gets incredibly confused at the point that paging is
* enabled. The symptom is that cache lines will occasionally fail to
* fill, and so reads from any address may return unrelated data from
* a previously read cache line for a different address.
*/
#define THEAD_PTE_MAEE ( 0x60 << ( __riscv_xlen - 8 ) )
/** T-Head vendor ID */
#define THEAD_MVENDORID 0x5b7
/** T-Head "sxstatus" CSR */
#define THEAD_CSR_SXSTATUS 0x5c0
#define THEAD_CSR_SXSTATUS_MAEE 0x00200000 /**< XTheadMae enabled */
.section ".prefix.enable_paging_64", "ax", @progbits
enable_paging_64:
/* Register usage:
*
* tp - return value (virtual address offset)
* a0 - page table base address
* a1 - currently attempted paging level
* a2 - enabled paging level
* a3 - PTE pointer
* a4 - PTE stride
* a5 - size of accessible physical address space
*/
progress " paging:"
/* Calculate virtual address offset */
LOADN t0, prefix_link
la t1, _prefix
sub tp, t1, t0
/* Zero PTE[0-511] */
li t0, PTE_COUNT
mv a3, a0
1: STOREN zero, (a3)
addi a3, a3, PTE_SIZE
addi t0, t0, -1
bgtz t0, 1b
/* Construct PTE[511] as next level page table pointer */
srli t0, a0, PTE_PPN_SHIFT
ori t0, t0, PTE_V
STOREN t0, -PTE_SIZE(a3)
/* Construct base page table entry for address zero */
li t0, PTE_LEAF
STOREN t0, (a0)
/* Check for broken T-Head paging extensions */
mv a3, a0
li a7, SBI_BASE
li a6, SBI_BASE_MVENDORID
ecall
bnez a0, 1f
li t0, THEAD_MVENDORID
bne a1, t0, 1f
progress "thead-"
csrr t0, THEAD_CSR_SXSTATUS
li t1, THEAD_CSR_SXSTATUS_MAEE
and t0, t0, t1
beqz t0, 1f
progress "mae-"
LOADN t0, (a3)
li t1, THEAD_PTE_MAEE
or t0, t0, t1
STOREN t0, (a3)
1: mv a0, a3
/* Calculate PTE[x] address for iPXE virtual address map */
LOADN t0, prefix_link
srli t0, t0, VPN1_LSB
andi t0, t0, ( PTE_COUNT - 1 )
slli t0, t0, PTE_SIZE_LOG2
add a3, a0, t0
/* Calculate PTE stride for iPXE virtual address map
*
* PPN[1] LSB is PTE bit 19 in all paging modes, and so the
* stride is always ( 1 << 19 )
*/
li a4, 1
slli a4, a4, PTE_PPN1_LSB
/* Construct PTE[x-1] for early UART, if applicable */
#ifdef EARLY_UART_REG_BASE
li t0, ( EARLY_UART_REG_BASE & ~( ( 1 << VPN1_LSB ) - 1 ) )
srli t0, t0, PTE_PPN_SHIFT
ori t0, t0, ( PTE_LEAF & ~PTE_X )
STOREN t0, -PTE_SIZE(a3)
#endif
/* Construct PTE[x-y] for iPXE virtual address map */
la t0, _prefix
srli t0, t0, PTE_PPN_SHIFT
LOADN t1, (a0)
or t0, t0, t1
la t2, _ebss
srli t2, t2, PTE_PPN_SHIFT
1: STOREN t0, (a3)
addi a3, a3, PTE_SIZE
add t0, t0, a4
ble t0, t2, 1b
/* Find highest supported paging level */
li a1, SATP_MODE_SV57
enable_paging_64_loop:
/* Calculate PTE stride for identity map at this paging level
*
* a1 == 10 == Sv57: PPN[4] LSB is PTE bit 46 => stride := 1 << 46
* a1 == 9 == Sv48: PPN[3] LSB is PTE bit 37 => stride := 1 << 37
* a1 == 8 == Sv39: PPN[2] LSB is PTE bit 28 => stride := 1 << 28
*
* and so we calculate stride a4 := ( 1 << ( 9 * a1 - 44 ) )
*/
slli a4, a1, 3
add a4, a4, a1
addi a4, a4, -44
li t0, 1
sll a4, t0, a4
/* Calculate size of accessible physical address space
*
* The identity map comprises only the lower half of the PTEs,
* since virtual addresses for the higher half must have all
* high bits set, and so cannot form part of an identity map.
*/
slli a5, a4, ( PTE_PPN_SHIFT + ( PTE_COUNT_LOG2 - 1 ) )
/* Construct PTE[0-255] for identity map at this paging level */
mv a3, a0
li t0, ( PTE_COUNT / 2 )
LOADN t1, (a0)
1: STOREN t1, (a3)
addi a3, a3, PTE_SIZE
add t1, t1, a4
addi t0, t0, -1
bgtz t0, 1b
/* Attempt to enable paging, and read back active paging level */
slli t0, a1, SATP_MODE_SHIFT
srli t1, a0, PAGE_SHIFT
or t0, t0, t1
csrw satp, t0
sfence.vma
csrr a2, satp
srli a2, a2, SATP_MODE_SHIFT
/* Loop until we successfully enable paging, or run out of levels */
beq a2, a1, 1f
csrw satp, zero
addi a1, a1, -1
li t0, SATP_MODE_SV39
bge a1, t0, enable_paging_64_loop
mv tp, zero
mv a5, zero
1:
/* Adjust return address to a virtual address */
sub ra, ra, tp
/* Return, with or without paging enabled */
paging_mode_name a2
mv a0, a5
ret
.size enable_paging_64, . - enable_paging_64
/* Convert 64-bit physical address to virtual address */
.macro phys_to_virt_64 rd, rs:vararg
.ifnb \rs
mv \rd, \rs
.endif
.endm
/* Early UART base address when 64-bit paging is enabled
*
* When an early UART is in use, we choose to use the 2MB
* "megapage" immediately below iPXE itself to map the UART.
*/
#ifdef EARLY_UART_REG_BASE
.section ".rodata.early_uart_reg_base_64_virt", "a", @progbits
.balign 8
early_uart_reg_base_64_virt:
.dword ( _base - ( 1 << VPN1_LSB ) + \
( EARLY_UART_REG_BASE & ( ( 1 << VPN1_LSB ) - 1 ) ) )
.size early_uart_reg_base_64_virt, . - early_uart_reg_base_64_virt
#endif
/*****************************************************************************
*
* Disable 64-bit paging
*
*****************************************************************************
*
* This function may be called with either virtual or flat physical
* addressing. It does not require a valid stack pointer.
*
* Parameters:
*
* tp - Virtual address offset
*
* Returns:
*
* tp - Virtual address offset (zeroed)
* pc - Updated to a physical address
*
*/
.section ".prefix.disable_paging_64", "ax", @progbits
disable_paging_64:
/* Register usage:
*
* tp - virtual address offset
*/
/* Jump to physical address */
la t0, 1f
bgez t0, 1f
add t0, t0, tp
jr t0
1:
/* Disable paging */
csrw satp, zero
sfence.vma
/* Update return address to a physical address */
bgez ra, 1f
add ra, ra, tp
1:
/* Return with paging disabled and virtual offset zeroed */
mv tp, zero
ret
.size disable_paging_64, . - disable_paging_64
/*****************************************************************************
*
* Enable 32-bit paging
*
*****************************************************************************
*
* Construct a 32-bit page table to map the whole of the 32-bit
* address space with a fixed offset selected to map iPXE itself at
* its link-time address (which must be 4MB-aligned).
*
* This function must be called with flat physical addressing. It
* does not require a valid stack pointer.
*
* Parameters:
*
* a0 - Page table to fill in (4kB, must be aligned to a 4kB boundary)
*
* Returns:
*
* a0 - Size of accessible physical address space (or zero for no limit)
* tp - Virtual address offset
* pc - Updated to a virtual address if paging enabled
*
* A 4kB 32-bit page table contains 1024 4-byte PTEs. We choose to
* use these to produce a circular map of the 32-bit address space
* using 4MB "megapages", with a fixed offset to align the virtual and
* link-time addresses.
*
* To handle the transition from physical to virtual addresses, we
* temporarily adjust the PTE covering the current program counter to
* be a direct physical map (so that the program counter remains valid
* at the moment when paging is enabled), then jump to a virtual
* address, then restore the temporarily modified PTE.
*/
.equ enable_paging_32_xalign, 32
.section ".prefix.enable_paging_32", "ax", @progbits
enable_paging_32:
/* Register usage:
*
* tp - return value (virtual address offset)
* a0 - page table base address
* a1 - enabled paging level
* a2 - PTE pointer
* a3 - saved content of temporarily modified PTE
*/
progress " paging:"
/* Calculate virtual address offset */
LOADN t0, prefix_link
la t1, _prefix
sub tp, t1, t0
/* Construct PTEs for circular map */
mv a2, a0
li t0, PTE_COUNT
mv t1, tp
ori t1, t1, ( PTE_LEAF << PTE_PPN_SHIFT )
li t2, ( 1 << ( PTE_PPN1_LSB + PTE_PPN_SHIFT ) )
1: srli t3, t1, PTE_PPN_SHIFT
STOREN t3, (a2)
addi a2, a2, PTE_SIZE
add t1, t1, t2
addi t0, t0, -1
bgtz t0, 1b
/* Temporarily modify PTE for transition code to be an identity map */
la t0, enable_paging_32_xstart
srli t0, t0, VPN1_LSB
slli t1, t0, PTE_SIZE_LOG2
add a2, a0, t1
LOADN a3, (a2)
slli t0, t0, PTE_PPN1_LSB
ori t0, t0, PTE_LEAF
STOREN t0, (a2)
/* Adjust PTE pointer to a virtual address */
sub a2, a2, tp
/* Attempt to enable paging, and read back active paging level */
la t0, 1f
sub t0, t0, tp
li t1, ( SATP_MODE_SV32 << SATP_MODE_SHIFT )
srli t2, a0, PAGE_SHIFT
or t1, t1, t2
.balign enable_paging_32_xalign
/* Start of transition code */
enable_paging_32_xstart:
csrw satp, t1
sfence.vma
csrr a1, satp
beqz a1, 2f
jr t0
1: /* Restore temporarily modified PTE */
STOREN a3, (a2)
sfence.vma
/* End of transition code */
.equ enable_paging_32_xlen, . - enable_paging_32_xstart
2: srli a1, a1, SATP_MODE_SHIFT
/* Zero SATP and virtual address offset if paging is not enabled */
bnez a1, 1f
csrw satp, zero
mv tp, zero
1:
/* Adjust return address to a virtual address */
sub ra, ra, tp
/* Return, with or without paging enabled */
paging_mode_name a1
mv a0, zero
ret
.size enable_paging_32, . - enable_paging_32
/* Ensure that transition code did not cross an alignment boundary */
.section ".bss.enable_paging_32_xcheck", "aw", @nobits
.org . + enable_paging_32_xalign - enable_paging_32_xlen
/* Convert 32-bit physical address to virtual address */
.macro phys_to_virt_32 rd, rs:vararg
.ifnb \rs
sub \rd, \rs, tp
.else
sub \rd, \rd, tp
.endif
.endm
/*****************************************************************************
*
* Disable 32-bit paging
*
*****************************************************************************
*
* This function may be called with either virtual or flat physical
* addressing. It does not require a valid stack pointer.
*
* Parameters:
*
* tp - Virtual address offset
*
* Returns:
*
* tp - Virtual address offset (zeroed)
* pc - Updated to a physical address
*
*/
.equ disable_paging_32_xalign, 16
.section ".prefix.disable_paging_32", "ax", @progbits
disable_paging_32:
/* Register usage:
*
* tp - virtual address offset
* a0 - page table address
* a1 - transition PTE pointer
* a2 - transition PTE content
*/
/* Get page table address, and exit if paging is already disabled */
csrr a0, satp
beqz a0, 99f
slli a0, a0, PAGE_SHIFT
sub a0, a0, tp
/* Prepare for modifying transition PTE */
la t0, disable_paging_32_xstart
add t0, t0, tp
srli t0, t0, VPN1_LSB
slli a1, t0, PTE_SIZE_LOG2
add a1, a1, a0
slli a2, t0, PTE_PPN1_LSB
ori a2, a2, PTE_LEAF
/* Jump to physical address in transition PTE, and disable paging */
la t0, 1f
add t0, t0, tp
.balign disable_paging_32_xalign
/* Start of transition code */
disable_paging_32_xstart:
STOREN a2, (a1)
sfence.vma
jr t0
1: csrw satp, zero
sfence.vma
/* End of transition code */
.equ disable_paging_32_xlen, . - disable_paging_32_xstart
/* Update return address to a physical address */
add ra, ra, tp
99: /* Return with paging disabled and virtual offset zeroed */
mv tp, zero
ret
.size disable_paging_32, . - disable_paging_32
/* Ensure that transition code did not cross an alignment boundary */
.section ".bss.disable_paging_32_xcheck", "aw", @nobits
.org . + disable_paging_32_xalign - disable_paging_32_xlen
/*****************************************************************************
*
* Poison .bss section
*
*****************************************************************************
*
* Fill the .bss section with an invalid non-zero value to expose bugs
* in early initialisation code that erroneously relies upon variables
* in .bss before the section has been zeroed.
*
* We use the value 0xeb55eb55eb55eb55 ("EBSS") since this is
* immediately recognisable as a value in a crash dump, and will
* trigger a page fault if dereferenced since the address is in a
* non-canonical form.
*
* Poisoning the .bss will overwrite the relocation records, and so
* can be done only as a debugging step on a system where relocation
* is known to be unnecessary (e.g. because paging is supported).
*
* This function does not require a valid stack pointer, but will
* destroy any existing stack contents if the stack happens to be
* placed within the original .bss section.
*
* Parameters: none
*
* Returns: none
*
*/
.equ poison_bss_value_32, 0xeb55eb55
.equ poison_bss_value_64, 0xeb55eb55eb55eb55
.equ poison_bss_value, _C2 ( poison_bss_value_, __riscv_xlen )
.section ".prefix.poison_bss", "ax", @progbits
poison_bss:
/* Fill .bss section */
la t0, _bss
la t1, _ebss
li t2, poison_bss_value
1: STOREN t2, (t0)
addi t0, t0, ( __riscv_xlen / 8 )
blt t0, t1, 1b
ret
.size poison_bss, . - poison_bss
/*****************************************************************************
*
* Install iPXE to a suitable runtime address
*
*****************************************************************************
*
* Identify a suitable runtime address for iPXE, relocate there, and
* set up for running normal C code.
*
* A valid temporary stack pointer is required. A 4kB space for a
* temporary page table may be provided, and must be provided if the
* iPXE image is running from read-only memory.
*
* Note that this function does not preserve the callee-save registers.
*
* Parameters:
*
* a0 - Boot hart ID
* a1 - Device tree physical address
* a2 - Optional temporary page table space (4kB, aligned to a 4kB boundary)
* sp - Valid temporary stack pointer
*
* Returns:
*
* pc - Updated to be within the relocated iPXE
* sp - Top of internal stack
* tp - Virtual address offset
*
*/
.section ".prefix.install", "ax", @progbits
.globl install
install:
/* Register usage:
*
* s0 - boot hart ID
* s1 - device tree physical address
* s2 - saved return address
* s3 - relocation records physical address
* s4 - maximum accessible physical address
* s5 - relocation physical address
* s6 - relocation offset
* tp - virtual address offset
*/
mv tp, zero
progress "\r\nSBI->iPXE hart:"
print_hex_reg a0
progress " temp:"
print_hex_reg a2
progress " fdt:"
print_hex_reg a1
progress "\r\nSBI->iPXE phys:"
print_hex_addr _prefix
progress " virt:"
print_hex_data prefix_virt
mv s0, a0
mv s1, a1
mv s2, ra
la s3, _edata
/* Poison .bss if configured to do so */
#if POISON_BSS
call poison_bss
#endif
/* Attempt to enable paging, if we have temporary page table space */
mv a0, a2
beqz a2, 1f
call enable_paging
1: addi s4, a0, -1
/* Apply relocations, if still needed after enabling paging */
mv a0, s3
call apply_relocs
/* Find a suitable address for relocation (using temporary stack) */
phys_to_virt a0, s1
mv a1, s4
phys_to_virt sp
call fdtmem_relocate
mv s5, a0
progress "SBI->iPXE dest:"
print_hex_reg a0
/* Disable paging */
call disable_paging
/* Determine relocation offset */
la s6, _prefix
sub s6, s5, s6
/* Copy iPXE image to new location and zero .bss */
mv t0, s5
la t1, _prefix
la t2, _edata
1: LOADN t3, (t1)
STOREN t3, (t0)
addi t0, t0, ( __riscv_xlen / 8 )
addi t1, t1, ( __riscv_xlen / 8 )
blt t1, t2, 1b
la t1, _ebss
add t1, t1, s6
2: STOREN zero, (t0)
addi t0, t0, ( __riscv_xlen / 8 )
blt t0, t1, 2b
/* Jump to relocated copy */
la t0, 1f
add t0, t0, s6
jr t0
1:
/* Attempt to re-enable paging */
la a0, page_table
call enable_paging
/* Reapply relocations, if still needed after enabling paging */
phys_to_virt a0, s3
call apply_relocs
/* Load stack pointer */
la sp, _estack
/* Store boot hart */
STOREN s0, boot_hart, t0
/* Copy and register system device tree */
phys_to_virt a0, s1
mv a1, s4
call fdtmem_register
/* Return to a virtual address in the relocated copy */
add ra, s2, s6
sub ra, ra, tp
progress "\r\n"
ret
.size install, . - install
/*****************************************************************************
*
* Reset (or lock up) system
*
*****************************************************************************
*
* Reset via system via SBI, as a means of exiting from a prefix that
* has no other defined exit path. If the reset fails, lock up the
* system since there is nothing else that can sensibly be done.
*
* This function does not require a valid stack pointer.
*
* Parameters: none
*
* Returns: n/a (does not return)
*
*/
/* SBI system reset extension */
#define SBI_SRST ( ( 'S' << 24 ) | ( 'R' << 16 ) | ( 'S' << 8 ) | 'T' )
#define SBI_SRST_SYSTEM_RESET 0x00
#define SBI_RESET_COLD 0x00000001
/* SBI legacy shutdown */
#define SBI_LEGACY_SHUTDOWN 0x08
.section ".prefix.reset_system", "ax", @progbits
.globl reset_system
reset_system:
/* Register usage: irrelevant (does not return) */
progress "\r\niPXE->SBI reset\r\n"
/* Attempt reset */
li a7, SBI_SRST
li a6, SBI_SRST_SYSTEM_RESET
li a0, SBI_RESET_COLD
mv a1, zero
ecall
progress "(reset failed)\r\n"
/* Attempt legacy shutdown */
li a7, SBI_LEGACY_SHUTDOWN
ecall
progress "(legacy shutdown failed)\r\n"
/* If reset failed, lock the system */
1: wfi
j 1b
.size reset_system, . - reset_system
/*****************************************************************************
*
* File split information for the compressor
*
*****************************************************************************
*/
/* ELF machine type */
#define EM_RISCV 243
.section ".zinfo", "a", @progbits
.org 0
/* Copy initialised-data portion of image */
.ascii "COPY"
.word 0
.word _filesz
.word 1
/* Notify compressor of link-time base address */
.ascii "BASE"
.word 0
.dword _base
/* Construct compressed relocation records */
.ascii "ZREL"
.word _reloc_offset
.word _reloc_filesz
.word EM_RISCV