Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
afcea24
arch/aarch64: scaffold a full backend that satisfies the shared arch …
squirelboy360 Jun 8, 2026
f4e806f
arch: route x86-specific code in shared kernel through the arch facade
squirelboy360 Jun 8, 2026
038ad3b
aarch64: boot to EL1 with PL011 serial (milestone 1)
squirelboy360 Jun 8, 2026
68a1148
aarch64: enable the MMU with identity-mapped translation tables (mile…
squirelboy360 Jun 8, 2026
f3e9f57
aarch64: EL1 exception vectors + trap frame round-trip (milestone 3)
squirelboy360 Jun 8, 2026
f3147f9
aarch64: GICv2 + generic-timer periodic tick (milestones 4+5)
squirelboy360 Jun 8, 2026
d6d2c52
aarch64: SVC syscall entry + EL0 user process servicing (milestones 6+7)
squirelboy360 Jun 8, 2026
a070d53
aarch64: wire SVC capture into the shared per-CPU user-GPR snapshot
squirelboy360 Jun 8, 2026
25fcebf
aarch64: wake a secondary core via PSCI CPU_ON (milestone 8)
squirelboy360 Jun 8, 2026
3bb0af3
aarch64: add run-aarch64.sh to build + boot the ARM bring-up under QEMU
squirelboy360 Jun 8, 2026
4629503
aarch64: route -kernel boot into the shared production kernel_main
squirelboy360 Jun 8, 2026
834e145
aarch64: spawn the init process at EL0 and service its syscalls
squirelboy360 Jun 9, 2026
2f4b601
aarch64: robust DTB discovery — x0, RAM probe, then arch default
squirelboy360 Jun 9, 2026
4fc41b9
aarch64: document the production boot path in run-aarch64.sh
squirelboy360 Jun 9, 2026
078d089
mm: widen BootMemMap capacity to 128 regions
squirelboy360 Jun 9, 2026
d2de014
aarch64: ramfb framebuffer via fw_cfg — display-capable on -M virt
tahiru Jun 9, 2026
85c8034
aarch64: timer-driven preemption — shared cooperative scheduler hand-off
tahiru Jun 9, 2026
fbb82d6
aarch64: port the Flutter embedder host to the ARM syscall ABI
Jun 9, 2026
ff31c11
aarch64: dynamic-loader relocations + EL0 demand-paging for the engine
Jun 9, 2026
b77afc3
aarch64 shell: stage engine+host+snapshot into initramfs, spawn host …
sqrldev-404 Jun 9, 2026
779466b
aarch64: fix user-page mapping over seeded identity blocks + serve en…
sqrldev-404 Jun 9, 2026
efe6c25
aarch64: native syscall trampolines + fix EL0 data-abort EC so demand…
sqrldev-404 Jun 9, 2026
44be8f7
aarch64: correct EL0 data-abort EC to 0x24 + route EL1 user-VA aborts…
squirelboy360 Jun 9, 2026
568faf6
aarch64: implement R_AARCH64_TLSDESC (+TPREL/DTPREL/DTPMOD) relocatio…
squirelboy360 Jun 9, 2026
f3c03ad
aarch64: preserve user x30 (LR) across cooperative syscall yields
squirelboy360 Jun 9, 2026
5458708
aarch64: arch-correct syscall re-exec rewind, drop nested thread-ente…
squirelboy360 Jun 9, 2026
e6b7d58
aarch64: harden FB text console against geometry overflow; keep neste…
squirelboy360 Jun 9, 2026
d6d4045
wip(aarch64): timer-ISR timerfd wakes + pending-wake delivery; cpu-id…
squirelboy360 Jun 9, 2026
fd120d7
wip(aarch64): walk EL0 FP chain in unhandled-exception report to symb…
squirelboy360 Jun 9, 2026
6d680d3
wip(aarch64): expand scheduler diagnostics — heartbeat ticks with ful…
squirelboy360 Jun 9, 2026
de9f8c0
wip(aarch64): dump raw PTABLE slot pid/state for idx 1-12 to find the…
squirelboy360 Jun 9, 2026
a9d6fe0
aarch64: unmask IRQs during SVC syscall handling so the generic-timer…
squirelboy360 Jun 9, 2026
bef4942
aarch64: make timer-ISR cond-expiry use try-lock for FUTEX_WAITERS (f…
squirelboy360 Jun 9, 2026
57f3023
mm: make PAGE_TABLE_LOCK reentrant on a single core (lock_page_table)…
squirelboy360 Jun 9, 2026
d19241a
dl: cfg-gate TLSDESC_RESOLVER_VA reference so the x86_64 kernel build…
squirelboy360 Jun 9, 2026
b98ff19
aarch64: eager user-GPR capture + IRQ-masked page-table lock — advanc…
squirelboy360 Jun 9, 2026
4cc1716
Merge remote-tracking branch 'origin/develop' into feat/arch-aarch64-…
squirelboy360 Jun 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 203 additions & 1 deletion kernel/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,22 @@ fn main() {
generate_liboscortex_embedder_shim(),
));

// aarch64 port: provide a native EL0 `/init` so the production kernel_main can
// spawn a userspace process + service its first syscalls on ARM.
//
// When `scripts/build-aarch64-shell.sh` has staged the real Flutter host as
// `initramfs/init` (it is then already in `entries`), we KEEP it — that path
// boots the actual shell. Only when no `init` was collected do we inject the
// self-contained preemption self-test (a couple of `write` syscalls, a
// `getpid`, then `exit`) so a bare `cargo build` + `run-aarch64.sh` still has
// something to spawn and exercises the spawn → SVC dispatch → exit path.
if arch == "aarch64" {
let has_real_init = entries.iter().any(|(n, _)| n == "init");
if !has_real_init {
entries.push(("init".to_string(), generate_aarch64_init_elf()));
}
}

// Write the USTAR tar to OUT_DIR.
let tar_bytes = build_ustar_tar(&entries);
std::fs::write(&out_tar, &tar_bytes).unwrap();
Expand Down Expand Up @@ -87,7 +103,15 @@ fn collect_dir(
&& name != "system/flutter/flutter_assets/kernel_blob.bin"
&& !(name.starts_with("Applications/") && name.ends_with(".app/flutter_assets/kernel_blob.bin"))
{ continue; }
if fname == "libflutter_engine.so" || fname.ends_with(".bak") { continue; }
// The Flutter engine .so is delivered as a Limine MODULE on x86
// (iso_root/boot/libflutter_engine.so), so it is excluded from the
// initramfs there. On aarch64 there is NO Limine — the kernel boots
// via `-kernel` and the engine MUST travel in the initramfs, so we
// KEEP it. (collect_dir runs in build.rs; CARGO_CFG_TARGET_ARCH is
// the kernel's build target.)
let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
if fname == "libflutter_engine.so" && target_arch != "aarch64" { continue; }
if fname.ends_with(".bak") { continue; }
println!("cargo:rerun-if-changed={}", path.display());
let data = match std::fs::read(&path) {
Ok(b) => b,
Expand Down Expand Up @@ -440,6 +464,184 @@ fn generate_liboscortex_embedder_shim() -> Vec<u8> {
generate_ret_stub_elf(SYMS)
}

// ── aarch64 native `/init` EL0 program ───────────────────────────────────────

/// Build a minimal statically-linked AArch64 ELF64 (ET_EXEC, EM_AARCH64) that
/// runs at EL0 and issues syscalls through the OSCortex shared dispatcher.
///
/// Syscall ABI (OSCortex / x86-numbered, as understood by `dispatch_fast`):
/// nr in x8, args in x0..x2, return in x0, trap via `svc #0`.
/// 1 = write(fd, buf, len) → fd 1/2 print to the kernel serial console
/// 39 = getpid() → returns the process id
/// 60 = exit(code) → terminate the process
///
/// Layout: a single RWX PT_LOAD at vaddr 0x400000 (USER_ELF_BASE). Code starts
/// at the entry; two NUL-free messages live at fixed offsets in the same page.
fn generate_aarch64_init_elf() -> Vec<u8> {
// Link the init program at 64 GiB (L1 index 64), well clear of the kernel's
// low identity map. The bring-up kernel runs from the TTBR0 low half, so each
// per-process root is seeded with the kernel's 1 GiB block descriptors for
// the device region (index 0) and RAM (indices 1..2). A user page at a low VA
// would collide with those blocks; placing the program at a high, otherwise-
// empty L1 index lets the page-table walker create fresh L2/L3 tables there.
const VBASE: u64 = 0x10_0000_0000; // 64 GiB

// Two messages selected by the bootstrap value the kernel seeds in x0 at
// entry: process A (x0==0) prints msg_a, any other value prints msg_b. This
// lets two instances of the SAME program emit distinguishable output so an
// observer can see the timer round-robining between them.
let msg_a: &[u8] = b"[arm-A] tick (EL0 compute loop A)\n";
let msg_b: &[u8] = b"[arm-B] tick (EL0 compute loop B)\n";
let hello: &[u8] = b"[arm-init] EL0 up; entering compute loop (timer preemption test)\n";
let msg_a_off: u64 = 0x200;
let msg_b_off: u64 = 0x280;
let hello_off: u64 = 0x300;
let msg_a_va = VBASE + msg_a_off;
let msg_b_va = VBASE + msg_b_off;
let hello_va = VBASE + hello_off;

// ── Assemble the code (offset 0) ────────────────────────────────────────
//
// Pseudocode:
// x19 = (x0 == 0) ? msg_a_va : msg_b_va // pick this instance's msg
// x20 = (x0 == 0) ? len_a : len_b
// write(1, hello, hello.len()) // announce once
// loop:
// x21 = SPIN_ITERS // pure-compute busy spin —
// spin: subs x21,x21,#1; b.ne spin // NO syscall, so only the
// // timer can switch us out
// write(1, x19, x20) // print this instance's tick
// b loop
//
// Because the spin does no syscalls, the kernel cannot cooperatively yield
// here; if both A and B make progress (interleaved ticks on serial) the
// generic-timer ISR MUST be preempting and switching between them.
const SPIN_ITERS: u64 = 4_000_000;
// Each thread prints this many ticks, then exits — enough to demonstrate the
// timer round-robining the two compute-bound threads, after which the system
// settles into the kernel idle/cortex loop (rather than spamming forever).
const TICK_BUDGET: u64 = 12;
let mut code: Vec<u32> = Vec::new();

// Select message VA (x19) and length (x20) from x0.
a64_load_imm(&mut code, 19, msg_a_va);
a64_load_imm(&mut code, 20, msg_a.len() as u64);
a64_load_imm(&mut code, 9, msg_b_va);
a64_load_imm(&mut code, 10, msg_b.len() as u64);
// cmp x0, #0 ; csel x19, x19, x9, eq ; csel x20, x20, x10, eq
code.push(0xF100_001F); // subs xzr, x0, #0 (cmp x0,#0)
code.push(0x9A89_0273); // csel x19, x19, x9, eq
code.push(0x9A8A_0294); // csel x20, x20, x10, eq

// x22 = TICK_BUDGET (remaining ticks before this thread exits)
a64_load_imm(&mut code, 22, TICK_BUDGET);

// write(1, hello, hello.len())
a64_load_imm(&mut code, 0, 1);
a64_load_imm(&mut code, 1, hello_va);
a64_load_imm(&mut code, 2, hello.len() as u64);
a64_load_imm(&mut code, 8, 1);
code.push(0xD400_0001); // svc #0

// loop:
let loop_idx = code.len();
// x21 = SPIN_ITERS
a64_load_imm(&mut code, 21, SPIN_ITERS);
// spin: subs x21, x21, #1 ; b.ne spin (pure compute — no syscall)
let spin_idx = code.len();
code.push(0xF100_06B5); // subs x21, x21, #1
{
let off = (spin_idx as i64 - code.len() as i64) as i32; // negative
let imm19 = (off as u32) & 0x7FFFF;
code.push(0x5400_0001 | (imm19 << 5)); // b.ne spin
}
// write(1, x19, x20)
a64_load_imm(&mut code, 0, 1);
code.push(0xAA13_03E1); // mov x1, x19
code.push(0xAA14_03E2); // mov x2, x20
a64_load_imm(&mut code, 8, 1);
code.push(0xD400_0001); // svc #0
// subs x22, x22, #1 ; b.ne loop (loop until the tick budget is spent)
code.push(0xF100_06D6); // subs x22, x22, #1
{
let off = (loop_idx as i64 - code.len() as i64) as i32; // negative
let imm19 = (off as u32) & 0x7FFFF;
code.push(0x5400_0001 | (imm19 << 5)); // b.ne loop
}
// exit(0) — lets the system settle into the kernel idle/cortex loop after
// the preemption demonstration rather than spinning forever.
a64_load_imm(&mut code, 0, 0);
a64_load_imm(&mut code, 8, 60);
code.push(0xD400_0001); // svc #0
// safety net: spin if exit ever returns to EL0.
code.push(0x1400_0000); // b .

// ── Compose the single loadable page image ──────────────────────────────
let page_sz = 0x1000usize;
let mut img = vec![0u8; page_sz];
for (i, w) in code.iter().enumerate() {
let o = i * 4;
img[o..o + 4].copy_from_slice(&w.to_le_bytes());
}
img[msg_a_off as usize..msg_a_off as usize + msg_a.len()].copy_from_slice(msg_a);
img[msg_b_off as usize..msg_b_off as usize + msg_b.len()].copy_from_slice(msg_b);
img[hello_off as usize..hello_off as usize + hello.len()].copy_from_slice(hello);

// ── ELF header + one program header ─────────────────────────────────────
let ehsize = 64usize;
let phentsize = 56usize;
let phoff = ehsize;
let data_off = phoff + phentsize; // file offset of the loadable image

let file_sz = data_off + img.len();
let mut elf = vec![0u8; file_sz];

// e_ident
elf[0..4].copy_from_slice(&[0x7F, b'E', b'L', b'F']);
elf[4] = 2; // ELFCLASS64
elf[5] = 1; // ELFDATA2LSB
elf[6] = 1; // EV_CURRENT
write_u16(&mut elf, 16, 2); // e_type = ET_EXEC
write_u16(&mut elf, 18, 183); // e_machine = EM_AARCH64
write_u32(&mut elf, 20, 1); // e_version
write_u64(&mut elf, 24, VBASE); // e_entry
write_u64(&mut elf, 32, phoff as u64); // e_phoff
write_u16(&mut elf, 52, ehsize as u16); // e_ehsize
write_u16(&mut elf, 54, phentsize as u16); // e_phentsize
write_u16(&mut elf, 56, 1); // e_phnum

// PT_LOAD program header (RWX).
let p = phoff;
write_u32(&mut elf, p, 1); // p_type = PT_LOAD
write_u32(&mut elf, p + 4, 7); // p_flags = R|W|X
write_u64(&mut elf, p + 8, data_off as u64); // p_offset
write_u64(&mut elf, p + 16, VBASE); // p_vaddr
write_u64(&mut elf, p + 24, VBASE); // p_paddr
write_u64(&mut elf, p + 32, img.len() as u64); // p_filesz
write_u64(&mut elf, p + 40, img.len() as u64); // p_memsz
write_u64(&mut elf, p + 48, 0x1000); // p_align

elf[data_off..data_off + img.len()].copy_from_slice(&img);
elf
}

/// Emit a MOVZ/MOVK sequence loading the 64-bit immediate `imm` into x`reg`.
fn a64_load_imm(code: &mut Vec<u32>, reg: u32, imm: u64) {
let mut first = true;
for hw in 0..4u32 {
let imm16 = ((imm >> (hw * 16)) & 0xFFFF) as u32;
if imm16 == 0 && !first {
continue; // skip zero halfwords once at least one MOVZ emitted
}
let base = if first { 0xD280_0000 } else { 0xF280_0000 };
code.push(base | (hw << 21) | (imm16 << 5) | reg);
first = false;
}
if first {
code.push(0xD280_0000 | reg); // imm == 0 → MOVZ Xreg, #0
}
}

/// Generate a minimal ELF64 ET_DYN where every exported symbol is a
/// 4-byte `xor eax,eax; ret; nop; nop` stub. Used for both
/// `libflutter_engine.so` (existing) and `liboscortex_embedder.so`.
Expand Down
59 changes: 50 additions & 9 deletions kernel/linker/aarch64.ld
Original file line number Diff line number Diff line change
@@ -1,13 +1,54 @@
/* aarch64 linker script stub */
/* aarch64 direct-boot linker script (QEMU -M virt -kernel).
*
* QEMU `-M virt` places RAM at 0x4000_0000 and, for an ELF kernel, loads each
* PT_LOAD segment at its physical address and jumps to e_entry with the MMU
* OFF (running on physical == virtual identity at this point). We therefore
* link the kernel to run from a low physical address and enable the MMU later
* with an identity map plus the kernel's higher-half window.
*
* Entry is the assembly `_start` (boot.rs) which sets up the stack, zeroes BSS,
* brings up the PL011 UART for serial, then calls into the Rust bring-up path.
*/
OUTPUT_FORMAT("elf64-littleaarch64")
ENTRY(kernel_main)
OUTPUT_ARCH(aarch64)
ENTRY(_start)

/* QEMU -M virt RAM base. The kernel is loaded here by -kernel. */
KERNEL_PHYS_BASE = 0x40080000;

SECTIONS {
. = 0xFFFF000000080000;
.text : { *(.text .text.*) }
. = KERNEL_PHYS_BASE;

.text : ALIGN(4096) {
__text_start = .;
KEEP(*(.text.boot)) /* _start must be first */
*(.text .text.*)
__text_end = .;
}

.rodata : ALIGN(4096) {
__rodata_start = .;
*(.rodata .rodata.*)
__rodata_end = .;
}

.data : ALIGN(4096) {
__data_start = .;
*(.data .data.*)
*(.got .got.*)
__data_end = .;
}

.bss (NOLOAD) : ALIGN(4096) {
__bss_start = .;
*(.bss .bss.*)
*(COMMON)
. = ALIGN(16);
__bss_end = .;
}

. = ALIGN(4096);
.rodata : { *(.rodata .rodata.*) }
. = ALIGN(4096);
.data : { *(.data .data.*) }
.bss : { __bss_start = .; *(.bss .bss.*) *(COMMON) __bss_end = .; }
/DISCARD/ : { *(.eh_frame) *(.note .note.*) }
__kernel_end = .;

/DISCARD/ : { *(.eh_frame) *(.eh_frame_hdr) *(.note .note.*) *(.comment) }
}
29 changes: 29 additions & 0 deletions kernel/src/arch/aarch64/acpi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//! Firmware-table lookup scaffolding — aarch64.
//!
//! On aarch64 platforms hardware topology comes from either ACPI (server-class
//! machines) or a flattened device tree (the QEMU `virt` bring-up target). The
//! shared kernel does not call these on the non-x86 path yet; they exist for
//! parity so the real port has named entry points to fill in.

/// Physical address of the RSDP, or 0 if unavailable.
///
/// TODO(arm): use the Limine RSDP request (ACPI) or parse the DTB.
pub fn rsdp_address() -> u64 {
0
}

/// Physical address of the MADT/GICC table, or 0 if not found.
///
/// TODO(arm): walk XSDT for the MADT, or parse `/cpus` + `intc` from the DTB.
pub fn find_madt(_rsdp_phys: u64) -> u64 {
0
}

/// Power off the machine.
///
/// TODO(arm): issue a PSCI `SYSTEM_OFF` via SMC/HVC. For now, park.
pub fn shutdown() -> ! {
loop {
unsafe { core::arch::asm!("wfi", options(nomem, nostack)) };
}
}
Loading
Loading