diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4f20c1a8e3084d4fa6b8c22b0523a024db779709..55c8d31ff3cdf8da145942d7e0dff413cdba9df7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -72,7 +72,7 @@ jobs:
       - name: Check docs
         run: |
           cargo doc --package libhermit-rs --no-deps --document-private-items --target x86_64-unknown-none
-          cargo doc --package libhermit-rs --no-deps --document-private-items --target aarch64-unknown-none-softfloat
+          cargo doc --package libhermit-rs --no-deps --document-private-items --target aarch64-unknown-none-softfloat --no-default-features
 
   build:
     name: Build
diff --git a/src/arch/aarch64/kernel/interrupts.rs b/src/arch/aarch64/kernel/interrupts.rs
index bec9ec918f5cfc509659481a0504bcd461c92699..48794923cb727d5587eb623dbb866908247dffb7 100644
--- a/src/arch/aarch64/kernel/interrupts.rs
+++ b/src/arch/aarch64/kernel/interrupts.rs
@@ -1,10 +1,79 @@
 use core::arch::asm;
 
-const IRQ_FLAG_F: usize = 1 << 6;
-const IRQ_FLAG_I: usize = 1 << 7;
-const IRQ_FLAG_A: usize = 1 << 8;
+use aarch64::regs::*;
+use hermit_dtb::Dtb;
+use hermit_sync::{InterruptTicketMutex, OnceCell};
+use tock_registers::interfaces::Readable;
 
-/// Enable Interrupts
+use crate::arch::aarch64::kernel::boot_info;
+use crate::arch::aarch64::kernel::scheduler::State;
+use crate::arch::aarch64::mm::paging::{
+	self, virt_to_phys, BasePageSize, PageSize, PageTableEntryFlags,
+};
+use crate::arch::aarch64::mm::{virtualmem, PhysAddr, VirtAddr};
+use crate::errno::EFAULT;
+use crate::scheduler::CoreId;
+use crate::sys_exit;
+
+pub const IST_SIZE: usize = 8 * BasePageSize::SIZE as usize;
+
+/*
+ * GIC Distributor interface register offsets that are common to GICv3 & GICv2
+ */
+
+const GICD_CTLR: usize = 0x0;
+const GICD_TYPER: usize = 0x4;
+const GICD_IIDR: usize = 0x8;
+const GICD_IGROUPR: usize = 0x80;
+const GICD_ISENABLER: usize = 0x100;
+const GICD_ICENABLER: usize = 0x180;
+const GICD_ISPENDR: usize = 0x200;
+const GICD_ICPENDR: usize = 0x280;
+const GICD_ISACTIVER: usize = 0x300;
+const GICD_ICACTIVER: usize = 0x380;
+const GICD_IPRIORITYR: usize = 0x400;
+const GICD_ITARGETSR: usize = 0x800;
+const GICD_ICFGR: usize = 0xC00;
+const GICD_NSACR: usize = 0xE00;
+const GICD_SGIR: usize = 0xF00;
+
+const GICD_CTLR_ENABLEGRP0: u32 = 1 << 0;
+const GICD_CTLR_ENABLEGRP1: u32 = 1 << 1;
+
+/* Physical CPU Interface registers */
+const GICC_CTLR: usize = 0x0;
+const GICC_PMR: usize = 0x4;
+const GICC_BPR: usize = 0x8;
+const GICC_IAR: usize = 0xC;
+const GICC_EOIR: usize = 0x10;
+const GICC_RPR: usize = 0x14;
+const GICC_HPPIR: usize = 0x18;
+const GICC_AHPPIR: usize = 0x28;
+const GICC_IIDR: usize = 0xFC;
+const GICC_DIR: usize = 0x1000;
+const GICC_PRIODROP: usize = GICC_EOIR;
+
+const GICC_CTLR_ENABLEGRP0: u32 = 1 << 0;
+const GICC_CTLR_ENABLEGRP1: u32 = 1 << 1;
+const GICC_CTLR_FIQEN: u32 = 1 << 3;
+const GICC_CTLR_ACKCTL: u32 = 1 << 2;
+
+/// maximum number of interrupt handlers
+const MAX_HANDLERS: usize = 256;
+
+static GICC_ADDRESS: OnceCell<VirtAddr> = OnceCell::new();
+static GICD_ADDRESS: OnceCell<VirtAddr> = OnceCell::new();
+
+/// Number of used supported interrupts
+static NR_IRQS: OnceCell<u32> = OnceCell::new();
+static mut INTERRUPT_HANDLERS: [fn(state: &State); MAX_HANDLERS] =
+	[default_interrupt_handler; MAX_HANDLERS];
+
+fn default_interrupt_handler(_state: &State) {
+	warn!("Entering default interrupt handler");
+}
+
+/// Enable all interrupts
 #[inline]
 pub fn enable() {
 	unsafe {
@@ -31,7 +100,7 @@ pub fn enable_and_wait() {
 	}
 }
 
-/// Disable Interrupts
+/// Disable all interrupts
 #[inline]
 pub fn disable() {
 	unsafe {
@@ -43,53 +112,294 @@ pub fn disable() {
 	}
 }
 
-#[no_mangle]
-pub extern "C" fn irq_install_handler(irq_number: u32, handler: usize) {
+pub fn irq_install_handler(irq_number: u32, handler: fn(state: &State)) {
 	info!("Install handler for interrupt {}", irq_number);
-	// TODO
+	unsafe {
+		INTERRUPT_HANDLERS[irq_number as usize] = handler;
+	}
 }
 
 #[no_mangle]
-pub extern "C" fn do_fiq(_: *const u8) {
-	debug!("Receive fast interrupt\n");
+pub extern "C" fn do_fiq(state: &State) {
+	info!("fiq");
+	let iar = gicc_read(GICC_IAR);
+	let vector: usize = iar as usize & 0x3ff;
 
-	loop {
-		crate::arch::processor::halt()
+	info!("Receive fiq {}", vector);
+
+	if vector < MAX_HANDLERS {
+		unsafe {
+			INTERRUPT_HANDLERS[vector](state);
+		}
 	}
+
+	gicc_write(GICC_EOIR, iar.try_into().unwrap());
 }
 
 #[no_mangle]
-pub extern "C" fn do_irq(_: *const u8) {
-	debug!("Receive interrupt\n");
+pub extern "C" fn do_irq(_state: &State) {
+	let iar = gicc_read(GICC_IAR);
+	let vector = iar & 0x3ff;
 
-	loop {
-		crate::arch::processor::halt()
-	}
+	info!("Receive interrupt {}", vector);
+
+	gicc_write(GICC_EOIR, iar);
 }
 
 #[no_mangle]
-pub extern "C" fn do_sync(_: *const u8) {
-	debug!("Receive synchronous exception\n");
+pub extern "C" fn do_sync(state: &State) {
+	info!("{:#012x?}", state);
+	let iar = gicc_read(GICC_IAR);
+	let esr = ESR_EL1.get();
+	let ec = esr >> 26;
+	let iss = esr & 0xFFFFFF;
+	let pc = ELR_EL1.get();
+
+	/* data abort from lower or current level */
+	if (ec == 0b100100) || (ec == 0b100101) {
+		/* check if value in far_el1 is valid */
+		if (iss & (1 << 10)) == 0 {
+			/* read far_el1 register, which holds the faulting virtual address */
+			let far = FAR_EL1.get();
+
+			// add page fault handler
 
-	loop {
-		crate::arch::processor::halt()
+			error!("Unable to handle page fault at {:#x}", far);
+			error!("Exception return address {:#x}", ELR_EL1.get());
+			error!("Thread ID register {:#x}", TPIDR_EL0.get());
+			error!("Table Base Register {:#x}", TTBR0_EL1.get());
+			error!("Exception Syndrome Register {:#x}", esr);
+
+			// send EOI
+			gicc_write(GICC_EOIR, iar);
+			sys_exit(-EFAULT);
+		} else {
+			error!("Unknown exception");
+		}
+	} else if ec == 0x3c {
+		error!("Trap to debugger, PC={:#x}", pc);
+	} else {
+		error!("Unsupported exception class: {:#x}, PC={:#x}", ec, pc);
 	}
 }
 
 #[no_mangle]
-pub extern "C" fn do_bad_mode(_: *const u8, reason: u32) {
+pub extern "C" fn do_bad_mode(_state: &State, reason: u32) -> ! {
 	error!("Receive unhandled exception: {}\n", reason);
 
-	loop {
-		crate::arch::processor::halt()
-	}
+	sys_exit(-EFAULT);
 }
 
 #[no_mangle]
-pub extern "C" fn do_error(_: *const u8) {
+pub extern "C" fn do_error(_state: &State) -> ! {
 	error!("Receive error interrupt\n");
 
-	loop {
-		crate::arch::processor::halt()
+	sys_exit(-EFAULT);
+}
+
+#[inline]
+fn gicd_read(off: usize) -> u32 {
+	let value: u32;
+
+	// we have to use inline assembly to guarantee 32bit memory access
+	unsafe {
+		asm!("ldar {value:w}, [{addr}]",
+			value = out(reg) value,
+			addr = in(reg) (GICD_ADDRESS.get().unwrap().as_usize() + off),
+			options(nostack, readonly),
+		);
+	}
+
+	value
+}
+
+#[inline]
+fn gicd_write(off: usize, value: u32) {
+	// we have to use inline assembly to guarantee 32bit memory access
+	unsafe {
+		asm!("str {value:w}, [{addr}]",
+			value = in(reg) value,
+			addr = in(reg) (GICD_ADDRESS.get().unwrap().as_usize() + off),
+			options(nostack),
+		);
+	}
+}
+
+#[inline]
+fn gicc_read(off: usize) -> u32 {
+	let value: u32;
+
+	// we have to use inline assembly to guarantee 32bit memory access
+	unsafe {
+		asm!("ldar {value:w}, [{addr}]",
+			value = out(reg) value,
+			addr = in(reg) (GICC_ADDRESS.get().unwrap().as_usize() + off),
+			options(nostack, readonly),
+		);
 	}
+
+	value
+}
+
+#[inline]
+fn gicc_write(off: usize, value: u32) {
+	// we have to use inline assembly to guarantee 32bit memory access
+	unsafe {
+		asm!("str {value:w}, [{addr}]",
+			value = in(reg) value,
+			addr = in(reg) (GICC_ADDRESS.get().unwrap().as_usize() + off),
+			options(nostack),
+		);
+	}
+}
+
+/// Global enable forwarding interrupts from distributor to cpu interface
+fn gicd_enable() {
+	gicd_write(GICD_CTLR, GICD_CTLR_ENABLEGRP0 | GICD_CTLR_ENABLEGRP1);
+}
+
+/// Global disable forwarding interrupts from distributor to cpu interface
+fn gicd_disable() {
+	gicd_write(GICD_CTLR, 0);
+}
+
+/// Global enable signalling of interrupt from the cpu interface
+fn gicc_enable() {
+	gicc_write(
+		GICC_CTLR,
+		GICC_CTLR_ENABLEGRP0 | GICC_CTLR_ENABLEGRP1 | GICC_CTLR_FIQEN | GICC_CTLR_ACKCTL,
+	);
+}
+
+/// Global disable signalling of interrupt from the cpu interface
+fn gicc_disable() {
+	gicc_write(GICC_CTLR, 0);
+}
+
+fn gicc_set_priority(priority: u32) {
+	gicc_write(GICC_PMR, priority & 0xFF);
+}
+
+static MASK_LOCK: InterruptTicketMutex<()> = InterruptTicketMutex::new(());
+
+pub fn mask_interrupt(vector: u32) -> Result<(), ()> {
+	if vector < *NR_IRQS.get().unwrap() && vector < MAX_HANDLERS.try_into().unwrap() {
+		let _guard = MASK_LOCK.lock();
+
+		let regoff = GICD_ICENABLER + 4 * (vector as usize / 32);
+		gicd_write(regoff, 1 << (vector % 32));
+
+		Ok(())
+	} else {
+		Err(())
+	}
+}
+
+pub fn unmask_interrupt(vector: u32) -> Result<(), ()> {
+	if vector < *NR_IRQS.get().unwrap() && vector < MAX_HANDLERS.try_into().unwrap() {
+		let _guard = MASK_LOCK.lock();
+
+		let regoff = GICD_ISENABLER + 4 * (vector as usize / 32);
+		gicd_write(regoff, 1 << (vector % 32));
+		Ok(())
+	} else {
+		Err(())
+	}
+}
+
+pub fn set_oneshot_timer(wakeup_time: Option<u64>) {
+	todo!("set_oneshot_timer stub");
+}
+
+pub fn wakeup_core(core_to_wakeup: CoreId) {
+	todo!("wakeup_core stub");
+}
+
+pub fn init() {
+	info!("Intialize generic interrupt controller");
+
+	let dtb = unsafe {
+		Dtb::from_raw(boot_info().hardware_info.device_tree.unwrap().get() as *const u8)
+			.expect(".dtb file has invalid header")
+	};
+
+	let reg = dtb.get_property("/intc", "reg").unwrap();
+	let (slice, residual_slice) = reg.split_at(core::mem::size_of::<u64>());
+	let gicd_start = PhysAddr(u64::from_be_bytes(slice.try_into().unwrap()));
+	let (slice, residual_slice) = residual_slice.split_at(core::mem::size_of::<u64>());
+	let gicd_size = u64::from_be_bytes(slice.try_into().unwrap());
+	let (slice, residual_slice) = residual_slice.split_at(core::mem::size_of::<u64>());
+	let gicc_start = PhysAddr(u64::from_be_bytes(slice.try_into().unwrap()));
+	let (slice, _residual_slice) = residual_slice.split_at(core::mem::size_of::<u64>());
+	let gicc_size = u64::from_be_bytes(slice.try_into().unwrap());
+
+	info!(
+		"Found GIC Distributor interface at {:#X} (size {:#X})",
+		gicd_start, gicd_size
+	);
+	info!(
+		"Found generic interrupt controller at {:#X} (size {:#X})",
+		gicc_start, gicc_size
+	);
+
+	let gicd_address =
+		virtualmem::allocate_aligned(gicd_size.try_into().unwrap(), 0x10000).unwrap();
+	GICD_ADDRESS.set(gicd_address).unwrap();
+	debug!("Mapping GIC Distributor interface to virtual address {gicd_address:p}",);
+
+	let mut flags = PageTableEntryFlags::empty();
+	flags.device().writable().execute_disable();
+	paging::map::<BasePageSize>(
+		gicd_address,
+		gicd_start,
+		(gicd_size / BasePageSize::SIZE).try_into().unwrap(),
+		flags,
+	);
+
+	let gicc_address =
+		virtualmem::allocate_aligned(gicc_size.try_into().unwrap(), 0x10000).unwrap();
+	GICC_ADDRESS.set(gicc_address).unwrap();
+	debug!("Mapping generic interrupt controller to virtual address {gicc_address:p}",);
+	paging::map::<BasePageSize>(
+		gicc_address,
+		gicc_start,
+		(gicc_size / BasePageSize::SIZE).try_into().unwrap(),
+		flags,
+	);
+
+	gicc_disable();
+	gicd_disable();
+
+	let nr_irqs = ((gicd_read(GICD_TYPER) & 0x1f) + 1) * 32;
+	info!("Number of supported interrupts {}", nr_irqs);
+	NR_IRQS.set(nr_irqs).unwrap();
+
+	gicd_write(GICD_ICENABLER, 0xffff0000);
+	gicd_write(GICD_ISENABLER, 0x0000ffff);
+	gicd_write(GICD_ICPENDR, 0xffffffff);
+	gicd_write(GICD_IGROUPR, 0);
+
+	for i in 0..32 / 4 {
+		gicd_write(GICD_IPRIORITYR + i * 4, 0x80808080);
+	}
+
+	for i in 32 / 16..nr_irqs / 16 {
+		gicd_write(GICD_NSACR + i as usize * 4, 0xffffffff);
+	}
+
+	for i in 32 / 32..nr_irqs / 32 {
+		gicd_write(GICD_ICENABLER + i as usize * 4, 0xffffffff);
+		gicd_write(GICD_ICPENDR + i as usize * 4, 0xffffffff);
+		gicd_write(GICD_IGROUPR + i as usize * 4, 0);
+	}
+
+	for i in 32 / 4..nr_irqs / 4 {
+		gicd_write(GICD_ITARGETSR + i as usize * 4, 0);
+		gicd_write(GICD_IPRIORITYR + i as usize * 4, 0x80808080);
+	}
+
+	gicd_enable();
+
+	gicc_set_priority(0xF0);
+	gicc_enable();
 }
diff --git a/src/arch/aarch64/kernel/mod.rs b/src/arch/aarch64/kernel/mod.rs
index 9c9c3ef19915f2102a6b582a7e3de15ca7ab3b2e..870342bcae2616dc91ff0401ff762114946c2335 100644
--- a/src/arch/aarch64/kernel/mod.rs
+++ b/src/arch/aarch64/kernel/mod.rs
@@ -1,11 +1,9 @@
 pub mod core_local;
 pub mod interrupts;
-pub mod pci;
 pub mod processor;
 pub mod scheduler;
 pub mod serial;
 mod start;
-pub mod stubs;
 pub mod switch;
 pub mod systemtime;
 
@@ -17,7 +15,6 @@ use hermit_sync::TicketMutex;
 
 use crate::arch::aarch64::kernel::core_local::*;
 use crate::arch::aarch64::kernel::serial::SerialPort;
-pub use crate::arch::aarch64::kernel::stubs::*;
 pub use crate::arch::aarch64::kernel::systemtime::get_boot_time;
 use crate::arch::aarch64::mm::{PhysAddr, VirtAddr};
 use crate::config::*;
@@ -104,12 +101,12 @@ pub fn get_tls_align() -> usize {
 
 #[cfg(feature = "smp")]
 pub fn get_possible_cpus() -> u32 {
-	todo!()
+	1
 }
 
 #[cfg(feature = "smp")]
 pub fn get_processor_count() -> u32 {
-	todo!()
+	1
 }
 
 #[cfg(not(feature = "smp"))]
@@ -117,17 +114,30 @@ pub fn get_processor_count() -> u32 {
 	1
 }
 
-/// Whether HermitCore is running under the "uhyve" hypervisor.
-pub fn is_uhyve() -> bool {
-	matches!(boot_info().platform_info, PlatformInfo::Uhyve { .. })
-}
-
 pub fn get_cmdsize() -> usize {
-	todo!()
+	let dtb = unsafe {
+		hermit_dtb::Dtb::from_raw(boot_info().hardware_info.device_tree.unwrap().get() as *const u8)
+			.expect(".dtb file has invalid header")
+	};
+
+	if let Some(cmd) = dtb.get_property("/chosen", "bootargs") {
+		cmd.len()
+	} else {
+		0
+	}
 }
 
 pub fn get_cmdline() -> VirtAddr {
-	todo!()
+	let dtb = unsafe {
+		hermit_dtb::Dtb::from_raw(boot_info().hardware_info.device_tree.unwrap().get() as *const u8)
+			.expect(".dtb file has invalid header")
+	};
+
+	if let Some(cmd) = dtb.get_property("/chosen", "bootargs") {
+		VirtAddr(cmd.as_ptr() as u64)
+	} else {
+		VirtAddr::zero()
+	}
 }
 
 /// Earliest initialization function called by the Boot Processor.
@@ -166,50 +176,19 @@ pub fn output_message_buf(buf: &[u8]) {
 
 /// Real Boot Processor initialization as soon as we have put the first Welcome message on the screen.
 pub fn boot_processor_init() {
-	//processor::configure();
+	processor::configure();
 
 	crate::mm::init();
 	crate::mm::print_information();
-
-	return;
-	/*processor::detect_features();
-	processor::configure();
-
-	::mm::init();
-	::mm::print_information();
 	env::init();
-	gdt::init();
-	gdt::add_current_core();
-	idt::install();
-
-	if !env::is_uhyve() {
-		pic::init();
-	}
-
-	interrupts::install();
+	interrupts::init();
 	interrupts::enable();
 	processor::detect_frequency();
 	processor::print_information();
-	systemtime::init();
 
-	if !env::is_uhyve() {
-		pci::init();
-		pci::print_information();
-		acpi::init();
-	}
-
-	apic::init();
-	scheduler::install_timer_handler();*/
-
-	// Read out PMCCNTR_EL0 in an infinite loop.
-	// TODO: This currently stays at zero on uhyve. Fix uhyve! :)
-	loop {
-		unsafe {
-			let pmccntr: u64;
-			asm!("mrs {}, pmccntr_el0", out(reg) pmccntr, options(nomem, nostack));
-			println!("Count: {}", pmccntr);
-		}
-	}
+	/*
+	systemtime::init();
+	*/
 
 	finish_processor_init();
 }
@@ -223,30 +202,12 @@ pub fn boot_application_processors() {
 /// Application Processor initialization
 pub fn application_processor_init() {
 	core_local::init();
-	/*processor::configure();
-	gdt::add_current_core();
-	idt::install();
-	apic::init_x2apic();
-	apic::init_local_apic();
-	interrupts::enable();*/
 	finish_processor_init();
 }
 
 fn finish_processor_init() {
 	debug!("Initialized Processor");
 
-	/*if env::is_uhyve() {
-		// uhyve does not use apic::detect_from_acpi and therefore does not know the number of processors and
-		// their APIC IDs in advance.
-		// Therefore, we have to add each booted processor into the CPU_LOCAL_APIC_IDS vector ourselves.
-		// Fortunately, the Core IDs are guaranteed to be sequential and match the Local APIC IDs.
-		apic::add_local_apic_id(core_id() as u8);
-
-		// uhyve also boots each processor into _start itself and does not use apic::boot_application_processors.
-		// Therefore, the current processor already needs to prepare the processor variables for a possible next processor.
-		apic::init_next_processor_variables(core_id() + 1);
-	}*/
-
 	// This triggers apic::boot_application_processors (bare-metal/QEMU) or uhyve
 	// to initialize the next processor.
 	*CPU_ONLINE.lock() += 1;
diff --git a/src/arch/aarch64/kernel/pci.rs b/src/arch/aarch64/kernel/pci.rs
deleted file mode 100644
index e2b1f105c381701249b17ce55a022af6d77be72c..0000000000000000000000000000000000000000
--- a/src/arch/aarch64/kernel/pci.rs
+++ /dev/null
@@ -1,45 +0,0 @@
-use alloc::rc::Rc;
-use core::cell::RefCell;
-
-use hermit_sync::InterruptTicketMutex;
-
-// Currently, onbly a dummy implementation
-pub struct VirtioNetDriver;
-
-impl VirtioNetDriver {
-	pub fn init_vqs(&mut self) {}
-
-	pub fn set_polling_mode(&mut self, value: bool) {
-		//(self.vqueues.as_deref_mut().unwrap())[VIRTIO_NET_RX_QUEUE].set_polling_mode(value);
-	}
-
-	pub fn get_mac_address(&self) -> [u8; 6] {
-		[0; 6]
-	}
-
-	pub fn get_mtu(&self) -> u16 {
-		1500 //self.device_cfg.mtu
-	}
-
-	pub fn get_tx_buffer(&mut self, len: usize) -> Result<(*mut u8, usize), ()> {
-		Err(())
-	}
-
-	pub fn send_tx_buffer(&mut self, index: usize, len: usize) -> Result<(), ()> {
-		Err(())
-	}
-
-	pub fn has_packet(&self) -> bool {
-		false
-	}
-
-	pub fn receive_rx_buffer(&self) -> Result<&'static [u8], ()> {
-		Err(())
-	}
-
-	pub fn rx_buffer_consumed(&mut self) {}
-}
-
-pub fn get_network_driver() -> Option<&'static InterruptTicketMutex<VirtioNetDriver>> {
-	None
-}
diff --git a/src/arch/aarch64/kernel/processor.rs b/src/arch/aarch64/kernel/processor.rs
index 680ff4e16335d3c5e719e1d3f57a613361274a68..f94bdffd73f4b6aeed0bff8a22ae77eda5ee2f7e 100644
--- a/src/arch/aarch64/kernel/processor.rs
+++ b/src/arch/aarch64/kernel/processor.rs
@@ -1,26 +1,108 @@
 use core::arch::asm;
 use core::hint::spin_loop;
+use core::{fmt, str};
 
-extern "C" {
-	static mut cpu_freq: u32;
+use aarch64::regs::CNTFRQ_EL0;
+use hermit_dtb::Dtb;
+use hermit_sync::Lazy;
+use qemu_exit::QEMUExit;
+use tock_registers::interfaces::Readable;
+
+use crate::arch::aarch64::kernel::boot_info;
+use crate::env;
+
+static CPU_FREQUENCY: Lazy<CpuFrequency> = Lazy::new(|| {
+	let mut cpu_frequency = CpuFrequency::new();
+	unsafe {
+		cpu_frequency.detect();
+	}
+	cpu_frequency
+});
+
+enum CpuFrequencySources {
+	Invalid,
+	CommandLine,
+	Register,
 }
 
-pub struct FPUState {
-	// TODO
+impl fmt::Display for CpuFrequencySources {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		match &self {
+			CpuFrequencySources::CommandLine => write!(f, "Command Line"),
+			CpuFrequencySources::Register => write!(f, "CNTFRQ_EL0"),
+			_ => panic!("Attempted to print an invalid CPU Frequency Source"),
+		}
+	}
+}
+
+struct CpuFrequency {
+	mhz: u16,
+	source: CpuFrequencySources,
 }
 
+impl CpuFrequency {
+	const fn new() -> Self {
+		CpuFrequency {
+			mhz: 0,
+			source: CpuFrequencySources::Invalid,
+		}
+	}
+
+	fn set_detected_cpu_frequency(
+		&mut self,
+		mhz: u16,
+		source: CpuFrequencySources,
+	) -> Result<(), ()> {
+		//The clock frequency must never be set to zero, otherwise a division by zero will
+		//occur during runtime
+		if mhz > 0 {
+			self.mhz = mhz;
+			self.source = source;
+			Ok(())
+		} else {
+			Err(())
+		}
+	}
+
+	unsafe fn detect_from_cmdline(&mut self) -> Result<(), ()> {
+		let mhz = env::freq().ok_or(())?;
+		self.set_detected_cpu_frequency(mhz, CpuFrequencySources::CommandLine)
+	}
+
+	unsafe fn detect_from_register(&mut self) -> Result<(), ()> {
+		let mhz = CNTFRQ_EL0.get() / 1000000;
+		self.set_detected_cpu_frequency(mhz.try_into().unwrap(), CpuFrequencySources::Register)
+	}
+
+	unsafe fn detect(&mut self) {
+		unsafe {
+			self.detect_from_register()
+				.or_else(|_e| self.detect_from_cmdline())
+				.unwrap();
+		}
+	}
+
+	fn get(&self) -> u16 {
+		self.mhz
+	}
+}
+
+impl fmt::Display for CpuFrequency {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		write!(f, "{} MHz (from {})", self.mhz, self.source)
+	}
+}
+
+pub struct FPUState;
+
 impl FPUState {
 	pub fn new() -> Self {
 		Self {}
 	}
 
-	pub fn restore(&self) {
-		// TODO
-	}
+	pub fn restore(&self) {}
 
-	pub fn save(&self) {
-		// TODO
-	}
+	pub fn save(&self) {}
 }
 
 pub fn seed_entropy() -> Option<[u8; 32]> {
@@ -64,19 +146,18 @@ pub fn halt() {
 pub fn shutdown() -> ! {
 	info!("Shutting down system");
 
-	loop {
-		halt();
-	}
+	let exit_handler = qemu_exit::AArch64::new();
+	exit_handler.exit_success();
 }
 
 pub fn get_timer_ticks() -> u64 {
 	// We simulate a timer with a 1 microsecond resolution by taking the CPU timestamp
 	// and dividing it by the CPU frequency in MHz.
-	0
+	get_timestamp() / u64::from(get_frequency())
 }
 
 pub fn get_frequency() -> u16 {
-	0
+	CPU_FREQUENCY.get()
 }
 
 #[inline]
@@ -117,7 +198,6 @@ pub fn configure() {
 			in(reg) pmuserenr_el0,
 			options(nostack, nomem),
 		);
-		debug!("pmuserenr_el0");
 
 		// TODO: Setting PMCNTENSET_EL0 is probably not required, but find out about that
 		// when reading PMCCNTR_EL0 works at all.
@@ -127,7 +207,6 @@ pub fn configure() {
 			in(reg) pmcntenset_el0,
 			options(nostack, nomem),
 		);
-		debug!("pmcntenset_el0");
 
 		// Enable PMCCNTR_EL0 using PMCR_EL0.
 		let mut pmcr_el0: u32 = 0;
@@ -148,3 +227,23 @@ pub fn configure() {
 		);
 	}
 }
+
+pub fn detect_frequency() {
+	Lazy::force(&CPU_FREQUENCY);
+}
+
+pub fn print_information() {
+	let dtb = unsafe {
+		Dtb::from_raw(boot_info().hardware_info.device_tree.unwrap().get() as *const u8)
+			.expect(".dtb file has invalid header")
+	};
+
+	let reg = dtb
+		.get_property("/cpus/cpu@0", "compatible")
+		.unwrap_or(b"unknown");
+
+	infoheader!(" CPU INFORMATION ");
+	infoentry!("Processor compatiblity", str::from_utf8(reg).unwrap());
+	infoentry!("System frequency", *CPU_FREQUENCY);
+	infofooter!();
+}
diff --git a/src/arch/aarch64/kernel/scheduler.rs b/src/arch/aarch64/kernel/scheduler.rs
index 96590844863be22f9926ce6a5f7a286b5c8defbb..05d7b4fed972703eac65f79b5ff9505bbe398056 100644
--- a/src/arch/aarch64/kernel/scheduler.rs
+++ b/src/arch/aarch64/kernel/scheduler.rs
@@ -1,6 +1,8 @@
 //! Architecture dependent interface to initialize a task
 
+use alloc::boxed::Box;
 use alloc::rc::Rc;
+use core::arch::asm;
 use core::cell::RefCell;
 use core::{mem, ptr};
 
@@ -10,12 +12,81 @@ use crate::arch::aarch64::kernel::core_local::*;
 use crate::arch::aarch64::kernel::processor;
 use crate::arch::aarch64::mm::paging::{BasePageSize, PageSize, PageTableEntryFlags};
 use crate::arch::aarch64::mm::{PhysAddr, VirtAddr};
+use crate::interrupts::IST_SIZE;
 use crate::scheduler::task::{Task, TaskFrame};
 use crate::{env, DEFAULT_STACK_SIZE, KERNEL_STACK_SIZE};
 
-extern "C" {
-	static tls_start: u8;
-	static tls_end: u8;
+#[derive(Debug)]
+#[repr(C, packed)]
+pub struct State {
+	/// Exception Link Register
+	elr_el1: u64,
+	/// Program Status Register
+	spsr_el1: u64,
+	/// Thread ID Register
+	tpidr_el0: u64,
+	/// X0 register
+	x0: u64,
+	/// X1 register
+	x1: u64,
+	/// X2 register
+	x2: u64,
+	/// X3 register
+	x3: u64,
+	/// X4 register
+	x4: u64,
+	/// X5 register
+	x5: u64,
+	/// X6 register
+	x6: u64,
+	/// X7 register
+	x7: u64,
+	/// X8 register
+	x8: u64,
+	/// X9 register
+	x9: u64,
+	/// X10 register
+	x10: u64,
+	/// X11 register
+	x11: u64,
+	/// X12 register
+	x12: u64,
+	/// X13 register
+	x13: u64,
+	/// X14 register
+	x14: u64,
+	/// X15 register
+	x15: u64,
+	/// X16 register
+	x16: u64,
+	/// X17 register
+	x17: u64,
+	/// X18 register
+	x18: u64,
+	/// X19 register
+	x19: u64,
+	/// X20 register
+	x20: u64,
+	/// X21 register
+	x21: u64,
+	/// X22 register
+	x22: u64,
+	/// X23 register
+	x23: u64,
+	/// X24 register
+	x24: u64,
+	/// X25 register
+	x25: u64,
+	/// X26 register
+	x26: u64,
+	/// X27 register
+	x27: u64,
+	/// X28 register
+	x28: u64,
+	/// X29 register
+	x29: u64,
+	/// X30 register
+	x30: u64,
 }
 
 pub struct BootStack {
@@ -51,7 +122,7 @@ impl TaskStacks {
 		} else {
 			size.align_up(BasePageSize::SIZE as usize)
 		};
-		let total_size = user_stack_size + DEFAULT_STACK_SIZE + KERNEL_STACK_SIZE;
+		let total_size = user_stack_size + DEFAULT_STACK_SIZE + IST_SIZE;
 		let virt_addr =
 			crate::arch::mm::virtualmem::allocate(total_size + 4 * BasePageSize::SIZE as usize)
 				.expect("Failed to allocate Virtual Memory for TaskStacks");
@@ -71,22 +142,22 @@ impl TaskStacks {
 		crate::arch::mm::paging::map::<BasePageSize>(
 			virt_addr + BasePageSize::SIZE,
 			phys_addr,
-			KERNEL_STACK_SIZE / BasePageSize::SIZE as usize,
+			IST_SIZE / BasePageSize::SIZE as usize,
 			flags,
 		);
 
 		// map kernel stack into the address space
 		crate::arch::mm::paging::map::<BasePageSize>(
-			virt_addr + KERNEL_STACK_SIZE + 2 * BasePageSize::SIZE,
-			phys_addr + KERNEL_STACK_SIZE,
+			virt_addr + IST_SIZE + 2 * BasePageSize::SIZE,
+			phys_addr + IST_SIZE,
 			DEFAULT_STACK_SIZE / BasePageSize::SIZE as usize,
 			flags,
 		);
 
 		// map user stack into the address space
 		crate::arch::mm::paging::map::<BasePageSize>(
-			virt_addr + KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE,
-			phys_addr + KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE,
+			virt_addr + IST_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE,
+			phys_addr + IST_SIZE + DEFAULT_STACK_SIZE,
 			user_stack_size / BasePageSize::SIZE as usize,
 			flags,
 		);
@@ -94,9 +165,7 @@ impl TaskStacks {
 		// clear user stack
 		unsafe {
 			ptr::write_bytes(
-				(virt_addr
-					+ KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE
-					+ 3 * BasePageSize::SIZE as usize)
+				(virt_addr + IST_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE as usize)
 					.as_mut_ptr::<u8>(),
 				0xAC,
 				user_stack_size,
@@ -125,9 +194,7 @@ impl TaskStacks {
 	pub fn get_user_stack_size(&self) -> usize {
 		match self {
 			TaskStacks::Boot(_) => 0,
-			TaskStacks::Common(stacks) => {
-				stacks.total_size - DEFAULT_STACK_SIZE - KERNEL_STACK_SIZE
-			}
+			TaskStacks::Common(stacks) => stacks.total_size - DEFAULT_STACK_SIZE - IST_SIZE,
 		}
 	}
 
@@ -135,9 +202,7 @@ impl TaskStacks {
 		match self {
 			TaskStacks::Boot(_) => VirtAddr::zero(),
 			TaskStacks::Common(stacks) => {
-				stacks.virt_addr
-					+ KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE
-					+ 3 * BasePageSize::SIZE as usize
+				stacks.virt_addr + IST_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE as usize
 			}
 		}
 	}
@@ -146,7 +211,7 @@ impl TaskStacks {
 		match self {
 			TaskStacks::Boot(stacks) => stacks.stack,
 			TaskStacks::Common(stacks) => {
-				stacks.virt_addr + KERNEL_STACK_SIZE + 2 * BasePageSize::SIZE as usize
+				stacks.virt_addr + IST_SIZE + 2 * BasePageSize::SIZE as usize
 			}
 		}
 	}
@@ -166,7 +231,7 @@ impl TaskStacks {
 	}
 
 	pub fn get_interrupt_stack_size(&self) -> usize {
-		KERNEL_STACK_SIZE
+		IST_SIZE
 	}
 }
 
@@ -197,29 +262,46 @@ impl Drop for TaskStacks {
 }
 
 pub struct TaskTLS {
-	address: VirtAddr,
-	//fs: VirtAddr,
-	//layout: Layout,
+	thread_ptr: Box<*mut ()>,
+	_block: Box<[u8]>,
 }
 
 impl TaskTLS {
-	fn from_environment() -> Self {
-		Self {
-			address: VirtAddr::zero(),
+	fn from_environment() -> Option<Self> {
+		let tls_len = env::get_tls_memsz();
+
+		if env::get_tls_memsz() == 0 {
+			return None;
 		}
-	}
-}
 
-impl Drop for TaskTLS {
-	fn drop(&mut self) {
-		/*debug!(
-				"Deallocate TLS at {:#x} (layout {:?})",
-				self.address, self.layout,
-		);
+		// Get TLS initialization image
+		let tls_init_image = {
+			let tls_init_data = env::get_tls_start().as_ptr::<u8>();
+			let tls_init_len = env::get_tls_filesz();
 
-		unsafe {
-				dealloc(self.address.as_mut_ptr::<u8>(), self.layout);
-		}*/
+			// SAFETY: We will have to trust the environment here.
+			unsafe { core::slice::from_raw_parts(tls_init_data, tls_init_len) }
+		};
+
+		// Allocate TLS block
+		let mut block = vec![0; tls_len].into_boxed_slice();
+
+		// Initialize beginning of the TLS block with TLS initialization image
+		block[..tls_init_image.len()].copy_from_slice(tls_init_image);
+
+		let thread_ptr = block.as_mut_ptr_range().start.cast::<()>();
+		// Put thread pointer on heap, so it does not move and can be referenced in fs:0
+		let thread_ptr = Box::new(thread_ptr);
+
+		let this = Self {
+			thread_ptr,
+			_block: block,
+		};
+		Some(this)
+	}
+
+	fn thread_ptr(&self) -> &*mut () {
+		&self.thread_ptr
 	}
 }
 
@@ -227,36 +309,76 @@ extern "C" fn leave_task() -> ! {
 	core_scheduler().exit(0)
 }
 
-extern "C" fn task_entry(func: extern "C" fn(usize), arg: usize) {
-	// Check if the task (process or thread) uses Thread-Local-Storage.
-	/*let tls_size = unsafe { &tls_end as *const u8 as usize - &tls_start as *const u8 as usize };
-	if tls_size > 0 {
-		// Yes, it does, so we have to allocate TLS memory.
-		// Allocate enough space for the given size and one more variable of type usize, which holds the tls_pointer.
-		let tls_allocation_size = tls_size + mem::size_of::<usize>();
-		let tls = TaskTLS::from_environment();
-
-		// The tls_pointer is the address to the end of the TLS area requested by the task.
-		let tls_pointer = tls.address + tls_size;
-
-		// TODO: Implement AArch64 TLS
-
-		// Associate the TLS memory to the current task.
-		let mut current_task_borrowed = core_scheduler().current_task.borrow_mut();
-		debug!(
-			"Set up TLS for task {} at address {:#X}",
-			current_task_borrowed.id,
-			tls.address
-		);
-		current_task_borrowed.tls = Some(tls);
-	}*/
+#[cfg(target_os = "none")]
+#[naked]
+extern "C" fn task_start(_f: extern "C" fn(usize), _arg: usize, _user_stack: u64) -> ! {
+	// `f` is in the `x0` register
+	// `arg` is in the `x1` register
+	// `user_stack` is in the `x2` register
+
+	unsafe {
+		asm!(
+			"mov sp, x2",
+			"adrp x4, {task_entry}",
+			"add  x4, x4, #:lo12:{task_entry}",
+			"br x4",
+			task_entry = sym task_entry,
+			options(noreturn)
+		)
+	}
+}
 
+#[inline(never)]
+extern "C" fn task_entry(func: extern "C" fn(usize), arg: usize) -> ! {
 	// Call the actual entry point of the task.
 	func(arg);
+
+	// Exit task
+	crate::sys_thread_exit(0)
 }
 
 impl TaskFrame for Task {
 	fn create_stack_frame(&mut self, func: extern "C" fn(usize), arg: usize) {
-		// TODO: Implement AArch64 stack frame
+		// Check if TLS is allocated already and if the task uses thread-local storage.
+		if self.tls.is_none() {
+			self.tls = TaskTLS::from_environment();
+		}
+
+		unsafe {
+			// Set a marker for debugging at the very top.
+			let mut stack = self.stacks.get_kernel_stack() + self.stacks.get_kernel_stack_size()
+				- TaskStacks::MARKER_SIZE;
+			*stack.as_mut_ptr::<u64>() = 0xDEAD_BEEFu64;
+
+			// Put the State structure expected by the ASM switch() function on the stack.
+			stack = stack - mem::size_of::<State>();
+
+			let state = stack.as_mut_ptr::<State>();
+			ptr::write_bytes(stack.as_mut_ptr::<u8>(), 0, mem::size_of::<State>());
+
+			if let Some(tls) = &self.tls {
+				(*state).tpidr_el0 = tls.thread_ptr() as *const _ as u64;
+			}
+
+			/*
+			 * The x30 needs to hold the address of the
+			 * first function to be called when returning from switch_context.
+			 */
+			(*state).x30 = task_start as usize as u64;
+			(*state).x0 = func as usize as u64; // use second argument to transfer the entry point
+			(*state).x1 = arg as u64;
+
+			/* Zero the condition flags. */
+			(*state).spsr_el1 = 0x3E5;
+
+			// Set the task's stack pointer entry to the stack we have just crafted.
+			self.last_stack_pointer = stack;
+			self.user_stack_pointer = self.stacks.get_user_stack()
+				+ self.stacks.get_user_stack_size()
+				- TaskStacks::MARKER_SIZE;
+
+			// x2 is required to initialize the stack
+			(*state).x2 = self.user_stack_pointer.as_u64() - mem::size_of::<u64>() as u64;
+		}
 	}
 }
diff --git a/src/arch/aarch64/kernel/start.rs b/src/arch/aarch64/kernel/start.rs
index 0de42f8d7677d475102a478fc73c23d0908b6c51..c6e5bb399d8702af9afa45de94b93c474a402bfb 100644
--- a/src/arch/aarch64/kernel/start.rs
+++ b/src/arch/aarch64/kernel/start.rs
@@ -12,34 +12,6 @@ extern "C" {
 	static vector_table: u8;
 }
 
-// TCR flags
-const TCR_IRGN_WBWA: u64 = ((1) << 8) | ((1) << 24);
-const TCR_ORGN_WBWA: u64 = ((1) << 10) | ((1) << 26);
-const TCR_SHARED: u64 = ((3) << 12) | ((3) << 28);
-const TCR_TBI0: u64 = 1 << 37;
-const TCR_TBI1: u64 = 1 << 38;
-const TCR_ASID16: u64 = 1 << 36;
-const TCR_TG1_64K: u64 = 3 << 30;
-const TCR_TG1_16K: u64 = 1 << 30;
-const TCR_TG1_4K: u64 = 0 << 30;
-const TCR_FLAGS: u64 = TCR_IRGN_WBWA | TCR_ORGN_WBWA | TCR_SHARED;
-
-/// Number of virtual address bits for 4KB page
-const VA_BITS: u64 = 48;
-
-// Available memory types
-#[allow(non_upper_case_globals)]
-const MT_DEVICE_nGnRnE: u64 = 0;
-#[allow(non_upper_case_globals)]
-const MT_DEVICE_nGnRE: u64 = 1;
-const MT_DEVICE_GRE: u64 = 2;
-const MT_NORMAL_NC: u64 = 3;
-const MT_NORMAL: u64 = 4;
-
-const fn mair(attr: u64, mt: u64) -> u64 {
-	attr << (mt * 8)
-}
-
 /// Entrypoint - Initialize Stack pointer and Exception Table
 #[no_mangle]
 #[naked]
@@ -77,7 +49,7 @@ unsafe extern "C" fn pre_init(boot_info: &'static RawBootInfo, cpu_id: u32) -> !
 		"msr vbar_el1, x4",
 		vector_table = sym vector_table,
 		out("x4") _,
-		options(nostack, nomem),
+		options(nostack),
 	);
 
 	// Memory barrier
diff --git a/src/arch/aarch64/kernel/start.s b/src/arch/aarch64/kernel/start.s
index 533c1afeb1799717714769d4b605088e4e5fd587..1ce1ef111a36710767d21d0d800d9f1bea4a466b 100644
--- a/src/arch/aarch64/kernel/start.s
+++ b/src/arch/aarch64/kernel/start.s
@@ -5,7 +5,7 @@
 .extern do_sync
 .extern do_error
 
-.macro trap_entry, el
+.macro trap_entry
      stp x29, x30, [sp, #-16]!
      stp x27, x28, [sp, #-16]!
      stp x25, x26, [sp, #-16]!
@@ -30,7 +30,7 @@
      stp x22, x23, [sp, #-16]!
 .endm
 
-.macro trap_exit, el
+.macro trap_exit
      ldp x22, x23, [sp], #16
      msr elr_el1, x22
      msr spsr_el1, x23
@@ -74,10 +74,10 @@ b       do_bad_mode
  */
 .align 6
 el1_sync:
-      trap_entry 1
+      trap_entry
       mov     x0, sp
       bl      do_sync
-      trap_exit 1
+      trap_exit
       eret
 .size el1_sync, .-el1_sync
 .type el1_sync, @function
@@ -87,21 +87,10 @@ el1_sync:
  */
 .align 6
 el1_irq:
-      trap_entry 1
+      trap_entry
       mov     x0, sp
       bl      do_irq
-      cmp     x0, 0
-      b.eq    1f
-
-      //mov x1, sp
-      //str x1, [x0]                   /* store old sp */
-      // bl get_current_stack          /* get new sp   */
-      // mov sp, x0
-
-      /* call cleanup code */
-      // bl finish_task_switch
-
-1:    trap_exit 1
+      trap_exit
       eret
 .size el1_irq, .-el1_irq
 .type el1_irq, @function
@@ -111,31 +100,20 @@ el1_irq:
  */
 .align 6
 el1_fiq:
-      trap_entry 1
+      trap_entry
       mov     x0, sp
       bl      do_fiq
-      cmp     x0, 0
-      b.eq    1f
-
-      //mov x1, sp
-      //str x1, [x0]                  /* store old sp */
-      //bl get_current_stack          /* get new sp   */
-      //mov sp, x0
-
-      /* call cleanup code */
-      //bl finish_task_switch
-
-1:    trap_exit 1
+      trap_exit
       eret
 .size el1_fiq, .-el1_fiq
 .type el1_fiq, @function
 
 .align 6
 el1_error:
-      trap_entry 1
+      trap_entry
       mov     x0, sp
       bl      do_error
-      trap_exit 1
+      trap_exit
       eret
 .size el1_error, .-el1_error
 .type el1_error, @function
@@ -200,5 +178,4 @@ ventry el0_sync_invalid         // Synchronous 32-bit EL0
 ventry el0_irq_invalid          // IRQ 32-bit EL0
 ventry el0_fiq_invalid          // FIQ 32-bit EL0
 ventry el0_error_invalid        // Error 32-bit EL0
-.size vector_table, .-vector_table
-
+.size vector_table, .-vector_table
\ No newline at end of file
diff --git a/src/arch/aarch64/kernel/stubs.rs b/src/arch/aarch64/kernel/stubs.rs
deleted file mode 100644
index 89f16c6e8c35c3b87734a958b42fd65a5c737a4e..0000000000000000000000000000000000000000
--- a/src/arch/aarch64/kernel/stubs.rs
+++ /dev/null
@@ -1,29 +0,0 @@
-use crate::scheduler::CoreId;
-
-pub fn set_oneshot_timer(wakeup_time: Option<u64>) {
-	// TODO
-	debug!("set_oneshot_timer stub");
-}
-
-pub fn wakeup_core(core_to_wakeup: CoreId) {
-	// TODO
-	debug!("wakeup_core stub");
-}
-
-#[no_mangle]
-pub extern "C" fn eoi() {}
-
-#[no_mangle]
-pub extern "C" fn finish_task_switch() {}
-
-#[no_mangle]
-pub extern "C" fn getcontext() {}
-
-#[no_mangle]
-pub extern "C" fn get_current_stack() {}
-
-#[no_mangle]
-pub extern "C" fn makecontext() {}
-
-#[no_mangle]
-pub extern "C" fn setcontext() {}
diff --git a/src/arch/aarch64/kernel/switch.rs b/src/arch/aarch64/kernel/switch.rs
index f9d94ee29af2903ea365f58ac3c9e1e38a7dda07..4d717620a5b3acadf2844d9e9d3273bcfe49810c 100644
--- a/src/arch/aarch64/kernel/switch.rs
+++ b/src/arch/aarch64/kernel/switch.rs
@@ -1,5 +1,63 @@
-#[no_mangle]
-pub extern "C" fn switch_to_fpu_owner(_old_stack: *mut usize, _new_stack: usize) {}
+use core::arch::asm;
 
-#[no_mangle]
-pub extern "C" fn switch_to_task(_old_stack: *mut usize, _new_stack: usize) {}
+#[inline]
+pub unsafe extern "C" fn switch_to_fpu_owner(old_stack: *mut usize, new_stack: usize) {
+	switch_to_task(old_stack, new_stack);
+}
+
+#[naked]
+pub unsafe extern "C" fn switch_to_task(_old_stack: *mut usize, _new_stack: usize) {
+	asm!(
+		// save general purpose registers
+		"stp x29, x30, [sp, #-16]!",
+		"stp x27, x28, [sp, #-16]!",
+		"stp x25, x26, [sp, #-16]!",
+		"stp x23, x24, [sp, #-16]!",
+		"stp x21, x22, [sp, #-16]!",
+		"stp x19, x20, [sp, #-16]!",
+		"stp x17, x18, [sp, #-16]!",
+		"stp x15, x16, [sp, #-16]!",
+		"stp x13, x14, [sp, #-16]!",
+		"stp x11, x12, [sp, #-16]!",
+		"stp x9, x10, [sp, #-16]!",
+		"stp x7, x8, [sp, #-16]!",
+		"stp x5, x6, [sp, #-16]!",
+		"stp x3, x4, [sp, #-16]!",
+		"stp x1, x2, [sp, #-16]!",
+		// save thread id register and process state
+		"mrs x22, tpidr_el0",
+		"stp x22, x0, [sp, #-16]!",
+		"mrs x22, elr_el1",
+		"mrs x23, spsr_el1",
+		"stp x22, x23, [sp, #-16]!",
+		// Store the old `sp` behind `old_stack`
+		"mov x24, sp",
+		"str x24, [x0]",
+		// Set `sp` to `new_stack`
+		"mov sp, x1",
+		// restore thread id register and process state
+		"ldp x22, x23, [sp], #16",
+		"msr elr_el1, x22",
+		"msr spsr_el1, x23",
+		"ldp x22, x0, [sp], #16",
+		"msr tpidr_el0, x22",
+		// restore general purpose registers
+		"ldp x1, x2, [sp], #16",
+		"ldp x3, x4, [sp], #16",
+		"ldp x5, x6, [sp], #16",
+		"ldp x7, x8, [sp], #16",
+		"ldp x9, x10, [sp], #16",
+		"ldp x11, x12, [sp], #16",
+		"ldp x13, x14, [sp], #16",
+		"ldp x15, x16, [sp], #16",
+		"ldp x17, x18, [sp], #16",
+		"ldp x19, x20, [sp], #16",
+		"ldp x21, x22, [sp], #16",
+		"ldp x23, x24, [sp], #16",
+		"ldp x25, x26, [sp], #16",
+		"ldp x27, x28, [sp], #16",
+		"ldp x29, x30, [sp], #16",
+		"ret",
+		options(noreturn),
+	);
+}
diff --git a/src/arch/aarch64/mm/paging.rs b/src/arch/aarch64/mm/paging.rs
index ee9b9d23549ab31435532863438316c082881195..fb3b81364767d598090871830a274f46cb1d5bcc 100644
--- a/src/arch/aarch64/mm/paging.rs
+++ b/src/arch/aarch64/mm/paging.rs
@@ -6,9 +6,10 @@ use align_address::Align;
 
 use crate::arch::aarch64::kernel::core_local::*;
 use crate::arch::aarch64::kernel::{
-	get_base_address, get_boot_info_address, get_image_size, get_ram_address, is_uhyve, processor,
+	get_base_address, get_boot_info_address, get_image_size, get_ram_address, processor,
 };
 use crate::arch::aarch64::mm::{physicalmem, virtualmem, PhysAddr, VirtAddr};
+use crate::env::is_uhyve;
 use crate::{mm, scheduler, KERNEL_STACK_SIZE};
 
 /// Pointer to the root page table (called "Level 0" in ARM terminology).
@@ -78,12 +79,24 @@ impl PageTableEntryFlags {
 	/// Needed as long as empty() is no const function.
 	const BLANK: PageTableEntryFlags = PageTableEntryFlags::empty();
 
+	pub fn present(&mut self) -> &mut Self {
+		self.insert(PageTableEntryFlags::PRESENT);
+		self
+	}
+
 	pub fn device(&mut self) -> &mut Self {
-		self.insert(PageTableEntryFlags::DEVICE_NGNRE);
+		self.remove(PageTableEntryFlags::NORMAL);
+		self.remove(PageTableEntryFlags::NORMAL_NC);
+		self.remove(PageTableEntryFlags::DEVICE_NGNRE);
+		self.remove(PageTableEntryFlags::DEVICE_GRE);
+		self.insert(PageTableEntryFlags::DEVICE_NGNRNE);
 		self
 	}
 
 	pub fn normal(&mut self) -> &mut Self {
+		self.remove(PageTableEntryFlags::NORMAL_NC);
+		self.remove(PageTableEntryFlags::DEVICE_NGNRE);
+		self.remove(PageTableEntryFlags::DEVICE_GRE);
 		self.insert(PageTableEntryFlags::NORMAL);
 		self
 	}
@@ -106,7 +119,7 @@ impl PageTableEntryFlags {
 }
 
 /// An entry in either table
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub struct PageTableEntry {
 	/// Physical memory address this entry refers, combined with flags from PageTableEntryFlags.
 	physical_address_and_flags: PhysAddr,
diff --git a/src/arch/aarch64/mm/virtualmem.rs b/src/arch/aarch64/mm/virtualmem.rs
index 26ce84b43807d370ad83d3171f8e11a7b81002e9..78c96395ce7472671df19fa161c1a4dd9aa7abdb 100644
--- a/src/arch/aarch64/mm/virtualmem.rs
+++ b/src/arch/aarch64/mm/virtualmem.rs
@@ -21,6 +21,7 @@ const KERNEL_VIRTUAL_MEMORY_END: VirtAddr = VirtAddr(0x1_0000_0000);
 const TASK_VIRTUAL_MEMORY_END: VirtAddr = VirtAddr(0x8000_0000_0000);
 
 pub fn init() {
+	// don't use the first two kilobytes
 	if get_ram_address() > PhysAddr(0x2000) {
 		let entry = FreeListEntry {
 			start: 0x2000,
diff --git a/src/arch/mod.rs b/src/arch/mod.rs
index 063fca43c018314bc76231959d5b1a31dfe72576..0e14d4f60d275daab68f66f65dd437fcd808a827 100644
--- a/src/arch/mod.rs
+++ b/src/arch/mod.rs
@@ -13,12 +13,12 @@ use crate::arch::aarch64::kernel::core_local::core_scheduler;
 #[cfg(target_arch = "aarch64")]
 pub use crate::arch::aarch64::kernel::interrupts;
 #[cfg(target_arch = "aarch64")]
+pub use crate::arch::aarch64::kernel::interrupts::{set_oneshot_timer, wakeup_core};
+#[cfg(target_arch = "aarch64")]
 pub use crate::arch::aarch64::kernel::processor;
 #[cfg(target_arch = "aarch64")]
 pub use crate::arch::aarch64::kernel::scheduler;
 #[cfg(target_arch = "aarch64")]
-pub use crate::arch::aarch64::kernel::stubs::{set_oneshot_timer, wakeup_core};
-#[cfg(target_arch = "aarch64")]
 pub use crate::arch::aarch64::kernel::switch;
 #[cfg(target_arch = "aarch64")]
 pub use crate::arch::aarch64::kernel::systemtime::get_boot_time;
diff --git a/src/lib.rs b/src/lib.rs
index 582daa54965fafdd91b74cdaf4961fa2eacfaa86..9d72767459360a09baccd4bd7e0dc454c95e4bfa 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -318,22 +318,6 @@ fn boot_processor_main() -> ! {
 	);
 
 	arch::boot_processor_init();
-	#[cfg(target_arch = "aarch64")]
-	{
-		info!("The current hermit-kernel is only implemented up to this point on aarch64.");
-		if env::is_uhyve() {
-			syscalls::init();
-			syscalls::__sys_shutdown(0);
-		} else {
-			info!("Attempting to exit via QEMU.");
-			info!("This requires that you passed the `-semihosting` option to QEMU.");
-			let exit_handler = qemu_exit::AArch64::new();
-			exit_handler.exit_success();
-		}
-
-		// Compiles up to here - loop prevents linker errors
-		loop {}
-	}
 	scheduler::add_current_core();
 
 	if !env::is_uhyve() {