From af7312edb0c7f3815bb365c09e871ac7c1930c3b Mon Sep 17 00:00:00 2001
From: Stefan Lankes <slankes@eonerc.rwth-aachen.de>
Date: Mon, 1 May 2023 23:11:42 +0200
Subject: [PATCH] revise TLS handling

- ARM used variant 1 of the TLS handling (see
  https://uclibc.org/docs/tls.pdf)
- the previous version doesn't work correctly
- using an unsized struct to implement TLS like a C kernel
---
 src/arch/aarch64/kernel/scheduler.rs | 78 +++++++++++++++++++++-------
 src/arch/x86_64/kernel/scheduler.rs  |  4 +-
 src/scheduler/task.rs                |  3 +-
 3 files changed, 62 insertions(+), 23 deletions(-)

diff --git a/src/arch/aarch64/kernel/scheduler.rs b/src/arch/aarch64/kernel/scheduler.rs
index 05d7b4fed..7ac05c0ac 100644
--- a/src/arch/aarch64/kernel/scheduler.rs
+++ b/src/arch/aarch64/kernel/scheduler.rs
@@ -1,10 +1,11 @@
 //! Architecture dependent interface to initialize a task
 
+use alloc::alloc::{alloc_zeroed, Layout};
 use alloc::boxed::Box;
 use alloc::rc::Rc;
 use core::arch::asm;
 use core::cell::RefCell;
-use core::{mem, ptr};
+use core::{mem, ptr, slice};
 
 use align_address::Align;
 
@@ -261,13 +262,34 @@ impl Drop for TaskStacks {
 	}
 }
 
+/*
+ * https://fuchsia.dev/fuchsia-src/development/kernel/threads/tls and
+ * and https://uclibc.org/docs/tls.pdf is used to understand variant 1
+ * of the TLS implementations.
+ */
+
+#[derive(Copy, Clone)]
+#[repr(C)]
+struct DtvPointer {
+	val: *const (),
+	to_free: *const (),
+}
+
+#[repr(C)]
+union Dtv {
+	counter: usize,
+	pointer: DtvPointer,
+}
+
+#[repr(C)]
 pub struct TaskTLS {
-	thread_ptr: Box<*mut ()>,
-	_block: Box<[u8]>,
+	dtv: mem::MaybeUninit<Box<[Dtv; 2]>>,
+	_private: usize,
+	block: [u8],
 }
 
 impl TaskTLS {
-	fn from_environment() -> Option<Self> {
+	fn from_environment() -> Option<Box<Self>> {
 		let tls_len = env::get_tls_memsz();
 
 		if env::get_tls_memsz() == 0 {
@@ -280,28 +302,39 @@ impl TaskTLS {
 			let tls_init_len = env::get_tls_filesz();
 
 			// SAFETY: We will have to trust the environment here.
-			unsafe { core::slice::from_raw_parts(tls_init_data, tls_init_len) }
+			unsafe { slice::from_raw_parts(tls_init_data, tls_init_len) }
 		};
 
-		// Allocate TLS block
-		let mut block = vec![0; tls_len].into_boxed_slice();
-
-		// Initialize beginning of the TLS block with TLS initialization image
-		block[..tls_init_image.len()].copy_from_slice(tls_init_image);
+		let off = core::cmp::max(16, env::get_tls_align()) - 16;
+		let block_len = env::get_tls_memsz() + off;
+		let len = block_len + mem::size_of::<Box<[Dtv; 2]>>();
+
+		let layout = Layout::from_size_align(len, 16).unwrap();
+		let mut this = unsafe {
+			let data = alloc_zeroed(layout);
+			let raw = ptr::slice_from_raw_parts_mut(data, block_len) as *mut TaskTLS;
+
+			let addr = (*raw).block.as_ptr().offset(off as isize).cast::<()>();
+			(*raw).dtv.as_mut_ptr().write(Box::new([
+				Dtv { counter: 1 },
+				Dtv {
+					pointer: DtvPointer {
+						val: addr,
+						to_free: ptr::null(),
+					},
+				},
+			]));
+
+			Box::from_raw(raw)
+		};
 
-		let thread_ptr = block.as_mut_ptr_range().start.cast::<()>();
-		// Put thread pointer on heap, so it does not move and can be referenced in fs:0
-		let thread_ptr = Box::new(thread_ptr);
+		this.block[off..off + tls_init_image.len()].copy_from_slice(tls_init_image);
 
-		let this = Self {
-			thread_ptr,
-			_block: block,
-		};
 		Some(this)
 	}
 
-	fn thread_ptr(&self) -> &*mut () {
-		&self.thread_ptr
+	fn thread_ptr(&self) -> *const Box<[Dtv; 2]> {
+		self.dtv.as_ptr()
 	}
 }
 
@@ -309,6 +342,11 @@ extern "C" fn leave_task() -> ! {
 	core_scheduler().exit(0)
 }
 
+#[cfg(not(target_os = "none"))]
+extern "C" fn task_start(_f: extern "C" fn(usize), _arg: usize, _user_stack: u64) -> ! {
+	unimplemented!()
+}
+
 #[cfg(target_os = "none")]
 #[naked]
 extern "C" fn task_start(_f: extern "C" fn(usize), _arg: usize, _user_stack: u64) -> ! {
@@ -357,7 +395,7 @@ impl TaskFrame for Task {
 			ptr::write_bytes(stack.as_mut_ptr::<u8>(), 0, mem::size_of::<State>());
 
 			if let Some(tls) = &self.tls {
-				(*state).tpidr_el0 = tls.thread_ptr() as *const _ as u64;
+				(*state).tpidr_el0 = tls.thread_ptr() as u64;
 			}
 
 			/*
diff --git a/src/arch/x86_64/kernel/scheduler.rs b/src/arch/x86_64/kernel/scheduler.rs
index cb55af6c8..ec8b3544d 100644
--- a/src/arch/x86_64/kernel/scheduler.rs
+++ b/src/arch/x86_64/kernel/scheduler.rs
@@ -236,7 +236,7 @@ pub struct TaskTLS {
 }
 
 impl TaskTLS {
-	fn from_environment() -> Option<Self> {
+	fn from_environment() -> Option<Box<Self>> {
 		// For details on thread-local storage data structures see
 		//
 		// “ELF Handling For Thread-Local Storage” Section 3.4.6: x86-64 Specific Definitions for Run-Time Handling of TLS
@@ -287,7 +287,7 @@ impl TaskTLS {
 			_block: block,
 			thread_ptr,
 		};
-		Some(this)
+		Some(Box::new(this))
 	}
 
 	fn thread_ptr(&self) -> &*mut () {
diff --git a/src/scheduler/task.rs b/src/scheduler/task.rs
index 88ad9c863..c8a6a0ac5 100644
--- a/src/scheduler/task.rs
+++ b/src/scheduler/task.rs
@@ -1,3 +1,4 @@
+use alloc::boxed::Box;
 use alloc::collections::{LinkedList, VecDeque};
 use alloc::rc::Rc;
 use core::cell::RefCell;
@@ -372,7 +373,7 @@ pub struct Task {
 	/// Stack of the task
 	pub stacks: TaskStacks,
 	/// Task Thread-Local-Storage (TLS)
-	pub tls: Option<TaskTLS>,
+	pub tls: Option<Box<TaskTLS>>,
 	/// lwIP error code for this task
 	#[cfg(feature = "newlib")]
 	pub lwip_errno: i32,
-- 
GitLab