I am working on implementing a strict priority scheduler for pintos threads, but I encountered a strange issue. Here's my code:
bool thread_higher_prio(const struct list_elem* a, const struct list_elem* b, void* aux){
struct thread *thread_a = list_entry(a,struct thread,elem);
struct thread *thread_b = list_entry(b,struct thread,elem);
return thread_a->priority>thread_b->priority;
}
static void thread_enqueue(struct thread* t) {
ASSERT(intr_get_level() == INTR_OFF);
ASSERT(is_thread(t));
if (active_sched_policy == SCHED_FIFO)
list_push_back(&fifo_ready_list, &t->elem);
else if (active_sched_policy == SCHED_PRIO){
list_insert_ordered(&fifo_ready_list,&t->elem,thread_higher_prio,NULL);
}
else
PANIC("Unimplemented scheduling policy value: %d", active_sched_policy);
}
static struct thread* thread_schedule_prio(void) {
if (!list_empty(&fifo_ready_list))
return list_entry(list_pop_front(&fifo_ready_list), struct thread, elem);
else
return idle_thread;
}
The test I am attempting to run is priority-fifo. In this test, the following function simple_thread_func is executed:
static void simple_thread_func(void* data_) {
struct simple_thread_data* data = data_;
int i;
for (i = 0; i < ITER_CNT; i++) {
lock_acquire(data->lock);
*(*data->op)++ = data->id;
lock_release(data->lock);
thread_yield();
}
}
For some reason, the lock in the data_ parameter gets changed to a strange value (currently, I have only made the above modifications for this test, and I have not yet modified the synchronization primitives). Here's the callback I get:
// ... output truncated ...
vagrant@development [09:19:48] build $ pintos-test
pintos -v -k -T 60 --bochs --filesys-size=2 -- -q -sched=prio -f rtkt priority-fifo < /dev/null 2> tests/threads/priority-fifo.errors > tests/threads/priority-fifo.output
perl -I../.. ../../tests/threads/priority-fifo.ck tests/threads/priority-fifo tests/threads/priority-fifo.result
FAIL tests/threads/priority-fifo
Kernel panic in run: PANIC at ../../lib/kernel/list.c:216 in list_remove(): assertion `is_interior(elem)' failed.
Call stack: 0xc002ad1d 0xc002b40e 0xc002b450 0xc00233d8 0xc002371e 0xc00334a5 0xc0021693
Translation of call stack:
0xc002ad1d: debug_panic (/home/vagrant/code/personal/proj-pregame/src/lib/kernel/debug.c:35)
0xc002b40e: list_remove (/home/vagrant/code/personal/proj-pregame/src/lib/kernel/list.c:217)
0xc002b450: list_pop_front (/home/vagrant/code/personal/proj-pregame/src/lib/kernel/list.c:227)
0xc00233d8: sema_up (/home/vagrant/code/personal/proj-pregame/src/threads/synch.c:113)
0xc002371e: lock_release (/home/vagrant/code/personal/proj-pregame/src/threads/synch.c:217)
0xc00334a5: simple_thread_func (/home/vagrant/code/personal/proj-pregame/src/tests/threads/priority-fifo.c:88)
0xc0021693: kernel_thread (/home/vagrant/code/personal/proj-pregame/src/threads/thread.c:492)
The parameter passing seems fine until here, aux is still normal.
tid_t thread_create(const char* name, int priority, thread_func* function, void* aux) {
//.....
kf = alloc_frame(t, sizeof *kf);
kf->eip = NULL;
kf->function = function;
kf->aux = aux;
//.....
return tid;
}
Even though the panic occurs when releasing the lock, the lock was actually changed before the function was even executed. I finally tracked it down to find that the value of aux was already changed when entering the kernel_thread function:
static void kernel_thread(thread_func* function, void* aux) {
ASSERT(function != NULL);
intr_enable(); /* The scheduler runs with interrupts off. */
function(aux); /* Execute the thread function. */
thread_exit(); /* If function() returns, kill the thread. */
}
At this point, the lock in the semaphore has already been changed to an unusual value. I am not sure who called it, and the call stack looks like this:
kernel_thread(thread_func * function, void * aux) (\home\vagrant\code\personal\proj-pregame\src\threads\thread.c:488)
[Unknown/Just-In-Time compiled code]
Can anyone help me figure out why the lock value is being changed and how to fix it? Thanks!
Here is the test function:
struct simple_thread_data {
int id; /* Sleeper ID. */
int iterations; /* Iterations so far. */
struct lock* lock; /* Lock on output. */
int** op; /* Output buffer position. */
};
#define THREAD_CNT 16
#define ITER_CNT 16
static thread_func simple_thread_func;
void test_priority_fifo(void) {
struct simple_thread_data data[THREAD_CNT];
struct lock lock;
int *output, *op;
int i, cnt;
/* This test does not work with the MLFQS. */
ASSERT(active_sched_policy == SCHED_PRIO);
/* Make sure our priority is the default. */
ASSERT(thread_get_priority() == PRI_DEFAULT);
msg("%d threads will iterate %d times in the same order each time.", THREAD_CNT, ITER_CNT);
msg("If the order varies then there is a bug.");
output = op = malloc(sizeof *output * THREAD_CNT * ITER_CNT * 2);
ASSERT(output != NULL);
lock_init(&lock);
thread_set_priority(PRI_DEFAULT + 2);
for (i = 0; i < THREAD_CNT; i++) {
char name[16];
struct simple_thread_data* d = data + i;
snprintf(name, sizeof name, "%d", i);
d->id = i;
d->iterations = 0;
d->lock = &lock;
d->op = &op;
thread_create(name, PRI_DEFAULT + 1, simple_thread_func, d);
}
thread_set_priority(PRI_DEFAULT);
/* All the other threads now run to termination here. */
ASSERT(lock.holder == NULL);
cnt = 0;
for (; output < op; output++) {
struct simple_thread_data* d;
ASSERT(*output >= 0 && *output < THREAD_CNT);
d = data + *output;
if (cnt % THREAD_CNT == 0)
printf("(priority-fifo) iteration:");
printf(" %d", d->id);
if (++cnt % THREAD_CNT == 0)
printf("\n");
d->iterations++;
}
}
Before the first call to the simple_thread_func function, the semaphore value in the lock turns into an unusual number: 3221286840 . Plus, there's an unexplained holder appearing:0xc000efb0.
I believe the changes I made should pass this test. Furthermore, I encountered another peculiar issue. If I change the return thread_a->priority>thread_b->priority; in
bool thread_higher_prio(const struct list_elem* a, const struct list_elem* b, void* aux){
struct thread *thread_a = list_entry(a,struct thread,elem);
struct thread *thread_b = list_entry(b,struct thread,elem);
return thread_a->priority>thread_b->priority;
}
to return thread_a->priority<thread_b->priority;, the problem does not occur. But this doesn't align with the requirement of higher-priority threads being prioritized. Also, The test results also do not meet the requirements:
// ... output truncated ...
(priority-fifo) iteration: 14 14 14 14 14 14 14