Index: linux-2.6.12-rc4-ps5/include/linux/sched_drv.h
===================================================================
--- linux-2.6.12-rc4-ps5.orig/include/linux/sched_drv.h	2005-05-26 12:17:59.000000000 +1000
+++ linux-2.6.12-rc4-ps5/include/linux/sched_drv.h	2005-05-26 13:13:12.000000000 +1000
@@ -27,6 +27,7 @@ struct sched_drv {
 	int (*move_tasks)(runqueue_t *, int, runqueue_t *, unsigned long,
 		 struct sched_domain *, enum idle_type);
 #endif
+	void (*systime_hook)(runqueue_t *, cputime_t);
 	void (*tick)(struct task_struct*, struct runqueue *, unsigned long long);
 #ifdef CONFIG_SCHED_SMT
 	struct task_struct *(*head_of_queue)(union runqueue_queue *);
Index: linux-2.6.12-rc4-ps5/include/linux/sched_runq.h
===================================================================
--- linux-2.6.12-rc4-ps5.orig/include/linux/sched_runq.h	2005-05-26 12:17:53.000000000 +1000
+++ linux-2.6.12-rc4-ps5/include/linux/sched_runq.h	2005-05-26 12:22:42.000000000 +1000
@@ -41,6 +41,7 @@ struct staircase_runqueue_queue {
 	struct list_head queue[STAIRCASE_NUM_PRIO_SLOTS - 1];
 	unsigned int cache_ticks;
 	unsigned int preempted;
+	unsigned long systime_centile;
 };
 #endif
 
Index: linux-2.6.12-rc4-ps5/kernel/ingosched.c
===================================================================
--- linux-2.6.12-rc4-ps5.orig/kernel/ingosched.c	2005-05-26 11:46:34.000000000 +1000
+++ linux-2.6.12-rc4-ps5/kernel/ingosched.c	2005-05-26 13:14:35.000000000 +1000
@@ -675,6 +675,10 @@ out:
 			STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
 			((rq)->curr->static_prio > (rq)->qu.ingosched.best_expired_prio))
 
+static void blank_hook(runqueue_t *rq, cputime_t cputime)
+{
+}
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -1148,6 +1152,7 @@ const struct sched_drv ingo_sched_drv = 
 #ifdef CONFIG_SMP
 	.move_tasks = ingo_move_tasks,
 #endif
+	.systime_hook = blank_hook,
 	.tick = ingo_tick,
 #ifdef CONFIG_SCHED_SMT
 	.head_of_queue = ingo_head_of_queue,
Index: linux-2.6.12-rc4-ps5/kernel/nicksched.c
===================================================================
--- linux-2.6.12-rc4-ps5.orig/kernel/nicksched.c	2005-05-26 11:46:34.000000000 +1000
+++ linux-2.6.12-rc4-ps5/kernel/nicksched.c	2005-05-26 13:14:18.000000000 +1000
@@ -589,6 +589,10 @@ out:
 }
 #endif
 
+static void blank_hook(runqueue_t *rq, cputime_t cputime)
+{
+}
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -965,6 +969,7 @@ const struct sched_drv nick_sched_drv = 
 #ifdef CONFIG_SMP
 	.move_tasks = nick_move_tasks,
 #endif
+	.systime_hook = blank_hook,
 	.tick = nick_tick,
 #ifdef CONFIG_SCHED_SMT
 	.head_of_queue = nick_head_of_queue,
Index: linux-2.6.12-rc4-ps5/kernel/sched.c
===================================================================
--- linux-2.6.12-rc4-ps5.orig/kernel/sched.c	2005-05-26 12:17:28.000000000 +1000
+++ linux-2.6.12-rc4-ps5/kernel/sched.c	2005-05-26 13:13:43.000000000 +1000
@@ -1393,6 +1393,7 @@ void account_system_time(struct task_str
 	acct_update_integrals(p);
 	/* Update rss highwater mark */
 	update_mem_hiwater(p);
+	sched_drvp->systime_hook(rq, cputime);
 }
 
 /*
Index: linux-2.6.12-rc4-ps5/kernel/sched_spa.c
===================================================================
--- linux-2.6.12-rc4-ps5.orig/kernel/sched_spa.c	2005-05-26 11:46:34.000000000 +1000
+++ linux-2.6.12-rc4-ps5/kernel/sched_spa.c	2005-05-26 13:14:46.000000000 +1000
@@ -593,6 +593,10 @@ static inline void spa_reassess_at_end_o
 #define spa_runq_data_tick(p, numr) zaphod_runq_data_tick(p, numr)
 #endif
 
+static void blank_hook(runqueue_t *rq, cputime_t cputime)
+{
+}
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -1472,6 +1476,7 @@ const struct sched_drv spa_nf_sched_drv 
 #ifdef CONFIG_SMP
 	.move_tasks = spa_move_tasks,
 #endif
+	.systime_hook = blank_hook,
 	.tick = spa_tick,
 #ifdef CONFIG_SCHED_SMT
 	.head_of_queue = spa_head_of_queue,
@@ -1513,6 +1518,7 @@ const struct sched_drv zaphod_sched_drv 
 #ifdef CONFIG_SMP
 	.move_tasks = spa_move_tasks,
 #endif
+	.systime_hook = blank_hook,
 	.tick = spa_tick,
 #ifdef CONFIG_SCHED_SMT
 	.head_of_queue = spa_head_of_queue,
Index: linux-2.6.12-rc4-ps5/kernel/staircase.c
===================================================================
--- linux-2.6.12-rc4-ps5.orig/kernel/staircase.c	2005-05-26 12:17:28.000000000 +1000
+++ linux-2.6.12-rc4-ps5/kernel/staircase.c	2005-05-26 13:07:49.000000000 +1000
@@ -2,8 +2,8 @@
  *  kernel/staircase.c
  *  Copyright (C) 1991-2005  Linus Torvalds
  *
- * 2005-02-13 Staircase scheduler by Con Kolivas
- *            Staircase v10.7
+ * 2005-05-26 Staircase scheduler by Con Kolivas
+ *            Staircase v11.2
  */
 #include <linux/sched.h>
 #include <linux/init.h>
@@ -38,7 +38,8 @@ static void staircase_init_runqueue_queu
 	__set_bit(STAIRCASE_MAX_PRIO, qup->staircase.bitmap);
 }
 
-static void staircase_set_oom_time_slice(struct task_struct *p, unsigned long t)
+static void staircase_set_oom_time_slice(struct task_struct *p,
+	unsigned long t)
 {
 	p->sdu.staircase.slice = p->sdu.staircase.time_slice = t;
 }
@@ -73,26 +74,28 @@ int sched_compute = 0;
 /*
  * Get nanosecond clock difference without overflowing unsigned long.
  */
-static inline unsigned long ns_diff(unsigned long long v1, unsigned long long v2)
+static inline unsigned long ns_diff(unsigned long long v1,
+	unsigned long long v2)
 {
 	unsigned long long vdiff;
-	if (unlikely(v1 < v2))
+	if (likely(v1 > v2)) {
+		vdiff = v1 - v2;
+		if (vdiff > (1 << 31))
+			vdiff = 1 << 31;
+	} else
 		/*
-		 * Rarely the clock goes backwards. There should always be
-		 * a positive difference so return 1.
+		 * Rarely the clock appears to go backwards. There should
+		 * always be a positive difference so return 1.
 		 */
 		vdiff = 1;
-	else
-		vdiff = v1 - v2;
-	if (vdiff > (1 << 31))
-		vdiff = 1 << 31;
 	return (unsigned long)vdiff;
 }
 
 /*
  * Adding/removing a task to/from a priority array:
  */
-static inline void dequeue_task(struct task_struct *p, struct staircase_runqueue_queue *rqq)
+static inline void dequeue_task(struct task_struct *p,
+	struct staircase_runqueue_queue *rqq)
 {
 	list_del_init(&p->run_list);
 	if (list_empty(rqq->queue + p->prio))
@@ -100,14 +103,16 @@ static inline void dequeue_task(struct t
 	p->sdu.staircase.ns_debit = 0;
 }
 
-static void enqueue_task(struct task_struct *p, struct staircase_runqueue_queue *rqq)
+static void enqueue_task(struct task_struct *p,
+	struct staircase_runqueue_queue *rqq)
 {
 	sched_info_queued(p);
 	list_add_tail(&p->run_list, rqq->queue + p->prio);
 	__set_bit(p->prio, rqq->bitmap);
 }
 
-static void requeue_task(struct task_struct *p, struct staircase_runqueue_queue *rq)
+static inline void requeue_task(struct task_struct *p,
+	struct staircase_runqueue_queue *rq)
 {
 	list_move_tail(&p->run_list, rq->queue + p->prio);
 }
@@ -117,7 +122,8 @@ static void requeue_task(struct task_str
  * remote queue so we want these tasks to show up at the head of the
  * local queue:
  */
-static inline void enqueue_task_head(struct task_struct *p, struct staircase_runqueue_queue *rqq)
+static inline void enqueue_task_head(struct task_struct *p,
+	struct staircase_runqueue_queue *rqq)
 {
 	list_add(&p->run_list, rqq->queue + p->prio);
 	__set_bit(p->prio, rqq->bitmap);
@@ -256,17 +262,24 @@ static void continue_slice(task_t *p)
  * or have just forked a thread/process and make them continue their old
  * slice instead of starting a new one at high priority.
  */
-static void recalc_task_prio(task_t *p, unsigned long long now, unsigned long rq_load)
+static inline void recalc_task_prio(task_t *p, unsigned long long now,
+	unsigned long rq_systime, unsigned long rq_running)
 {
-	unsigned long sleep_time;
+	unsigned long sleep_time = ns_diff(now, p->timestamp);
 
-	if (rq_load > 31)
-		rq_load = 31;
-	sleep_time = ns_diff(now, p->timestamp) / (1 << rq_load);
+	/*
+	 * Priority is elevated back to best by amount of sleep_time.
+	 * sleep_time is scaled down by in-kernel system time and by
+	 * number of tasks currently running.
+	 */
+	sleep_time /= rq_running + 1;
+	if (rq_systime)
+		sleep_time = sleep_time / 200 * (100 - rq_systime);
 
 	p->sdu.staircase.totalrun += p->sdu.staircase.runtime;
-	if (NS_TO_JIFFIES(p->sdu.staircase.totalrun) >= p->sdu.staircase.slice &&
-		NS_TO_JIFFIES(sleep_time) < p->sdu.staircase.slice) {
+	if (NS_TO_JIFFIES(p->sdu.staircase.totalrun) >=
+		p->sdu.staircase.slice && NS_TO_JIFFIES(sleep_time) <
+		p->sdu.staircase.slice) {
 			p->sdu.staircase.sflags &= ~SF_FORKED;
 			dec_burst(p);
 			goto new_slice;
@@ -317,7 +330,8 @@ static void activate_task(task_t *p, run
 #endif
 	p->sdu.staircase.slice = slice(p);
 	p->sdu.staircase.time_slice = rr_interval(p);
-	recalc_task_prio(p, now, rq->nr_running);
+	recalc_task_prio(p, now, rq->qu.staircase.systime_centile / 100,
+		rq->nr_running);
 	p->sdu.staircase.sflags &= ~SF_UISLEEP;
 	p->prio = effective_prio(p);
 	p->timestamp = now;
@@ -348,10 +362,8 @@ static void preempt(task_t *p, struct ru
 	if (!TASK_PREEMPTS_CURR(p, rq))
 		return;
 
-	if (p->prio == rq->curr->prio &&
-		((p->sdu.staircase.totalrun || p->sdu.staircase.slice != slice(p)) ||
-		rt_task(rq->curr)))
-			return;
+	if (p->prio >= rq->curr->prio)
+		return;
 
 	if (!sched_compute || rq->qu.staircase.cache_ticks >= cache_delay ||
 		!p->mm || rt_task(p))
@@ -366,7 +378,8 @@ static void preempt(task_t *p, struct ru
  * @sync: do a synchronous wakeup?
  * @rq: The run queue on which the task is to be placed (already locked)
  */
-static void staircase_wake_up_task(struct task_struct *p, struct runqueue *rq, unsigned int old_state, int sync)
+static void staircase_wake_up_task(struct task_struct *p, struct runqueue *rq,
+	unsigned int old_state, int sync)
 {
 	int same_cpu = (rq == this_rq());
 
@@ -420,29 +433,19 @@ static void staircase_wake_up_new_task(t
 
 	if (likely(cpu == this_cpu)) {
 		current->sdu.staircase.sflags |= SF_FORKED;
-
-		if (!(clone_flags & CLONE_VM)) {
+		activate_task(p, rq, 1);
+		if (!(clone_flags & CLONE_VM))
 			/*
 			 * The VM isn't cloned, so we're in a good position to
 			 * do child-runs-first in anticipation of an exec. This
 			 * usually avoids a lot of COW overhead.
 			 */
-			if (unlikely(!task_is_queued(current))) {
-				p->prio = effective_prio(p);
-				__activate_task(p, rq);
-			} else {
-				p->prio = current->prio;
-				list_add_tail(&p->run_list, &current->run_list);
-				inc_nr_running(p, rq);
-			}
 			set_need_resched();
-		} else {
-			p->prio = effective_prio(p);
-			/* Run child last */
-			__activate_task(p, rq);
-		}
 		/*
 		 * We skip the following code due to cpu == this_cpu
+	 	 *
+		 *   task_rq_unlock(rq, &flags);
+		 *   this_rq = task_rq_lock(current, &flags);
 		 */
 		this_rq = rq;
 	} else {
@@ -459,8 +462,8 @@ static void staircase_wake_up_new_task(t
 		preempt(p, rq);
 
 		/*
-		 * Parent and child are on different CPUs, now get the
-		 * parent runqueue to update the parent's ->sdu.staircase.sleep_avg:
+		 * Parent and child are on different CPUs, now get the parent
+		 * runqueue to update the parent's ->sdu.staircase.sleep_avg:
 		 */
 		task_rq_unlock(rq, &flags);
 		this_rq = task_rq_lock(current, &flags);
@@ -487,8 +490,8 @@ static void staircase_exit(task_t * p)
  * pull_task - move a task from a remote runqueue to the local runqueue.
  * Both runqueues must be locked.
  */
-static inline
-void pull_task(runqueue_t *src_rq, task_t *p, runqueue_t *this_rq, int this_cpu)
+static inline void pull_task(runqueue_t *src_rq, task_t *p,
+	runqueue_t *this_rq, int this_cpu)
 {
 	dequeue_task(p, &src_rq->qu.staircase);
 	dec_nr_running(p, src_rq);
@@ -498,8 +501,8 @@ void pull_task(runqueue_t *src_rq, task_
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
 	/*
-	 * Note that idle threads have a prio of STAIRCASE_MAX_PRIO, for this test
-	 * to be always true for them.
+	 * Note that idle threads have a prio of STAIRCASE_MAX_PRIO, for this 
+	 * test to be always true for them.
 	 */
 	preempt(p, this_rq);
 }
@@ -512,9 +515,9 @@ void pull_task(runqueue_t *src_rq, task_
  *
  * Called with both runqueues locked.
  */
-static int staircase_move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
-		      unsigned long max_nr_move, struct sched_domain *sd,
-		      enum idle_type idle)
+static int staircase_move_tasks(runqueue_t *this_rq, int this_cpu,
+	runqueue_t *busiest, unsigned long max_nr_move,
+	struct sched_domain *sd, enum idle_type idle)
 {
 	struct list_head *head, *curr;
 	int idx, pulled = 0;
@@ -529,7 +532,8 @@ skip_bitmap:
 	if (!idx)
 		idx = sched_find_first_bit(busiest->qu.staircase.bitmap);
 	else
-		idx = find_next_bit(busiest->qu.staircase.bitmap, STAIRCASE_MAX_PRIO, idx);
+		idx = find_next_bit(busiest->qu.staircase.bitmap,
+			STAIRCASE_MAX_PRIO, idx);
 	if (idx >= STAIRCASE_MAX_PRIO)
 		goto out;
 
@@ -578,14 +582,25 @@ static void time_slice_expired(task_t *p
 	enqueue_task(p, rqq);
 }
 
+static void staircase_systime_hook(runqueue_t *rq, cputime_t cputime)
+{
+	/* For calculating rolling percentage of sys time per runqueue */
+	rq->qu.staircase.systime_centile += cputime * 100;
+}
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
  */
-static void staircase_tick(struct task_struct *p, struct runqueue *rq, unsigned long long now)
+static void staircase_tick(struct task_struct *p, struct runqueue *rq,
+	unsigned long long now)
 {
 	int cpu = smp_processor_id();
-	unsigned long debit;
+	unsigned long debit, expired_balance = rq->nr_running;
+
+	/* Rolling percentage systime per runqueue */
+	rq->qu.staircase.systime_centile = rq->qu.staircase.systime_centile *
+		99 / 100;
 
 	if (p == rq->idle) {
 		if (wake_priority_sleeper(rq))
@@ -603,8 +618,10 @@ static void staircase_tick(struct task_s
 	/*
 	 * SCHED_FIFO tasks never run out of timeslice.
 	 */
-	if (unlikely(p->policy == SCHED_FIFO))
+	if (unlikely(p->policy == SCHED_FIFO)) {
+		expired_balance = 0;
 		goto out;
+	}
 
 	spin_lock(&rq->lock);
 	debit = ns_diff(rq->timestamp_last_tick, p->timestamp);
@@ -631,12 +648,17 @@ static void staircase_tick(struct task_s
 		goto out_unlock;
 	}
 	rq->qu.staircase.cache_ticks++;
-	if (rq->qu.staircase.preempted && rq->qu.staircase.cache_ticks >= cache_delay)
+	if (rq->qu.staircase.preempted &&
+		rq->qu.staircase.cache_ticks >= cache_delay) {
 		set_tsk_need_resched(p);
+		goto out_unlock;
+	}
+	expired_balance = 0;
 out_unlock:
 	spin_unlock(&rq->lock);
 out:
-	rebalance_tick(cpu, rq, NOT_IDLE);
+	if (expired_balance > 1)
+		rebalance_tick(cpu, rq, NOT_IDLE);
 }
 
 #ifdef CONFIG_SCHED_SMT
@@ -649,9 +671,9 @@ static struct task_struct *staircase_hea
 static int staircase_dependent_sleeper_trumps(const struct task_struct *p1,
 	const struct task_struct * p2, struct sched_domain *sd)
 {
-	return ((p1->sdu.staircase.time_slice * (100 - sd->per_cpu_gain) / 100) >
-			slice(p2) || rt_task(p1)) &&
-			p2->mm && p1->mm && !rt_task(p2);
+	return ((p1->sdu.staircase.time_slice * (100 - sd->per_cpu_gain) /
+		100) > slice(p2) || rt_task(p1)) && p2->mm && p1->mm &&
+		!rt_task(p2);
 }
 #endif
 
@@ -743,7 +765,8 @@ switch_tasks:
 		int newprio = effective_prio(next);
 		next->sdu.staircase.sflags &= ~SF_YIELDED;
 		if (newprio != next->prio) {
-			struct staircase_runqueue_queue *rqq = &rq->qu.staircase;
+			struct staircase_runqueue_queue *rqq =
+				&rq->qu.staircase;
 
 			dequeue_task(next, rqq);
 			next->prio = newprio;
@@ -930,7 +953,8 @@ static void staircase_migrate_dead_tasks
 	for (i = 0; i < STAIRCASE_MAX_PRIO; i++) {
 		struct list_head *list = &rq->qu.staircase.queue[i];
 		while (!list_empty(list))
-			migrate_dead(dead_cpu, list_entry(list->next, task_t, run_list));
+			migrate_dead(dead_cpu, list_entry(list->next, task_t,
+				run_list));
 	}
 }
 #endif
@@ -990,6 +1014,7 @@ const struct sched_drv staircase_sched_d
 #ifdef CONFIG_SMP
 	.move_tasks = staircase_move_tasks,
 #endif
+	.systime_hook = staircase_systime_hook,
 	.tick = staircase_tick,
 #ifdef CONFIG_SCHED_SMT
 	.head_of_queue = staircase_head_of_queue,
