#include <linux/nmi.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/prefetch.h>
#include "rcutree.h"
+#include <trace/events/rcu.h>
+
+#include "rcu.h"
/* Data structures. */
static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
#define RCU_STATE_INITIALIZER(structname) { \
- .level = { &structname.node[0] }, \
+ .level = { &structname##_state.node[0] }, \
.levelcnt = { \
NUM_RCU_LVL_0, /* root of hierarchy. */ \
NUM_RCU_LVL_1, \
NUM_RCU_LVL_3, \
NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
}, \
- .signaled = RCU_GP_IDLE, \
+ .fqs_state = RCU_GP_IDLE, \
.gpnum = -300, \
.completed = -300, \
- .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \
- .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \
+ .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
+ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
.n_force_qs = 0, \
.n_force_qs_ngp = 0, \
.name = #structname, \
}
-struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state);
+struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
-struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
+struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
static struct rcu_state *rcu_state;
+/*
+ * The rcu_scheduler_active variable transitions from zero to one just
+ * before the first task is spawned. So when this variable is zero, RCU
+ * can assume that there is but one task, allowing RCU to (for example)
+ * optimized synchronize_sched() to a simple barrier(). When this variable
+ * is one, RCU must actually do all the hard work required to detect real
+ * grace periods. This variable is also used to suppress boot-time false
+ * positives from lockdep-RCU error checking.
+ */
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
/*
+ * The rcu_scheduler_fully_active variable transitions from zero to one
+ * during the early_initcall() processing, which is after the scheduler
+ * is capable of creating new tasks. So RCU processing (for example,
+ * creating tasks for RCU priority boosting) must be delayed until after
+ * rcu_scheduler_fully_active transitions from zero to one. We also
+ * currently delay invocation of any RCU callbacks until after this point.
+ *
+ * It might later prove better for people registering RCU callbacks during
+ * early boot to take responsibility for these callbacks, but one step at
+ * a time.
+ */
+static int rcu_scheduler_fully_active __read_mostly;
+
+#ifdef CONFIG_RCU_BOOST
+
+/*
* Control variables for per-CPU and per-rcu_node kthreads. These
* handle all flavors of RCU.
*/
DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
DEFINE_PER_CPU(char, rcu_cpu_has_work);
-static char rcu_kthreads_spawnable;
-static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
-static void invoke_rcu_cpu_kthread(void);
+#endif /* #ifdef CONFIG_RCU_BOOST */
-#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
+static void invoke_rcu_core(void);
+static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
/*
* Track the rcutorture test sequence number and the update version
* Note a quiescent state. Because we do not need to know
* how many quiescent states passed, just if there was at least
* one since the start of the grace period, this just sets a flag.
+ * The caller must have disabled preemption.
*/
void rcu_sched_qs(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
- rdp->passed_quiesc_completed = rdp->gpnum - 1;
+ rdp->passed_quiesce_gpnum = rdp->gpnum;
barrier();
- rdp->passed_quiesc = 1;
+ if (rdp->passed_quiesce == 0)
+ trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
+ rdp->passed_quiesce = 1;
}
void rcu_bh_qs(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
- rdp->passed_quiesc_completed = rdp->gpnum - 1;
+ rdp->passed_quiesce_gpnum = rdp->gpnum;
barrier();
- rdp->passed_quiesc = 1;
+ if (rdp->passed_quiesce == 0)
+ trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
+ rdp->passed_quiesce = 1;
}
/*
* Note a context switch. This is a quiescent state for RCU-sched,
* and requires special handling for preemptible RCU.
+ * The caller must have disabled preemption.
*/
void rcu_note_context_switch(int cpu)
{
+ trace_rcu_utilization("Start context switch");
rcu_sched_qs(cpu);
rcu_preempt_note_context_switch(cpu);
+ trace_rcu_utilization("End context switch");
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
-#ifdef CONFIG_NO_HZ
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
- .dynticks_nesting = 1,
+ .dynticks_nesting = DYNTICK_TASK_NESTING,
.dynticks = ATOMIC_INIT(1),
};
-#endif /* #ifdef CONFIG_NO_HZ */
-static int blimit = 10; /* Maximum callbacks per softirq. */
+static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
static int qhimark = 10000; /* If this many pending, ignore blimit. */
static int qlowmark = 100; /* Once only this many pending, use blimit. */
* trust its state not to change because interrupts are disabled.
*/
if (cpu_is_offline(rdp->cpu)) {
+ trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
rdp->offline_fqs++;
return 1;
}
- /* If preemptible RCU, no point in sending reschedule IPI. */
- if (rdp->preemptible)
- return 0;
-
- /* The CPU is online, so send it a reschedule IPI. */
+ /*
+ * The CPU is online, so send it a reschedule IPI. This forces
+ * it through the scheduler, and (inefficiently) also handles cases
+ * where idle loops fail to inform RCU about the CPU being idle.
+ */
if (rdp->cpu != smp_processor_id())
smp_send_reschedule(rdp->cpu);
else
#endif /* #ifdef CONFIG_SMP */
-#ifdef CONFIG_NO_HZ
+/*
+ * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
+ *
+ * If the new value of the ->dynticks_nesting counter now is zero,
+ * we really have entered idle, and must do the appropriate accounting.
+ * The caller must have disabled interrupts.
+ */
+static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
+{
+ trace_rcu_dyntick("Start", oldval, 0);
+ if (!is_idle_task(current)) {
+ struct task_struct *idle = idle_task(smp_processor_id());
+
+ trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
+ ftrace_dump(DUMP_ALL);
+ WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
+ current->pid, current->comm,
+ idle->pid, idle->comm); /* must be idle task! */
+ }
+ rcu_prepare_for_idle(smp_processor_id());
+ /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+ smp_mb__before_atomic_inc(); /* See above. */
+ atomic_inc(&rdtp->dynticks);
+ smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
+ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+}
/**
- * rcu_enter_nohz - inform RCU that current CPU is entering nohz
+ * rcu_idle_enter - inform RCU that current CPU is entering idle
*
- * Enter nohz mode, in other words, -leave- the mode in which RCU
+ * Enter idle mode, in other words, -leave- the mode in which RCU
* read-side critical sections can occur. (Though RCU read-side
- * critical sections can occur in irq handlers in nohz mode, a possibility
- * handled by rcu_irq_enter() and rcu_irq_exit()).
+ * critical sections can occur in irq handlers in idle, a possibility
+ * handled by irq_enter() and irq_exit().)
+ *
+ * We crowbar the ->dynticks_nesting field to zero to allow for
+ * the possibility of usermode upcalls having messed up our count
+ * of interrupt nesting level during the prior busy period.
*/
-void rcu_enter_nohz(void)
+void rcu_idle_enter(void)
{
unsigned long flags;
+ long long oldval;
struct rcu_dynticks *rdtp;
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
- if (--rdtp->dynticks_nesting) {
- local_irq_restore(flags);
- return;
- }
- /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
- smp_mb__before_atomic_inc(); /* See above. */
- atomic_inc(&rdtp->dynticks);
- smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
- WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+ oldval = rdtp->dynticks_nesting;
+ rdtp->dynticks_nesting = 0;
+ rcu_idle_enter_common(rdtp, oldval);
local_irq_restore(flags);
-
- /* If the interrupt queued a callback, get out of dyntick mode. */
- if (in_irq() &&
- (__get_cpu_var(rcu_sched_data).nxtlist ||
- __get_cpu_var(rcu_bh_data).nxtlist ||
- rcu_preempt_needs_cpu(smp_processor_id())))
- set_need_resched();
}
-/*
- * rcu_exit_nohz - inform RCU that current CPU is leaving nohz
+/**
+ * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
+ *
+ * Exit from an interrupt handler, which might possibly result in entering
+ * idle mode, in other words, leaving the mode in which read-side critical
+ * sections can occur.
+ *
+ * This code assumes that the idle loop never does anything that might
+ * result in unbalanced calls to irq_enter() and irq_exit(). If your
+ * architecture violates this assumption, RCU will give you what you
+ * deserve, good and hard. But very infrequently and irreproducibly.
+ *
+ * Use things like work queues to work around this limitation.
*
- * Exit nohz mode, in other words, -enter- the mode in which RCU
- * read-side critical sections normally occur.
+ * You have been warned.
*/
-void rcu_exit_nohz(void)
+void rcu_irq_exit(void)
{
unsigned long flags;
+ long long oldval;
struct rcu_dynticks *rdtp;
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
- if (rdtp->dynticks_nesting++) {
- local_irq_restore(flags);
- return;
- }
+ oldval = rdtp->dynticks_nesting;
+ rdtp->dynticks_nesting--;
+ WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
+ if (rdtp->dynticks_nesting)
+ trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
+ else
+ rcu_idle_enter_common(rdtp, oldval);
+ local_irq_restore(flags);
+}
+
+/*
+ * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
+ *
+ * If the new value of the ->dynticks_nesting counter was previously zero,
+ * we really have exited idle, and must do the appropriate accounting.
+ * The caller must have disabled interrupts.
+ */
+static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
+{
smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
atomic_inc(&rdtp->dynticks);
/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
smp_mb__after_atomic_inc(); /* See above. */
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
+ rcu_cleanup_after_idle(smp_processor_id());
+ trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
+ if (!is_idle_task(current)) {
+ struct task_struct *idle = idle_task(smp_processor_id());
+
+ trace_rcu_dyntick("Error on exit: not idle task",
+ oldval, rdtp->dynticks_nesting);
+ ftrace_dump(DUMP_ALL);
+ WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
+ current->pid, current->comm,
+ idle->pid, idle->comm); /* must be idle task! */
+ }
+}
+
+/**
+ * rcu_idle_exit - inform RCU that current CPU is leaving idle
+ *
+ * Exit idle mode, in other words, -enter- the mode in which RCU
+ * read-side critical sections can occur.
+ *
+ * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to
+ * allow for the possibility of usermode upcalls messing up our count
+ * of interrupt nesting level during the busy period that is just
+ * now starting.
+ */
+void rcu_idle_exit(void)
+{
+ unsigned long flags;
+ struct rcu_dynticks *rdtp;
+ long long oldval;
+
+ local_irq_save(flags);
+ rdtp = &__get_cpu_var(rcu_dynticks);
+ oldval = rdtp->dynticks_nesting;
+ WARN_ON_ONCE(oldval != 0);
+ rdtp->dynticks_nesting = DYNTICK_TASK_NESTING;
+ rcu_idle_exit_common(rdtp, oldval);
+ local_irq_restore(flags);
+}
+
+/**
+ * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
+ *
+ * Enter an interrupt handler, which might possibly result in exiting
+ * idle mode, in other words, entering the mode in which read-side critical
+ * sections can occur.
+ *
+ * Note that the Linux kernel is fully capable of entering an interrupt
+ * handler that it never exits, for example when doing upcalls to
+ * user mode! This code assumes that the idle loop never does upcalls to
+ * user mode. If your architecture does do upcalls from the idle loop (or
+ * does anything else that results in unbalanced calls to the irq_enter()
+ * and irq_exit() functions), RCU will give you what you deserve, good
+ * and hard. But very infrequently and irreproducibly.
+ *
+ * Use things like work queues to work around this limitation.
+ *
+ * You have been warned.
+ */
+void rcu_irq_enter(void)
+{
+ unsigned long flags;
+ struct rcu_dynticks *rdtp;
+ long long oldval;
+
+ local_irq_save(flags);
+ rdtp = &__get_cpu_var(rcu_dynticks);
+ oldval = rdtp->dynticks_nesting;
+ rdtp->dynticks_nesting++;
+ WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
+ if (oldval)
+ trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
+ else
+ rcu_idle_exit_common(rdtp, oldval);
local_irq_restore(flags);
}
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
}
+#ifdef CONFIG_PROVE_RCU
+
/**
- * rcu_irq_enter - inform RCU of entry to hard irq context
+ * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
*
- * If the CPU was idle with dynamic ticks active, this updates the
- * rdtp->dynticks to let the RCU handling know that the CPU is active.
+ * If the current CPU is in its idle loop and is neither in an interrupt
+ * or NMI handler, return true.
*/
-void rcu_irq_enter(void)
+int rcu_is_cpu_idle(void)
{
- rcu_exit_nohz();
+ int ret;
+
+ preempt_disable();
+ ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
+ preempt_enable();
+ return ret;
}
+EXPORT_SYMBOL(rcu_is_cpu_idle);
+
+#endif /* #ifdef CONFIG_PROVE_RCU */
/**
- * rcu_irq_exit - inform RCU of exit from hard irq context
+ * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
*
- * If the CPU was idle with dynamic ticks active, update the rdp->dynticks
- * to put let the RCU handling be aware that the CPU is going back to idle
- * with no ticks.
+ * If the current CPU is idle or running at a first-level (not nested)
+ * interrupt from idle, return true. The caller must have at least
+ * disabled preemption.
*/
-void rcu_irq_exit(void)
+int rcu_is_cpu_rrupt_from_idle(void)
{
- rcu_enter_nohz();
+ return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
}
#ifdef CONFIG_SMP
static int dyntick_save_progress_counter(struct rcu_data *rdp)
{
rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
- return 0;
+ return (rdp->dynticks_snap & 0x1) == 0;
}
/*
*/
static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
{
- unsigned long curr;
- unsigned long snap;
+ unsigned int curr;
+ unsigned int snap;
- curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks);
- snap = (unsigned long)rdp->dynticks_snap;
+ curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
+ snap = (unsigned int)rdp->dynticks_snap;
/*
* If the CPU passed through or entered a dynticks idle phase with
* read-side critical section that started before the beginning
* of the current RCU grace period.
*/
- if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) {
+ if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
+ trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
rdp->dynticks_fqs++;
return 1;
}
#endif /* #ifdef CONFIG_SMP */
-#else /* #ifdef CONFIG_NO_HZ */
-
-#ifdef CONFIG_SMP
-
-static int dyntick_save_progress_counter(struct rcu_data *rdp)
-{
- return 0;
-}
-
-static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
-{
- return rcu_implicit_offline_qs(rdp);
-}
-
-#endif /* #ifdef CONFIG_SMP */
-
-#endif /* #else #ifdef CONFIG_NO_HZ */
-
-int rcu_cpu_stall_suppress __read_mostly;
-
static void record_gp_stall_check_time(struct rcu_state *rsp)
{
rsp->gp_start = jiffies;
int cpu;
long delta;
unsigned long flags;
+ int ndetected;
struct rcu_node *rnp = rcu_get_root(rsp);
/* Only let one CPU complain about others per time interval. */
* Now rat on any tasks that got kicked up to the root rcu_node
* due to CPU offlining.
*/
- rcu_print_task_stall(rnp);
+ ndetected = rcu_print_task_stall(rnp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
/*
rsp->name);
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave(&rnp->lock, flags);
- rcu_print_task_stall(rnp);
+ ndetected += rcu_print_task_stall(rnp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (rnp->qsmask == 0)
continue;
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
- if (rnp->qsmask & (1UL << cpu))
+ if (rnp->qsmask & (1UL << cpu)) {
printk(" %d", rnp->grplo + cpu);
+ ndetected++;
+ }
}
printk("} (detected by %d, t=%ld jiffies)\n",
smp_processor_id(), (long)(jiffies - rsp->gp_start));
- trigger_all_cpu_backtrace();
+ if (ndetected == 0)
+ printk(KERN_ERR "INFO: Stall ended before state dump start\n");
+ else if (!trigger_all_cpu_backtrace())
+ dump_stack();
/* If so configured, complain about tasks blocking the grace period. */
*/
printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
- trigger_all_cpu_backtrace();
+ if (!trigger_all_cpu_backtrace())
+ dump_stack();
raw_spin_lock_irqsave(&rnp->lock, flags);
if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
* go looking for one.
*/
rdp->gpnum = rnp->gpnum;
+ trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
if (rnp->qsmask & rdp->grpmask) {
rdp->qs_pending = 1;
- rdp->passed_quiesc = 0;
+ rdp->passed_quiesce = 0;
} else
rdp->qs_pending = 0;
}
/* Remember that we saw this grace-period completion. */
rdp->completed = rnp->completed;
+ trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
/*
* If we were in an extended quiescent state, we may have
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_node *rnp = rcu_get_root(rsp);
- if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
- if (cpu_needs_another_gp(rsp, rdp))
- rsp->fqs_need_gp = 1;
- if (rnp->completed == rsp->completed) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
- return;
- }
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ if (!rcu_scheduler_fully_active ||
+ !cpu_needs_another_gp(rsp, rdp)) {
+ /*
+ * Either the scheduler hasn't yet spawned the first
+ * non-idle task or this CPU does not need another
+ * grace period. Either way, don't start a new grace
+ * period.
+ */
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ return;
+ }
+ if (rsp->fqs_active) {
/*
- * Propagate new ->completed value to rcu_node structures
- * so that other CPUs don't have to wait until the start
- * of the next grace period to process their callbacks.
+ * This CPU needs a grace period, but force_quiescent_state()
+ * is running. Tell it to start one on this CPU's behalf.
*/
- rcu_for_each_node_breadth_first(rsp, rnp) {
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- rnp->completed = rsp->completed;
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
- }
- local_irq_restore(flags);
+ rsp->fqs_need_gp = 1;
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
/* Advance to a new grace period and initialize state. */
rsp->gpnum++;
- WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
- rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
+ trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
+ WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT);
+ rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
record_gp_stall_check_time(rsp);
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
rnp->completed = rsp->completed;
- rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
+ rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */
rcu_start_gp_per_cpu(rsp, rnp, rdp);
rcu_preempt_boost_start_gp(rnp);
+ trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
+ rnp->level, rnp->grplo,
+ rnp->grphi, rnp->qsmask);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
if (rnp == rdp->mynode)
rcu_start_gp_per_cpu(rsp, rnp, rdp);
rcu_preempt_boost_start_gp(rnp);
+ trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
+ rnp->level, rnp->grplo,
+ rnp->grphi, rnp->qsmask);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
rnp = rcu_get_root(rsp);
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
+ rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
__releases(rcu_get_root(rsp)->lock)
{
unsigned long gp_duration;
+ struct rcu_node *rnp = rcu_get_root(rsp);
+ struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
gp_duration = jiffies - rsp->gp_start;
if (gp_duration > rsp->gp_max)
rsp->gp_max = gp_duration;
- rsp->completed = rsp->gpnum;
- rsp->signaled = RCU_GP_IDLE;
+
+ /*
+ * We know the grace period is complete, but to everyone else
+ * it appears to still be ongoing. But it is also the case
+ * that to everyone else it looks like there is nothing that
+ * they can do to advance the grace period. It is therefore
+ * safe for us to drop the lock in order to mark the grace
+ * period as completed in all of the rcu_node structures.
+ *
+ * But if this CPU needs another grace period, it will take
+ * care of this while initializing the next grace period.
+ * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL
+ * because the callbacks have not yet been advanced: Those
+ * callbacks are waiting on the grace period that just now
+ * completed.
+ */
+ if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) {
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+
+ /*
+ * Propagate new ->completed value to rcu_node structures
+ * so that other CPUs don't have to wait until the start
+ * of the next grace period to process their callbacks.
+ */
+ rcu_for_each_node_breadth_first(rsp, rnp) {
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
+ rnp->completed = rsp->gpnum;
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ }
+ rnp = rcu_get_root(rsp);
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
+ }
+
+ rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
+ trace_rcu_grace_period(rsp->name, rsp->completed, "end");
+ rsp->fqs_state = RCU_GP_IDLE;
rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
}
return;
}
rnp->qsmask &= ~mask;
+ trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
+ mask, rnp->qsmask, rnp->level,
+ rnp->grplo, rnp->grphi,
+ !!rnp->gp_tasks);
if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
/* Other bits still set at this level, so done. */
* based on quiescent states detected in an earlier grace period!
*/
static void
-rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
+rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp)
{
unsigned long flags;
unsigned long mask;
rnp = rdp->mynode;
raw_spin_lock_irqsave(&rnp->lock, flags);
- if (lastcomp != rnp->completed) {
+ if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) {
/*
- * Someone beat us to it for this grace period, so leave.
- * The race with GP start is resolved by the fact that we
- * hold the leaf rcu_node lock, so that the per-CPU bits
- * cannot yet be initialized -- so we would simply find our
- * CPU's bit already cleared in rcu_report_qs_rnp() if this
- * race occurred.
+ * The grace period in which this quiescent state was
+ * recorded has ended, so don't report it upwards.
+ * We will instead need a new quiescent state that lies
+ * within the current grace period.
*/
- rdp->passed_quiesc = 0; /* try again later! */
+ rdp->passed_quiesce = 0; /* need qs for new gp. */
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
* Was there a quiescent state since the beginning of the grace
* period? If no, then exit and wait for the next call.
*/
- if (!rdp->passed_quiesc)
+ if (!rdp->passed_quiesce)
return;
/*
* Tell RCU we are done (but rcu_report_qs_rdp() will be the
* judge of that).
*/
- rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
+ rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum);
}
#ifdef CONFIG_HOTPLUG_CPU
int need_report = 0;
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_node *rnp;
- struct task_struct *t;
- /* Stop the CPU's kthread. */
- t = per_cpu(rcu_cpu_kthread_task, cpu);
- if (t != NULL) {
- per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
- kthread_stop(t);
- }
+ rcu_stop_cpu_kthread(cpu);
/* Exclude any attempts to start a new grace period. */
raw_spin_lock_irqsave(&rsp->onofflock, flags);
if (rnp->qsmaskinit != 0) {
if (rnp != rdp->mynode)
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ else
+ trace_rcu_grace_period(rsp->name,
+ rnp->gpnum + 1 -
+ !!(rnp->qsmask & mask),
+ "cpuofl");
break;
}
- if (rnp == rdp->mynode)
+ if (rnp == rdp->mynode) {
+ trace_rcu_grace_period(rsp->name,
+ rnp->gpnum + 1 -
+ !!(rnp->qsmask & mask),
+ "cpuofl");
need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
- else
+ } else
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
mask = rnp->grpmask;
rnp = rnp->parent;
else
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (need_report & RCU_OFL_TASKS_EXP_GP)
- rcu_report_exp_rnp(rsp, rnp);
+ rcu_report_exp_rnp(rsp, rnp, true);
rcu_node_kthread_setaffinity(rnp, -1);
}
{
unsigned long flags;
struct rcu_head *next, *list, **tail;
- int count;
+ int bl, count;
/* If no callbacks are ready, just return.*/
- if (!cpu_has_callbacks_ready_to_invoke(rdp))
+ if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
+ trace_rcu_batch_start(rsp->name, 0, 0);
+ trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
+ need_resched(), is_idle_task(current),
+ rcu_is_callbacks_kthread());
return;
+ }
/*
* Extract the list of ready callbacks, disabling to prevent
* races with call_rcu() from interrupt handlers.
*/
local_irq_save(flags);
+ bl = rdp->blimit;
+ trace_rcu_batch_start(rsp->name, rdp->qlen, bl);
list = rdp->nxtlist;
rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
*rdp->nxttail[RCU_DONE_TAIL] = NULL;
next = list->next;
prefetch(next);
debug_rcu_head_unqueue(list);
- __rcu_reclaim(list);
+ __rcu_reclaim(rsp->name, list);
list = next;
- if (++count >= rdp->blimit)
+ /* Stop only if limit reached and CPU has something to do. */
+ if (++count >= bl &&
+ (need_resched() ||
+ (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
break;
}
local_irq_save(flags);
+ trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
+ is_idle_task(current),
+ rcu_is_callbacks_kthread());
/* Update count, and requeue any remaining callbacks. */
rdp->qlen -= count;
local_irq_restore(flags);
- /* Re-raise the RCU softirq if there are callbacks remaining. */
+ /* Re-invoke RCU core processing if there are callbacks remaining. */
if (cpu_has_callbacks_ready_to_invoke(rdp))
- invoke_rcu_cpu_kthread();
+ invoke_rcu_core();
}
/*
* Check to see if this CPU is in a non-context-switch quiescent state
* (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
- * Also schedule the RCU softirq handler.
+ * Also schedule RCU core processing.
*
- * This function must be called with hardirqs disabled. It is normally
+ * This function must be called from hardirq context. It is normally
* invoked from the scheduling-clock interrupt. If rcu_pending returns
* false, there is no point in invoking rcu_check_callbacks().
*/
void rcu_check_callbacks(int cpu, int user)
{
- if (user ||
- (idle_cpu(cpu) && rcu_scheduler_active &&
- !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+ trace_rcu_utilization("Start scheduler-tick");
+ if (user || rcu_is_cpu_rrupt_from_idle()) {
/*
* Get here if this CPU took its interrupt from user
}
rcu_preempt_check_callbacks(cpu);
if (rcu_pending(cpu))
- invoke_rcu_cpu_kthread();
+ invoke_rcu_core();
+ trace_rcu_utilization("End scheduler-tick");
}
#ifdef CONFIG_SMP
unsigned long flags;
struct rcu_node *rnp = rcu_get_root(rsp);
- if (!rcu_gp_in_progress(rsp))
+ trace_rcu_utilization("Start fqs");
+ if (!rcu_gp_in_progress(rsp)) {
+ trace_rcu_utilization("End fqs");
return; /* No grace period in progress, nothing to force. */
+ }
if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
+ trace_rcu_utilization("End fqs");
return; /* Someone else is already on the job. */
}
if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
goto unlock_fqs_ret; /* no GP in progress, time updated. */
}
rsp->fqs_active = 1;
- switch (rsp->signaled) {
+ switch (rsp->fqs_state) {
case RCU_GP_IDLE:
case RCU_GP_INIT:
force_qs_rnp(rsp, dyntick_save_progress_counter);
raw_spin_lock(&rnp->lock); /* irqs already disabled */
if (rcu_gp_in_progress(rsp))
- rsp->signaled = RCU_FORCE_QS;
+ rsp->fqs_state = RCU_FORCE_QS;
break;
case RCU_FORCE_QS:
raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
rsp->fqs_need_gp = 0;
rcu_start_gp(rsp, flags); /* releases rnp->lock */
+ trace_rcu_utilization("End fqs");
return;
}
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
unlock_fqs_ret:
raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
+ trace_rcu_utilization("End fqs");
}
#else /* #ifdef CONFIG_SMP */
#endif /* #else #ifdef CONFIG_SMP */
/*
- * This does the RCU processing work from softirq context for the
- * specified rcu_state and rcu_data structures. This may be called
- * only from the CPU to whom the rdp belongs.
+ * This does the RCU core processing work for the specified rcu_state
+ * and rcu_data structures. This may be called only from the CPU to
+ * whom the rdp belongs.
*/
static void
__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
}
/* If there are callbacks ready, invoke them. */
- rcu_do_batch(rsp, rdp);
+ if (cpu_has_callbacks_ready_to_invoke(rdp))
+ invoke_rcu_callbacks(rsp, rdp);
}
/*
- * Do softirq processing for the current CPU.
+ * Do RCU core processing for the current CPU.
*/
-static void rcu_process_callbacks(void)
+static void rcu_process_callbacks(struct softirq_action *unused)
{
+ trace_rcu_utilization("Start RCU core");
__rcu_process_callbacks(&rcu_sched_state,
&__get_cpu_var(rcu_sched_data));
__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
rcu_preempt_process_callbacks();
-
- /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
- rcu_needs_cpu_flush();
+ trace_rcu_utilization("End RCU core");
}
/*
- * Wake up the current CPU's kthread. This replaces raise_softirq()
- * in earlier versions of RCU. Note that because we are running on
- * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
- * cannot disappear out from under us.
+ * Schedule RCU callback invocation. If the specified type of RCU
+ * does not support RCU priority boosting, just do a direct call,
+ * otherwise wake up the per-CPU kernel kthread. Note that because we
+ * are running on the current CPU with interrupts disabled, the
+ * rcu_cpu_kthread_task cannot disappear out from under us.
*/
-static void invoke_rcu_cpu_kthread(void)
+static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
{
- unsigned long flags;
-
- local_irq_save(flags);
- __this_cpu_write(rcu_cpu_has_work, 1);
- if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) {
- local_irq_restore(flags);
+ if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
return;
- }
- wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
- local_irq_restore(flags);
-}
-
-/*
- * Wake up the specified per-rcu_node-structure kthread.
- * Because the per-rcu_node kthreads are immortal, we don't need
- * to do anything to keep them alive.
- */
-static void invoke_rcu_node_kthread(struct rcu_node *rnp)
-{
- struct task_struct *t;
-
- t = rnp->node_kthread_task;
- if (t != NULL)
- wake_up_process(t);
-}
-
-/*
- * Set the specified CPU's kthread to run RT or not, as specified by
- * the to_rt argument. The CPU-hotplug locks are held, so the task
- * is not going away.
- */
-static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
-{
- int policy;
- struct sched_param sp;
- struct task_struct *t;
-
- t = per_cpu(rcu_cpu_kthread_task, cpu);
- if (t == NULL)
+ if (likely(!rsp->boost)) {
+ rcu_do_batch(rsp, rdp);
return;
- if (to_rt) {
- policy = SCHED_FIFO;
- sp.sched_priority = RCU_KTHREAD_PRIO;
- } else {
- policy = SCHED_NORMAL;
- sp.sched_priority = 0;
}
- sched_setscheduler_nocheck(t, policy, &sp);
+ invoke_rcu_callbacks_kthread();
}
-/*
- * Timer handler to initiate the waking up of per-CPU kthreads that
- * have yielded the CPU due to excess numbers of RCU callbacks.
- * We wake up the per-rcu_node kthread, which in turn will wake up
- * the booster kthread.
- */
-static void rcu_cpu_kthread_timer(unsigned long arg)
+static void invoke_rcu_core(void)
{
- struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
- struct rcu_node *rnp = rdp->mynode;
-
- atomic_or(rdp->grpmask, &rnp->wakemask);
- invoke_rcu_node_kthread(rnp);
+ raise_softirq(RCU_SOFTIRQ);
}
-/*
- * Drop to non-real-time priority and yield, but only after posting a
- * timer that will cause us to regain our real-time priority if we
- * remain preempted. Either way, we restore our real-time priority
- * before returning.
- */
-static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
-{
- struct sched_param sp;
- struct timer_list yield_timer;
-
- setup_timer_on_stack(&yield_timer, f, arg);
- mod_timer(&yield_timer, jiffies + 2);
- sp.sched_priority = 0;
- sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
- set_user_nice(current, 19);
- schedule();
- sp.sched_priority = RCU_KTHREAD_PRIO;
- sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
- del_timer(&yield_timer);
-}
-
-/*
- * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
- * This can happen while the corresponding CPU is either coming online
- * or going offline. We cannot wait until the CPU is fully online
- * before starting the kthread, because the various notifier functions
- * can wait for RCU grace periods. So we park rcu_cpu_kthread() until
- * the corresponding CPU is online.
- *
- * Return 1 if the kthread needs to stop, 0 otherwise.
- *
- * Caller must disable bh. This function can momentarily enable it.
- */
-static int rcu_cpu_kthread_should_stop(int cpu)
-{
- while (cpu_is_offline(cpu) ||
- !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) ||
- smp_processor_id() != cpu) {
- if (kthread_should_stop())
- return 1;
- per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
- per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
- local_bh_enable();
- schedule_timeout_uninterruptible(1);
- if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)))
- set_cpus_allowed_ptr(current, cpumask_of(cpu));
- local_bh_disable();
- }
- per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
- return 0;
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
- * earlier RCU softirq.
- */
-static int rcu_cpu_kthread(void *arg)
-{
- int cpu = (int)(long)arg;
- unsigned long flags;
- int spincnt = 0;
- unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
- char work;
- char *workp = &per_cpu(rcu_cpu_has_work, cpu);
-
- for (;;) {
- *statusp = RCU_KTHREAD_WAITING;
- rcu_wait(*workp != 0 || kthread_should_stop());
- local_bh_disable();
- if (rcu_cpu_kthread_should_stop(cpu)) {
- local_bh_enable();
- break;
- }
- *statusp = RCU_KTHREAD_RUNNING;
- per_cpu(rcu_cpu_kthread_loops, cpu)++;
- local_irq_save(flags);
- work = *workp;
- *workp = 0;
- local_irq_restore(flags);
- if (work)
- rcu_process_callbacks();
- local_bh_enable();
- if (*workp != 0)
- spincnt++;
- else
- spincnt = 0;
- if (spincnt > 10) {
- *statusp = RCU_KTHREAD_YIELDING;
- rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
- spincnt = 0;
- }
- }
- *statusp = RCU_KTHREAD_STOPPED;
- return 0;
-}
-
-/*
- * Spawn a per-CPU kthread, setting up affinity and priority.
- * Because the CPU hotplug lock is held, no other CPU will be attempting
- * to manipulate rcu_cpu_kthread_task. There might be another CPU
- * attempting to access it during boot, but the locking in kthread_bind()
- * will enforce sufficient ordering.
- */
-static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
-{
- struct sched_param sp;
- struct task_struct *t;
-
- if (!rcu_kthreads_spawnable ||
- per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
- return 0;
- t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
- if (IS_ERR(t))
- return PTR_ERR(t);
- kthread_bind(t, cpu);
- per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
- WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
- per_cpu(rcu_cpu_kthread_task, cpu) = t;
- sp.sched_priority = RCU_KTHREAD_PRIO;
- sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
- return 0;
-}
-
-/*
- * Per-rcu_node kthread, which is in charge of waking up the per-CPU
- * kthreads when needed. We ignore requests to wake up kthreads
- * for offline CPUs, which is OK because force_quiescent_state()
- * takes care of this case.
- */
-static int rcu_node_kthread(void *arg)
-{
- int cpu;
- unsigned long flags;
- unsigned long mask;
- struct rcu_node *rnp = (struct rcu_node *)arg;
- struct sched_param sp;
- struct task_struct *t;
-
- for (;;) {
- rnp->node_kthread_status = RCU_KTHREAD_WAITING;
- rcu_wait(atomic_read(&rnp->wakemask) != 0);
- rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- mask = atomic_xchg(&rnp->wakemask, 0);
- rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
- for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
- if ((mask & 0x1) == 0)
- continue;
- preempt_disable();
- t = per_cpu(rcu_cpu_kthread_task, cpu);
- if (!cpu_online(cpu) || t == NULL) {
- preempt_enable();
- continue;
- }
- per_cpu(rcu_cpu_has_work, cpu) = 1;
- sp.sched_priority = RCU_KTHREAD_PRIO;
- sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
- preempt_enable();
- }
- }
- /* NOTREACHED */
- rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
- return 0;
-}
-
-/*
- * Set the per-rcu_node kthread's affinity to cover all CPUs that are
- * served by the rcu_node in question. The CPU hotplug lock is still
- * held, so the value of rnp->qsmaskinit will be stable.
- *
- * We don't include outgoingcpu in the affinity set, use -1 if there is
- * no outgoing CPU. If there are no CPUs left in the affinity set,
- * this function allows the kthread to execute on any CPU.
- */
-static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
-{
- cpumask_var_t cm;
- int cpu;
- unsigned long mask = rnp->qsmaskinit;
-
- if (rnp->node_kthread_task == NULL)
- return;
- if (!alloc_cpumask_var(&cm, GFP_KERNEL))
- return;
- cpumask_clear(cm);
- for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
- if ((mask & 0x1) && cpu != outgoingcpu)
- cpumask_set_cpu(cpu, cm);
- if (cpumask_weight(cm) == 0) {
- cpumask_setall(cm);
- for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
- cpumask_clear_cpu(cpu, cm);
- WARN_ON_ONCE(cpumask_weight(cm) == 0);
- }
- set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
- rcu_boost_kthread_setaffinity(rnp, cm);
- free_cpumask_var(cm);
-}
-
-/*
- * Spawn a per-rcu_node kthread, setting priority and affinity.
- * Called during boot before online/offline can happen, or, if
- * during runtime, with the main CPU-hotplug locks held. So only
- * one of these can be executing at a time.
- */
-static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
- struct rcu_node *rnp)
-{
- unsigned long flags;
- int rnp_index = rnp - &rsp->node[0];
- struct sched_param sp;
- struct task_struct *t;
-
- if (!rcu_kthreads_spawnable ||
- rnp->qsmaskinit == 0)
- return 0;
- if (rnp->node_kthread_task == NULL) {
- t = kthread_create(rcu_node_kthread, (void *)rnp,
- "rcun%d", rnp_index);
- if (IS_ERR(t))
- return PTR_ERR(t);
- raw_spin_lock_irqsave(&rnp->lock, flags);
- rnp->node_kthread_task = t;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
- sp.sched_priority = 99;
- sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
- }
- return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
-}
-
-static void rcu_wake_one_boost_kthread(struct rcu_node *rnp);
-
-/*
- * Spawn all kthreads -- called as soon as the scheduler is running.
- */
-static int __init rcu_spawn_kthreads(void)
-{
- int cpu;
- struct rcu_node *rnp;
- struct task_struct *t;
-
- rcu_kthreads_spawnable = 1;
- for_each_possible_cpu(cpu) {
- per_cpu(rcu_cpu_has_work, cpu) = 0;
- if (cpu_online(cpu)) {
- (void)rcu_spawn_one_cpu_kthread(cpu);
- t = per_cpu(rcu_cpu_kthread_task, cpu);
- if (t)
- wake_up_process(t);
- }
- }
- rnp = rcu_get_root(rcu_state);
- (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
- if (rnp->node_kthread_task)
- wake_up_process(rnp->node_kthread_task);
- if (NUM_RCU_NODES > 1) {
- rcu_for_each_leaf_node(rcu_state, rnp) {
- (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
- t = rnp->node_kthread_task;
- if (t)
- wake_up_process(t);
- rcu_wake_one_boost_kthread(rnp);
- }
- }
- return 0;
-}
-early_initcall(rcu_spawn_kthreads);
-
static void
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
struct rcu_state *rsp)
rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
rdp->qlen++;
+ if (__is_kfree_rcu_offset((unsigned long)func))
+ trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
+ rdp->qlen);
+ else
+ trace_rcu_callback(rsp->name, head, rdp->qlen);
+
/* If interrupts were disabled, don't dive into RCU core. */
if (irqs_disabled_flags(flags)) {
local_irq_restore(flags);
*/
void synchronize_sched(void)
{
- struct rcu_synchronize rcu;
-
if (rcu_blocking_is_gp())
return;
-
- init_rcu_head_on_stack(&rcu.head);
- init_completion(&rcu.completion);
- /* Will wake me after RCU finished. */
- call_rcu_sched(&rcu.head, wakeme_after_rcu);
- /* Wait for it. */
- wait_for_completion(&rcu.completion);
- destroy_rcu_head_on_stack(&rcu.head);
+ wait_rcu_gp(call_rcu_sched);
}
EXPORT_SYMBOL_GPL(synchronize_sched);
*/
void synchronize_rcu_bh(void)
{
- struct rcu_synchronize rcu;
-
if (rcu_blocking_is_gp())
return;
-
- init_rcu_head_on_stack(&rcu.head);
- init_completion(&rcu.completion);
- /* Will wake me after RCU finished. */
- call_rcu_bh(&rcu.head, wakeme_after_rcu);
- /* Wait for it. */
- wait_for_completion(&rcu.completion);
- destroy_rcu_head_on_stack(&rcu.head);
+ wait_rcu_gp(call_rcu_bh);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
check_cpu_stall(rsp, rdp);
/* Is the RCU core waiting for a quiescent state from this CPU? */
- if (rdp->qs_pending && !rdp->passed_quiesc) {
+ if (rcu_scheduler_fully_active &&
+ rdp->qs_pending && !rdp->passed_quiesce) {
/*
* If force_quiescent_state() coming soon and this CPU
ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
jiffies))
set_need_resched();
- } else if (rdp->qs_pending && rdp->passed_quiesc) {
+ } else if (rdp->qs_pending && rdp->passed_quiesce) {
rdp->n_rp_report_qs++;
return 1;
}
* by the current CPU, even if none need be done immediately, returning
* 1 if so.
*/
-static int rcu_needs_cpu_quick_check(int cpu)
+static int rcu_cpu_has_callbacks(int cpu)
{
/* RCU callbacks either ready or pending? */
return per_cpu(rcu_sched_data, cpu).nxtlist ||
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
rdp->qlen = 0;
-#ifdef CONFIG_NO_HZ
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
-#endif /* #ifdef CONFIG_NO_HZ */
+ WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING);
+ WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
rdp->cpu = cpu;
+ rdp->rsp = rsp;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/* Set up local state, ensuring consistent view of global state. */
raw_spin_lock_irqsave(&rnp->lock, flags);
- rdp->passed_quiesc = 0; /* We could be racing with new GP, */
- rdp->qs_pending = 1; /* so set up to respond to current GP. */
rdp->beenonline = 1; /* We have now been online. */
rdp->preemptible = preemptible;
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
+ rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING;
+ atomic_set(&rdp->dynticks->dynticks,
+ (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
+ rcu_prepare_for_idle_init(cpu);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
/*
rnp->qsmaskinit |= mask;
mask = rnp->grpmask;
if (rnp == rdp->mynode) {
- rdp->gpnum = rnp->completed; /* if GP in progress... */
+ /*
+ * If there is a grace period in progress, we will
+ * set up to wait for it next time we run the
+ * RCU core code.
+ */
+ rdp->gpnum = rnp->completed;
rdp->completed = rnp->completed;
- rdp->passed_quiesc_completed = rnp->completed - 1;
+ rdp->passed_quiesce = 0;
+ rdp->qs_pending = 0;
+ rdp->passed_quiesce_gpnum = rnp->gpnum - 1;
+ trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
}
raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
rnp = rnp->parent;
rcu_preempt_init_percpu_data(cpu);
}
-static void __cpuinit rcu_prepare_kthreads(int cpu)
-{
- struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
- struct rcu_node *rnp = rdp->mynode;
-
- /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
- if (rcu_kthreads_spawnable) {
- (void)rcu_spawn_one_cpu_kthread(cpu);
- if (rnp->node_kthread_task == NULL)
- (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
- }
-}
-
-/*
- * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state,
- * but the RCU threads are woken on demand, and if demand is low this
- * could be a while triggering the hung task watchdog.
- *
- * In order to avoid this, poke all tasks once the CPU is fully
- * up and running.
- */
-static void __cpuinit rcu_online_kthreads(int cpu)
-{
- struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
- struct rcu_node *rnp = rdp->mynode;
- struct task_struct *t;
-
- t = per_cpu(rcu_cpu_kthread_task, cpu);
- if (t)
- wake_up_process(t);
-
- t = rnp->node_kthread_task;
- if (t)
- wake_up_process(t);
-
- rcu_wake_one_boost_kthread(rnp);
-}
-
/*
* Handle CPU online/offline notification events.
*/
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode;
+ trace_rcu_utilization("Start CPU hotplug");
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
rcu_prepare_kthreads(cpu);
break;
case CPU_ONLINE:
- rcu_online_kthreads(cpu);
case CPU_DOWN_FAILED:
rcu_node_kthread_setaffinity(rnp, -1);
rcu_cpu_kthread_setrt(cpu, 1);
rcu_send_cbs_to_online(&rcu_bh_state);
rcu_send_cbs_to_online(&rcu_sched_state);
rcu_preempt_send_cbs_to_online();
+ rcu_cleanup_after_idle(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
default:
break;
}
+ trace_rcu_utilization("End CPU hotplug");
return NOTIFY_OK;
}
rcu_init_one(&rcu_sched_state, &rcu_sched_data);
rcu_init_one(&rcu_bh_state, &rcu_bh_data);
__rcu_init_preempt();
+ open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
/*
* We don't need protection against CPU-hotplug here because