#include <asm/time.h>
#include <asm/timer.h>
-#include <xen/evtchn.h>
+#include <xen/clock.h>
#include <xen/sysctl.h>
#include <xen/interface/vcpu.h>
static struct timespec shadow_tv;
static u32 shadow_tv_version;
-/* Keep track of last time we did processing/updating of jiffies and xtime. */
-static u64 processed_system_time; /* System time (ns) at last processing. */
-
-struct local_time_info {
- u64 processed_system;
- u64 accounted_system;
- /* How much CPU time was spent blocked and how much was 'stolen'? */
- u64 accounted_stolen;
- u64 accounted_blocked;
-};
-static DEFINE_PER_CPU(struct local_time_info, local_time);
+static u64 jiffies_bias, system_time_bias;
/* Current runstate of each CPU (updated automatically by the hypervisor). */
DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
/* Must be signed, as it's compared with s64 quantities which can be -ve. */
#define NS_PER_TICK (1000000000LL/HZ)
-static struct vcpu_set_periodic_timer xen_set_periodic_tick = {
- .period_ns = NS_PER_TICK
-};
-
static void __clock_was_set(struct work_struct *unused)
{
clock_was_set();
__setup("permitted_clock_jitter=", __permitted_clock_jitter);
/*
- * Limit on the number of CPUs that may concurrently attempt to acquire
- * xtime_lock in timer_interrupt() (reducing contention potentially leading
- * to a live lock on systems with many CPUs.
- */
-static unsigned int __read_mostly duty_limit = -2;
-static int __init set_duty_limit(char *str)
-{
- duty_limit = simple_strtoul(str, NULL, 0) - 1;
- return 1;
-}
-__setup("timer_duty_limit=", set_duty_limit);
-
-/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
*/
return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
}
+static inline u64 processed_system_time(void)
+{
+ return (jiffies_64 - jiffies_bias) * NS_PER_TICK + system_time_bias;
+}
+
static void __update_wallclock(time_t sec, long nsec)
{
long wtm_nsec, xtime_nsec;
u64 tmp, wc_nsec;
/* Adjust wall-clock time base. */
- wc_nsec = processed_system_time;
+ wc_nsec = processed_system_time();
wc_nsec += sec * (u64)NSEC_PER_SEC;
wc_nsec += nsec;
__update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
}
+void xen_check_wallclock_update(void)
+{
+ if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
+ write_seqlock(&xtime_lock);
+ update_wallclock();
+ write_sequnlock(&xtime_lock);
+ if (keventd_up())
+ schedule_work(&clock_was_set_work);
+ }
+}
+
/*
* Reads a consistent set of time-base values from Xen, into a shadow data
* area.
op.cmd = XENPF_settime;
op.u.settime.secs = sec;
op.u.settime.nsecs = nsec;
- op.u.settime.system_time = processed_system_time;
+ op.u.settime.system_time = processed_system_time();
WARN_ON(HYPERVISOR_platform_op(&op));
update_wallclock();
mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ);
}
-static unsigned long long local_clock(void)
+unsigned long long xen_local_clock(void)
{
unsigned int cpu = get_cpu();
struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
/*
* Runstate accounting
*/
-static void get_runstate_snapshot(struct vcpu_runstate_info *res)
+void get_runstate_snapshot(struct vcpu_runstate_info *res)
{
u64 state_time;
struct vcpu_runstate_info *state;
*/
preempt_disable();
- now = local_clock();
+ now = xen_local_clock();
get_runstate_snapshot(&runstate);
}
EXPORT_SYMBOL(profile_pc);
-/*
- * Default timer interrupt handler
- */
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
- static unsigned int contention_count;
- s64 delta, delta_cpu, stolen, blocked;
- unsigned int i, cpu = smp_processor_id();
- struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
- struct local_time_info *local = &per_cpu(local_time, cpu);
- bool duty = false;
- struct vcpu_runstate_info runstate;
-
- /* Keep nmi watchdog up to date */
- inc_irq_stat(irq0_irqs);
-
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- asm (LOCK_PREFIX "xaddl %1, %0"
- : "+m" (contention_count), "=r" (i) : "1" (1));
- if (i <= duty_limit) {
- write_seqlock(&xtime_lock);
- duty = true;
- }
- asm (LOCK_PREFIX "decl %0" : "+m" (contention_count));
-
- do {
- get_time_values_from_xen(cpu);
-
- /* Obtain a consistent snapshot of elapsed wallclock cycles. */
- delta = delta_cpu =
- shadow->system_timestamp + get_nsec_offset(shadow);
- delta -= processed_system_time;
- delta_cpu -= local->processed_system;
-
- get_runstate_snapshot(&runstate);
- } while (!time_values_up_to_date());
-
- if (duty && unlikely(delta < -(s64)permitted_clock_jitter)) {
- blocked = processed_system_time;
- write_sequnlock(&xtime_lock);
- if (printk_ratelimit()) {
- printk("Timer ISR/%u: Time went backwards: "
- "delta=%Ld/%Ld shadow=%Lx off=%Lx "
- "processed=%Lx/%Lx\n",
- cpu, delta, delta_cpu, shadow->system_timestamp,
- get_nsec_offset(shadow), blocked,
- local->processed_system);
- for_each_cpu_and(i, cpu_online_mask, cpumask_of(cpu))
- printk(" %u: %Lx\n", i,
- per_cpu(local_time.processed_system, i));
- }
- } else if (unlikely(delta_cpu < -(s64)permitted_clock_jitter)) {
- blocked = processed_system_time;
- if (duty)
- write_sequnlock(&xtime_lock);
- if (printk_ratelimit()) {
- printk("Timer ISR/%u: Time went backwards: delta=%Ld"
- " shadow=%Lx off=%Lx processed=%Lx/%Lx\n",
- cpu, delta_cpu, shadow->system_timestamp,
- get_nsec_offset(shadow), blocked,
- local->processed_system);
- for_each_cpu_and(i, cpu_online_mask, cpumask_of(cpu))
- printk(" %u: %Lx\n", i,
- per_cpu(local_time.processed_system, i));
- }
- } else if (duty) {
- /* System-wide jiffy work. */
- if (delta >= NS_PER_TICK) {
- do_div(delta, NS_PER_TICK);
- processed_system_time += delta * NS_PER_TICK;
- while (delta > HZ) {
- clobber_induction_variable(delta);
- do_timer(HZ);
- delta -= HZ;
- }
- do_timer(delta);
- }
-
- if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
- update_wallclock();
- if (keventd_up())
- schedule_work(&clock_was_set_work);
- }
-
- write_sequnlock(&xtime_lock);
- }
-
- delta = delta_cpu;
- delta_cpu += local->processed_system - local->accounted_system;
- if (delta >= NS_PER_TICK) {
- do_div(delta, NS_PER_TICK);
- local->processed_system += delta * NS_PER_TICK;
- }
-
- /*
- * Account stolen ticks.
- * ensures that the ticks are accounted as stolen.
- */
- stolen = runstate.time[RUNSTATE_runnable]
- + runstate.time[RUNSTATE_offline]
- - local->accounted_stolen;
- if ((stolen > 0) && (delta_cpu > 0)) {
- delta_cpu -= stolen;
- if (unlikely(delta_cpu < 0))
- stolen += delta_cpu; /* clamp local-time progress */
- do_div(stolen, NS_PER_TICK);
- local->accounted_stolen += stolen * NS_PER_TICK;
- local->accounted_system += stolen * NS_PER_TICK;
- account_steal_time((cputime_t)stolen);
- }
-
- /*
- * Account blocked ticks.
- * ensures that the ticks are accounted as idle/wait.
- */
- blocked = runstate.time[RUNSTATE_blocked]
- - local->accounted_blocked;
- if ((blocked > 0) && (delta_cpu > 0)) {
- delta_cpu -= blocked;
- if (unlikely(delta_cpu < 0))
- blocked += delta_cpu; /* clamp local-time progress */
- do_div(blocked, NS_PER_TICK);
- local->accounted_blocked += blocked * NS_PER_TICK;
- local->accounted_system += blocked * NS_PER_TICK;
- account_idle_time((cputime_t)blocked);
- }
-
- /* Account user/system ticks. */
- if (delta_cpu > 0) {
- do_div(delta_cpu, NS_PER_TICK);
- local->accounted_system += delta_cpu * NS_PER_TICK;
- if (user_mode_vm(get_irq_regs()))
- account_user_time(current, (cputime_t)delta_cpu,
- (cputime_t)delta_cpu);
- else if (current != idle_task(cpu))
- account_system_time(current, HARDIRQ_OFFSET,
- (cputime_t)delta_cpu,
- (cputime_t)delta_cpu);
- else
- account_idle_time((cputime_t)delta_cpu);
- }
-
- /* Offlined for more than a few seconds? Avoid lockup warnings. */
- if (stolen > 5*HZ)
- touch_softlockup_watchdog();
-
- /* Local timer processing (see update_process_times()). */
- run_local_timers();
- rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
- printk_tick();
- scheduler_tick();
- run_posix_cpu_timers(current);
- profile_tick(CPU_PROFILING);
-
- return IRQ_HANDLED;
-}
-
void mark_tsc_unstable(char *reason)
{
#ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
}
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-static void init_missing_ticks_accounting(unsigned int cpu)
-{
- struct vcpu_runstate_info *runstate = setup_runstate_area(cpu);
-
- per_cpu(local_time.accounted_blocked, cpu) =
- runstate->time[RUNSTATE_blocked];
- per_cpu(local_time.accounted_stolen, cpu) =
- runstate->time[RUNSTATE_runnable] +
- runstate->time[RUNSTATE_offline];
-}
-
static cycle_t cs_last;
static cycle_t xen_clocksource_read(struct clocksource *cs)
{
#ifdef CONFIG_SMP
cycle_t last = get64(&cs_last);
- cycle_t ret = local_clock();
+ cycle_t ret = xen_local_clock();
if (unlikely((s64)(ret - last) < 0)) {
if (last - ret > permitted_clock_jitter
last = cur;
}
#else
- return local_clock();
+ return xen_local_clock();
#endif
}
/* No locking required. Interrupts are disabled on all CPUs. */
static void xen_clocksource_resume(void)
{
+ unsigned long seq;
unsigned int cpu;
init_cpu_khz();
- for_each_online_cpu(cpu) {
- switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu,
- &xen_set_periodic_tick)) {
- case 0:
-#if CONFIG_XEN_COMPAT <= 0x030004
- case -ENOSYS:
-#endif
- break;
- default:
- BUG();
- }
+ for_each_online_cpu(cpu)
get_time_values_from_xen(cpu);
- per_cpu(local_time.accounted_system, cpu) =
- per_cpu(local_time.processed_system, cpu) =
- per_cpu(shadow_time, 0).system_timestamp;
- init_missing_ticks_accounting(cpu);
- }
- processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ jiffies_bias = jiffies_64;
+ } while (read_seqretry(&xtime_lock, seq));
+ system_time_bias = per_cpu(shadow_time, 0).system_timestamp;
- cs_last = local_clock();
+ cs_last = xen_local_clock();
}
static struct clocksource clocksource_xen = {
rmb();
} while ((s->wc_version & 1) | (version ^ s->wc_version));
- delta = local_clock() + (u64)sec * NSEC_PER_SEC + nsec;
+ delta = xen_local_clock() + (u64)sec * NSEC_PER_SEC + nsec;
do_div(delta, NSEC_PER_SEC);
ts->tv_sec = delta;
return 0;
}
-/* Dynamically-mapped IRQ. */
-static int __read_mostly timer_irq = -1;
-static struct irqaction timer_action = {
- .handler = timer_interrupt,
- .flags = IRQF_DISABLED|IRQF_TIMER,
- .name = "timer"
-};
-
-static void __init setup_cpu0_timer_irq(void)
-{
- timer_irq = bind_virq_to_irqaction(VIRQ_TIMER, 0, &timer_action);
- BUG_ON(timer_irq < 0);
-}
-
void __init time_init(void)
{
init_cpu_khz();
printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
- switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0,
- &xen_set_periodic_tick)) {
- case 0:
-#if CONFIG_XEN_COMPAT <= 0x030004
- case -ENOSYS:
-#endif
- break;
- default:
- BUG();
- }
-
+ setup_runstate_area(0);
get_time_values_from_xen(0);
- processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
- per_cpu(local_time.processed_system, 0) = processed_system_time;
- per_cpu(local_time.accounted_system, 0) = processed_system_time;
- init_missing_ticks_accounting(0);
+ jiffies_bias = jiffies_64;
+ system_time_bias = per_cpu(shadow_time, 0).system_timestamp;
clocksource_register(&clocksource_xen);
use_tsc_delay();
/* Cannot request_irq() until kmem is initialised. */
- late_time_init = setup_cpu0_timer_irq;
-
- if (!(duty_limit + 2))
- duty_limit = __fls(nr_cpu_ids);
+ late_time_init = xen_clockevents_init;
}
/* Convert jiffies to system time. */
if (delta < 1) {
/* Triggers in some wrap-around cases, but that's okay:
* we just end up with a shorter timeout. */
- st = processed_system_time + NS_PER_TICK;
+ st = processed_system_time() + NS_PER_TICK;
} else if (((unsigned long)delta >> (BITS_PER_LONG-3)) != 0) {
/* Very long timeout means there is no pending timer.
* We indicate this to Xen by passing zero timeout. */
st = 0;
} else {
- st = processed_system_time + delta * (u64)NS_PER_TICK;
+ st = processed_system_time() + delta * (u64)NS_PER_TICK;
}
} while (read_seqretry(&xtime_lock, seq));
}
EXPORT_SYMBOL(jiffies_to_st);
-/*
- * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
- * These functions are based on implementations from arch/s390/kernel/time.c
- */
-static void stop_hz_timer(void)
-{
- struct vcpu_set_singleshot_timer singleshot;
- unsigned int cpu = smp_processor_id();
- unsigned long j;
- u64 local;
- int rc;
-
- cpumask_set_cpu(cpu, nohz_cpu_mask);
-
- /* See matching smp_mb in rcu_start_batch in rcupdate.c. These mbs */
- /* ensure that if __rcu_pending (nested in rcu_needs_cpu) fetches a */
- /* value of rcp->cur that matches rdp->quiescbatch and allows us to */
- /* stop the hz timer then the cpumasks created for subsequent values */
- /* of cur in rcu_start_batch are guaranteed to pick up the updated */
- /* nohz_cpu_mask and so will not depend on this cpu. */
-
- smp_mb();
-
- /* Leave ourselves in tick mode if rcu or softirq or timer pending. */
- if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
- local_softirq_pending() ||
- (j = get_next_timer_interrupt(jiffies),
- time_before_eq(j, jiffies))) {
- cpumask_clear_cpu(cpu, nohz_cpu_mask);
- j = jiffies + 1;
- }
-
- singleshot.timeout_abs_ns = jiffies_to_st(j);
- if (!singleshot.timeout_abs_ns)
- return;
- local = per_cpu(local_time.processed_system, cpu);
- if ((s64)(singleshot.timeout_abs_ns - local) <= NS_PER_TICK) {
- cpumask_clear_cpu(cpu, nohz_cpu_mask);
- singleshot.timeout_abs_ns = local + NS_PER_TICK;
- }
- singleshot.timeout_abs_ns += NS_PER_TICK / 2;
- singleshot.flags = 0;
- rc = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &singleshot);
-#if CONFIG_XEN_COMPAT <= 0x030004
- if (rc) {
- BUG_ON(rc != -ENOSYS);
- rc = HYPERVISOR_set_timer_op(singleshot.timeout_abs_ns);
- }
-#endif
- BUG_ON(rc);
-}
-
-static void start_hz_timer(void)
-{
- unsigned int cpu = smp_processor_id();
- int rc = HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL);
-
-#if CONFIG_XEN_COMPAT <= 0x030004
- if (rc) {
- BUG_ON(rc != -ENOSYS);
- rc = HYPERVISOR_set_timer_op(0);
- }
-#endif
- BUG_ON(rc);
- cpumask_clear_cpu(cpu, nohz_cpu_mask);
-}
-
void xen_safe_halt(void)
{
- stop_hz_timer();
/* Blocking includes an implicit local_irq_enable(). */
HYPERVISOR_block();
- start_hz_timer();
}
EXPORT_SYMBOL(xen_safe_halt);
}
EXPORT_SYMBOL(xen_halt);
-#ifdef CONFIG_SMP
-int __cpuinit local_setup_timer(unsigned int cpu)
-{
- int seq, irq;
-
- BUG_ON(cpu == 0);
-
- switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu,
- &xen_set_periodic_tick)) {
- case 0:
-#if CONFIG_XEN_COMPAT <= 0x030004
- case -ENOSYS:
-#endif
- break;
- default:
- BUG();
- }
-
- do {
- seq = read_seqbegin(&xtime_lock);
- /* Use cpu0 timestamp: cpu's shadow is not initialised yet. */
- per_cpu(local_time.accounted_system, cpu) =
- per_cpu(local_time.processed_system, cpu) =
- per_cpu(shadow_time, 0).system_timestamp;
- init_missing_ticks_accounting(cpu);
- } while (read_seqretry(&xtime_lock, seq));
-
- irq = bind_virq_to_irqaction(VIRQ_TIMER, cpu, &timer_action);
- if (irq < 0)
- return irq;
- BUG_ON(timer_irq != irq);
-
- return 0;
-}
-
-void __cpuinit local_teardown_timer(unsigned int cpu)
-{
- BUG_ON(cpu == 0);
- unbind_from_per_cpu_irq(timer_irq, cpu, &timer_action);
-}
-#endif
-
#ifdef CONFIG_CPU_FREQ
static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
--- /dev/null
+/*
+ * Xen clockevent functions
+ *
+ * See arch/x86/xen/time.c for copyright and credits for derived
+ * portions of this file.
+ *
+ * Xen clockevent implementation
+ *
+ * Xen has two clockevent implementations:
+ *
+ * The old timer_op one works with all released versions of Xen prior
+ * to version 3.0.4. This version of the hypervisor provides a
+ * single-shot timer with nanosecond resolution. However, sharing the
+ * same event channel is a 100Hz tick which is delivered while the
+ * vcpu is running. We don't care about or use this tick, but it will
+ * cause the core time code to think the timer fired too soon, and
+ * will end up resetting it each time. It could be filtered, but
+ * doing so has complications when the ktime clocksource is not yet
+ * the xen clocksource (ie, at boot time).
+ *
+ * The new vcpu_op-based timer interface allows the tick timer period
+ * to be changed or turned off. The tick timer is not useful as a
+ * periodic timer because events are only delivered to running vcpus.
+ * The one-shot timer can report when a timeout is in the past, so
+ * set_next_event is capable of returning -ETIME when appropriate.
+ * This interface is used when available.
+ */
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/math64.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/clock.h>
+#include <xen/evtchn.h>
+#include <xen/interface/vcpu.h>
+
+#define XEN_SHIFT 22
+
+/* Xen may fire a timer up to this many ns early */
+#define TIMER_SLOP 100000
+#define NS_PER_TICK (1000000000LL / HZ)
+
+/*
+ * Get a hypervisor absolute time. In theory we could maintain an
+ * offset between the kernel's time and the hypervisor's time, and
+ * apply that to a kernel's absolute timeout. Unfortunately the
+ * hypervisor and kernel times can drift even if the kernel is using
+ * the Xen clocksource, because ntp can warp the kernel's clocksource.
+ */
+static u64 get_abs_timeout(unsigned long delta)
+{
+ return xen_local_clock() + delta;
+}
+
+#if CONFIG_XEN_COMPAT <= 0x030004
+static void timerop_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ WARN_ON(1); /* unsupported */
+ break;
+
+ case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
+ break;
+
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ if (HYPERVISOR_set_timer_op(0)) /* cancel timeout */
+ BUG();
+ break;
+ }
+}
+
+static int timerop_set_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
+
+ if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
+ BUG();
+
+ /*
+ * We may have missed the deadline, but there's no real way of
+ * knowing for sure. If the event was in the past, then we'll
+ * get an immediate interrupt.
+ */
+
+ return 0;
+}
+#endif
+
+static void vcpuop_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ WARN_ON(1); /* unsupported */
+ break;
+
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer,
+ smp_processor_id(), NULL))
+ BUG();
+ /* fall through */
+ case CLOCK_EVT_MODE_ONESHOT:
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
+ smp_processor_id(), NULL))
+ BUG();
+ break;
+
+ case CLOCK_EVT_MODE_RESUME:
+ break;
+ }
+}
+
+static int vcpuop_set_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ struct vcpu_set_singleshot_timer single;
+ int ret;
+
+ WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
+
+ single.timeout_abs_ns = get_abs_timeout(delta);
+ single.flags = VCPU_SSHOTTMR_future;
+
+ ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer,
+ smp_processor_id(), &single);
+
+ BUG_ON(ret != 0 && ret != -ETIME);
+
+ return ret;
+}
+
+static DEFINE_PER_CPU(struct clock_event_device, xen_clock_event) = {
+ .name = "xen",
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+
+ .max_delta_ns = 0xffffffff,
+ .min_delta_ns = TIMER_SLOP,
+
+ .mult = 1,
+ .shift = 0,
+ .rating = 500,
+
+ .irq = -1,
+};
+
+/* snapshots of runstate info */
+static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
+
+/* unused ns of stolen and blocked time */
+static DEFINE_PER_CPU(unsigned int, xen_residual_stolen);
+static DEFINE_PER_CPU(unsigned int, xen_residual_blocked);
+
+static void init_missing_ticks_accounting(unsigned int cpu)
+{
+ per_cpu(xen_runstate_snapshot, cpu) = *setup_runstate_area(cpu);
+ if (cpu == smp_processor_id())
+ get_runstate_snapshot(&__get_cpu_var(xen_runstate_snapshot));
+ per_cpu(xen_residual_stolen, cpu) = 0;
+ per_cpu(xen_residual_blocked, cpu) = 0;
+}
+
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
+{
+ struct clock_event_device *evt = &__get_cpu_var(xen_clock_event);
+ struct vcpu_runstate_info state, *snap;
+ s64 blocked, stolen;
+ irqreturn_t ret = IRQ_NONE;
+
+ if (evt->event_handler) {
+ evt->event_handler(evt);
+ ret = IRQ_HANDLED;
+ }
+
+ xen_check_wallclock_update();
+
+ get_runstate_snapshot(&state);
+ snap = &__get_cpu_var(xen_runstate_snapshot);
+
+ stolen = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]
+ + state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]
+ + percpu_read(xen_residual_stolen);
+
+ if (stolen >= NS_PER_TICK)
+ account_steal_ticks(div_u64_rem(stolen, NS_PER_TICK,
+ &__get_cpu_var(xen_residual_stolen)));
+ else
+ percpu_write(xen_residual_stolen, stolen > 0 ? stolen : 0);
+
+ blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]
+ + percpu_read(xen_residual_blocked);
+
+ if (blocked >= NS_PER_TICK)
+ account_idle_ticks(div_u64_rem(blocked, NS_PER_TICK,
+ &__get_cpu_var(xen_residual_blocked)));
+ else
+ percpu_write(xen_residual_blocked, blocked > 0 ? blocked : 0);
+
+ *snap = state;
+
+ return ret;
+}
+
+static struct irqaction timer_action = {
+ .handler = timer_interrupt,
+ .flags = IRQF_DISABLED|IRQF_TIMER,
+ .name = "timer"
+};
+
+void __cpuinit xen_setup_cpu_clockevents(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct clock_event_device *evt = &per_cpu(xen_clock_event, cpu);
+
+ init_missing_ticks_accounting(cpu);
+
+ evt->cpumask = cpumask_of(cpu);
+ clockevents_register_device(evt);
+}
+
+#ifdef CONFIG_SMP
+int __cpuinit local_setup_timer(unsigned int cpu)
+{
+ struct clock_event_device *evt = &per_cpu(xen_clock_event, cpu);
+
+ BUG_ON(cpu == smp_processor_id());
+
+ evt->irq = bind_virq_to_irqaction(VIRQ_TIMER, cpu, &timer_action);
+ if (evt->irq < 0)
+ return evt->irq;
+ BUG_ON(per_cpu(xen_clock_event.irq, 0) != evt->irq);
+
+ evt->set_mode = percpu_read(xen_clock_event.set_mode);
+ evt->set_next_event = percpu_read(xen_clock_event.set_next_event);
+
+ return 0;
+}
+
+void __cpuinit local_teardown_timer(unsigned int cpu)
+{
+ struct clock_event_device *evt = &per_cpu(xen_clock_event, cpu);
+
+ BUG_ON(cpu == 0);
+ unbind_from_per_cpu_irq(evt->irq, cpu, &timer_action);
+}
+#endif
+
+void xen_clockevents_resume(void)
+{
+ unsigned int cpu;
+
+ if (percpu_read(xen_clock_event.set_mode) != vcpuop_set_mode)
+ return;
+
+ for_each_online_cpu(cpu) {
+ init_missing_ticks_accounting(cpu);
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+ BUG();
+ }
+}
+
+void __init xen_clockevents_init(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct clock_event_device *evt = &__get_cpu_var(xen_clock_event);
+
+ switch (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
+ cpu, NULL)) {
+ case 0:
+ /*
+ * Successfully turned off 100Hz tick, so we have the
+ * vcpuop-based timer interface
+ */
+ evt->set_mode = vcpuop_set_mode;
+ evt->set_next_event = vcpuop_set_next_event;
+ break;
+#if CONFIG_XEN_COMPAT <= 0x030004
+ case -ENOSYS:
+ printk(KERN_DEBUG "Xen: using timerop interface\n");
+ evt->set_mode = timerop_set_mode;
+ evt->set_next_event = timerop_set_next_event;
+ break;
+#endif
+ default:
+ BUG();
+ }
+
+ evt->irq = bind_virq_to_irqaction(VIRQ_TIMER, cpu, &timer_action);
+ BUG_ON(evt->irq < 0);
+
+ xen_setup_cpu_clockevents();
+}