KVM: Ensure all vcpus are consistent with in-kernel irqchip settings
[linux-flexiantxendom0.git] / arch / ia64 / kvm / kvm-ia64.c
index 28af6a7..7073185 100644 (file)
@@ -23,8 +23,8 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/percpu.h>
-#include <linux/gfp.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
@@ -33,6 +33,7 @@
 #include <linux/uaccess.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
+#include <linux/pci.h>
 
 #include <asm/pgtable.h>
 #include <asm/gcc_intrin.h>
@@ -41,6 +42,9 @@
 #include <asm/div64.h>
 #include <asm/tlb.h>
 #include <asm/elf.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
 
 #include "misc.h"
 #include "vti.h"
@@ -65,6 +69,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { NULL }
 };
 
+static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+       if (vcpu->kvm->arch.is_sn2)
+               return rtc_time();
+       else
+#endif
+               return ia64_getreg(_IA64_REG_AR_ITC);
+}
+
 static void kvm_flush_icache(unsigned long start, unsigned long len)
 {
        int l;
@@ -111,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
        long  status;
        long  tmp_base;
@@ -119,21 +133,21 @@ void kvm_arch_hardware_enable(void *garbage)
        unsigned long saved_psr;
        int slot;
 
-       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-                               PAGE_KERNEL));
+       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
        local_irq_save(saved_psr);
        slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
        local_irq_restore(saved_psr);
        if (slot < 0)
-               return;
+               return -EINVAL;
 
        spin_lock(&vp_lock);
        status = ia64_pal_vp_init_env(kvm_vsa_base ?
                                VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
                        __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
        if (status != 0) {
+               spin_unlock(&vp_lock);
                printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-               return ;
+               return -EINVAL;
        }
 
        if (!kvm_vsa_base) {
@@ -142,6 +156,8 @@ void kvm_arch_hardware_enable(void *garbage)
        }
        spin_unlock(&vp_lock);
        ia64_ptr_entry(0x3, slot);
+
+       return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
@@ -189,7 +205,7 @@ int kvm_dev_ioctl_check_extension(long ext)
                r = KVM_COALESCED_MMIO_PAGE_OFFSET;
                break;
        case KVM_CAP_IOMMU:
-               r = iommu_found();
+               r = iommu_present(&pci_bus_type);
                break;
        default:
                r = 0;
@@ -198,16 +214,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 
 }
 
-static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
-                                       gpa_t addr, int len, int is_write)
-{
-       struct kvm_io_device *dev;
-
-       dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write);
-
-       return dev;
-}
-
 static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -219,6 +225,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        struct kvm_mmio_req *p;
        struct kvm_io_device *mmio_dev;
+       int r;
 
        p = kvm_get_vcpu_ioreq(vcpu);
 
@@ -235,16 +242,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        kvm_run->exit_reason = KVM_EXIT_MMIO;
        return 0;
 mmio:
-       mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir);
-       if (mmio_dev) {
-               if (!p->dir)
-                       kvm_iodevice_write(mmio_dev, p->addr, p->size,
-                                               &p->data);
-               else
-                       kvm_iodevice_read(mmio_dev, p->addr, p->size,
-                                               &p->data);
-
-       } else
+       if (p->dir)
+               r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
+                                   p->size, &p->data);
+       else
+               r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
+                                    p->size, &p->data);
+       if (r)
                printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
        p->state = STATE_IORESP_READY;
 
@@ -283,6 +287,18 @@ static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 }
 
+static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
+{
+       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+       if (!test_and_set_bit(vector, &vpd->irr[0])) {
+               vcpu->arch.irq_new_pending = 1;
+               kvm_vcpu_kick(vcpu);
+               return 1;
+       }
+       return 0;
+}
+
 /*
  *  offset: address offset to IPI space.
  *  value:  deliver value.
@@ -292,20 +308,20 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
 {
        switch (dm) {
        case SAPIC_FIXED:
-               kvm_apic_set_irq(vcpu, vector, 0);
                break;
        case SAPIC_NMI:
-               kvm_apic_set_irq(vcpu, 2, 0);
+               vector = 2;
                break;
        case SAPIC_EXTINT:
-               kvm_apic_set_irq(vcpu, 0, 0);
+               vector = 0;
                break;
        case SAPIC_INIT:
        case SAPIC_PMI:
        default:
                printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-               break;
+               return;
        }
+       __apic_accept_irq(vcpu, vector);
 }
 
 static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
@@ -313,13 +329,12 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
 {
        union ia64_lid lid;
        int i;
+       struct kvm_vcpu *vcpu;
 
-       for (i = 0; i < kvm->arch.online_vcpus; i++) {
-               if (kvm->vcpus[i]) {
-                       lid.val = VCPU_LID(kvm->vcpus[i]);
-                       if (lid.id == id && lid.eid == eid)
-                               return kvm->vcpus[i];
-               }
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               lid.val = VCPU_LID(vcpu);
+               if (lid.id == id && lid.eid == eid)
+                       return vcpu;
        }
 
        return NULL;
@@ -385,21 +400,21 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        struct kvm *kvm = vcpu->kvm;
        struct call_data call_data;
        int i;
+       struct kvm_vcpu *vcpui;
 
        call_data.ptc_g_data = p->u.ptc_g_data;
 
-       for (i = 0; i < kvm->arch.online_vcpus; i++) {
-               if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
-                                               KVM_MP_STATE_UNINITIALIZED ||
-                                       vcpu == kvm->vcpus[i])
+       kvm_for_each_vcpu(i, vcpui, kvm) {
+               if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
+                               vcpu == vcpui)
                        continue;
 
-               if (waitqueue_active(&kvm->vcpus[i]->wq))
-                       wake_up_interruptible(&kvm->vcpus[i]->wq);
+               if (waitqueue_active(&vcpui->wq))
+                       wake_up_interruptible(&vcpui->wq);
 
-               if (kvm->vcpus[i]->cpu != -1) {
-                       call_data.vcpu = kvm->vcpus[i];
-                       smp_call_function_single(kvm->vcpus[i]->cpu,
+               if (vcpui->cpu != -1) {
+                       call_data.vcpu = vcpui;
+                       smp_call_function_single(vcpui->cpu,
                                        vcpu_global_purge, &call_data, 1);
                } else
                        printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
@@ -413,6 +428,23 @@ static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        return 1;
 }
 
+static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
+{
+       unsigned long pte, rtc_phys_addr, map_addr;
+       int slot;
+
+       map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
+       rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
+       pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
+       slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
+       vcpu->arch.sn_rtc_tr_slot = slot;
+       if (slot < 0) {
+               printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
+               slot = 0;
+       }
+       return slot;
+}
+
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
 
@@ -426,7 +458,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 
        if (irqchip_in_kernel(vcpu->kvm)) {
 
-               vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset;
+               vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
 
                if (time_after(vcpu_now_itc, vpd->itm)) {
                        vcpu->arch.timer_check = 1;
@@ -447,10 +479,10 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
                hrtimer_cancel(p_ht);
                vcpu->arch.ht_active = 0;
 
-               if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
+               if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
+                               kvm_cpu_has_pending_timer(vcpu))
                        if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
-                               vcpu->arch.mp_state =
-                                       KVM_MP_STATE_RUNNABLE;
+                               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
                if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
                        return -EINTR;
@@ -551,22 +583,35 @@ static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
        if (r < 0)
                goto out;
        vcpu->arch.vm_tr_slot = r;
+
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+       if (kvm->arch.is_sn2) {
+               r = kvm_sn2_setup_mappings(vcpu);
+               if (r < 0)
+                       goto out;
+       }
+#endif
+
        r = 0;
 out:
        return r;
-
 }
 
 static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
 {
-
+       struct kvm *kvm = vcpu->kvm;
        ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
        ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+       if (kvm->arch.is_sn2)
+               ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
+#endif
 }
 
 static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
 {
+       unsigned long psr;
+       int r;
        int cpu = smp_processor_id();
 
        if (vcpu->arch.last_run_cpu != cpu ||
@@ -578,62 +623,58 @@ static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
 
        vcpu->arch.host_rr6 = ia64_get_rr(RR6);
        vti_set_rr6(vcpu->arch.vmm_rr);
-       return kvm_insert_vmm_mapping(vcpu);
+       local_irq_save(psr);
+       r = kvm_insert_vmm_mapping(vcpu);
+       local_irq_restore(psr);
+       return r;
 }
+
 static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
 {
        kvm_purge_vmm_mapping(vcpu);
        vti_set_rr6(vcpu->arch.host_rr6);
 }
 
-static int  vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        union context *host_ctx, *guest_ctx;
-       int r;
+       int r, idx;
 
-       /*Get host and guest context with guest address space.*/
-       host_ctx = kvm_get_host_context(vcpu);
-       guest_ctx = kvm_get_guest_context(vcpu);
-
-       r = kvm_vcpu_pre_transition(vcpu);
-       if (r < 0)
-               goto out;
-       kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
-       kvm_vcpu_post_transition(vcpu);
-       r = 0;
-out:
-       return r;
-}
-
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       int r;
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 again:
-       preempt_disable();
-       local_irq_disable();
-
        if (signal_pending(current)) {
-               local_irq_enable();
-               preempt_enable();
                r = -EINTR;
                kvm_run->exit_reason = KVM_EXIT_INTR;
                goto out;
        }
 
-       vcpu->guest_mode = 1;
+       preempt_disable();
+       local_irq_disable();
+
+       /*Get host and guest context with guest address space.*/
+       host_ctx = kvm_get_host_context(vcpu);
+       guest_ctx = kvm_get_guest_context(vcpu);
+
+       clear_bit(KVM_REQ_KICK, &vcpu->requests);
+
+       r = kvm_vcpu_pre_transition(vcpu);
+       if (r < 0)
+               goto vcpu_run_fail;
+
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
+       vcpu->mode = IN_GUEST_MODE;
        kvm_guest_enter();
-       down_read(&vcpu->kvm->slots_lock);
-       r = vti_vcpu_run(vcpu, kvm_run);
-       if (r < 0) {
-               local_irq_enable();
-               preempt_enable();
-               kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-               goto out;
-       }
+
+       /*
+        * Transition to the guest
+        */
+       kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
+
+       kvm_vcpu_post_transition(vcpu);
 
        vcpu->arch.launched = 1;
-       vcpu->guest_mode = 0;
+       set_bit(KVM_REQ_KICK, &vcpu->requests);
        local_irq_enable();
 
        /*
@@ -644,9 +685,11 @@ again:
         */
        barrier();
        kvm_guest_exit();
-       up_read(&vcpu->kvm->slots_lock);
+       vcpu->mode = OUTSIDE_GUEST_MODE;
        preempt_enable();
 
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
+
        r = kvm_handle_exit(kvm_run, vcpu);
 
        if (r > 0) {
@@ -655,12 +698,20 @@ again:
        }
 
 out:
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
        if (r > 0) {
                kvm_resched(vcpu);
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
                goto again;
        }
 
        return r;
+
+vcpu_run_fail:
+       local_irq_enable();
+       preempt_enable();
+       kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+       goto out;
 }
 
 static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
@@ -677,8 +728,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        int r;
        sigset_t sigsaved;
 
-       vcpu_load(vcpu);
-
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
@@ -700,11 +749,10 @@ out:
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
-       vcpu_put(vcpu);
        return r;
 }
 
-static struct kvm *kvm_alloc_kvm(void)
+struct kvm *kvm_arch_alloc_vm(void)
 {
 
        struct kvm *kvm;
@@ -715,7 +763,7 @@ static struct kvm *kvm_alloc_kvm(void)
        vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
 
        if (!vm_base)
-               return ERR_PTR(-ENOMEM);
+               return NULL;
 
        memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
        kvm = (struct kvm *)(vm_base +
@@ -761,10 +809,12 @@ static void kvm_build_io_pmt(struct kvm *kvm)
 #define GUEST_PHYSICAL_RR4     0x2739
 #define VMM_INIT_RR            0x1660
 
-static void kvm_init_vm(struct kvm *kvm)
+int kvm_arch_init_vm(struct kvm *kvm)
 {
        BUG_ON(!kvm);
 
+       kvm->arch.is_sn2 = ia64_platform_is("sn2");
+
        kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
        kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
        kvm->arch.vmm_init_rr = VMM_INIT_RR;
@@ -778,20 +828,8 @@ static void kvm_init_vm(struct kvm *kvm)
 
        /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
        set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
-}
-
-struct  kvm *kvm_arch_create_vm(void)
-{
-       struct kvm *kvm = kvm_alloc_kvm();
-
-       if (IS_ERR(kvm))
-               return ERR_PTR(-ENOMEM);
-       kvm_init_vm(kvm);
-
-       kvm->arch.online_vcpus = 0;
-
-       return kvm;
 
+       return 0;
 }
 
 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
@@ -802,8 +840,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
        r = 0;
        switch (chip->chip_id) {
        case KVM_IRQCHIP_IOAPIC:
-               memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
-                               sizeof(struct kvm_ioapic_state));
+               r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
@@ -819,9 +856,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
        r = 0;
        switch (chip->chip_id) {
        case KVM_IRQCHIP_IOAPIC:
-               memcpy(ioapic_irqchip(kvm),
-                               &chip->chip.ioapic,
-                               sizeof(struct kvm_ioapic_state));
+               r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
@@ -837,8 +872,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
        int i;
 
-       vcpu_load(vcpu);
-
        for (i = 0; i < 16; i++) {
                vpd->vgr[i] = regs->vpd.vgr[i];
                vpd->vbgr[i] = regs->vpd.vbgr[i];
@@ -882,11 +915,9 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        RESTORE_REGS(saved_gp);
 
        vcpu->arch.irq_new_pending = 1;
-       vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC);
+       vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
        set_bit(KVM_REQ_RESUME, &vcpu->requests);
 
-       vcpu_put(vcpu);
-
        return 0;
 }
 
@@ -895,7 +926,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 {
        struct kvm *kvm = filp->private_data;
        void __user *argp = (void __user *)arg;
-       int r = -EINVAL;
+       int r = -ENOTTY;
 
        switch (ioctl) {
        case KVM_SET_MEMORY_REGION: {
@@ -923,7 +954,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
                r = kvm_setup_default_irq_routing(kvm);
                if (r) {
-                       kfree(kvm->arch.vioapic);
+                       mutex_lock(&kvm->slots_lock);
+                       kvm_ioapic_destroy(kvm);
+                       mutex_unlock(&kvm->slots_lock);
                        goto out;
                }
                break;
@@ -934,13 +967,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = -EFAULT;
                if (copy_from_user(&irq_event, argp, sizeof irq_event))
                        goto out;
+               r = -ENXIO;
                if (irqchip_in_kernel(kvm)) {
                        __s32 status;
-                       mutex_lock(&kvm->lock);
                        status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
                                    irq_event.irq, irq_event.level);
-                       mutex_unlock(&kvm->lock);
                        if (ioctl == KVM_IRQ_LINE_STATUS) {
+                               r = -EFAULT;
                                irq_event.status = status;
                                if (copy_to_user(argp, &irq_event,
                                                        sizeof irq_event))
@@ -1041,10 +1074,6 @@ static void kvm_free_vmm_area(void)
        }
 }
 
-static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 static int vti_init_vpd(struct kvm_vcpu *vcpu)
 {
        int i;
@@ -1140,6 +1169,11 @@ out:
 
 #define PALE_RESET_ENTRY    0x80000000ffffffb0UL
 
+bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
+{
+       return irqchip_in_kernel(vcpu->kcm) == (vcpu->arch.apic != NULL);
+}
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
        struct kvm_vcpu *v;
@@ -1156,15 +1190,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        if (IS_ERR(vmm_vcpu))
                return PTR_ERR(vmm_vcpu);
 
-       if (vcpu->vcpu_id == 0) {
+       if (kvm_vcpu_is_bsp(vcpu)) {
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
                /*Set entry address for first run.*/
                regs->cr_iip = PALE_RESET_ENTRY;
 
                /*Initialize itc offset for vcpus*/
-               itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
-               for (i = 0; i < kvm->arch.online_vcpus; i++) {
+               itc_offset = 0UL - kvm_get_itc(vcpu);
+               for (i = 0; i < KVM_MAX_VCPUS; i++) {
                        v = (struct kvm_vcpu *)((char *)vcpu +
                                        sizeof(struct kvm_vcpu_data) * i);
                        v->arch.itc_offset = itc_offset;
@@ -1195,7 +1229,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
        p_ctx->cr[8] = 0x3c;
 
-       /*Initilize region register*/
+       /*Initialize region register*/
        p_ctx->rr[0] = 0x30;
        p_ctx->rr[1] = 0x30;
        p_ctx->rr[2] = 0x30;
@@ -1204,7 +1238,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        p_ctx->rr[5] = 0x30;
        p_ctx->rr[7] = 0x30;
 
-       /*Initilize branch register 0*/
+       /*Initialize branch register 0*/
        p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
 
        vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
@@ -1235,6 +1269,7 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
 
        local_irq_save(psr);
        r = kvm_insert_vmm_mapping(vcpu);
+       local_irq_restore(psr);
        if (r)
                goto fail;
        r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
@@ -1252,13 +1287,11 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
                goto uninit;
 
        kvm_purge_vmm_mapping(vcpu);
-       local_irq_restore(psr);
 
        return 0;
 uninit:
        kvm_vcpu_uninit(vcpu);
 fail:
-       local_irq_restore(psr);
        return r;
 }
 
@@ -1289,7 +1322,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
        vcpu->kvm = kvm;
 
        cpu = get_cpu();
-       vti_vcpu_load(vcpu, cpu);
        r = vti_vcpu_setup(vcpu, id);
        put_cpu();
 
@@ -1298,8 +1330,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
                goto fail;
        }
 
-       kvm->arch.online_vcpus++;
-
        return vcpu;
 fail:
        return ERR_PTR(r);
@@ -1326,7 +1356,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
        return -EINVAL;
 }
 
-static void free_kvm(struct kvm *kvm)
+void kvm_arch_free_vm(struct kvm *kvm)
 {
        unsigned long vm_base = kvm->arch.vm_base;
 
@@ -1339,12 +1369,14 @@ static void free_kvm(struct kvm *kvm)
 
 static void kvm_release_vm_pages(struct kvm *kvm)
 {
+       struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
        int i, j;
        unsigned long base_gfn;
 
-       for (i = 0; i < kvm->nmemslots; i++) {
-               memslot = &kvm->memslots[i];
+       slots = kvm_memslots(kvm);
+       for (i = 0; i < slots->nmemslots; i++) {
+               memslot = &slots->memslots[i];
                base_gfn = memslot->base_gfn;
 
                for (j = 0; j < memslot->npages; j++) {
@@ -1366,8 +1398,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 #endif
        kfree(kvm->arch.vioapic);
        kvm_release_vm_pages(kvm);
-       kvm_free_physmem(kvm);
-       free_kvm(kvm);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1425,7 +1455,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        }
        for (i = 0; i < 4; i++)
                regs->insvc[i] = vcpu->arch.insvc[i];
-       regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC);
+       regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
        SAVE_REGS(xtp);
        SAVE_REGS(metaphysical_rr0);
        SAVE_REGS(metaphysical_rr4);
@@ -1497,8 +1527,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                        goto out;
 
                if (copy_to_user(user_stack, stack,
-                                sizeof(struct kvm_ia64_vcpu_stack)))
+                                sizeof(struct kvm_ia64_vcpu_stack))) {
+                       r = -EFAULT;
                        goto out;
+               }
 
                break;
        }
@@ -1538,15 +1570,15 @@ out:
        return r;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-               struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+               struct kvm_memory_slot *memslot,
                struct kvm_memory_slot old,
+               struct kvm_userspace_memory_region *mem,
                int user_alloc)
 {
        unsigned long i;
        unsigned long pfn;
-       int npages = mem->memory_size >> PAGE_SHIFT;
-       struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+       int npages = memslot->npages;
        unsigned long base_gfn = memslot->base_gfn;
 
        if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
@@ -1570,8 +1602,17 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
        return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+               struct kvm_userspace_memory_region *mem,
+               struct kvm_memory_slot old,
+               int user_alloc)
+{
+       return;
+}
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
+       kvm_flush_remote_tlbs(kvm);
 }
 
 long kvm_arch_dev_ioctl(struct file *filp,
@@ -1614,8 +1655,37 @@ out:
        return 0;
 }
 
+
+/*
+ * On SN2, the ITC isn't stable, so copy in fast path code to use the
+ * SN2 RTC, replacing the ITC based default verion.
+ */
+static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
+                         struct module *module)
+{
+       unsigned long new_ar, new_ar_sn2;
+       unsigned long module_base;
+
+       if (!ia64_platform_is("sn2"))
+               return;
+
+       module_base = (unsigned long)module->module_core;
+
+       new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
+       new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
+
+       printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
+              "as source\n");
+
+       /*
+        * Copy the SN2 version of mov_ar into place. They are both
+        * the same size, so 6 bundles is sufficient (6 * 0x10).
+        */
+       memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
+}
+
 static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-                                               struct module *module)
+                           struct module *module)
 {
        unsigned long module_base;
        unsigned long vmm_size;
@@ -1626,7 +1696,7 @@ static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
        BUG_ON(!module);
 
        if (!kvm_vmm_base) {
-               printk("kvm: kvm area hasn't been initilized yet!!\n");
+               printk("kvm: kvm area hasn't been initialized yet!!\n");
                return -EFAULT;
        }
 
@@ -1637,6 +1707,7 @@ static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
                return -EFAULT;
 
        memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
+       kvm_patch_vmm(vmm_info, module);
        kvm_flush_icache(kvm_vmm_base, vmm_size);
 
        /*Recalculate kvm_vmm_info based on new VMM*/
@@ -1720,50 +1791,46 @@ void kvm_arch_exit(void)
        kvm_vmm_info = NULL;
 }
 
-static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
-               struct kvm_dirty_log *log)
+static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
+                                   struct kvm_memory_slot *memslot)
 {
-       struct kvm_memory_slot *memslot;
-       int r, i;
-       long n, base;
+       int i;
+       long base;
+       unsigned long n;
        unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
                        offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
 
-       r = -EINVAL;
-       if (log->slot >= KVM_MEMORY_SLOTS)
-               goto out;
-
-       memslot = &kvm->memslots[log->slot];
-       r = -ENOENT;
-       if (!memslot->dirty_bitmap)
-               goto out;
-
-       n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+       n = kvm_dirty_bitmap_bytes(memslot);
        base = memslot->base_gfn / BITS_PER_LONG;
 
+       spin_lock(&kvm->arch.dirty_log_lock);
        for (i = 0; i < n/sizeof(long); ++i) {
                memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
                dirty_bitmap[base + i] = 0;
        }
-       r = 0;
-out:
-       return r;
+       spin_unlock(&kvm->arch.dirty_log_lock);
 }
 
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
                struct kvm_dirty_log *log)
 {
        int r;
-       int n;
+       unsigned long n;
        struct kvm_memory_slot *memslot;
        int is_dirty = 0;
 
-       spin_lock(&kvm->arch.dirty_log_lock);
+       mutex_lock(&kvm->slots_lock);
 
-       r = kvm_ia64_sync_dirty_log(kvm, log);
-       if (r)
+       r = -EINVAL;
+       if (log->slot >= KVM_MEMORY_SLOTS)
                goto out;
 
+       memslot = &kvm->memslots->memslots[log->slot];
+       r = -ENOENT;
+       if (!memslot->dirty_bitmap)
+               goto out;
+
+       kvm_ia64_sync_dirty_log(kvm, memslot);
        r = kvm_get_dirty_log(kvm, log, &is_dirty);
        if (r)
                goto out;
@@ -1771,13 +1838,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        /* If nothing is dirty, don't bother messing with page tables. */
        if (is_dirty) {
                kvm_flush_remote_tlbs(kvm);
-               memslot = &kvm->memslots[log->slot];
-               n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+               n = kvm_dirty_bitmap_bytes(memslot);
                memset(memslot->dirty_bitmap, 0, n);
        }
        r = 0;
 out:
-       spin_unlock(&kvm->arch.dirty_log_lock);
+       mutex_unlock(&kvm->slots_lock);
        return r;
 }
 
@@ -1790,38 +1856,24 @@ void kvm_arch_hardware_unsetup(void)
 {
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-       printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-       int ipi_pcpu = vcpu->cpu;
-       int cpu = get_cpu();
+       int me;
+       int cpu = vcpu->cpu;
 
        if (waitqueue_active(&vcpu->wq))
                wake_up_interruptible(&vcpu->wq);
 
-       if (vcpu->guest_mode && cpu != ipi_pcpu)
-               smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+       me = get_cpu();
+       if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
+               if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+                       smp_send_reschedule(cpu);
        put_cpu();
 }
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
-
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-       if (!test_and_set_bit(vec, &vpd->irr[0])) {
-               vcpu->arch.irq_new_pending = 1;
-               kvm_vcpu_kick(vcpu);
-               return 1;
-       }
-       return 0;
+       return __apic_accept_irq(vcpu, irq->vector);
 }
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
@@ -1834,20 +1886,18 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
        return 0;
 }
 
-struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-                                      unsigned long bitmap)
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 {
-       struct kvm_vcpu *lvcpu = kvm->vcpus[0];
-       int i;
-
-       for (i = 1; i < kvm->arch.online_vcpus; i++) {
-               if (!kvm->vcpus[i])
-                       continue;
-               if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
-                       lvcpu = kvm->vcpus[i];
-       }
+       return vcpu1->arch.xtp - vcpu2->arch.xtp;
+}
 
-       return lvcpu;
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+               int short_hand, int dest, int dest_mode)
+{
+       struct kvm_lapic *target = vcpu->arch.apic;
+       return (dest_mode == 0) ?
+               kvm_apic_match_physical_addr(target, dest) :
+               kvm_apic_match_logical_addr(target, dest);
 }
 
 static int find_highest_bits(int *dat)
@@ -1879,34 +1929,21 @@ int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
     return find_highest_bits((int *)&vpd->irr[0]);
 }
 
-int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
-{
-       if (kvm_highest_pending_irq(vcpu) != -1)
-               return 1;
-       return 0;
-}
-
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.timer_fired;
 }
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-       return gfn;
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
-       return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
+       return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
+               (kvm_highest_pending_irq(vcpu) != -1);
 }
 
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
-       vcpu_load(vcpu);
        mp_state->mp_state = vcpu->arch.mp_state;
-       vcpu_put(vcpu);
        return 0;
 }
 
@@ -1916,6 +1953,7 @@ static int vcpu_reset(struct kvm_vcpu *vcpu)
        long psr;
        local_irq_save(psr);
        r = kvm_insert_vmm_mapping(vcpu);
+       local_irq_restore(psr);
        if (r)
                goto fail;
 
@@ -1928,7 +1966,6 @@ static int vcpu_reset(struct kvm_vcpu *vcpu)
        kvm_purge_vmm_mapping(vcpu);
        r = 0;
 fail:
-       local_irq_restore(psr);
        return r;
 }
 
@@ -1937,10 +1974,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 {
        int r = 0;
 
-       vcpu_load(vcpu);
        vcpu->arch.mp_state = mp_state->mp_state;
        if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
                r = vcpu_reset(vcpu);
-       vcpu_put(vcpu);
        return r;
 }