x86: Add NumaChip support
authorSteffen Persvold <sp@numascale.com>
Mon, 5 Dec 2011 16:07:26 +0000 (00:07 +0800)
committerLeann Ogasawara <leann.ogasawara@canonical.com>
Mon, 2 Apr 2012 20:21:26 +0000 (13:21 -0700)
Adds support for Numascale NumaChip large-SMP systems. It is
needed to enable the booting of more than ~168 cores.

v2:
 - [Steffen] enumerate only accessible northbridges
 - [Daniel] rediffed and validated against 3.1-rc10

v3:
 - [Daniel] use x86_init core numbering override
 - [Daniel] cleanups as per feedback

v4:
 - [Daniel] use updated x86_cpuinit override

v5:
 - drop disabling interrupts locally, as ISR write is atomic; drop delay
 - added read-mostly annotations where appropriate
 - require CONFIG_SMP, so drop conditional path

Workload tested on 96 cores/16 sockets.

Signed-off-by: Steffen Persvold <sp@numascale.com>
Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Link: http://lkml.kernel.org/r/1323101246-2400-1-git-send-email-daniel@numascale-asia.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
(cherry picked from commit 44b111b519160e33fdc41eadb39af86a24707edf)

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>

arch/x86/Kconfig
arch/x86/include/asm/numachip/numachip_csr.h [new file with mode: 0644]
arch/x86/kernel/apic/Makefile
arch/x86/kernel/apic/apic_numachip.c [new file with mode: 0644]

index 5d55dcb..d8fbbb0 100644 (file)
@@ -343,6 +343,7 @@ config X86_EXTENDED_PLATFORM
 
          If you enable this option then you'll be able to select support
          for the following (non-PC) 64 bit x86 platforms:
+               Numascale NumaChip
                ScaleMP vSMP
                SGI Ultraviolet
 
@@ -351,6 +352,18 @@ config X86_EXTENDED_PLATFORM
 endif
 # This is an alphabetically sorted list of 64 bit extended platforms
 # Please maintain the alphabetic order if and when there are additions
+config X86_NUMACHIP
+       bool "Numascale NumaChip"
+       depends on X86_64
+       depends on X86_EXTENDED_PLATFORM
+       depends on NUMA
+       depends on SMP
+       depends on X86_X2APIC
+       depends on !EDAC_AMD64
+       ---help---
+         Adds support for Numascale NumaChip large-SMP systems. Needed to
+         enable more than ~168 cores.
+         If you don't have one of these, you should say N here.
 
 config X86_VSMP
        bool "ScaleMP vSMP"
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
new file mode 100644 (file)
index 0000000..660f843
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific Header file
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
+
+#include <linux/numa.h>
+#include <linux/percpu.h>
+#include <linux/io.h>
+#include <linux/swab.h>
+#include <asm/types.h>
+#include <asm/processor.h>
+
+#define CSR_NODE_SHIFT         16
+#define CSR_NODE_BITS(p)       (((unsigned long)(p)) << CSR_NODE_SHIFT)
+#define CSR_NODE_MASK          0x0fff          /* 4K nodes */
+
+/* 32K CSR space, b15 indicates geo/non-geo */
+#define CSR_OFFSET_MASK        0x7fffUL
+
+/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
+#define NUMACHIP_GCSR_BASE     0x3fff00000000ULL
+#define NUMACHIP_GCSR_LIM      0x3fff0fffffffULL
+#define NUMACHIP_GCSR_SIZE     (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)
+
+/*
+ * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
+ * when using the direct mapping on x86_64, both start and size needs to be
+ * aligned with PMD_SIZE which is 2M
+ */
+#define NUMACHIP_LCSR_BASE     0x3ffffe000000ULL
+#define NUMACHIP_LCSR_LIM      0x3fffffffffffULL
+#define NUMACHIP_LCSR_SIZE     (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
+
+static inline void *gcsr_address(int node, unsigned long offset)
+{
+       return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
+               CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
+}
+
+static inline void *lcsr_address(unsigned long offset)
+{
+       return __va(NUMACHIP_LCSR_BASE | (1UL << 15) |
+               CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
+}
+
+static inline unsigned int read_gcsr(int node, unsigned long offset)
+{
+       return swab32(readl(gcsr_address(node, offset)));
+}
+
+static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
+{
+       writel(swab32(val), gcsr_address(node, offset));
+}
+
+static inline unsigned int read_lcsr(unsigned long offset)
+{
+       return swab32(readl(lcsr_address(offset)));
+}
+
+static inline void write_lcsr(unsigned long offset, unsigned int val)
+{
+       writel(swab32(val), lcsr_address(offset));
+}
+
+/* ========================================================================= */
+/*                   CSR_G0_STATE_CLEAR                                      */
+/* ========================================================================= */
+
+#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
+union numachip_csr_g0_state_clear {
+       unsigned int v;
+       struct numachip_csr_g0_state_clear_s {
+               unsigned int _state:2;
+               unsigned int _rsvd_2_6:5;
+               unsigned int _lost:1;
+               unsigned int _rsvd_8_31:24;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G0_NODE_IDS                                         */
+/* ========================================================================= */
+
+#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
+union numachip_csr_g0_node_ids {
+       unsigned int v;
+       struct numachip_csr_g0_node_ids_s {
+               unsigned int _initialid:16;
+               unsigned int _nodeid:12;
+               unsigned int _rsvd_28_31:4;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_EXT_IRQ_GEN                                      */
+/* ========================================================================= */
+
+#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
+union numachip_csr_g3_ext_irq_gen {
+       unsigned int v;
+       struct numachip_csr_g3_ext_irq_gen_s {
+               unsigned int _vector:8;
+               unsigned int _msgtype:3;
+               unsigned int _index:5;
+               unsigned int _destination_apic_id:16;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_EXT_IRQ_STATUS                                   */
+/* ========================================================================= */
+
+#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
+union numachip_csr_g3_ext_irq_status {
+       unsigned int v;
+       struct numachip_csr_g3_ext_irq_status_s {
+               unsigned int _result:32;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_EXT_IRQ_DEST                                     */
+/* ========================================================================= */
+
+#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
+union numachip_csr_g3_ext_irq_dest {
+       unsigned int v;
+       struct numachip_csr_g3_ext_irq_dest_s {
+               unsigned int _irq:8;
+               unsigned int _rsvd_8_31:24;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_NC_ATT_MAP_SELECT                                */
+/* ========================================================================= */
+
+#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
+union numachip_csr_g3_nc_att_map_select {
+       unsigned int v;
+       struct numachip_csr_g3_nc_att_map_select_s {
+               unsigned int _upper_address_bits:4;
+               unsigned int _select_ram:4;
+               unsigned int _rsvd_8_31:24;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_NC_ATT_MAP_SELECT_0-255                          */
+/* ========================================================================= */
+
+#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
+
index 767fd04..0ae0323 100644 (file)
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP)             += ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
 # APIC probe will depend on the listing order here
+obj-$(CONFIG_X86_NUMACHIP)     += apic_numachip.o
 obj-$(CONFIG_X86_UV)           += x2apic_uv_x.o
 obj-$(CONFIG_X86_X2APIC)       += x2apic_phys.o
 obj-$(CONFIG_X86_X2APIC)       += x2apic_cluster.o
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644 (file)
index 0000000..09d3d8c
--- /dev/null
@@ -0,0 +1,294 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific APIC Code
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
+
+#include <asm/numachip/numachip_csr.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/apic_flat_64.h>
+
+static int numachip_system __read_mostly;
+
+static struct apic apic_numachip __read_mostly;
+
+static unsigned int get_apic_id(unsigned long x)
+{
+       unsigned long value;
+       unsigned int id;
+
+       rdmsrl(MSR_FAM10H_NODE_ID, value);
+       id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
+
+       return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+       unsigned long x;
+
+       x = ((id & 0xffU) << 24);
+       return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+       return get_apic_id(apic_read(APIC_ID));
+}
+
+static int numachip_apic_id_registered(void)
+{
+       return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+       return initial_apic_id >> index_msb;
+}
+
+static const struct cpumask *numachip_target_cpus(void)
+{
+       return cpu_online_mask;
+}
+
+static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+       cpumask_clear(retmask);
+       cpumask_set_cpu(cpu, retmask);
+}
+
+static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+{
+       union numachip_csr_g3_ext_irq_gen int_gen;
+
+       int_gen.s._destination_apic_id = phys_apicid;
+       int_gen.s._vector = 0;
+       int_gen.s._msgtype = APIC_DM_INIT >> 8;
+       int_gen.s._index = 0;
+
+       write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+       int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
+       int_gen.s._vector = start_rip >> 12;
+
+       write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+       atomic_set(&init_deasserted, 1);
+       return 0;
+}
+
+static void numachip_send_IPI_one(int cpu, int vector)
+{
+       union numachip_csr_g3_ext_irq_gen int_gen;
+       int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+
+       int_gen.s._destination_apic_id = apicid;
+       int_gen.s._vector = vector;
+       int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
+       int_gen.s._index = 0;
+
+       write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+}
+
+static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+       unsigned int cpu;
+
+       for_each_cpu(cpu, mask)
+               numachip_send_IPI_one(cpu, vector);
+}
+
+static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
+                                               int vector)
+{
+       unsigned int this_cpu = smp_processor_id();
+       unsigned int cpu;
+
+       for_each_cpu(cpu, mask) {
+               if (cpu != this_cpu)
+                       numachip_send_IPI_one(cpu, vector);
+       }
+}
+
+static void numachip_send_IPI_allbutself(int vector)
+{
+       unsigned int this_cpu = smp_processor_id();
+       unsigned int cpu;
+
+       for_each_online_cpu(cpu) {
+               if (cpu != this_cpu)
+                       numachip_send_IPI_one(cpu, vector);
+       }
+}
+
+static void numachip_send_IPI_all(int vector)
+{
+       numachip_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static void numachip_send_IPI_self(int vector)
+{
+       __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       cpu = cpumask_first(cpumask);
+       if (likely((unsigned)cpu < nr_cpu_ids))
+               return per_cpu(x86_cpu_to_apicid, cpu);
+
+       return BAD_APICID;
+}
+
+static unsigned int
+numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+                               const struct cpumask *andmask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       for_each_cpu_and(cpu, cpumask, andmask) {
+               if (cpumask_test_cpu(cpu, cpu_online_mask))
+                       break;
+       }
+       return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+static int __init numachip_probe(void)
+{
+       return apic == &apic_numachip;
+}
+
+static void __init map_csrs(void)
+{
+       printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
+               NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
+       init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
+
+       printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
+               NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
+       init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
+}
+
+static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
+{
+       c->phys_proc_id = node;
+       per_cpu(cpu_llc_id, smp_processor_id()) = node;
+}
+
+static int __init numachip_system_init(void)
+{
+       unsigned int val;
+
+       if (!numachip_system)
+               return 0;
+
+       x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+
+       map_csrs();
+
+       val = read_lcsr(CSR_G0_NODE_IDS);
+       printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
+
+       return 0;
+}
+early_initcall(numachip_system_init);
+
+static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+       if (!strncmp(oem_id, "NUMASC", 6)) {
+               numachip_system = 1;
+               return 1;
+       }
+
+       return 0;
+}
+
+static struct apic apic_numachip __refconst = {
+
+       .name                           = "NumaConnect system",
+       .probe                          = numachip_probe,
+       .acpi_madt_oem_check            = numachip_acpi_madt_oem_check,
+       .apic_id_registered             = numachip_apic_id_registered,
+
+       .irq_delivery_mode              = dest_Fixed,
+       .irq_dest_mode                  = 0, /* physical */
+
+       .target_cpus                    = numachip_target_cpus,
+       .disable_esr                    = 0,
+       .dest_logical                   = 0,
+       .check_apicid_used              = NULL,
+       .check_apicid_present           = NULL,
+
+       .vector_allocation_domain       = numachip_vector_allocation_domain,
+       .init_apic_ldr                  = flat_init_apic_ldr,
+
+       .ioapic_phys_id_map             = NULL,
+       .setup_apic_routing             = NULL,
+       .multi_timer_check              = NULL,
+       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
+       .apicid_to_cpu_present          = NULL,
+       .setup_portio_remap             = NULL,
+       .check_phys_apicid_present      = default_check_phys_apicid_present,
+       .enable_apic_mode               = NULL,
+       .phys_pkg_id                    = numachip_phys_pkg_id,
+       .mps_oem_check                  = NULL,
+
+       .get_apic_id                    = get_apic_id,
+       .set_apic_id                    = set_apic_id,
+       .apic_id_mask                   = 0xffU << 24,
+
+       .cpu_mask_to_apicid             = numachip_cpu_mask_to_apicid,
+       .cpu_mask_to_apicid_and         = numachip_cpu_mask_to_apicid_and,
+
+       .send_IPI_mask                  = numachip_send_IPI_mask,
+       .send_IPI_mask_allbutself       = numachip_send_IPI_mask_allbutself,
+       .send_IPI_allbutself            = numachip_send_IPI_allbutself,
+       .send_IPI_all                   = numachip_send_IPI_all,
+       .send_IPI_self                  = numachip_send_IPI_self,
+
+       .wakeup_secondary_cpu           = numachip_wakeup_secondary,
+       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
+       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+       .wait_for_init_deassert         = NULL,
+       .smp_callin_clear_local_apic    = NULL,
+       .inquire_remote_apic            = NULL, /* REMRD not supported */
+
+       .read                           = native_apic_mem_read,
+       .write                          = native_apic_mem_write,
+       .icr_read                       = native_apic_icr_read,
+       .icr_write                      = native_apic_icr_write,
+       .wait_icr_idle                  = native_apic_wait_icr_idle,
+       .safe_wait_icr_idle             = native_safe_apic_wait_icr_idle,
+};
+apic_driver(apic_numachip);
+