2 * SN1 Platform specific SMP Support
4 * Copyright (C) 2000-2002 Silicon Graphics, Inc. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of version 2 of the GNU General Public License
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it would be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 * Further, this software is distributed without any warranty that it is
15 * free of the rightful claim of any third person regarding infringement
16 * or the like. Any license provided herein, whether implied or
17 * otherwise, applies only to this software file. Patent licenses, if
18 * any, provided herein do not apply to combinations of this program with
19 * other software, or any other product whatsoever.
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
25 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
26 * Mountain View, CA 94043, or:
30 * For further information regarding this notice, see:
32 * http://oss.sgi.com/projects/GenInfo/NoticeExplan
35 #include <linux/config.h>
36 #include <linux/init.h>
37 #include <linux/kernel.h>
38 #include <linux/spinlock.h>
39 #include <linux/threads.h>
40 #include <linux/sched.h>
41 #include <linux/smp.h>
42 #include <linux/interrupt.h>
43 #include <linux/irq.h>
44 #include <linux/mmzone.h>
46 #include <asm/processor.h>
49 #include <asm/system.h>
52 #include <asm/hw_irq.h>
53 #include <asm/current.h>
54 #include <asm/delay.h>
55 #include <asm/sn/sn_cpuid.h>
58 * The following structure is used to pass params thru smp_call_function
59 * to other cpus for flushing TLB ranges.
68 atomic_t unfinished_count;
70 char pad[SMP_CACHE_BYTES];
76 static ptc_params_t ptcParamArray[NUMPTC] __attribute__((__aligned__(128)));
78 /* use separate cache lines on ptcParamsNextByCpu to avoid false sharing */
79 static ptc_params_t *ptcParamsNextByCpu[NR_CPUS*16] __attribute__((__aligned__(128)));
80 static volatile ptc_params_t *ptcParamsEmpty __cacheline_aligned;
83 static spinlock_t ptcParamsLock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
85 static int ptcInit = 0;
87 static int ptcParamsAllBusy = 0; /* debugging/statistics */
88 static int ptcCountBacklog = 0;
89 static int ptcBacklog[NUMPTC+1];
90 static char ptcParamsCounts[NR_CPUS][NUMPTC] __attribute__((__aligned__(128)));
91 static char ptcParamsResults[NR_CPUS][NUMPTC] __attribute__((__aligned__(128)));
95 * Make smp_send_flush_tlbsmp_send_flush_tlb() a weak reference,
96 * so that we get a clean compile with the ia64 patch without the
97 * actual SN1 specific code in arch/ia64/kernel/smp.c.
99 extern void smp_send_flush_tlb (void) __attribute((weak));
102 * The following table/struct is for remembering PTC coherency domains. It
103 * is also used to translate sapicid into cpuids. We don't want to start
104 * cpus unless we know their cache domain.
107 sn_sapicid_info_t sn_sapicid_info[NR_CPUS];
111 * sn1_ptc_l_range - purge local translation cache
112 * @start: start of virtual address range
113 * @end: end of virtual address range
114 * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
116 * Purges the range specified from the local processor's translation cache
117 * (as opposed to the translation registers). Note that more than the specified
118 * range *may* be cleared from the cache by some processors.
120 * This is probably not good enough, but I don't want to try to make it better
121 * until I get some statistics on a running system. At a minimum, we should only
122 * send IPIs to 1 processor in each TLB domain & have it issue a ptc.g on it's
123 * own FSB. Also, we only have to serialize per FSB, not globally.
125 * More likely, we will have to do some work to reduce the frequency of calls to
129 sn1_ptc_l_range(unsigned long start, unsigned long end, unsigned long nbits)
132 __asm__ __volatile__ ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory");
133 start += (1UL << nbits);
134 } while (start < end);
139 * sn1_received_flush_tlb - cpu tlb flush routine
141 * Flushes the TLB of a given processor.
144 sn1_received_flush_tlb(void)
146 unsigned long start, end, nbits;
147 unsigned int rid, saved_rid;
148 int cpu = smp_processor_id();
150 ptc_params_t *ptcParams;
152 ptcParams = ptcParamsNextByCpu[cpu*16];
153 if (ptcParams == ptcParamsEmpty)
157 start = ptcParams->ptc.start;
158 saved_rid = (unsigned int) ia64_get_rr(start);
159 end = ptcParams->ptc.end;
160 nbits = ptcParams->ptc.nbits;
161 rid = ptcParams->ptc.rid;
163 if (saved_rid != rid) {
164 ia64_set_rr(start, (unsigned long)rid);
168 sn1_ptc_l_range(start, end, nbits);
170 if (saved_rid != rid)
171 ia64_set_rr(start, (unsigned long)saved_rid);
175 result = atomic_dec(&ptcParams->ptc.unfinished_count);
178 int i = ptcParams-&ptcParamArray[0];
179 ptcParamsResults[cpu][i] = (char) result;
180 ptcParamsCounts[cpu][i]++;
182 #endif /* PTCDEBUG */
184 if (++ptcParams == &ptcParamArray[NUMPTC])
185 ptcParams = &ptcParamArray[0];
187 } while (ptcParams != ptcParamsEmpty);
189 ptcParamsNextByCpu[cpu*16] = ptcParams;
193 * sn1_global_tlb_purge - flush a translation cache range on all processors
194 * @start: start of virtual address range to flush
195 * @end: end of virtual address range
196 * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
198 * Flushes the translation cache of all processors from @start to @end.
201 sn1_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
203 ptc_params_t *params;
205 unsigned long irqflags;
207 ptc_params_t *nextnext;
211 if (smp_num_cpus == 1) {
212 sn1_ptc_l_range(start, end, nbits);
216 if (in_interrupt()) {
218 * If at interrupt level and cannot get spinlock,
219 * then do something useful by flushing own tlbflush queue
220 * so as to avoid a possible deadlock.
222 while (!spin_trylock(&ptcParamsLock)) {
223 local_irq_save(irqflags);
224 sn1_received_flush_tlb();
225 local_irq_restore(irqflags);
226 udelay(10); /* take it easier on the bus */
229 spin_lock(&ptcParamsLock);
235 memset(ptcParamArray, 0, sizeof(ptcParamArray));
236 ptcParamsEmpty = &ptcParamArray[0];
237 for (cpu=0; cpu<NR_CPUS; cpu++)
238 ptcParamsNextByCpu[cpu*16] = &ptcParamArray[0];
241 memset(ptcBacklog, 0, sizeof(ptcBacklog));
242 memset(ptcParamsCounts, 0, sizeof(ptcParamsCounts));
243 memset(ptcParamsResults, 0, sizeof(ptcParamsResults));
244 #endif /* PTCDEBUG */
247 params = (ptc_params_t *) ptcParamsEmpty;
248 next = (ptc_params_t *) ptcParamsEmpty + 1;
249 if (next == &ptcParamArray[NUMPTC])
250 next = &ptcParamArray[0];
254 if (nextnext == &ptcParamArray[NUMPTC])
255 nextnext = &ptcParamArray[0];
257 if (ptcCountBacklog) {
258 /* quick count of backlog */
261 /* check the current pointer to the beginning */
263 while(--ptr >= &ptcParamArray[0]) {
264 if (atomic_read(&ptr->ptc.unfinished_count) == 0)
270 /* check the end of the array */
271 ptr = &ptcParamArray[NUMPTC];
272 while (--ptr > params) {
273 if (atomic_read(&ptr->ptc.unfinished_count) == 0)
278 ptcBacklog[backlog]++;
280 #endif /* PTCDEBUG */
282 /* wait for the next entry to clear...should be rare */
283 if (atomic_read(&next->ptc.unfinished_count) > 0) {
287 if (atomic_read(&nextnext->ptc.unfinished_count) == 0) {
288 if (atomic_read(&next->ptc.unfinished_count) > 0) {
289 panic("\nnonzero next zero nextnext %lx %lx\n",
290 (long)next, (long)nextnext);
295 /* it could be this cpu that is behind */
296 local_irq_save(irqflags);
297 sn1_received_flush_tlb();
298 local_irq_restore(irqflags);
300 /* now we know it's not this cpu, so just wait */
301 while (atomic_read(&next->ptc.unfinished_count) > 0) {
306 params->ptc.start = start;
307 params->ptc.end = end;
308 params->ptc.nbits = nbits;
309 params->ptc.rid = (unsigned int) ia64_get_rr(start);
310 atomic_set(¶ms->ptc.unfinished_count, smp_num_cpus);
312 /* The atomic_set above can hit memory *after* the update
313 * to ptcParamsEmpty below, which opens a timing window
314 * that other cpus can squeeze into!
318 /* everything is ready to process:
319 * -- global lock is held
320 * -- new entry + 1 is free
321 * -- new entry is set up
323 * -- update the global next pointer
324 * -- unlock the global lock
325 * -- send IPI to notify other cpus
326 * -- process the data ourselves
328 ptcParamsEmpty = next;
329 spin_unlock(&ptcParamsLock);
330 smp_send_flush_tlb();
332 local_irq_save(irqflags);
333 sn1_received_flush_tlb();
334 local_irq_restore(irqflags);
336 /* Currently we don't think global TLB purges need to be atomic.
337 * All CPUs get sent IPIs, so if they haven't done the purge,
338 * they're busy with interrupts that are at the IPI level, which is
339 * priority 15. We're asserting that any code at that level
340 * shouldn't be using user TLB entries. To change this to wait
341 * for all the flushes to complete, enable the following code.
343 #if defined(SN1_SYNCHRONOUS_GLOBAL_TLB_PURGE) || defined(BUS_INT_WAR)
344 /* this code is not tested */
345 /* wait for the flush to complete */
346 while (atomic_read(¶ms->ptc.unfinished_count) > 0)
352 * sn_send_IPI_phys - send an IPI to a Nasid and slice
353 * @physid: physical cpuid to receive the interrupt.
354 * @vector: command to send
355 * @delivery_mode: delivery mechanism
357 * Sends an IPI (interprocessor interrupt) to the processor specified by
360 * @delivery_mode can be one of the following
362 * %IA64_IPI_DM_INT - pend an interrupt
363 * %IA64_IPI_DM_PMI - pend a PMI
364 * %IA64_IPI_DM_NMI - pend an NMI
365 * %IA64_IPI_DM_INIT - pend an INIT interrupt
368 sn_send_IPI_phys(long physid, int vector, int delivery_mode)
373 static int off[4] = {0x1800080, 0x1800088, 0x1a00080, 0x1a00088};
376 if (vector != ap_wakeup_vector) {
381 nasid = cpu_physical_id_to_nasid(physid);
382 slice = cpu_physical_id_to_slice(physid);
384 p = (long*)(0xc0000a0000000000LL | (nasid<<33) | off[slice]);
387 *p = (delivery_mode << 8) | (vector & 0xff);
392 * sn1_send_IPI - send an IPI to a processor
393 * @cpuid: target of the IPI
394 * @vector: command to send
395 * @delivery_mode: delivery mechanism
396 * @redirect: redirect the IPI?
398 * Sends an IPI (interprocessor interrupt) to the processor specified by
399 * @cpuid. @delivery_mode can be one of the following
401 * %IA64_IPI_DM_INT - pend an interrupt
402 * %IA64_IPI_DM_PMI - pend a PMI
403 * %IA64_IPI_DM_NMI - pend an NMI
404 * %IA64_IPI_DM_INIT - pend an INIT interrupt
407 sn1_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
411 physid = cpu_physical_id(cpuid);
413 sn_send_IPI_phys(physid, vector, delivery_mode);
419 process_sal_ptc_domain_info(ia64_sal_ptc_domain_info_t *di, int domain)
421 ia64_sal_ptc_domain_proc_entry_t *pe;
422 int i, sapicid, cpuid;
424 pe = __va(di->proc_list);
425 for (i=0; i<di->proc_count; i++, pe++) {
426 sapicid = id_eid_to_sapicid(pe->id, pe->eid);
427 cpuid = cpu_logical_id(sapicid);
428 sn_sapicid_info[cpuid].domain = domain;
429 sn_sapicid_info[cpuid].sapicid = sapicid;
435 process_sal_desc_ptc(ia64_sal_desc_ptc_t *ptc)
437 ia64_sal_ptc_domain_info_t *di;
440 di = __va(ptc->domain_info);
441 for (i=0; i<ptc->num_domains; i++, di++) {
442 process_sal_ptc_domain_info(di, i);
445 #endif /* PTC_NOTYET */
448 * init_sn1_smp_config - setup PTC domains per processor
451 init_sn1_smp_config(void)
453 if (!ia64_ptc_domain_info) {
454 printk("SMP: Can't find PTC domain info. Forcing UP mode\n");
460 memset (sn_sapicid_info, -1, sizeof(sn_sapicid_info));
461 process_sal_desc_ptc(ia64_ptc_domain_info);
465 #else /* CONFIG_SMP */
468 init_sn1_smp_config(void)
472 sn_sapicid_info[0].sapicid = hard_smp_processor_id();
476 #endif /* CONFIG_SMP */