4 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of version 2 of the GNU General Public License
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it would be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 * Further, this software is distributed without any warranty that it is
15 * free of the rightful claim of any third person regarding infringement
16 * or the like. Any license provided herein, whether implied or
17 * otherwise, applies only to this software file. Patent licenses, if
18 * any, provided herein do not apply to combinations of this program with
19 * other software, or any other product whatsoever.
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
25 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
26 * Mountain View, CA 94043, or:
30 * For further information regarding this notice, see:
32 * http://oss.sgi.com/projects/GenInfo/NoticeExplan
35 #ifndef _ASM_IA64_SN_BTE_COPY_H
36 #define _ASM_IA64_SN_BTE_COPY_H
38 #ident "$Revision: 1.1 $"
40 #include <linux/timer.h>
41 #include <linux/cache.h>
42 #include <asm/sn/bte.h>
43 #include <asm/sn/sgi.h>
44 #include <asm/sn/pda.h>
45 #include <asm/delay.h>
47 #define L1_CACHE_MASK (L1_CACHE_BYTES - 1)
50 * BTE_LOCKING support - When CONFIG_IA64_SGI_BTE_LOCKING is
51 * not defined, the bte_copy code supports one bte per cpu in
52 * synchronous mode. Even if bte_copy is called with a
53 * notify address, the bte will spin and wait for the transfer
54 * to complete. By defining the following, spin_locks and
55 * busy checks are placed around the initiation of a BTE
56 * transfer and multiple bte's per cpu are supported.
59 #define CONFIG_IA64_SGI_BTE_LOCKING 1
63 * Handle locking of the bte interfaces.
65 * All transfers spinlock the interface before setting up the SHUB
66 * registers. Sync transfers hold the lock until all processing is
67 * complete. Async transfers release the lock as soon as the transfer
70 * To determine if an interface is available, we must check both the
71 * busy bit and the spinlock for that interface.
73 #define BTE_LOCK_IF_AVAIL(_x) (\
74 (*pda.cpu_bte_if[_x]->most_rcnt_na & IBLS_BUSY) && \
75 (!(spin_trylock(&(pda.cpu_bte_if[_x]->spinlock)))) \
79 * Some macros to simplify reading.
81 * Start with macros to locate the BTE control registers.
84 #define BTEREG_LNSTAT_ADDR ((u64 *)(bte->bte_base_addr))
85 #define BTEREG_SRC_ADDR ((u64 *)(bte->bte_base_addr + BTEOFF_SRC))
86 #define BTEREG_DEST_ADDR ((u64 *)(bte->bte_base_addr + BTEOFF_DEST))
87 #define BTEREG_CTRL_ADDR ((u64 *)(bte->bte_base_addr + BTEOFF_CTRL))
88 #define BTEREG_NOTIF_ADDR ((u64 *)(bte->bte_base_addr + BTEOFF_NOTIFY))
90 /* Some macros to force the IBCT0 value valid. */
92 #define BTE_VALID_MODES BTE_NOTIFY
93 #define BTE_VLD_MODE(x) (x & BTE_VALID_MODES)
96 // #define BTE_DEBUG_VERBOSE
100 # define BTE_PRINTK(x) printk x /* Terse */
101 # ifdef BTE_DEBUG_VERBOSE
102 # define BTE_PRINTKV(x) printk x /* Verbose */
104 # define BTE_PRINTKV(x)
105 # endif /* BTE_DEBUG_VERBOSE */
107 # define BTE_PRINTK(x)
108 # define BTE_PRINTKV(x)
109 #endif /* BTE_DEBUG */
111 #define BTE_IDEAL_TMO(x) (jiffies + \
112 (HZ / BTE_MAXT_LINES_PER_SECOND * x))
115 volatile extern u64 bte_setup_time;
116 volatile extern u64 bte_transfer_time;
117 volatile extern u64 bte_tear_down_time;
118 volatile extern u64 bte_execute_time;
120 #define BTE_TIME_DECLARE() \
121 u64 btcp_strt_tm = 0; \
122 u64 btcp_cplt_tm = 0; \
123 u64 xfr_strt_tm = 0; \
124 u64 xfr_cplt_tm = 0; \
126 #define BTE_TIME_START() \
127 btcp_strt_tm = xfr_strt_tm = xfr_cplt_tm = ia64_get_itc();
129 #define BTE_TIME_XFR_START() \
130 xfr_strt_tm = ia64_get_itc();
132 #define BTE_TIME_XFR_STOP() \
133 xfr_cplt_tm = ia64_get_itc();
135 #define BTE_TIME_STOP() \
136 btcp_cplt_tm = ia64_get_itc(); \
137 bte_setup_time = xfr_strt_tm - btcp_strt_tm; \
138 bte_transfer_time = xfr_cplt_tm - xfr_strt_tm; \
139 bte_tear_down_time = btcp_cplt_tm - xfr_cplt_tm; \
140 bte_execute_time = btcp_cplt_tm - btcp_strt_tm; \
143 #define BTE_TIME_DECLARE()
144 #define BTE_TIME_START()
145 #define BTE_TIME_XFR_START()
146 #define BTE_TIME_XFR_STOP()
147 #define BTE_TIME_STOP()
148 #endif /* BTE_TIME */
151 * bte_copy(src, dest, len, mode, notification)
153 * use the block transfer engine to move kernel
154 * memory from src to dest using the assigned mode.
157 * src - physical address of the transfer source.
158 * dest - physical address of the transfer destination.
159 * len - number of bytes to transfer from source to dest.
160 * mode - hardware defined. See reference information
161 * for IBCT0/1 in the SHUB Programmers Reference
162 * notification - kernel virtual address of the notification cache
163 * line. If NULL, the default is used and
164 * the bte_copy is synchronous.
166 * NOTE: This function requires src, dest, and len to
167 * be cache line aligned.
169 extern __inline__ bte_result_t
170 bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
172 #ifdef CONFIG_IA64_SGI_BTE_LOCKING
174 #endif /* CONFIG_IA64_SGI_BTE_LOCKING */
182 BTE_PRINTK(("bte_copy (0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
183 src, dest, len, mode, notification));
187 return (BTE_SUCCESS);
190 ASSERT(!((len & L1_CACHE_MASK) ||
191 (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)));
193 ASSERT(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT));
195 #ifdef CONFIG_IA64_SGI_BTE_LOCKING
199 /* Attempt to lock one of the BTE interfaces */
200 while ((bte_to_use < BTES_PER_NODE) &&
201 BTE_LOCK_IF_AVAIL(bte_to_use)) {
206 if ((bte_to_use >= BTES_PER_NODE) &&
207 !(mode & BTE_WACQUIRE)) {
209 return (BTEFAIL_NOTAVAIL);
212 /* Wait until a bte is available. */
214 while (bte_to_use >= BTES_PER_NODE);
216 bte = pda.cpu_bte_if[bte_to_use];
217 BTE_PRINTKV(("Got a lock on bte %d\n", bte_to_use));
219 /* Assuming one BTE per CPU. */
220 bte = pda->cpu_bte_if[0];
221 #endif /* CONFIG_IA64_SGI_BTE_LOCKING */
224 * The following are removed for optimization but is
225 * available in the event that the SHUB exhibits
226 * notification problems similar to the hub, bedrock et al.
228 * bte->mostRecentSrc = src;
229 * bte->mostRecentDest = dest;
230 * bte->mostRecentLen = len;
231 * bte->mostRecentMode = mode;
233 if (notification == NULL) {
234 /* User does not want to be notified. */
235 bte->most_rcnt_na = &bte->notify;
237 bte->most_rcnt_na = notification;
240 /* Calculate the number of cache lines to transfer. */
241 transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
243 BTE_PRINTKV(("Calculated transfer size of %d cache lines\n",
246 /* Initialize the notification to a known value. */
247 *bte->most_rcnt_na = -1L;
250 BTE_PRINTKV(("Before, status is 0x%lx and notify is 0x%lx\n",
251 HUB_L(BTEREG_LNSTAT_ADDR),
252 *bte->most_rcnt_na));
254 /* Set the status reg busy bit and transfer length */
255 BTE_PRINTKV(("IBLS - HUB_S(0x%lx, 0x%lx)\n",
256 BTEREG_LNSTAT_ADDR, IBLS_BUSY | transfer_size));
257 HUB_S(BTEREG_LNSTAT_ADDR, (IBLS_BUSY | transfer_size));
259 /* Set the source and destination registers */
260 BTE_PRINTKV(("IBSA - HUB_S(0x%lx, 0x%lx)\n", BTEREG_SRC_ADDR,
262 HUB_S(BTEREG_SRC_ADDR, (TO_PHYS(src)));
263 BTE_PRINTKV(("IBDA - HUB_S(0x%lx, 0x%lx)\n", BTEREG_DEST_ADDR,
265 HUB_S(BTEREG_DEST_ADDR, (TO_PHYS(dest)));
267 /* Set the notification register */
268 BTE_PRINTKV(("IBNA - HUB_S(0x%lx, 0x%lx)\n", BTEREG_NOTIF_ADDR,
269 (TO_PHYS(__pa(bte->most_rcnt_na)))));
270 HUB_S(BTEREG_NOTIF_ADDR, (TO_PHYS(__pa(bte->most_rcnt_na))));
272 /* Initiate the transfer */
273 BTE_PRINTKV(("IBCT - HUB_S(0x%lx, 0x%lx)\n", BTEREG_CTRL_ADDR, mode));
274 BTE_TIME_XFR_START();
275 HUB_S(BTEREG_CTRL_ADDR, BTE_VLD_MODE(mode));
277 BTE_PRINTKV(("Initiated, status is 0x%lx and notify is 0x%lx\n",
278 HUB_L(BTEREG_LNSTAT_ADDR),
279 *bte->most_rcnt_na));
281 if (notification == NULL) {
283 * Calculate our timeout
285 * What are we doing here? We are trying to determine
286 * the fastest time the BTE could have transfered our
287 * block of data. By takine the clock frequency (ticks/sec)
288 * divided by the BTE MaxT Transfer Rate (lines/sec)
289 * times the transfer size (lines), we get a tick
290 * offset from current time that the transfer should
293 * Why do this? We are watching for a notification
294 * failure from the BTE. This behaviour has been
295 * seen in the SN0 and SN1 hardware on rare circumstances
296 * and is expected in SN2. By checking at the
297 * ideal transfer timeout, we minimize our time
298 * delay from hardware completing our request and
299 * our detecting the failure.
301 bte->ideal_xfr_tmo = BTE_IDEAL_TMO(transfer_size);
303 while (bte->notify == -1UL) {
305 * Notification Workaround: When the max
306 * theoretical time has elapsed, read the hub
307 * status register into the notification area.
308 * This fakes the shub performing the copy.
310 BTE_PRINTKV((" Timing. IBLS = 0x%lx, "
312 HUB_L(BTEREG_LNSTAT_ADDR),
314 if (time_after(jiffies, bte->ideal_xfr_tmo)) {
315 lines_remaining = HUB_L(BTEREG_LNSTAT_ADDR) &
317 bte->ideal_xfr_tmo_cnt++;
319 BTE_IDEAL_TMO(lines_remaining);
321 BTE_PRINTKV((" Timeout. cpu %d "
324 "Lines remaining = %d. "
325 "New timeout = %d.\n",
327 HUB_L(BTEREG_LNSTAT_ADDR),
330 bte->ideal_xfr_tmo));
333 BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, notify= 0x%lx\n",
334 HUB_L(BTEREG_LNSTAT_ADDR),
337 if (bte->notify & IBLS_ERROR) {
338 /* >>> Need to do real error checking. */
341 #ifdef CONFIG_IA64_SGI_BTE_LOCKING
342 spin_unlock(&(bte->spinlock));
343 #endif /* CONFIG_IA64_SGI_BTE_LOCKING */
344 BTE_PRINTKV(("Erroring status is 0x%lx and "
346 HUB_L(BTEREG_LNSTAT_ADDR),
351 return (BTEFAIL_ERROR);
355 #ifdef CONFIG_IA64_SGI_BTE_LOCKING
356 spin_unlock(&(bte->spinlock));
357 #endif /* CONFIG_IA64_SGI_BTE_LOCKING */
359 BTE_PRINTKV(("Returning status is 0x%lx and notify is 0x%lx\n",
360 HUB_L(BTEREG_LNSTAT_ADDR),
361 *bte->most_rcnt_na));
363 return (BTE_SUCCESS);
367 * Define the bte_unaligned_copy as an extern.
369 extern bte_result_t bte_unaligned_copy(u64, u64, u64, u64);
372 * The following is the prefered way of calling bte_unaligned_copy
373 * If the copy is fully cache line aligned, then bte_copy is
374 * used instead. Since bte_copy is inlined, this saves a call
375 * stack. NOTE: bte_copy is called synchronously and does block
376 * until the transfer is complete. In order to get the asynch
377 * version of bte_copy, you must perform this check yourself.
379 #define BTE_UNALIGNED_COPY(src, dest, len, mode) \
380 (((len & L1_CACHE_MASK) || (src & L1_CACHE_MASK) || \
381 (dest & L1_CACHE_MASK)) ? \
382 bte_unaligned_copy(src, dest, len, mode) : \
383 bte_copy(src, dest, len, mode, NULL))
385 #endif /* _ASM_IA64_SN_BTE_COPY_H */