Update to 3.4-final.
[linux-flexiantxendom0-3.2.10.git] / arch / x86 / include / mach-xen / asm / spinlock.h
1 #ifndef _ASM_X86_SPINLOCK_H
2 #define _ASM_X86_SPINLOCK_H
3
4 #include <linux/atomic.h>
5 #include <asm/page.h>
6 #include <asm/processor.h>
7 #include <linux/compiler.h>
8
9 /*
10  * Your basic SMP spinlocks, allowing only a single CPU anywhere
11  *
12  * Simple spin lock operations.  There are two variants, one clears IRQ's
13  * on the local processor, one does not.
14  *
15  * These are fair FIFO ticket locks, which are currently limited to 256
16  * CPUs.
17  *
18  * (the type definitions are in asm/spinlock_types.h)
19  */
20
21 #ifdef CONFIG_X86_32
22 # define LOCK_PTR_REG "a"
23 # define REG_PTR_MODE "k"
24 #else
25 # define LOCK_PTR_REG "D"
26 # define REG_PTR_MODE "q"
27 #endif
28
29 #if defined(CONFIG_XEN) || (defined(CONFIG_X86_32) && \
30         (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)))
31 /*
32  * On Xen, as we read back the result of the unlocking increment, we must use
33  * a locked access (or insert a full memory barrier) in all cases (so that we
34  * read what is globally visible).
35  *
36  * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
37  * (PPro errata 66, 92)
38  */
39 # define UNLOCK_LOCK_PREFIX LOCK_PREFIX
40 #else
41 # define UNLOCK_LOCK_PREFIX
42 #endif
43
44 #ifdef TICKET_SHIFT
45
46 #include <asm/irqflags.h>
47 #include <asm/smp-processor-id.h>
48
49 int xen_spinlock_init(unsigned int cpu);
50 void xen_spinlock_cleanup(unsigned int cpu);
51 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
52 struct __raw_tickets xen_spin_adjust(const arch_spinlock_t *,
53                                      struct __raw_tickets);
54 #else
55 #define xen_spin_adjust(lock, raw_tickets) (raw_tickets)
56 #define xen_spin_wait(l, t, f) xen_spin_wait(l, t)
57 #endif
58 unsigned int xen_spin_wait(arch_spinlock_t *, struct __raw_tickets *,
59                            unsigned int flags);
60 void xen_spin_kick(const arch_spinlock_t *, unsigned int ticket);
61
62 /*
63  * Ticket locks are conceptually two parts, one indicating the current head of
64  * the queue, and the other indicating the current tail. The lock is acquired
65  * by atomically noting the tail and incrementing it by one (thus adding
66  * ourself to the queue and noting our position), then waiting until the head
67  * becomes equal to the the initial value of the tail.
68  *
69  * We use an xadd covering *both* parts of the lock, to increment the tail and
70  * also load the position of the head, which takes care of memory ordering
71  * issues and should be optimal for the uncontended case. Note the tail must be
72  * in the high part, because a wide xadd increment of the low part would carry
73  * up and contaminate the high part.
74  */
75 #define __spin_count_dec(c, l) (vcpu_running((l)->owner) ? --(c) : ((c) >>= 1))
76
77 #if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
78 static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
79 {
80         struct __raw_tickets inc = { .tail = 1 };
81         unsigned int count, flags = arch_local_irq_save();
82
83         inc = xadd(&lock->tickets, inc);
84         if (likely(inc.head == inc.tail))
85                 arch_local_irq_restore(flags);
86         else {
87                 inc = xen_spin_adjust(lock, inc);
88                 arch_local_irq_restore(flags);
89                 count = 1 << 12;
90                 do {
91                         while (inc.head != inc.tail
92                                && __spin_count_dec(count, lock)) {
93                                 cpu_relax();
94                                 inc.head = ACCESS_ONCE(lock->tickets.head);
95                         }
96                 } while (unlikely(!count)
97                          && (count = xen_spin_wait(lock, &inc, flags)));
98         }
99         barrier();              /* make sure nothing creeps before the lock is taken */
100         lock->owner = raw_smp_processor_id();
101 }
102 #else
103 #define __ticket_spin_lock(lock) __ticket_spin_lock_flags(lock, -1)
104 #endif
105
106 static __always_inline void __ticket_spin_lock_flags(arch_spinlock_t *lock,
107                                                      unsigned long flags)
108 {
109         struct __raw_tickets inc = { .tail = 1 };
110
111         inc = xadd(&lock->tickets, inc);
112         if (unlikely(inc.head != inc.tail)) {
113                 unsigned int count = 1 << 12;
114
115                 inc = xen_spin_adjust(lock, inc);
116                 do {
117                         while (inc.head != inc.tail
118                                && __spin_count_dec(count, lock)) {
119                                 cpu_relax();
120                                 inc.head = ACCESS_ONCE(lock->tickets.head);
121                         }
122                 } while (unlikely(!count)
123                          && (count = xen_spin_wait(lock, &inc, flags)));
124         }
125         barrier();              /* make sure nothing creeps before the lock is taken */
126         lock->owner = raw_smp_processor_id();
127 }
128
129 #undef __spin_count_dec
130
131 static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
132 {
133         arch_spinlock_t old;
134
135         old.tickets = ACCESS_ONCE(lock->tickets);
136         if (old.tickets.head != old.tickets.tail)
137                 return 0;
138
139         /* cmpxchg is a full barrier, so nothing can move before it */
140         if (cmpxchg(&lock->head_tail, old.head_tail,
141                     old.head_tail + (1 << TICKET_SHIFT)) != old.head_tail)
142                 return 0;
143         lock->owner = raw_smp_processor_id();
144         return 1;
145 }
146
147 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
148 {
149         register struct __raw_tickets new;
150
151         __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
152 #if !defined(XEN_SPINLOCK_SOURCE) || !CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
153 # undef UNLOCK_LOCK_PREFIX
154 #endif
155         new = ACCESS_ONCE(lock->tickets);
156         if (new.head != new.tail)
157                 xen_spin_kick(lock, new.head);
158 }
159
160 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
161 {
162         struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
163
164         return tmp.tail != tmp.head;
165 }
166
167 static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
168 {
169         struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
170
171         return (__ticket_t)(tmp.tail - tmp.head) > 1;
172 }
173
174 #define __arch_spin(n) __ticket_spin_##n
175
176 #else /* TICKET_SHIFT */
177
178 static inline int xen_spinlock_init(unsigned int cpu) { return 0; }
179 static inline void xen_spinlock_cleanup(unsigned int cpu) {}
180
181 static inline int __byte_spin_is_locked(arch_spinlock_t *lock)
182 {
183         return lock->lock != 0;
184 }
185
186 static inline int __byte_spin_is_contended(arch_spinlock_t *lock)
187 {
188         return lock->spinners != 0;
189 }
190
191 static inline void __byte_spin_lock(arch_spinlock_t *lock)
192 {
193         s8 val = 1;
194
195         asm("1: xchgb %1, %0\n"
196             "   test %1,%1\n"
197             "   jz 3f\n"
198             "   " LOCK_PREFIX "incb %2\n"
199             "2: rep;nop\n"
200             "   cmpb $1, %0\n"
201             "   je 2b\n"
202             "   " LOCK_PREFIX "decb %2\n"
203             "   jmp 1b\n"
204             "3:"
205             : "+m" (lock->lock), "+q" (val), "+m" (lock->spinners): : "memory");
206 }
207
208 #define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock)
209
210 static inline int __byte_spin_trylock(arch_spinlock_t *lock)
211 {
212         u8 old = 1;
213
214         asm("xchgb %1,%0"
215             : "+m" (lock->lock), "+q" (old) : : "memory");
216
217         return old == 0;
218 }
219
220 static inline void __byte_spin_unlock(arch_spinlock_t *lock)
221 {
222         smp_wmb();
223         lock->lock = 0;
224 }
225
226 #define __arch_spin(n) __byte_spin_##n
227
228 #endif /* TICKET_SHIFT */
229
230 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
231 {
232         return __arch_spin(is_locked)(lock);
233 }
234
235 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
236 {
237         return __arch_spin(is_contended)(lock);
238 }
239 #define arch_spin_is_contended  arch_spin_is_contended
240
241 static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
242 {
243         __arch_spin(lock)(lock);
244 }
245
246 static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
247 {
248         return __arch_spin(trylock)(lock);
249 }
250
251 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
252 {
253         __arch_spin(unlock)(lock);
254 }
255
256 static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
257                                                   unsigned long flags)
258 {
259         __arch_spin(lock_flags)(lock, flags);
260 }
261
262 #undef __arch_spin
263
264 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
265 {
266         while (arch_spin_is_locked(lock))
267                 cpu_relax();
268 }
269
270 /*
271  * Read-write spinlocks, allowing multiple readers
272  * but only one writer.
273  *
274  * NOTE! it is quite common to have readers in interrupts
275  * but no interrupt writers. For those circumstances we
276  * can "mix" irq-safe locks - any writer needs to get a
277  * irq-safe write-lock, but readers can get non-irqsafe
278  * read-locks.
279  *
280  * On x86, we implement read-write locks as a 32-bit counter
281  * with the high bit (sign) being the "contended" bit.
282  */
283
284 /**
285  * read_can_lock - would read_trylock() succeed?
286  * @lock: the rwlock in question.
287  */
288 static inline int arch_read_can_lock(arch_rwlock_t *lock)
289 {
290         return lock->lock > 0;
291 }
292
293 /**
294  * write_can_lock - would write_trylock() succeed?
295  * @lock: the rwlock in question.
296  */
297 static inline int arch_write_can_lock(arch_rwlock_t *lock)
298 {
299         return lock->write == WRITE_LOCK_CMP;
300 }
301
302 static inline void arch_read_lock(arch_rwlock_t *rw)
303 {
304         asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t"
305                      "jns 1f\n"
306                      "call __read_lock_failed\n\t"
307                      "1:\n"
308                      ::LOCK_PTR_REG (rw) : "memory");
309 }
310
311 static inline void arch_write_lock(arch_rwlock_t *rw)
312 {
313         asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t"
314                      "jz 1f\n"
315                      "call __write_lock_failed\n\t"
316                      "1:\n"
317                      ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS)
318                      : "memory");
319 }
320
321 static inline int arch_read_trylock(arch_rwlock_t *lock)
322 {
323         READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock;
324
325         if (READ_LOCK_ATOMIC(dec_return)(count) >= 0)
326                 return 1;
327         READ_LOCK_ATOMIC(inc)(count);
328         return 0;
329 }
330
331 static inline int arch_write_trylock(arch_rwlock_t *lock)
332 {
333         atomic_t *count = (atomic_t *)&lock->write;
334
335         if (atomic_sub_and_test(WRITE_LOCK_CMP, count))
336                 return 1;
337         atomic_add(WRITE_LOCK_CMP, count);
338         return 0;
339 }
340
341 static inline void arch_read_unlock(arch_rwlock_t *rw)
342 {
343         asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0"
344                      :"+m" (rw->lock) : : "memory");
345 }
346
347 static inline void arch_write_unlock(arch_rwlock_t *rw)
348 {
349         asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0"
350                      : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory");
351 }
352
353 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
354 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
355
356 #undef READ_LOCK_SIZE
357 #undef READ_LOCK_ATOMIC
358 #undef WRITE_LOCK_ADD
359 #undef WRITE_LOCK_SUB
360 #undef WRITE_LOCK_CMP
361
362 #define arch_spin_relax(lock)   cpu_relax()
363 #define arch_read_relax(lock)   cpu_relax()
364 #define arch_write_relax(lock)  cpu_relax()
365
366 /* The {read|write|spin}_lock() on x86 are full memory barriers. */
367 static inline void smp_mb__after_lock(void) { }
368 #define ARCH_HAS_SMP_MB_AFTER_LOCK
369
370 #endif /* _ASM_X86_SPINLOCK_H */