- patches.fixes/downgrade_write: Fix from mainline.
[linux-flexiantxendom0-3.2.10.git] / lib / rwsem.c
1 /* rwsem.c: R/W semaphores: contention handling functions
2  *
3  * Written by David Howells (dhowells@redhat.com).
4  * Derived from arch/i386/kernel/semaphore.c
5  */
6 #include <linux/rwsem.h>
7 #include <linux/sched.h>
8 #include <linux/module.h>
9
10 struct rwsem_waiter {
11         struct list_head        list;
12         struct task_struct      *task;
13         unsigned int            flags;
14 #define RWSEM_WAITING_FOR_READ  0x00000001
15 #define RWSEM_WAITING_FOR_WRITE 0x00000002
16 };
17
18 #if RWSEM_DEBUG
19 #undef rwsemtrace
20 void rwsemtrace(struct rw_semaphore *sem, const char *str)
21 {
22         printk("sem=%p\n",sem);
23         printk("(sem)=%08lx\n",sem->count);
24         if (sem->debug)
25                 printk("[%d] %s({%08lx})\n",current->pid,str,sem->count);
26 }
27 #endif
28
29 /*
30  * handle the lock being released whilst there are processes blocked on it that can now run
31  * - if we come here from up_xxxx(), then:
32  *   - the 'active part' of the count (&0x0000ffff) had reached zero (but may have changed)
33  *   - the 'waiting part' of the count (&0xffff0000) is negative (and will still be so)
34  *   - there must be someone on the queue
35  * - the spinlock must be held by the caller
36  * - woken process blocks are discarded from the list after having task zeroed
37  * - writers are only woken if downgrading is false
38  */
39 static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
40 {
41         struct rwsem_waiter *waiter;
42         struct task_struct *tsk;
43         struct list_head *next;
44         signed long oldcount, woken, loop;
45
46         rwsemtrace(sem,"Entering __rwsem_do_wake");
47
48         if (downgrading)
49                 goto dont_wake_writers;
50
51         /* if we came through an up_xxxx() call, we only only wake someone up
52          * if we can transition the active part of the count from 0 -> 1
53          */
54  try_again:
55         oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS,sem) - RWSEM_ACTIVE_BIAS;
56         if (oldcount & RWSEM_ACTIVE_MASK)
57                 goto undo;
58
59         waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
60
61         /* try to grant a single write lock if there's a writer at the front of the queue
62          * - note we leave the 'active part' of the count incremented by 1 and the waiting part
63          *   incremented by 0x00010000
64          */
65         if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
66                 goto readers_only;
67
68         list_del(&waiter->list);
69         tsk = waiter->task;
70         mb();
71         waiter->task = NULL;
72         wake_up_process(tsk);
73         put_task_struct(tsk);
74         goto out;
75
76         /* don't want to wake any writers */
77  dont_wake_writers:
78         waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
79         if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
80                 goto out;
81
82         /* grant an infinite number of read locks to the readers at the front
83          * of the queue
84          * - note we increment the 'active part' of the count by the number of
85          *   readers before waking any processes up
86          */
87  readers_only:
88         woken = 0;
89         do {
90                 woken++;
91
92                 if (waiter->list.next==&sem->wait_list)
93                         break;
94
95                 waiter = list_entry(waiter->list.next,struct rwsem_waiter,list);
96
97         } while (waiter->flags & RWSEM_WAITING_FOR_READ);
98
99         loop = woken;
100         woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
101         if (!downgrading)
102                 woken -= RWSEM_ACTIVE_BIAS; /* we'd already done one increment
103                                              * earlier */
104         rwsem_atomic_add(woken,sem);
105
106         next = sem->wait_list.next;
107         for (; loop>0; loop--) {
108                 waiter = list_entry(next,struct rwsem_waiter,list);
109                 next = waiter->list.next;
110                 tsk = waiter->task;
111                 mb();
112                 waiter->task = NULL;
113                 wake_up_process(tsk);
114                 put_task_struct(tsk);
115         }
116
117         sem->wait_list.next = next;
118         next->prev = &sem->wait_list;
119
120  out:
121         rwsemtrace(sem,"Leaving __rwsem_do_wake");
122         return sem;
123
124         /* undo the change to count, but check for a transition 1->0 */
125  undo:
126         if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS,sem)!=0)
127                 goto out;
128         goto try_again;
129 }
130
131 /*
132  * wait for a lock to be granted
133  */
134 static inline struct rw_semaphore *rwsem_down_failed_common(struct rw_semaphore *sem,
135                                                                  struct rwsem_waiter *waiter,
136                                                                  signed long adjustment)
137 {
138         struct task_struct *tsk = current;
139         signed long count;
140
141         set_task_state(tsk,TASK_UNINTERRUPTIBLE);
142
143         /* set up my own style of waitqueue */
144         spin_lock(&sem->wait_lock);
145         waiter->task = tsk;
146         get_task_struct(tsk);
147
148         list_add_tail(&waiter->list,&sem->wait_list);
149
150         /* note that we're now waiting on the lock, but no longer actively read-locking */
151         count = rwsem_atomic_update(adjustment,sem);
152
153         /* if there are no longer active locks, wake the front queued process(es) up
154          * - it might even be this process, since the waker takes a more active part
155          */
156         if (!(count & RWSEM_ACTIVE_MASK))
157                 sem = __rwsem_do_wake(sem, 0);
158
159         spin_unlock(&sem->wait_lock);
160
161         /* wait to be given the lock */
162         for (;;) {
163                 if (!waiter->task)
164                         break;
165                 schedule();
166                 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
167         }
168
169         tsk->state = TASK_RUNNING;
170
171         return sem;
172 }
173
174 /*
175  * wait for the read lock to be granted
176  */
177 struct rw_semaphore fastcall *rwsem_down_read_failed(struct rw_semaphore *sem)
178 {
179         struct rwsem_waiter waiter;
180
181         rwsemtrace(sem,"Entering rwsem_down_read_failed");
182
183         waiter.flags = RWSEM_WAITING_FOR_READ;
184         rwsem_down_failed_common(sem,&waiter,RWSEM_WAITING_BIAS-RWSEM_ACTIVE_BIAS);
185
186         rwsemtrace(sem,"Leaving rwsem_down_read_failed");
187         return sem;
188 }
189
190 /*
191  * wait for the write lock to be granted
192  */
193 struct rw_semaphore fastcall *rwsem_down_write_failed(struct rw_semaphore *sem)
194 {
195         struct rwsem_waiter waiter;
196
197         rwsemtrace(sem,"Entering rwsem_down_write_failed");
198
199         waiter.flags = RWSEM_WAITING_FOR_WRITE;
200         rwsem_down_failed_common(sem,&waiter,-RWSEM_ACTIVE_BIAS);
201
202         rwsemtrace(sem,"Leaving rwsem_down_write_failed");
203         return sem;
204 }
205
206 /*
207  * handle waking up a waiter on the semaphore
208  * - up_read/up_write has decremented the active part of the count if we come here
209  */
210 struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
211 {
212         rwsemtrace(sem,"Entering rwsem_wake");
213
214         spin_lock(&sem->wait_lock);
215
216         /* do nothing if list empty */
217         if (!list_empty(&sem->wait_list))
218                 sem = __rwsem_do_wake(sem, 0);
219
220         spin_unlock(&sem->wait_lock);
221
222         rwsemtrace(sem,"Leaving rwsem_wake");
223
224         return sem;
225 }
226
227 /*
228  * downgrade a write lock into a read lock
229  * - caller incremented waiting part of count, and discovered it to be still negative
230  * - just wake up any readers at the front of the queue
231  */
232 struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
233 {
234         rwsemtrace(sem,"Entering rwsem_downgrade_wake");
235
236         spin_lock(&sem->wait_lock);
237
238         /* do nothing if list empty */
239         if (!list_empty(&sem->wait_list))
240                 sem = __rwsem_do_wake(sem, 1);
241
242         spin_unlock(&sem->wait_lock);
243
244         rwsemtrace(sem,"Leaving rwsem_downgrade_wake");
245         return sem;
246 }
247
248 EXPORT_SYMBOL_NOVERS(rwsem_down_read_failed);
249 EXPORT_SYMBOL_NOVERS(rwsem_down_write_failed);
250 EXPORT_SYMBOL_NOVERS(rwsem_wake);
251 EXPORT_SYMBOL_NOVERS(rwsem_downgrade_wake);
252 #if RWSEM_DEBUG
253 EXPORT_SYMBOL(rwsemtrace);
254 #endif