2 * Copyright (c) International Business Machines Corp., 2000-2003
3 * Portions Copyright (c) Christoph Hellwig, 2001-2002
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * jfs_logmgr.c: log manager
23 * for related information, see transaction manager (jfs_txnmgr.c), and
24 * recovery manager (jfs_logredo.c).
26 * note: for detail, RTFS.
29 * special purpose buffer manager supporting log i/o requirements.
30 * per log serial pageout of logpage
31 * queuing i/o requests and redrive i/o at iodone
32 * maintain current logpage buffer
33 * no caching since append only
34 * appropriate jfs buffer cache buffers as needed
37 * transactions which wrote COMMIT records in the same in-memory
38 * log page during the pageout of previous/current log page(s) are
39 * committed together by the pageout of the page.
42 * transactions are committed asynchronously when the log page
43 * containing it COMMIT is paged out when it becomes full;
46 * . a per log lock serialize log write.
47 * . a per log lock serialize group commit.
48 * . a per log lock serialize log open/close;
51 * careful-write (ping-pong) of last logpage to recover from crash
53 * detection of split (out-of-order) write of physical sectors
54 * of last logpage via timestamp at end of each sector
55 * with its mirror data array at trailer).
58 * lsn - 64-bit monotonically increasing integer vs
59 * 32-bit lspn and page eor.
63 #include <linux/blkdev.h>
64 #include <linux/interrupt.h>
65 #include <linux/smp_lock.h>
66 #include <linux/completion.h>
67 #include <linux/buffer_head.h> /* for sync_blockdev() */
68 #include <linux/bio.h>
69 #include <linux/suspend.h>
70 #include "jfs_incore.h"
71 #include "jfs_filsys.h"
72 #include "jfs_metapage.h"
73 #include "jfs_txnmgr.h"
74 #include "jfs_debug.h"
78 * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread)
80 static struct lbuf *log_redrive_list;
81 static spinlock_t log_redrive_lock = SPIN_LOCK_UNLOCKED;
82 DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
86 * log read/write serialization (per log)
88 #define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock)
89 #define LOG_LOCK(log) down(&((log)->loglock))
90 #define LOG_UNLOCK(log) up(&((log)->loglock))
94 * log group commit serialization (per log)
97 #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)
98 #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)
99 #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)
100 #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)
103 * log sync serialization (per log)
105 #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE)
106 #define LOGSYNC_BARRIER(logsize) ((logsize)/4)
108 #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE)
109 #define LOGSYNC_BARRIER(logsize) ((logsize)/2)
114 * log buffer cache synchronization
116 static spinlock_t jfsLCacheLock = SPIN_LOCK_UNLOCKED;
118 #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags)
119 #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags)
122 * See __SLEEP_COND in jfs_locks.h
124 #define LCACHE_SLEEP_COND(wq, cond, flags) \
128 __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
131 #define LCACHE_WAKEUP(event) wake_up(event)
135 * lbuf buffer cache (lCache) control
137 /* log buffer manager pageout control (cumulative, inclusive) */
138 #define lbmREAD 0x0001
139 #define lbmWRITE 0x0002 /* enqueue at tail of write queue;
140 * init pageout if at head of queue;
142 #define lbmRELEASE 0x0004 /* remove from write queue
143 * at completion of pageout;
144 * do not free/recycle it yet:
145 * caller will free it;
147 #define lbmSYNC 0x0008 /* do not return to freelist
148 * when removed from write queue;
150 #define lbmFREE 0x0010 /* return to freelist
151 * at completion of pageout;
152 * the buffer may be recycled;
154 #define lbmDONE 0x0020
155 #define lbmERROR 0x0040
156 #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing
159 #define lbmDIRECT 0x0100
162 * external references
164 extern void txLazyUnlock(struct tblock * tblk);
165 extern int jfs_stop_threads;
166 extern struct completion jfsIOwait;
171 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
172 struct lrd * lrd, struct tlock * tlck);
174 static int lmNextPage(struct jfs_log * log);
175 static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate);
177 static int lbmLogInit(struct jfs_log * log);
178 static void lbmLogShutdown(struct jfs_log * log);
179 static struct lbuf *lbmAllocate(struct jfs_log * log, int);
180 static void lbmFree(struct lbuf * bp);
181 static void lbmfree(struct lbuf * bp);
182 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
183 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
184 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
185 static int lbmIOWait(struct lbuf * bp, int flag);
186 static bio_end_io_t lbmIODone;
187 static void lbmStartIO(struct lbuf * bp);
188 static void lmGCwrite(struct jfs_log * log, int cant_block);
195 #ifdef CONFIG_JFS_STATISTICS
197 uint commit; /* # of commit */
198 uint pagedone; /* # of page written */
199 uint submitted; /* # of pages submitted */
200 uint full_page; /* # of full pages submitted */
201 uint partial_page; /* # of partial pages submitted */
209 * FUNCTION: write a log record;
213 * RETURN: lsn - offset to the next log record to write (end-of-log);
216 * note: todo: log error handler
218 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
223 struct metapage *mp = NULL;
225 jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
226 log, tblk, lrd, tlck);
230 /* log by (out-of-transaction) JFS ? */
234 /* log from page ? */
236 tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
240 * initialize/update page/transaction recovery lsn
247 * initialize page lsn if first log write of the page
254 /* insert page at tail of logsynclist */
255 list_add_tail(&mp->synclist, &log->synclist);
259 * initialize/update lsn of tblock of the page
261 * transaction inherits oldest lsn of pages associated
262 * with allocation/deallocation of resources (their
263 * log records are used to reconstruct allocation map
264 * at recovery time: inode for inode allocation map,
265 * B+-tree index of extent descriptors for block
267 * allocation map pages inherit transaction lsn at
268 * commit time to allow forwarding log syncpt past log
269 * records associated with allocation/deallocation of
270 * resources only after persistent map of these map pages
271 * have been updated and propagated to home.
274 * initialize transaction lsn:
276 if (tblk->lsn == 0) {
277 /* inherit lsn of its first page logged */
281 /* insert tblock after the page on logsynclist */
282 list_add(&tblk->synclist, &mp->synclist);
285 * update transaction lsn:
288 /* inherit oldest/smallest lsn of page */
289 logdiff(diffp, mp->lsn, log);
290 logdiff(difft, tblk->lsn, log);
292 /* update tblock lsn with page lsn */
295 /* move tblock after page on logsynclist */
296 list_move(&tblk->synclist, &mp->synclist);
303 * write the log record
306 lsn = lmWriteRecord(log, tblk, lrd, tlck);
309 * forward log syncpt if log reached next syncpt trigger
311 logdiff(diffp, lsn, log);
312 if (diffp >= log->nextsync)
313 lsn = lmLogSync(log, 0);
315 /* update end-of-log lsn */
320 /* return end-of-log address */
326 * NAME: lmWriteRecord()
328 * FUNCTION: move the log record to current log page
330 * PARAMETER: cd - commit descriptor
332 * RETURN: end-of-log address
334 * serialization: LOG_LOCK() held on entry/exit
337 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
340 int lsn = 0; /* end-of-log address */
341 struct lbuf *bp; /* dst log page buffer */
342 struct logpage *lp; /* dst log page */
343 caddr_t dst; /* destination address in log page */
344 int dstoffset; /* end-of-log offset in log page */
345 int freespace; /* free space in log page */
346 caddr_t p; /* src meta-data page */
349 int nbytes; /* number of bytes to move */
352 struct linelock *linelock;
359 /* retrieve destination log page to write */
360 bp = (struct lbuf *) log->bp;
361 lp = (struct logpage *) bp->l_ldata;
362 dstoffset = log->eor;
364 /* any log data to write ? */
369 * move log record data
371 /* retrieve source meta-data page to log */
372 if (tlck->flag & tlckPAGELOCK) {
373 p = (caddr_t) (tlck->mp->data);
374 linelock = (struct linelock *) & tlck->lock;
376 /* retrieve source in-memory inode to log */
377 else if (tlck->flag & tlckINODELOCK) {
378 if (tlck->type & tlckDTREE)
379 p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
381 p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
382 linelock = (struct linelock *) & tlck->lock;
385 else if (tlck->flag & tlckINLINELOCK) {
387 inlinelock = (struct inlinelock *) & tlck;
388 p = (caddr_t) & inlinelock->pxd;
389 linelock = (struct linelock *) & tlck;
391 #endif /* _JFS_WIP */
393 jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
394 return 0; /* Probably should trap */
396 l2linesize = linelock->l2linesize;
399 ASSERT(linelock->index <= linelock->maxcnt);
402 for (i = 0; i < linelock->index; i++, lv++) {
407 if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
408 /* page become full: move on to next page */
412 lp = (struct logpage *) bp->l_ldata;
413 dstoffset = LOGPHDRSIZE;
417 * move log vector data
419 src = (u8 *) p + (lv->offset << l2linesize);
420 srclen = lv->length << l2linesize;
423 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
424 nbytes = min(freespace, srclen);
425 dst = (caddr_t) lp + dstoffset;
426 memcpy(dst, src, nbytes);
429 /* is page not full ? */
430 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
433 /* page become full: move on to next page */
436 bp = (struct lbuf *) log->bp;
437 lp = (struct logpage *) bp->l_ldata;
438 dstoffset = LOGPHDRSIZE;
445 * move log vector descriptor
448 lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
449 lvd->offset = cpu_to_le16(lv->offset);
450 lvd->length = cpu_to_le16(lv->length);
452 jfs_info("lmWriteRecord: lv offset:%d length:%d",
453 lv->offset, lv->length);
456 if ((i = linelock->next)) {
457 linelock = (struct linelock *) lid_to_tlock(i);
462 * move log record descriptor
465 lrd->length = cpu_to_le16(len);
471 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
472 nbytes = min(freespace, srclen);
473 dst = (caddr_t) lp + dstoffset;
474 memcpy(dst, src, nbytes);
479 /* are there more to move than freespace of page ? */
484 * end of log record descriptor
487 /* update last log record eor */
488 log->eor = dstoffset;
489 bp->l_eor = dstoffset;
490 lsn = (log->page << L2LOGPSIZE) + dstoffset;
492 if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
494 jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
497 INCREMENT(lmStat.commit); /* # of commit */
500 * enqueue tblock for group commit:
502 * enqueue tblock of non-trivial/synchronous COMMIT
503 * at tail of group commit queue
504 * (trivial/asynchronous COMMITs are ignored by
509 /* init tblock gc state */
510 tblk->flag = tblkGC_QUEUE;
512 tblk->pn = log->page;
513 tblk->eor = log->eor;
515 /* enqueue transaction to commit queue */
517 if (log->cqueue.head) {
518 log->cqueue.tail->cqnext = tblk;
519 log->cqueue.tail = tblk;
521 log->cqueue.head = log->cqueue.tail = tblk;
526 jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
527 le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
529 /* page not full ? */
530 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
534 /* page become full: move on to next page */
537 bp = (struct lbuf *) log->bp;
538 lp = (struct logpage *) bp->l_ldata;
539 dstoffset = LOGPHDRSIZE;
550 * FUNCTION: write current page and allocate next page.
556 * serialization: LOG_LOCK() held on entry/exit
558 static int lmNextPage(struct jfs_log * log)
561 int lspn; /* log sequence page number */
562 int pn; /* current page number */
567 /* get current log page number and log sequence page number */
570 lp = (struct logpage *) bp->l_ldata;
571 lspn = le32_to_cpu(lp->h.page);
576 * write or queue the full page at the tail of write queue
578 /* get the tail tblk on commit queue */
579 tblk = log->cqueue.tail;
581 /* every tblk who has COMMIT record on the current page,
582 * and has not been committed, must be on commit queue
583 * since tblk is queued at commit queueu at the time
584 * of writing its COMMIT record on the page before
585 * page becomes full (even though the tblk thread
586 * who wrote COMMIT record may have been suspended
590 /* is page bound with outstanding tail tblk ? */
591 if (tblk && tblk->pn == pn) {
592 /* mark tblk for end-of-page */
593 tblk->flag |= tblkGC_EOP;
595 if (log->cflag & logGC_PAGEOUT) {
596 /* if page is not already on write queue,
597 * just enqueue (no lbmWRITE to prevent redrive)
598 * buffer to wqueue to ensure correct serial order
599 * of the pages since log pages will be added
602 if (bp->l_wqnext == NULL)
603 lbmWrite(log, bp, 0, 0);
606 * No current GC leader, initiate group commit
608 log->cflag |= logGC_PAGEOUT;
612 /* page is not bound with outstanding tblk:
613 * init write or mark it to be redriven (lbmWRITE)
616 /* finalize the page */
617 bp->l_ceor = bp->l_eor;
618 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
619 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
624 * allocate/initialize next page
626 /* if log wraps, the first data page of log is 2
627 * (0 never used, 1 is superblock).
629 log->page = (pn == log->size - 1) ? 2 : pn + 1;
630 log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
632 /* allocate/initialize next log page buffer */
633 nextbp = lbmAllocate(log, log->page);
634 nextbp->l_eor = log->eor;
637 /* initialize next log page */
638 lp = (struct logpage *) nextbp->l_ldata;
639 lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
640 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
647 * NAME: lmGroupCommit()
649 * FUNCTION: group commit
650 * initiate pageout of the pages with COMMIT in the order of
651 * page number - redrive pageout of the page at the head of
652 * pageout queue until full page has been written.
657 * LOGGC_LOCK serializes log group commit queue, and
658 * transaction blocks on the commit queue.
659 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
661 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
667 /* group committed already ? */
668 if (tblk->flag & tblkGC_COMMITTED) {
669 if (tblk->flag & tblkGC_ERROR)
675 jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
677 if (tblk->xflag & COMMIT_LAZY)
678 tblk->flag |= tblkGC_LAZY;
680 if ((!(log->cflag & logGC_PAGEOUT)) && log->cqueue.head &&
681 (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag))) {
683 * No pageout in progress
685 * start group commit as its group leader.
687 log->cflag |= logGC_PAGEOUT;
692 if (tblk->xflag & COMMIT_LAZY) {
694 * Lazy transactions can leave now
700 /* lmGCwrite gives up LOGGC_LOCK, check again */
702 if (tblk->flag & tblkGC_COMMITTED) {
703 if (tblk->flag & tblkGC_ERROR)
710 /* upcount transaction waiting for completion
713 tblk->flag |= tblkGC_READY;
715 __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
716 LOGGC_LOCK(log), LOGGC_UNLOCK(log));
718 /* removed from commit queue */
719 if (tblk->flag & tblkGC_ERROR)
729 * FUNCTION: group commit write
730 * initiate write of log page, building a group of all transactions
731 * with commit records on that page.
736 * LOGGC_LOCK must be held by caller.
737 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
739 static void lmGCwrite(struct jfs_log * log, int cant_write)
743 int gcpn; /* group commit page number */
745 struct tblock *xtblk;
748 * build the commit group of a log page
750 * scan commit queue and make a commit group of all
751 * transactions with COMMIT records on the same log page.
753 /* get the head tblk on the commit queue */
754 tblk = xtblk = log->cqueue.head;
757 while (tblk && tblk->pn == gcpn) {
760 /* state transition: (QUEUE, READY) -> COMMIT */
761 tblk->flag |= tblkGC_COMMIT;
764 tblk = xtblk; /* last tblk of the page */
767 * pageout to commit transactions on the log page.
769 bp = (struct lbuf *) tblk->bp;
770 lp = (struct logpage *) bp->l_ldata;
771 /* is page already full ? */
772 if (tblk->flag & tblkGC_EOP) {
773 /* mark page to free at end of group commit of the page */
774 tblk->flag &= ~tblkGC_EOP;
775 tblk->flag |= tblkGC_FREE;
776 bp->l_ceor = bp->l_eor;
777 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
778 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
780 INCREMENT(lmStat.full_page);
782 /* page is not yet full */
784 bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
785 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
786 lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
787 INCREMENT(lmStat.partial_page);
794 * FUNCTION: group commit post-processing
795 * Processes transactions after their commit records have been written
796 * to disk, redriving log I/O if necessary.
801 * This routine is called a interrupt time by lbmIODone
803 void lmPostGC(struct lbuf * bp)
806 struct jfs_log *log = bp->l_log;
811 spin_lock_irqsave(&log->gclock, flags);
813 * current pageout of group commit completed.
815 * remove/wakeup transactions from commit queue who were
816 * group committed with the current log page
818 while ((tblk = log->cqueue.head) && (tblk->flag & tblkGC_COMMIT)) {
819 /* if transaction was marked GC_COMMIT then
820 * it has been shipped in the current pageout
821 * and made it to disk - it is committed.
824 if (bp->l_flag & lbmERROR)
825 tblk->flag |= tblkGC_ERROR;
827 /* remove it from the commit queue */
828 log->cqueue.head = tblk->cqnext;
829 if (log->cqueue.head == NULL)
830 log->cqueue.tail = NULL;
831 tblk->flag &= ~tblkGC_QUEUE;
834 if (tblk == log->flush_tblk) {
835 /* we can stop flushing the log now */
836 clear_bit(log_FLUSH, &log->flag);
837 log->flush_tblk = NULL;
840 jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
843 if (!(tblk->xflag & COMMIT_FORCE))
845 * Hand tblk over to lazy commit thread
849 /* state transition: COMMIT -> COMMITTED */
850 tblk->flag |= tblkGC_COMMITTED;
852 if (tblk->flag & tblkGC_READY)
858 /* was page full before pageout ?
859 * (and this is the last tblk bound with the page)
861 if (tblk->flag & tblkGC_FREE)
863 /* did page become full after pageout ?
864 * (and this is the last tblk bound with the page)
866 else if (tblk->flag & tblkGC_EOP) {
867 /* finalize the page */
868 lp = (struct logpage *) bp->l_ldata;
869 bp->l_ceor = bp->l_eor;
870 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
871 jfs_info("lmPostGC: calling lbmWrite");
872 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
878 /* are there any transactions who have entered lnGroupCommit()
879 * (whose COMMITs are after that of the last log page written.
880 * They are waiting for new group commit (above at (SLEEP 1))
881 * or lazy transactions are on a full (queued) log page,
882 * select the latest ready transaction as new group leader and
883 * wake her up to lead her group.
885 if ((tblk = log->cqueue.head) &&
886 ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
887 test_bit(log_FLUSH, &log->flag)))
889 * Call lmGCwrite with new group leader
893 /* no transaction are ready yet (transactions are only just
894 * queued (GC_QUEUE) and not entered for group commit yet).
895 * the first transaction entering group commit
896 * will elect herself as new group leader.
899 log->cflag &= ~logGC_PAGEOUT;
902 spin_unlock_irqrestore(&log->gclock, flags);
909 * FUNCTION: write log SYNCPT record for specified log
910 * if new sync address is available
911 * (normally the case if sync() is executed by back-ground
913 * if not, explicitly run jfs_blogsync() to initiate
914 * getting of new sync address.
915 * calculate new value of i_nextsync which determines when
916 * this code is called again.
918 * this is called only from lmLog().
920 * PARAMETER: ip - pointer to logs inode.
924 * serialization: LOG_LOCK() held on entry/exit
926 int lmLogSync(struct jfs_log * log, int nosyncwait)
929 int written; /* written since last syncpt */
930 int free; /* free space left available */
931 int delta; /* additional delta to write normally */
932 int more; /* additional write granted */
935 struct logsyncblk *lp;
940 /* if last sync is same as last syncpt,
941 * invoke sync point forward processing to update sync.
944 if (log->sync == log->syncpt) {
946 /* ToDo: push dirty metapages out to disk */
949 if (list_empty(&log->synclist))
950 log->sync = log->lsn;
952 lp = list_entry(log->synclist.next,
953 struct logsyncblk, synclist);
960 /* if sync is different from last syncpt,
961 * write a SYNCPT record with syncpt = sync.
962 * reset syncpt = sync
964 if (log->sync != log->syncpt) {
965 struct super_block *sb = log->sb;
966 struct jfs_sb_info *sbi = JFS_SBI(sb);
969 * We need to make sure all of the "written" metapages
970 * actually make it to disk
972 filemap_fdatawrite(sbi->ipbmap->i_mapping);
973 filemap_fdatawrite(sbi->ipimap->i_mapping);
974 filemap_fdatawrite(sb->s_bdev->bd_inode->i_mapping);
975 filemap_fdatawait(sbi->ipbmap->i_mapping);
976 filemap_fdatawait(sbi->ipimap->i_mapping);
977 filemap_fdatawait(sb->s_bdev->bd_inode->i_mapping);
981 lrd.type = cpu_to_le16(LOG_SYNCPT);
983 lrd.log.syncpt.sync = cpu_to_le32(log->sync);
984 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
986 log->syncpt = log->sync;
991 * setup next syncpt trigger (SWAG)
993 logsize = log->logsize;
995 logdiff(written, lsn, log);
996 free = logsize - written;
997 delta = LOGSYNC_DELTA(logsize);
998 more = min(free / 2, delta);
999 if (more < 2 * LOGPSIZE) {
1000 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1004 * option 1 - panic ? No.!
1005 * option 2 - shutdown file systems
1006 * associated with log ?
1007 * option 3 - extend log ?
1010 * option 4 - second chance
1012 * mark log wrapped, and continue.
1013 * when all active transactions are completed,
1014 * mark log vaild for recovery.
1015 * if crashed during invalid state, log state
1016 * implies invald log, forcing fsck().
1018 /* mark log state log wrap in log superblock */
1019 /* log->state = LOGWRAP; */
1021 /* reset sync point computation */
1022 log->syncpt = log->sync = lsn;
1023 log->nextsync = delta;
1025 /* next syncpt trigger = written + more */
1026 log->nextsync = written + more;
1028 /* return if lmLogSync() from outside of transaction, e.g., sync() */
1032 /* if number of bytes written from last sync point is more
1033 * than 1/4 of the log size, stop new transactions from
1034 * starting until all current transactions are completed
1035 * by setting syncbarrier flag.
1037 if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
1038 set_bit(log_SYNCBARRIER, &log->flag);
1039 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1042 * We may have to initiate group commit
1044 jfs_flush_journal(log, 0);
1054 * FUNCTION: open the log on first open;
1055 * insert filesystem in the active list of the log.
1057 * PARAMETER: ipmnt - file system mount inode
1058 * iplog - log inode (out)
1064 int lmLogOpen(struct super_block *sb, struct jfs_log ** logptr)
1067 struct block_device *bdev;
1068 struct jfs_log *log;
1070 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1072 memset(log, 0, sizeof(struct jfs_log));
1073 init_waitqueue_head(&log->syncwait);
1075 log->sb = sb; /* This should be a list */
1077 if (!(JFS_SBI(sb)->mntflag & JFS_INLINELOG))
1081 * in-line log in host file system
1083 * file system to log have 1-to-1 relationship;
1086 set_bit(log_INLINELOG, &log->flag);
1087 log->bdev = sb->s_bdev;
1088 log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1089 log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1090 (L2LOGPSIZE - sb->s_blocksize_bits);
1091 log->l2bsize = sb->s_blocksize_bits;
1092 ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1097 if ((rc = lmLogInit(log)))
1102 * external log as separate logical volume
1104 * file systems to log may have n-to-1 relationship;
1108 bdev = open_by_devnum(JFS_SBI(sb)->logdev,
1109 FMODE_READ|FMODE_WRITE, BDEV_FS);
1111 rc = -PTR_ERR(bdev);
1115 if ((rc = bd_claim(bdev, log))) {
1120 memcpy(log->uuid, JFS_SBI(sb)->loguuid, sizeof(log->uuid));
1125 if ((rc = lmLogInit(log)))
1129 * add file system to log active file system list
1131 if ((rc = lmLogFileSystem(log, JFS_SBI(sb)->uuid, 1)))
1141 shutdown: /* unwind lbmLogInit() */
1142 lbmLogShutdown(log);
1147 close: /* close external log device */
1148 blkdev_put(bdev, BDEV_FS);
1150 free: /* free log descriptor */
1153 jfs_warn("lmLogOpen: exit(%d)", rc);
1161 * FUNCTION: log initialization at first log open.
1163 * logredo() (or logformat()) should have been run previously.
1164 * initialize the log inode from log superblock.
1165 * set the log state in the superblock to LOGMOUNT and
1166 * write SYNCPT log record.
1168 * PARAMETER: log - log structure
1171 * -EINVAL - bad log magic number or superblock dirty
1172 * error returned from logwait()
1174 * serialization: single first open thread
1176 int lmLogInit(struct jfs_log * log)
1180 struct logsuper *logsuper;
1181 struct lbuf *bpsuper;
1186 jfs_info("lmLogInit: log:0x%p", log);
1189 * log inode is overlaid on generic inode where
1190 * dinode have been zeroed out by iRead();
1194 * initialize log i/o
1196 if ((rc = lbmLogInit(log)))
1200 * validate log superblock
1202 if (!test_bit(log_INLINELOG, &log->flag))
1203 log->l2bsize = 12; /* XXX kludge alert XXX */
1204 if ((rc = lbmRead(log, 1, &bpsuper)))
1207 logsuper = (struct logsuper *) bpsuper->l_ldata;
1209 if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1210 jfs_warn("*** Log Format Error ! ***");
1215 /* logredo() should have been run successfully. */
1216 if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1217 jfs_warn("*** Log Is Dirty ! ***");
1222 /* initialize log inode from log superblock */
1223 if (test_bit(log_INLINELOG,&log->flag)) {
1224 if (log->size != le32_to_cpu(logsuper->size)) {
1228 jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1229 log, (unsigned long long) log->base, log->size);
1231 if (memcmp(logsuper->uuid, log->uuid, 16)) {
1232 jfs_warn("wrong uuid on JFS log device");
1235 log->size = le32_to_cpu(logsuper->size);
1236 log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1237 jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1238 log, (unsigned long long) log->base, log->size);
1241 log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1242 log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1244 /* check for disabled journaling to disk */
1245 if (JFS_SBI(log->sb)->flag & JFS_NOINTEGRITY) {
1246 log->no_integrity = 1;
1247 log->ni_page = log->page;
1248 log->ni_eor = log->eor;
1251 log->no_integrity = 0;
1254 * initialize for log append write mode
1256 /* establish current/end-of-log page/buffer */
1257 if ((rc = lbmRead(log, log->page, &bp)))
1260 lp = (struct logpage *) bp->l_ldata;
1262 jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1263 le32_to_cpu(logsuper->end), log->page, log->eor,
1264 le16_to_cpu(lp->h.eor));
1266 // ASSERT(log->eor == lp->h.eor);
1269 bp->l_pn = log->page;
1270 bp->l_eor = log->eor;
1272 /* initialize the group commit serialization lock */
1273 LOGGC_LOCK_INIT(log);
1275 /* if current page is full, move on to next page */
1276 if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1279 /* allocate/initialize the log write serialization lock */
1283 * initialize log syncpoint
1286 * write the first SYNCPT record with syncpoint = 0
1287 * (i.e., log redo up to HERE !);
1288 * remove current page from lbm write queue at end of pageout
1289 * (to write log superblock update), but do not release to freelist;
1293 lrd.type = cpu_to_le16(LOG_SYNCPT);
1295 lrd.log.syncpt.sync = 0;
1296 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1298 bp->l_ceor = bp->l_eor;
1299 lp = (struct logpage *) bp->l_ldata;
1300 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1301 lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1302 if ((rc = lbmIOWait(bp, 0)))
1305 /* initialize logsync parameters */
1306 log->logsize = (log->size - 2) << L2LOGPSIZE;
1309 log->sync = log->syncpt;
1310 log->nextsync = LOGSYNC_DELTA(log->logsize);
1312 jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1313 log->lsn, log->syncpt, log->sync);
1315 LOGSYNC_LOCK_INIT(log);
1317 INIT_LIST_HEAD(&log->synclist);
1319 log->cqueue.head = log->cqueue.tail = NULL;
1320 log->flush_tblk = NULL;
1325 * initialize for lazy/group commit
1330 * update/write superblock
1332 logsuper->state = cpu_to_le32(LOGMOUNT);
1333 log->serial = le32_to_cpu(logsuper->serial) + 1;
1334 logsuper->serial = cpu_to_le32(log->serial);
1335 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1336 if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1344 errout30: /* release log page */
1347 errout20: /* release log superblock */
1350 errout10: /* unwind lbmLogInit() */
1351 lbmLogShutdown(log);
1353 jfs_warn("lmLogInit: exit(%d)", rc);
1359 * NAME: lmLogClose()
1361 * FUNCTION: remove file system <ipmnt> from active list of log <iplog>
1362 * and close it on last close.
1364 * PARAMETER: sb - superblock
1367 * RETURN: errors from subroutines
1371 int lmLogClose(struct super_block *sb, struct jfs_log * log)
1373 struct block_device *bdev = log->bdev;
1376 jfs_info("lmLogClose: log:0x%p", log);
1378 if (!test_bit(log_INLINELOG, &log->flag))
1382 * in-line log in host file system
1384 rc = lmLogShutdown(log);
1388 * external log as separate logical volume
1391 lmLogFileSystem(log, JFS_SBI(sb)->uuid, 0);
1392 rc = lmLogShutdown(log);
1395 blkdev_put(bdev, BDEV_FS);
1398 jfs_info("lmLogClose: exit(%d)", rc);
1404 * NAME: jfs_flush_journal()
1406 * FUNCTION: initiate write of any outstanding transactions to the journal
1407 * and optionally wait until they are all written to disk
1409 * wait == 0 flush until latest txn is committed, don't wait
1410 * wait == 1 flush until latest txn is committed, wait
1411 * wait > 1 flush until all txn's are complete, wait
1413 void jfs_flush_journal(struct jfs_log *log, int wait)
1416 struct tblock *target;
1418 /* jfs_write_inode may call us during read-only mount */
1422 jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1426 target = log->cqueue.head;
1430 * This ensures that we will keep writing to the journal as long
1431 * as there are unwritten commit records
1434 if (test_bit(log_FLUSH, &log->flag)) {
1436 * We're already flushing.
1437 * if flush_tblk is NULL, we are flushing everything,
1438 * so leave it that way. Otherwise, update it to the
1439 * latest transaction
1441 if (log->flush_tblk)
1442 log->flush_tblk = target;
1444 /* Only flush until latest transaction is committed */
1445 log->flush_tblk = target;
1446 set_bit(log_FLUSH, &log->flag);
1449 * Initiate I/O on outstanding transactions
1451 if (!(log->cflag & logGC_PAGEOUT)) {
1452 log->cflag |= logGC_PAGEOUT;
1457 if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1458 /* Flush until all activity complete */
1459 set_bit(log_FLUSH, &log->flag);
1460 log->flush_tblk = NULL;
1463 if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1464 DECLARE_WAITQUEUE(__wait, current);
1466 add_wait_queue(&target->gcwait, &__wait);
1467 set_current_state(TASK_UNINTERRUPTIBLE);
1470 current->state = TASK_RUNNING;
1472 remove_wait_queue(&target->gcwait, &__wait);
1480 * If there was recent activity, we may need to wait
1481 * for the lazycommit thread to catch up
1483 if (log->cqueue.head || !list_empty(&log->synclist)) {
1484 for (i = 0; i < 800; i++) { /* Too much? */
1485 current->state = TASK_INTERRUPTIBLE;
1486 schedule_timeout(HZ / 4);
1487 if ((log->cqueue.head == NULL) &&
1488 list_empty(&log->synclist))
1492 assert(log->cqueue.head == NULL);
1493 assert(list_empty(&log->synclist));
1494 clear_bit(log_FLUSH, &log->flag);
1498 * NAME: lmLogShutdown()
1500 * FUNCTION: log shutdown at last LogClose().
1502 * write log syncpt record.
1503 * update super block to set redone flag to 0.
1505 * PARAMETER: log - log inode
1507 * RETURN: 0 - success
1509 * serialization: single last close thread
1511 int lmLogShutdown(struct jfs_log * log)
1516 struct logsuper *logsuper;
1517 struct lbuf *bpsuper;
1521 jfs_info("lmLogShutdown: log:0x%p", log);
1523 jfs_flush_journal(log, 2);
1526 * We need to make sure all of the "written" metapages
1527 * actually make it to disk
1529 sync_blockdev(log->sb->s_bdev);
1532 * write the last SYNCPT record with syncpoint = 0
1533 * (i.e., log redo up to HERE !)
1537 lrd.type = cpu_to_le16(LOG_SYNCPT);
1539 lrd.log.syncpt.sync = 0;
1541 /* check for disabled journaling to disk */
1542 if (JFS_SBI(log->sb)->flag & JFS_NOINTEGRITY) {
1543 log->no_integrity = 0;
1544 log->page = log->ni_page;
1545 log->eor = log->ni_eor;
1548 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1550 lp = (struct logpage *) bp->l_ldata;
1551 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1552 lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1553 lbmIOWait(log->bp, lbmFREE);
1556 * synchronous update log superblock
1557 * mark log state as shutdown cleanly
1558 * (i.e., Log does not need to be replayed).
1560 if ((rc = lbmRead(log, 1, &bpsuper)))
1563 logsuper = (struct logsuper *) bpsuper->l_ldata;
1564 logsuper->state = cpu_to_le32(LOGREDONE);
1565 logsuper->end = cpu_to_le32(lsn);
1566 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1567 rc = lbmIOWait(bpsuper, lbmFREE);
1569 jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1570 lsn, log->page, log->eor);
1574 * shutdown per log i/o
1576 lbmLogShutdown(log);
1579 jfs_warn("lmLogShutdown: exit(%d)", rc);
1586 * NAME: lmLogFileSystem()
1588 * FUNCTION: insert (<activate> = true)/remove (<activate> = false)
1589 * file system into/from log active file system list.
1591 * PARAMETE: log - pointer to logs inode.
1592 * fsdev - kdev_t of filesystem.
1593 * serial - pointer to returned log serial number
1594 * activate - insert/remove device from active list.
1596 * RETURN: 0 - success
1597 * errors returned by vms_iowait().
1599 static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate)
1603 struct logsuper *logsuper;
1604 struct lbuf *bpsuper;
1607 * insert/remove file system device to log active file system list.
1609 if ((rc = lbmRead(log, 1, &bpsuper)))
1612 logsuper = (struct logsuper *) bpsuper->l_ldata;
1614 for (i = 0; i < MAX_ACTIVE; i++)
1615 if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1616 memcpy(logsuper->active[i].uuid, uuid, 16);
1619 if (i == MAX_ACTIVE) {
1620 jfs_warn("Too many file systems sharing journal!");
1622 return -EMFILE; /* Is there a better rc? */
1625 for (i = 0; i < MAX_ACTIVE; i++)
1626 if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1627 memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1630 if (i == MAX_ACTIVE) {
1631 jfs_warn("Somebody stomped on the journal!");
1639 * synchronous write log superblock:
1641 * write sidestream bypassing write queue:
1642 * at file system mount, log super block is updated for
1643 * activation of the file system before any log record
1644 * (MOUNT record) of the file system, and at file system
1645 * unmount, all meta data for the file system has been
1646 * flushed before log super block is updated for deactivation
1647 * of the file system.
1649 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1650 rc = lbmIOWait(bpsuper, lbmFREE);
1656 * log buffer manager (lbm)
1657 * ------------------------
1659 * special purpose buffer manager supporting log i/o requirements.
1661 * per log write queue:
1662 * log pageout occurs in serial order by fifo write queue and
1663 * restricting to a single i/o in pregress at any one time.
1664 * a circular singly-linked list
1665 * (log->wrqueue points to the tail, and buffers are linked via
1666 * bp->wrqueue field), and
1667 * maintains log page in pageout ot waiting for pageout in serial pageout.
1673 * initialize per log I/O setup at lmLogInit()
1675 static int lbmLogInit(struct jfs_log * log)
1680 jfs_info("lbmLogInit: log:0x%p", log);
1682 /* initialize current buffer cursor */
1685 /* initialize log device write queue */
1689 * Each log has its own buffer pages allocated to it. These are
1690 * not managed by the page cache. This ensures that a transaction
1691 * writing to the log does not block trying to allocate a page from
1692 * the page cache (for the log). This would be bad, since page
1693 * allocation waits on the kswapd thread that may be committing inodes
1694 * which would cause log activity. Was that clear? I'm trying to
1695 * avoid deadlock here.
1697 init_waitqueue_head(&log->free_wait);
1699 log->lbuf_free = NULL;
1701 for (i = 0; i < LOGPAGES; i++) {
1702 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1705 lbuf->l_ldata = (char *) __get_free_page(GFP_KERNEL);
1706 if (lbuf->l_ldata == 0) {
1711 init_waitqueue_head(&lbuf->l_ioevent);
1713 lbuf->l_freelist = log->lbuf_free;
1714 log->lbuf_free = lbuf;
1720 lbmLogShutdown(log);
1728 * finalize per log I/O setup at lmLogShutdown()
1730 static void lbmLogShutdown(struct jfs_log * log)
1734 jfs_info("lbmLogShutdown: log:0x%p", log);
1736 lbuf = log->lbuf_free;
1738 struct lbuf *next = lbuf->l_freelist;
1739 free_page((unsigned long) lbuf->l_ldata);
1751 * allocate an empty log buffer
1753 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1756 unsigned long flags;
1759 * recycle from log buffer freelist if any
1762 LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1763 log->lbuf_free = bp->l_freelist;
1764 LCACHE_UNLOCK(flags);
1768 bp->l_wqnext = NULL;
1769 bp->l_freelist = NULL;
1772 bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1782 * release a log buffer to freelist
1784 static void lbmFree(struct lbuf * bp)
1786 unsigned long flags;
1792 LCACHE_UNLOCK(flags);
1795 static void lbmfree(struct lbuf * bp)
1797 struct jfs_log *log = bp->l_log;
1799 assert(bp->l_wqnext == NULL);
1802 * return the buffer to head of freelist
1804 bp->l_freelist = log->lbuf_free;
1805 log->lbuf_free = bp;
1807 wake_up(&log->free_wait);
1815 * FUNCTION: add a log buffer to the the log redrive list
1821 * Takes log_redrive_lock.
1823 static inline void lbmRedrive(struct lbuf *bp)
1825 unsigned long flags;
1827 spin_lock_irqsave(&log_redrive_lock, flags);
1828 bp->l_redrive_next = log_redrive_list;
1829 log_redrive_list = bp;
1830 spin_unlock_irqrestore(&log_redrive_lock, flags);
1832 wake_up(&jfs_IO_thread_wait);
1839 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1845 * allocate a log buffer
1847 *bpp = bp = lbmAllocate(log, pn);
1848 jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1850 bp->l_flag |= lbmREAD;
1852 bio = bio_alloc(GFP_NOFS, 1);
1854 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
1855 bio->bi_bdev = log->bdev;
1856 bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata);
1857 bio->bi_io_vec[0].bv_len = LOGPSIZE;
1858 bio->bi_io_vec[0].bv_offset = 0;
1862 bio->bi_size = LOGPSIZE;
1864 bio->bi_end_io = lbmIODone;
1865 bio->bi_private = bp;
1866 submit_bio(READ, bio);
1869 wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1878 * buffer at head of pageout queue stays after completion of
1879 * partial-page pageout and redriven by explicit initiation of
1880 * pageout by caller until full-page pageout is completed and
1883 * device driver i/o done redrives pageout of new buffer at
1884 * head of pageout queue when current buffer at head of pageout
1885 * queue is released at the completion of its full-page pageout.
1887 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
1888 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
1890 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
1894 unsigned long flags;
1896 jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
1898 /* map the logical block address to physical block address */
1900 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1902 LCACHE_LOCK(flags); /* disable+lock */
1905 * initialize buffer for device driver
1910 * insert bp at tail of write queue associated with log
1912 * (request is either for bp already/currently at head of queue
1913 * or new bp to be inserted at tail)
1917 /* is buffer not already on write queue ? */
1918 if (bp->l_wqnext == NULL) {
1919 /* insert at tail of wqueue */
1925 bp->l_wqnext = tail->l_wqnext;
1926 tail->l_wqnext = bp;
1932 /* is buffer at head of wqueue and for write ? */
1933 if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
1934 LCACHE_UNLOCK(flags); /* unlock+enable */
1938 LCACHE_UNLOCK(flags); /* unlock+enable */
1942 else if (flag & lbmSYNC)
1955 * initiate pageout bypassing write queue for sidestream
1956 * (e.g., log superblock) write;
1958 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
1960 jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
1961 bp, flag, bp->l_pn);
1964 * initialize buffer for device driver
1966 bp->l_flag = flag | lbmDIRECT;
1968 /* map the logical block address to physical block address */
1970 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1973 * initiate pageout of the page
1980 * NAME: lbmStartIO()
1982 * FUNCTION: Interface to DD strategy routine
1986 * serialization: LCACHE_LOCK() is NOT held during log i/o;
1988 static void lbmStartIO(struct lbuf * bp)
1991 struct jfs_log *log = bp->l_log;
1993 jfs_info("lbmStartIO\n");
1995 bio = bio_alloc(GFP_NOFS, 1);
1996 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
1997 bio->bi_bdev = log->bdev;
1998 bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata);
1999 bio->bi_io_vec[0].bv_len = LOGPSIZE;
2000 bio->bi_io_vec[0].bv_offset = 0;
2004 bio->bi_size = LOGPSIZE;
2006 bio->bi_end_io = lbmIODone;
2007 bio->bi_private = bp;
2009 /* check if journaling to disk has been disabled */
2010 if (!log->no_integrity) {
2011 submit_bio(WRITE, bio);
2012 INCREMENT(lmStat.submitted);
2017 lbmIODone(bio, 0, 0); /* 2nd argument appears to not be used => 0
2018 * 3rd argument appears to not be used => 0
2027 static int lbmIOWait(struct lbuf * bp, int flag)
2029 unsigned long flags;
2032 jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2034 LCACHE_LOCK(flags); /* disable+lock */
2036 LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2038 rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2043 LCACHE_UNLOCK(flags); /* unlock+enable */
2045 jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2052 * executed at INTIODONE level
2054 static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2056 struct lbuf *bp = bio->bi_private;
2057 struct lbuf *nextbp, *tail;
2058 struct jfs_log *log;
2059 unsigned long flags;
2065 * get back jfs buffer bound to the i/o buffer
2067 jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2069 LCACHE_LOCK(flags); /* disable+lock */
2071 bp->l_flag |= lbmDONE;
2073 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
2074 bp->l_flag |= lbmERROR;
2076 jfs_err("lbmIODone: I/O error in JFS log");
2084 if (bp->l_flag & lbmREAD) {
2085 bp->l_flag &= ~lbmREAD;
2087 LCACHE_UNLOCK(flags); /* unlock+enable */
2089 /* wakeup I/O initiator */
2090 LCACHE_WAKEUP(&bp->l_ioevent);
2096 * pageout completion
2098 * the bp at the head of write queue has completed pageout.
2100 * if single-commit/full-page pageout, remove the current buffer
2101 * from head of pageout queue, and redrive pageout with
2102 * the new buffer at head of pageout queue;
2103 * otherwise, the partial-page pageout buffer stays at
2104 * the head of pageout queue to be redriven for pageout
2105 * by lmGroupCommit() until full-page pageout is completed.
2107 bp->l_flag &= ~lbmWRITE;
2108 INCREMENT(lmStat.pagedone);
2110 /* update committed lsn */
2112 log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2114 if (bp->l_flag & lbmDIRECT) {
2115 LCACHE_WAKEUP(&bp->l_ioevent);
2116 LCACHE_UNLOCK(flags);
2122 /* single element queue */
2124 /* remove head buffer of full-page pageout
2125 * from log device write queue
2127 if (bp->l_flag & lbmRELEASE) {
2129 bp->l_wqnext = NULL;
2132 /* multi element queue */
2134 /* remove head buffer of full-page pageout
2135 * from log device write queue
2137 if (bp->l_flag & lbmRELEASE) {
2138 nextbp = tail->l_wqnext = bp->l_wqnext;
2139 bp->l_wqnext = NULL;
2142 * redrive pageout of next page at head of write queue:
2143 * redrive next page without any bound tblk
2144 * (i.e., page w/o any COMMIT records), or
2145 * first page of new group commit which has been
2146 * queued after current page (subsequent pageout
2147 * is performed synchronously, except page without
2148 * any COMMITs) by lmGroupCommit() as indicated
2151 if (nextbp->l_flag & lbmWRITE) {
2153 * We can't do the I/O at interrupt time.
2154 * The jfsIO thread can do it
2162 * synchronous pageout:
2164 * buffer has not necessarily been removed from write queue
2165 * (e.g., synchronous write of partial-page with COMMIT):
2166 * leave buffer for i/o initiator to dispose
2168 if (bp->l_flag & lbmSYNC) {
2169 LCACHE_UNLOCK(flags); /* unlock+enable */
2171 /* wakeup I/O initiator */
2172 LCACHE_WAKEUP(&bp->l_ioevent);
2176 * Group Commit pageout:
2178 else if (bp->l_flag & lbmGC) {
2179 LCACHE_UNLOCK(flags);
2184 * asynchronous pageout:
2186 * buffer must have been removed from write queue:
2187 * insert buffer at head of freelist where it can be recycled
2190 assert(bp->l_flag & lbmRELEASE);
2191 assert(bp->l_flag & lbmFREE);
2194 LCACHE_UNLOCK(flags); /* unlock+enable */
2200 int jfsIOWait(void *arg)
2206 complete(&jfsIOwait);
2209 DECLARE_WAITQUEUE(wq, current);
2211 spin_lock_irq(&log_redrive_lock);
2212 while ((bp = log_redrive_list)) {
2213 log_redrive_list = bp->l_redrive_next;
2214 bp->l_redrive_next = NULL;
2215 spin_unlock_irq(&log_redrive_lock);
2217 spin_lock_irq(&log_redrive_lock);
2219 if (current->flags & PF_FREEZE) {
2220 spin_unlock_irq(&log_redrive_lock);
2221 refrigerator(PF_IOTHREAD);
2223 add_wait_queue(&jfs_IO_thread_wait, &wq);
2224 set_current_state(TASK_INTERRUPTIBLE);
2225 spin_unlock_irq(&log_redrive_lock);
2227 current->state = TASK_RUNNING;
2228 remove_wait_queue(&jfs_IO_thread_wait, &wq);
2230 } while (!jfs_stop_threads);
2232 jfs_info("jfsIOWait being killed!");
2233 complete_and_exit(&jfsIOwait, 0);
2237 * NAME: lmLogFormat()/jfs_logform()
2239 * FUNCTION: format file system log
2243 * logAddress - start address of log space in FS block
2244 * logSize - length of log space in FS block;
2246 * RETURN: 0 - success
2249 * XXX: We're synchronously writing one page at a time. This needs to
2250 * be improved by writing multiple pages at once.
2252 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2255 struct jfs_sb_info *sbi = JFS_SBI(log->sb);
2256 struct logsuper *logsuper;
2258 int lspn; /* log sequence page number */
2259 struct lrd *lrd_ptr;
2263 jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2264 (long long)logAddress, logSize);
2266 /* allocate a log buffer */
2267 bp = lbmAllocate(log, 1);
2269 npages = logSize >> sbi->l2nbperpage;
2274 * page 0 - reserved;
2275 * page 1 - log superblock;
2276 * page 2 - log data page: A SYNC log record is written
2277 * into this page at logform time;
2278 * pages 3-N - log data page: set to empty log data pages;
2281 * init log superblock: log page 1
2283 logsuper = (struct logsuper *) bp->l_ldata;
2285 logsuper->magic = cpu_to_le32(LOGMAGIC);
2286 logsuper->version = cpu_to_le32(LOGVERSION);
2287 logsuper->state = cpu_to_le32(LOGREDONE);
2288 logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */
2289 logsuper->size = cpu_to_le32(npages);
2290 logsuper->bsize = cpu_to_le32(sbi->bsize);
2291 logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2292 logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2294 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2295 bp->l_blkno = logAddress + sbi->nbperpage;
2297 if ((rc = lbmIOWait(bp, 0)))
2301 * init pages 2 to npages-1 as log data pages:
2303 * log page sequence number (lpsn) initialization:
2306 * +-----+-----+=====+=====+===.....===+=====+
2308 * <--- N page circular file ---->
2310 * the N (= npages-2) data pages of the log is maintained as
2311 * a circular file for the log records;
2312 * lpsn grows by 1 monotonically as each log page is written
2313 * to the circular file of the log;
2314 * and setLogpage() will not reset the page number even if
2315 * the eor is equal to LOGPHDRSIZE. In order for binary search
2316 * still work in find log end process, we have to simulate the
2317 * log wrap situation at the log format time.
2318 * The 1st log page written will have the highest lpsn. Then
2319 * the succeeding log pages will have ascending order of
2320 * the lspn starting from 0, ... (N-2)
2322 lp = (struct logpage *) bp->l_ldata;
2324 * initialize 1st log page to be written: lpsn = N - 1,
2325 * write a SYNCPT log record is written to this page
2327 lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2328 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2330 lrd_ptr = (struct lrd *) &lp->data;
2331 lrd_ptr->logtid = 0;
2332 lrd_ptr->backchain = 0;
2333 lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2334 lrd_ptr->length = 0;
2335 lrd_ptr->log.syncpt.sync = 0;
2337 bp->l_blkno += sbi->nbperpage;
2338 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2340 if ((rc = lbmIOWait(bp, 0)))
2344 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2346 for (lspn = 0; lspn < npages - 3; lspn++) {
2347 lp->h.page = lp->t.page = cpu_to_le32(lspn);
2348 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2350 bp->l_blkno += sbi->nbperpage;
2351 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2353 if ((rc = lbmIOWait(bp, 0)))
2362 /* release the buffer */
2368 #ifdef CONFIG_JFS_STATISTICS
2369 int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
2370 int *eof, void *data)
2375 len += sprintf(buffer,
2376 "JFS Logmgr stats\n"
2377 "================\n"
2379 "writes submitted = %d\n"
2380 "writes completed = %d\n"
2381 "full pages submitted = %d\n"
2382 "partial pages submitted = %d\n",
2387 lmStat.partial_page);
2390 *start = buffer + begin;
2403 #endif /* CONFIG_JFS_STATISTICS */