|  | /* | 
|  | *   Copyright (C) International Business Machines Corp., 2000-2004 | 
|  | *   Portions Copyright (C) Christoph Hellwig, 2001-2002 | 
|  | * | 
|  | *   This program is free software;  you can redistribute it and/or modify | 
|  | *   it under the terms of the GNU General Public License as published by | 
|  | *   the Free Software Foundation; either version 2 of the License, or | 
|  | *   (at your option) any later version. | 
|  | * | 
|  | *   This program is distributed in the hope that it will be useful, | 
|  | *   but WITHOUT ANY WARRANTY;  without even the implied warranty of | 
|  | *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See | 
|  | *   the GNU General Public License for more details. | 
|  | * | 
|  | *   You should have received a copy of the GNU General Public License | 
|  | *   along with this program;  if not, write to the Free Software | 
|  | *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 
|  | */ | 
|  | #ifndef	_H_JFS_LOGMGR | 
|  | #define _H_JFS_LOGMGR | 
|  |  | 
|  | #include "jfs_filsys.h" | 
|  | #include "jfs_lock.h" | 
|  |  | 
|  | /* | 
|  | *	log manager configuration parameters | 
|  | */ | 
|  |  | 
|  | /* log page size */ | 
|  | #define	LOGPSIZE	4096 | 
|  | #define	L2LOGPSIZE	12 | 
|  |  | 
|  | #define LOGPAGES	16	/* Log pages per mounted file system */ | 
|  |  | 
|  | /* | 
|  | *	log logical volume | 
|  | * | 
|  | * a log is used to make the commit operation on journalled | 
|  | * files within the same logical volume group atomic. | 
|  | * a log is implemented with a logical volume. | 
|  | * there is one log per logical volume group. | 
|  | * | 
|  | * block 0 of the log logical volume is not used (ipl etc). | 
|  | * block 1 contains a log "superblock" and is used by logFormat(), | 
|  | * lmLogInit(), lmLogShutdown(), and logRedo() to record status | 
|  | * of the log but is not otherwise used during normal processing. | 
|  | * blocks 2 - (N-1) are used to contain log records. | 
|  | * | 
|  | * when a volume group is varied-on-line, logRedo() must have | 
|  | * been executed before the file systems (logical volumes) in | 
|  | * the volume group can be mounted. | 
|  | */ | 
|  | /* | 
|  | *	log superblock (block 1 of logical volume) | 
|  | */ | 
|  | #define	LOGSUPER_B	1 | 
|  | #define	LOGSTART_B	2 | 
|  |  | 
|  | #define	LOGMAGIC	0x87654321 | 
|  | #define	LOGVERSION	1 | 
|  |  | 
|  | #define MAX_ACTIVE	128	/* Max active file systems sharing log */ | 
|  |  | 
|  | struct logsuper { | 
|  | __le32 magic;		/* 4: log lv identifier */ | 
|  | __le32 version;		/* 4: version number */ | 
|  | __le32 serial;		/* 4: log open/mount counter */ | 
|  | __le32 size;		/* 4: size in number of LOGPSIZE blocks */ | 
|  | __le32 bsize;		/* 4: logical block size in byte */ | 
|  | __le32 l2bsize;		/* 4: log2 of bsize */ | 
|  |  | 
|  | __le32 flag;		/* 4: option */ | 
|  | __le32 state;		/* 4: state - see below */ | 
|  |  | 
|  | __le32 end;		/* 4: addr of last log record set by logredo */ | 
|  | char uuid[16];		/* 16: 128-bit journal uuid */ | 
|  | char label[16];		/* 16: journal label */ | 
|  | struct { | 
|  | char uuid[16]; | 
|  | } active[MAX_ACTIVE];	/* 2048: active file systems list */ | 
|  | }; | 
|  |  | 
|  | #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | 
|  |  | 
|  | /* log flag: commit option (see jfs_filsys.h) */ | 
|  |  | 
|  | /* log state */ | 
|  | #define	LOGMOUNT	0	/* log mounted by lmLogInit() */ | 
|  | #define LOGREDONE	1	/* log shutdown by lmLogShutdown(). | 
|  | * log redo completed by logredo(). | 
|  | */ | 
|  | #define LOGWRAP		2	/* log wrapped */ | 
|  | #define LOGREADERR	3	/* log read error detected in logredo() */ | 
|  |  | 
|  |  | 
|  | /* | 
|  | *	log logical page | 
|  | * | 
|  | * (this comment should be rewritten !) | 
|  | * the header and trailer structures (h,t) will normally have | 
|  | * the same page and eor value. | 
|  | * An exception to this occurs when a complete page write is not | 
|  | * accomplished on a power failure. Since the hardware may "split write" | 
|  | * sectors in the page, any out of order sequence may occur during powerfail | 
|  | * and needs to be recognized during log replay.  The xor value is | 
|  | * an "exclusive or" of all log words in the page up to eor.  This | 
|  | * 32 bit eor is stored with the top 16 bits in the header and the | 
|  | * bottom 16 bits in the trailer.  logredo can easily recognize pages | 
|  | * that were not completed by reconstructing this eor and checking | 
|  | * the log page. | 
|  | * | 
|  | * Previous versions of the operating system did not allow split | 
|  | * writes and detected partially written records in logredo by | 
|  | * ordering the updates to the header, trailer, and the move of data | 
|  | * into the logdata area.  The order: (1) data is moved (2) header | 
|  | * is updated (3) trailer is updated.  In logredo, when the header | 
|  | * differed from the trailer, the header and trailer were reconciled | 
|  | * as follows: if h.page != t.page they were set to the smaller of | 
|  | * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) | 
|  | * h.eor != t.eor they were set to the smaller of their two values. | 
|  | */ | 
|  | struct logpage { | 
|  | struct {		/* header */ | 
|  | __le32 page;	/* 4: log sequence page number */ | 
|  | __le16 rsrvd;	/* 2: */ | 
|  | __le16 eor;	/* 2: end-of-log offset of lasrt record write */ | 
|  | } h; | 
|  |  | 
|  | __le32 data[LOGPSIZE / 4 - 4];	/* log record area */ | 
|  |  | 
|  | struct {		/* trailer */ | 
|  | __le32 page;	/* 4: normally the same as h.page */ | 
|  | __le16 rsrvd;	/* 2: */ | 
|  | __le16 eor;	/* 2: normally the same as h.eor */ | 
|  | } t; | 
|  | }; | 
|  |  | 
|  | #define LOGPHDRSIZE	8	/* log page header size */ | 
|  | #define LOGPTLRSIZE	8	/* log page trailer size */ | 
|  |  | 
|  |  | 
|  | /* | 
|  | *	log record | 
|  | * | 
|  | * (this comment should be rewritten !) | 
|  | * jfs uses only "after" log records (only a single writer is allowed | 
|  | * in a  page, pages are written to temporary paging space if | 
|  | * if they must be written to disk before commit, and i/o is | 
|  | * scheduled for modified pages to their home location after | 
|  | * the log records containing the after values and the commit | 
|  | * record is written to the log on disk, undo discards the copy | 
|  | * in main-memory.) | 
|  | * | 
|  | * a log record consists of a data area of variable length followed by | 
|  | * a descriptor of fixed size LOGRDSIZE bytes. | 
|  | * the  data area is rounded up to an integral number of 4-bytes and | 
|  | * must be no longer than LOGPSIZE. | 
|  | * the descriptor is of size of multiple of 4-bytes and aligned on a | 
|  | * 4-byte boundary. | 
|  | * records are packed one after the other in the data area of log pages. | 
|  | * (sometimes a DUMMY record is inserted so that at least one record ends | 
|  | * on every page or the longest record is placed on at most two pages). | 
|  | * the field eor in page header/trailer points to the byte following | 
|  | * the last record on a page. | 
|  | */ | 
|  |  | 
|  | /* log record types */ | 
|  | #define LOG_COMMIT		0x8000 | 
|  | #define LOG_SYNCPT		0x4000 | 
|  | #define LOG_MOUNT		0x2000 | 
|  | #define LOG_REDOPAGE		0x0800 | 
|  | #define LOG_NOREDOPAGE		0x0080 | 
|  | #define LOG_NOREDOINOEXT	0x0040 | 
|  | #define LOG_UPDATEMAP		0x0008 | 
|  | #define LOG_NOREDOFILE		0x0001 | 
|  |  | 
|  | /* REDOPAGE/NOREDOPAGE log record data type */ | 
|  | #define	LOG_INODE		0x0001 | 
|  | #define	LOG_XTREE		0x0002 | 
|  | #define	LOG_DTREE		0x0004 | 
|  | #define	LOG_BTROOT		0x0010 | 
|  | #define	LOG_EA			0x0020 | 
|  | #define	LOG_ACL			0x0040 | 
|  | #define	LOG_DATA		0x0080 | 
|  | #define	LOG_NEW			0x0100 | 
|  | #define	LOG_EXTEND		0x0200 | 
|  | #define LOG_RELOCATE		0x0400 | 
|  | #define LOG_DIR_XTREE		0x0800	/* Xtree is in directory inode */ | 
|  |  | 
|  | /* UPDATEMAP log record descriptor type */ | 
|  | #define	LOG_ALLOCXADLIST	0x0080 | 
|  | #define	LOG_ALLOCPXDLIST	0x0040 | 
|  | #define	LOG_ALLOCXAD		0x0020 | 
|  | #define	LOG_ALLOCPXD		0x0010 | 
|  | #define	LOG_FREEXADLIST		0x0008 | 
|  | #define	LOG_FREEPXDLIST		0x0004 | 
|  | #define	LOG_FREEXAD		0x0002 | 
|  | #define	LOG_FREEPXD		0x0001 | 
|  |  | 
|  |  | 
|  | struct lrd { | 
|  | /* | 
|  | * type independent area | 
|  | */ | 
|  | __le32 logtid;		/* 4: log transaction identifier */ | 
|  | __le32 backchain;	/* 4: ptr to prev record of same transaction */ | 
|  | __le16 type;		/* 2: record type */ | 
|  | __le16 length;		/* 2: length of data in record (in byte) */ | 
|  | __le32 aggregate;	/* 4: file system lv/aggregate */ | 
|  | /* (16) */ | 
|  |  | 
|  | /* | 
|  | * type dependent area (20) | 
|  | */ | 
|  | union { | 
|  |  | 
|  | /* | 
|  | *      COMMIT: commit | 
|  | * | 
|  | * transaction commit: no type-dependent information; | 
|  | */ | 
|  |  | 
|  | /* | 
|  | *      REDOPAGE: after-image | 
|  | * | 
|  | * apply after-image; | 
|  | * | 
|  | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | 
|  | */ | 
|  | struct { | 
|  | __le32 fileset;	/* 4: fileset number */ | 
|  | __le32 inode;	/* 4: inode number */ | 
|  | __le16 type;	/* 2: REDOPAGE record type */ | 
|  | __le16 l2linesize;	/* 2: log2 of line size */ | 
|  | pxd_t pxd;	/* 8: on-disk page pxd */ | 
|  | } redopage;	/* (20) */ | 
|  |  | 
|  | /* | 
|  | *      NOREDOPAGE: the page is freed | 
|  | * | 
|  | * do not apply after-image records which precede this record | 
|  | * in the log with the same page block number to this page. | 
|  | * | 
|  | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | 
|  | */ | 
|  | struct { | 
|  | __le32 fileset;	/* 4: fileset number */ | 
|  | __le32 inode;	/* 4: inode number */ | 
|  | __le16 type;	/* 2: NOREDOPAGE record type */ | 
|  | __le16 rsrvd;	/* 2: reserved */ | 
|  | pxd_t pxd;	/* 8: on-disk page pxd */ | 
|  | } noredopage;	/* (20) */ | 
|  |  | 
|  | /* | 
|  | *      UPDATEMAP: update block allocation map | 
|  | * | 
|  | * either in-line PXD, | 
|  | * or     out-of-line  XADLIST; | 
|  | * | 
|  | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | 
|  | */ | 
|  | struct { | 
|  | __le32 fileset;	/* 4: fileset number */ | 
|  | __le32 inode;	/* 4: inode number */ | 
|  | __le16 type;	/* 2: UPDATEMAP record type */ | 
|  | __le16 nxd;	/* 2: number of extents */ | 
|  | pxd_t pxd;	/* 8: pxd */ | 
|  | } updatemap;	/* (20) */ | 
|  |  | 
|  | /* | 
|  | *      NOREDOINOEXT: the inode extent is freed | 
|  | * | 
|  | * do not apply after-image records which precede this | 
|  | * record in the log with the any of the 4 page block | 
|  | * numbers in this inode extent. | 
|  | * | 
|  | * NOTE: The fileset and pxd fields MUST remain in | 
|  | *       the same fields in the REDOPAGE record format. | 
|  | * | 
|  | */ | 
|  | struct { | 
|  | __le32 fileset;	/* 4: fileset number */ | 
|  | __le32 iagnum;	/* 4: IAG number     */ | 
|  | __le32 inoext_idx;	/* 4: inode extent index */ | 
|  | pxd_t pxd;	/* 8: on-disk page pxd */ | 
|  | } noredoinoext;	/* (20) */ | 
|  |  | 
|  | /* | 
|  | *      SYNCPT: log sync point | 
|  | * | 
|  | * replay log upto syncpt address specified; | 
|  | */ | 
|  | struct { | 
|  | __le32 sync;	/* 4: syncpt address (0 = here) */ | 
|  | } syncpt; | 
|  |  | 
|  | /* | 
|  | *      MOUNT: file system mount | 
|  | * | 
|  | * file system mount: no type-dependent information; | 
|  | */ | 
|  |  | 
|  | /* | 
|  | *      ? FREEXTENT: free specified extent(s) | 
|  | * | 
|  | * free specified extent(s) from block allocation map | 
|  | * N.B.: nextents should be length of data/sizeof(xad_t) | 
|  | */ | 
|  | struct { | 
|  | __le32 type;	/* 4: FREEXTENT record type */ | 
|  | __le32 nextent;	/* 4: number of extents */ | 
|  |  | 
|  | /* data: PXD or XAD list */ | 
|  | } freextent; | 
|  |  | 
|  | /* | 
|  | *      ? NOREDOFILE: this file is freed | 
|  | * | 
|  | * do not apply records which precede this record in the log | 
|  | * with the same inode number. | 
|  | * | 
|  | * NOREDILE must be the first to be written at commit | 
|  | * (last to be read in logredo()) - it prevents | 
|  | * replay of preceding updates of all preceding generations | 
|  | * of the inumber esp. the on-disk inode itself, | 
|  | * but does NOT prevent | 
|  | * replay of the | 
|  | */ | 
|  | struct { | 
|  | __le32 fileset;	/* 4: fileset number */ | 
|  | __le32 inode;	/* 4: inode number */ | 
|  | } noredofile; | 
|  |  | 
|  | /* | 
|  | *      ? NEWPAGE: | 
|  | * | 
|  | * metadata type dependent | 
|  | */ | 
|  | struct { | 
|  | __le32 fileset;	/* 4: fileset number */ | 
|  | __le32 inode;	/* 4: inode number */ | 
|  | __le32 type;	/* 4: NEWPAGE record type */ | 
|  | pxd_t pxd;	/* 8: on-disk page pxd */ | 
|  | } newpage; | 
|  |  | 
|  | /* | 
|  | *      ? DUMMY: filler | 
|  | * | 
|  | * no type-dependent information | 
|  | */ | 
|  | } log; | 
|  | };					/* (36) */ | 
|  |  | 
|  | #define	LOGRDSIZE	(sizeof(struct lrd)) | 
|  |  | 
|  | /* | 
|  | *	line vector descriptor | 
|  | */ | 
|  | struct lvd { | 
|  | __le16 offset; | 
|  | __le16 length; | 
|  | }; | 
|  |  | 
|  |  | 
|  | /* | 
|  | *	log logical volume | 
|  | */ | 
|  | struct jfs_log { | 
|  |  | 
|  | struct list_head sb_list;/*  This is used to sync metadata | 
|  | *    before writing syncpt. | 
|  | */ | 
|  | struct list_head journal_list; /* Global list */ | 
|  | struct block_device *bdev; /* 4: log lv pointer */ | 
|  | int serial;		/* 4: log mount serial number */ | 
|  |  | 
|  | s64 base;		/* @8: log extent address (inline log ) */ | 
|  | int size;		/* 4: log size in log page (in page) */ | 
|  | int l2bsize;		/* 4: log2 of bsize */ | 
|  |  | 
|  | long flag;		/* 4: flag */ | 
|  |  | 
|  | struct lbuf *lbuf_free;	/* 4: free lbufs */ | 
|  | wait_queue_head_t free_wait;	/* 4: */ | 
|  |  | 
|  | /* log write */ | 
|  | int logtid;		/* 4: log tid */ | 
|  | int page;		/* 4: page number of eol page */ | 
|  | int eor;		/* 4: eor of last record in eol page */ | 
|  | struct lbuf *bp;	/* 4: current log page buffer */ | 
|  |  | 
|  | struct semaphore loglock;	/* 4: log write serialization lock */ | 
|  |  | 
|  | /* syncpt */ | 
|  | int nextsync;		/* 4: bytes to write before next syncpt */ | 
|  | int active;		/* 4: */ | 
|  | wait_queue_head_t syncwait;	/* 4: */ | 
|  |  | 
|  | /* commit */ | 
|  | uint cflag;		/* 4: */ | 
|  | struct list_head cqueue; /* FIFO commit queue */ | 
|  | struct tblock *flush_tblk; /* tblk we're waiting on for flush */ | 
|  | int gcrtc;		/* 4: GC_READY transaction count */ | 
|  | struct tblock *gclrt;	/* 4: latest GC_READY transaction */ | 
|  | spinlock_t gclock;	/* 4: group commit lock */ | 
|  | int logsize;		/* 4: log data area size in byte */ | 
|  | int lsn;		/* 4: end-of-log */ | 
|  | int clsn;		/* 4: clsn */ | 
|  | int syncpt;		/* 4: addr of last syncpt record */ | 
|  | int sync;		/* 4: addr from last logsync() */ | 
|  | struct list_head synclist;	/* 8: logsynclist anchor */ | 
|  | spinlock_t synclock;	/* 4: synclist lock */ | 
|  | struct lbuf *wqueue;	/* 4: log pageout queue */ | 
|  | int count;		/* 4: count */ | 
|  | char uuid[16];		/* 16: 128-bit uuid of log device */ | 
|  |  | 
|  | int no_integrity;	/* 3: flag to disable journaling to disk */ | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Log flag | 
|  | */ | 
|  | #define log_INLINELOG	1 | 
|  | #define log_SYNCBARRIER	2 | 
|  | #define log_QUIESCE	3 | 
|  | #define log_FLUSH	4 | 
|  |  | 
|  | /* | 
|  | * group commit flag | 
|  | */ | 
|  | /* jfs_log */ | 
|  | #define logGC_PAGEOUT	0x00000001 | 
|  |  | 
|  | /* tblock/lbuf */ | 
|  | #define tblkGC_QUEUE		0x0001 | 
|  | #define tblkGC_READY		0x0002 | 
|  | #define tblkGC_COMMIT		0x0004 | 
|  | #define tblkGC_COMMITTED	0x0008 | 
|  | #define tblkGC_EOP		0x0010 | 
|  | #define tblkGC_FREE		0x0020 | 
|  | #define tblkGC_LEADER		0x0040 | 
|  | #define tblkGC_ERROR		0x0080 | 
|  | #define tblkGC_LAZY		0x0100	// D230860 | 
|  | #define tblkGC_UNLOCKED		0x0200	// D230860 | 
|  |  | 
|  | /* | 
|  | *		log cache buffer header | 
|  | */ | 
|  | struct lbuf { | 
|  | struct jfs_log *l_log;	/* 4: log associated with buffer */ | 
|  |  | 
|  | /* | 
|  | * data buffer base area | 
|  | */ | 
|  | uint l_flag;		/* 4: pageout control flags */ | 
|  |  | 
|  | struct lbuf *l_wqnext;	/* 4: write queue link */ | 
|  | struct lbuf *l_freelist;	/* 4: freelistlink */ | 
|  |  | 
|  | int l_pn;		/* 4: log page number */ | 
|  | int l_eor;		/* 4: log record eor */ | 
|  | int l_ceor;		/* 4: committed log record eor */ | 
|  |  | 
|  | s64 l_blkno;		/* 8: log page block number */ | 
|  | caddr_t l_ldata;	/* 4: data page */ | 
|  | struct page *l_page;	/* The page itself */ | 
|  | uint l_offset;		/* Offset of l_ldata within the page */ | 
|  |  | 
|  | wait_queue_head_t l_ioevent;	/* 4: i/o done event */ | 
|  | }; | 
|  |  | 
|  | /* Reuse l_freelist for redrive list */ | 
|  | #define l_redrive_next l_freelist | 
|  |  | 
|  | /* | 
|  | *	logsynclist block | 
|  | * | 
|  | * common logsyncblk prefix for jbuf_t and tblock | 
|  | */ | 
|  | struct logsyncblk { | 
|  | u16 xflag;		/* flags */ | 
|  | u16 flag;		/* only meaninful in tblock */ | 
|  | lid_t lid;		/* lock id */ | 
|  | s32 lsn;		/* log sequence number */ | 
|  | struct list_head synclist;	/* log sync list link */ | 
|  | }; | 
|  |  | 
|  | /* | 
|  | *	logsynclist serialization (per log) | 
|  | */ | 
|  |  | 
|  | #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) | 
|  | #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) | 
|  | #define LOGSYNC_UNLOCK(log, flags) \ | 
|  | spin_unlock_irqrestore(&(log)->synclock, flags) | 
|  |  | 
|  | /* compute the difference in bytes of lsn from sync point */ | 
|  | #define logdiff(diff, lsn, log)\ | 
|  | {\ | 
|  | diff = (lsn) - (log)->syncpt;\ | 
|  | if (diff < 0)\ | 
|  | diff += (log)->logsize;\ | 
|  | } | 
|  |  | 
|  | extern int lmLogOpen(struct super_block *sb); | 
|  | extern int lmLogClose(struct super_block *sb); | 
|  | extern int lmLogShutdown(struct jfs_log * log); | 
|  | extern int lmLogInit(struct jfs_log * log); | 
|  | extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); | 
|  | extern int lmGroupCommit(struct jfs_log *, struct tblock *); | 
|  | extern int jfsIOWait(void *); | 
|  | extern void jfs_flush_journal(struct jfs_log * log, int wait); | 
|  | extern void jfs_syncpt(struct jfs_log *log); | 
|  |  | 
|  | #endif				/* _H_JFS_LOGMGR */ |