| /* | 
 |  *   Copyright (C) International Business Machines Corp., 2000-2004 | 
 |  *   Portions Copyright (C) Christoph Hellwig, 2001-2002 | 
 |  * | 
 |  *   This program is free software;  you can redistribute it and/or modify | 
 |  *   it under the terms of the GNU General Public License as published by | 
 |  *   the Free Software Foundation; either version 2 of the License, or | 
 |  *   (at your option) any later version. | 
 |  * | 
 |  *   This program is distributed in the hope that it will be useful, | 
 |  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of | 
 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See | 
 |  *   the GNU General Public License for more details. | 
 |  * | 
 |  *   You should have received a copy of the GNU General Public License | 
 |  *   along with this program;  if not, write to the Free Software | 
 |  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 
 |  */ | 
 | #ifndef	_H_JFS_LOGMGR | 
 | #define _H_JFS_LOGMGR | 
 |  | 
 | #include "jfs_filsys.h" | 
 | #include "jfs_lock.h" | 
 |  | 
 | /* | 
 |  *	log manager configuration parameters | 
 |  */ | 
 |  | 
 | /* log page size */ | 
 | #define	LOGPSIZE	4096 | 
 | #define	L2LOGPSIZE	12 | 
 |  | 
 | #define LOGPAGES	16	/* Log pages per mounted file system */ | 
 |  | 
 | /* | 
 |  *	log logical volume | 
 |  * | 
 |  * a log is used to make the commit operation on journalled | 
 |  * files within the same logical volume group atomic. | 
 |  * a log is implemented with a logical volume. | 
 |  * there is one log per logical volume group. | 
 |  * | 
 |  * block 0 of the log logical volume is not used (ipl etc). | 
 |  * block 1 contains a log "superblock" and is used by logFormat(), | 
 |  * lmLogInit(), lmLogShutdown(), and logRedo() to record status | 
 |  * of the log but is not otherwise used during normal processing. | 
 |  * blocks 2 - (N-1) are used to contain log records. | 
 |  * | 
 |  * when a volume group is varied-on-line, logRedo() must have | 
 |  * been executed before the file systems (logical volumes) in | 
 |  * the volume group can be mounted. | 
 |  */ | 
 | /* | 
 |  *	log superblock (block 1 of logical volume) | 
 |  */ | 
 | #define	LOGSUPER_B	1 | 
 | #define	LOGSTART_B	2 | 
 |  | 
 | #define	LOGMAGIC	0x87654321 | 
 | #define	LOGVERSION	1 | 
 |  | 
 | #define MAX_ACTIVE	128	/* Max active file systems sharing log */ | 
 |  | 
 | struct logsuper { | 
 | 	__le32 magic;		/* 4: log lv identifier */ | 
 | 	__le32 version;		/* 4: version number */ | 
 | 	__le32 serial;		/* 4: log open/mount counter */ | 
 | 	__le32 size;		/* 4: size in number of LOGPSIZE blocks */ | 
 | 	__le32 bsize;		/* 4: logical block size in byte */ | 
 | 	__le32 l2bsize;		/* 4: log2 of bsize */ | 
 |  | 
 | 	__le32 flag;		/* 4: option */ | 
 | 	__le32 state;		/* 4: state - see below */ | 
 |  | 
 | 	__le32 end;		/* 4: addr of last log record set by logredo */ | 
 | 	char uuid[16];		/* 16: 128-bit journal uuid */ | 
 | 	char label[16];		/* 16: journal label */ | 
 | 	struct { | 
 | 		char uuid[16]; | 
 | 	} active[MAX_ACTIVE];	/* 2048: active file systems list */ | 
 | }; | 
 |  | 
 | #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | 
 |  | 
 | /* log flag: commit option (see jfs_filsys.h) */ | 
 |  | 
 | /* log state */ | 
 | #define	LOGMOUNT	0	/* log mounted by lmLogInit() */ | 
 | #define LOGREDONE	1	/* log shutdown by lmLogShutdown(). | 
 | 				 * log redo completed by logredo(). | 
 | 				 */ | 
 | #define LOGWRAP		2	/* log wrapped */ | 
 | #define LOGREADERR	3	/* log read error detected in logredo() */ | 
 |  | 
 |  | 
 | /* | 
 |  *	log logical page | 
 |  * | 
 |  * (this comment should be rewritten !) | 
 |  * the header and trailer structures (h,t) will normally have | 
 |  * the same page and eor value. | 
 |  * An exception to this occurs when a complete page write is not | 
 |  * accomplished on a power failure. Since the hardware may "split write" | 
 |  * sectors in the page, any out of order sequence may occur during powerfail | 
 |  * and needs to be recognized during log replay.  The xor value is | 
 |  * an "exclusive or" of all log words in the page up to eor.  This | 
 |  * 32 bit eor is stored with the top 16 bits in the header and the | 
 |  * bottom 16 bits in the trailer.  logredo can easily recognize pages | 
 |  * that were not completed by reconstructing this eor and checking | 
 |  * the log page. | 
 |  * | 
 |  * Previous versions of the operating system did not allow split | 
 |  * writes and detected partially written records in logredo by | 
 |  * ordering the updates to the header, trailer, and the move of data | 
 |  * into the logdata area.  The order: (1) data is moved (2) header | 
 |  * is updated (3) trailer is updated.  In logredo, when the header | 
 |  * differed from the trailer, the header and trailer were reconciled | 
 |  * as follows: if h.page != t.page they were set to the smaller of | 
 |  * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) | 
 |  * h.eor != t.eor they were set to the smaller of their two values. | 
 |  */ | 
 | struct logpage { | 
 | 	struct {		/* header */ | 
 | 		__le32 page;	/* 4: log sequence page number */ | 
 | 		__le16 rsrvd;	/* 2: */ | 
 | 		__le16 eor;	/* 2: end-of-log offset of lasrt record write */ | 
 | 	} h; | 
 |  | 
 | 	__le32 data[LOGPSIZE / 4 - 4];	/* log record area */ | 
 |  | 
 | 	struct {		/* trailer */ | 
 | 		__le32 page;	/* 4: normally the same as h.page */ | 
 | 		__le16 rsrvd;	/* 2: */ | 
 | 		__le16 eor;	/* 2: normally the same as h.eor */ | 
 | 	} t; | 
 | }; | 
 |  | 
 | #define LOGPHDRSIZE	8	/* log page header size */ | 
 | #define LOGPTLRSIZE	8	/* log page trailer size */ | 
 |  | 
 |  | 
 | /* | 
 |  *	log record | 
 |  * | 
 |  * (this comment should be rewritten !) | 
 |  * jfs uses only "after" log records (only a single writer is allowed | 
 |  * in a page, pages are written to temporary paging space if | 
 |  * if they must be written to disk before commit, and i/o is | 
 |  * scheduled for modified pages to their home location after | 
 |  * the log records containing the after values and the commit | 
 |  * record is written to the log on disk, undo discards the copy | 
 |  * in main-memory.) | 
 |  * | 
 |  * a log record consists of a data area of variable length followed by | 
 |  * a descriptor of fixed size LOGRDSIZE bytes. | 
 |  * the data area is rounded up to an integral number of 4-bytes and | 
 |  * must be no longer than LOGPSIZE. | 
 |  * the descriptor is of size of multiple of 4-bytes and aligned on a | 
 |  * 4-byte boundary. | 
 |  * records are packed one after the other in the data area of log pages. | 
 |  * (sometimes a DUMMY record is inserted so that at least one record ends | 
 |  * on every page or the longest record is placed on at most two pages). | 
 |  * the field eor in page header/trailer points to the byte following | 
 |  * the last record on a page. | 
 |  */ | 
 |  | 
 | /* log record types */ | 
 | #define LOG_COMMIT		0x8000 | 
 | #define LOG_SYNCPT		0x4000 | 
 | #define LOG_MOUNT		0x2000 | 
 | #define LOG_REDOPAGE		0x0800 | 
 | #define LOG_NOREDOPAGE		0x0080 | 
 | #define LOG_NOREDOINOEXT	0x0040 | 
 | #define LOG_UPDATEMAP		0x0008 | 
 | #define LOG_NOREDOFILE		0x0001 | 
 |  | 
 | /* REDOPAGE/NOREDOPAGE log record data type */ | 
 | #define	LOG_INODE		0x0001 | 
 | #define	LOG_XTREE		0x0002 | 
 | #define	LOG_DTREE		0x0004 | 
 | #define	LOG_BTROOT		0x0010 | 
 | #define	LOG_EA			0x0020 | 
 | #define	LOG_ACL			0x0040 | 
 | #define	LOG_DATA		0x0080 | 
 | #define	LOG_NEW			0x0100 | 
 | #define	LOG_EXTEND		0x0200 | 
 | #define LOG_RELOCATE		0x0400 | 
 | #define LOG_DIR_XTREE		0x0800	/* Xtree is in directory inode */ | 
 |  | 
 | /* UPDATEMAP log record descriptor type */ | 
 | #define	LOG_ALLOCXADLIST	0x0080 | 
 | #define	LOG_ALLOCPXDLIST	0x0040 | 
 | #define	LOG_ALLOCXAD		0x0020 | 
 | #define	LOG_ALLOCPXD		0x0010 | 
 | #define	LOG_FREEXADLIST		0x0008 | 
 | #define	LOG_FREEPXDLIST		0x0004 | 
 | #define	LOG_FREEXAD		0x0002 | 
 | #define	LOG_FREEPXD		0x0001 | 
 |  | 
 |  | 
 | struct lrd { | 
 | 	/* | 
 | 	 * type independent area | 
 | 	 */ | 
 | 	__le32 logtid;		/* 4: log transaction identifier */ | 
 | 	__le32 backchain;	/* 4: ptr to prev record of same transaction */ | 
 | 	__le16 type;		/* 2: record type */ | 
 | 	__le16 length;		/* 2: length of data in record (in byte) */ | 
 | 	__le32 aggregate;	/* 4: file system lv/aggregate */ | 
 | 	/* (16) */ | 
 |  | 
 | 	/* | 
 | 	 * type dependent area (20) | 
 | 	 */ | 
 | 	union { | 
 |  | 
 | 		/* | 
 | 		 *	COMMIT: commit | 
 | 		 * | 
 | 		 * transaction commit: no type-dependent information; | 
 | 		 */ | 
 |  | 
 | 		/* | 
 | 		 *	REDOPAGE: after-image | 
 | 		 * | 
 | 		 * apply after-image; | 
 | 		 * | 
 | 		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | 
 | 		 */ | 
 | 		struct { | 
 | 			__le32 fileset;	/* 4: fileset number */ | 
 | 			__le32 inode;	/* 4: inode number */ | 
 | 			__le16 type;	/* 2: REDOPAGE record type */ | 
 | 			__le16 l2linesize;	/* 2: log2 of line size */ | 
 | 			pxd_t pxd;	/* 8: on-disk page pxd */ | 
 | 		} redopage;	/* (20) */ | 
 |  | 
 | 		/* | 
 | 		 *	NOREDOPAGE: the page is freed | 
 | 		 * | 
 | 		 * do not apply after-image records which precede this record | 
 | 		 * in the log with the same page block number to this page. | 
 | 		 * | 
 | 		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | 
 | 		 */ | 
 | 		struct { | 
 | 			__le32 fileset;	/* 4: fileset number */ | 
 | 			__le32 inode;	/* 4: inode number */ | 
 | 			__le16 type;	/* 2: NOREDOPAGE record type */ | 
 | 			__le16 rsrvd;	/* 2: reserved */ | 
 | 			pxd_t pxd;	/* 8: on-disk page pxd */ | 
 | 		} noredopage;	/* (20) */ | 
 |  | 
 | 		/* | 
 | 		 *	UPDATEMAP: update block allocation map | 
 | 		 * | 
 | 		 * either in-line PXD, | 
 | 		 * or     out-of-line  XADLIST; | 
 | 		 * | 
 | 		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | 
 | 		 */ | 
 | 		struct { | 
 | 			__le32 fileset;	/* 4: fileset number */ | 
 | 			__le32 inode;	/* 4: inode number */ | 
 | 			__le16 type;	/* 2: UPDATEMAP record type */ | 
 | 			__le16 nxd;	/* 2: number of extents */ | 
 | 			pxd_t pxd;	/* 8: pxd */ | 
 | 		} updatemap;	/* (20) */ | 
 |  | 
 | 		/* | 
 | 		 *	NOREDOINOEXT: the inode extent is freed | 
 | 		 * | 
 | 		 * do not apply after-image records which precede this | 
 | 		 * record in the log with the any of the 4 page block | 
 | 		 * numbers in this inode extent. | 
 | 		 * | 
 | 		 * NOTE: The fileset and pxd fields MUST remain in | 
 | 		 *       the same fields in the REDOPAGE record format. | 
 | 		 * | 
 | 		 */ | 
 | 		struct { | 
 | 			__le32 fileset;	/* 4: fileset number */ | 
 | 			__le32 iagnum;	/* 4: IAG number     */ | 
 | 			__le32 inoext_idx;	/* 4: inode extent index */ | 
 | 			pxd_t pxd;	/* 8: on-disk page pxd */ | 
 | 		} noredoinoext;	/* (20) */ | 
 |  | 
 | 		/* | 
 | 		 *	SYNCPT: log sync point | 
 | 		 * | 
 | 		 * replay log upto syncpt address specified; | 
 | 		 */ | 
 | 		struct { | 
 | 			__le32 sync;	/* 4: syncpt address (0 = here) */ | 
 | 		} syncpt; | 
 |  | 
 | 		/* | 
 | 		 *	MOUNT: file system mount | 
 | 		 * | 
 | 		 * file system mount: no type-dependent information; | 
 | 		 */ | 
 |  | 
 | 		/* | 
 | 		 *	? FREEXTENT: free specified extent(s) | 
 | 		 * | 
 | 		 * free specified extent(s) from block allocation map | 
 | 		 * N.B.: nextents should be length of data/sizeof(xad_t) | 
 | 		 */ | 
 | 		struct { | 
 | 			__le32 type;	/* 4: FREEXTENT record type */ | 
 | 			__le32 nextent;	/* 4: number of extents */ | 
 |  | 
 | 			/* data: PXD or XAD list */ | 
 | 		} freextent; | 
 |  | 
 | 		/* | 
 | 		 *	? NOREDOFILE: this file is freed | 
 | 		 * | 
 | 		 * do not apply records which precede this record in the log | 
 | 		 * with the same inode number. | 
 | 		 * | 
 | 		 * NOREDOFILE must be the first to be written at commit | 
 | 		 * (last to be read in logredo()) - it prevents | 
 | 		 * replay of preceding updates of all preceding generations | 
 | 		 * of the inumber esp. the on-disk inode itself. | 
 | 		 */ | 
 | 		struct { | 
 | 			__le32 fileset;	/* 4: fileset number */ | 
 | 			__le32 inode;	/* 4: inode number */ | 
 | 		} noredofile; | 
 |  | 
 | 		/* | 
 | 		 *	? NEWPAGE: | 
 | 		 * | 
 | 		 * metadata type dependent | 
 | 		 */ | 
 | 		struct { | 
 | 			__le32 fileset;	/* 4: fileset number */ | 
 | 			__le32 inode;	/* 4: inode number */ | 
 | 			__le32 type;	/* 4: NEWPAGE record type */ | 
 | 			pxd_t pxd;	/* 8: on-disk page pxd */ | 
 | 		} newpage; | 
 |  | 
 | 		/* | 
 | 		 *	? DUMMY: filler | 
 | 		 * | 
 | 		 * no type-dependent information | 
 | 		 */ | 
 | 	} log; | 
 | };					/* (36) */ | 
 |  | 
 | #define	LOGRDSIZE	(sizeof(struct lrd)) | 
 |  | 
 | /* | 
 |  *	line vector descriptor | 
 |  */ | 
 | struct lvd { | 
 | 	__le16 offset; | 
 | 	__le16 length; | 
 | }; | 
 |  | 
 |  | 
 | /* | 
 |  *	log logical volume | 
 |  */ | 
 | struct jfs_log { | 
 |  | 
 | 	struct list_head sb_list;/*  This is used to sync metadata | 
 | 				 *    before writing syncpt. | 
 | 				 */ | 
 | 	struct list_head journal_list; /* Global list */ | 
 | 	struct block_device *bdev; /* 4: log lv pointer */ | 
 | 	int serial;		/* 4: log mount serial number */ | 
 |  | 
 | 	s64 base;		/* @8: log extent address (inline log ) */ | 
 | 	int size;		/* 4: log size in log page (in page) */ | 
 | 	int l2bsize;		/* 4: log2 of bsize */ | 
 |  | 
 | 	unsigned long flag;	/* 4: flag */ | 
 |  | 
 | 	struct lbuf *lbuf_free;	/* 4: free lbufs */ | 
 | 	wait_queue_head_t free_wait;	/* 4: */ | 
 |  | 
 | 	/* log write */ | 
 | 	int logtid;		/* 4: log tid */ | 
 | 	int page;		/* 4: page number of eol page */ | 
 | 	int eor;		/* 4: eor of last record in eol page */ | 
 | 	struct lbuf *bp;	/* 4: current log page buffer */ | 
 |  | 
 | 	struct mutex loglock;	/* 4: log write serialization lock */ | 
 |  | 
 | 	/* syncpt */ | 
 | 	int nextsync;		/* 4: bytes to write before next syncpt */ | 
 | 	int active;		/* 4: */ | 
 | 	wait_queue_head_t syncwait;	/* 4: */ | 
 |  | 
 | 	/* commit */ | 
 | 	uint cflag;		/* 4: */ | 
 | 	struct list_head cqueue; /* FIFO commit queue */ | 
 | 	struct tblock *flush_tblk; /* tblk we're waiting on for flush */ | 
 | 	int gcrtc;		/* 4: GC_READY transaction count */ | 
 | 	struct tblock *gclrt;	/* 4: latest GC_READY transaction */ | 
 | 	spinlock_t gclock;	/* 4: group commit lock */ | 
 | 	int logsize;		/* 4: log data area size in byte */ | 
 | 	int lsn;		/* 4: end-of-log */ | 
 | 	int clsn;		/* 4: clsn */ | 
 | 	int syncpt;		/* 4: addr of last syncpt record */ | 
 | 	int sync;		/* 4: addr from last logsync() */ | 
 | 	struct list_head synclist;	/* 8: logsynclist anchor */ | 
 | 	spinlock_t synclock;	/* 4: synclist lock */ | 
 | 	struct lbuf *wqueue;	/* 4: log pageout queue */ | 
 | 	int count;		/* 4: count */ | 
 | 	char uuid[16];		/* 16: 128-bit uuid of log device */ | 
 |  | 
 | 	int no_integrity;	/* 3: flag to disable journaling to disk */ | 
 | }; | 
 |  | 
 | /* | 
 |  * Log flag | 
 |  */ | 
 | #define log_INLINELOG	1 | 
 | #define log_SYNCBARRIER	2 | 
 | #define log_QUIESCE	3 | 
 | #define log_FLUSH	4 | 
 |  | 
 | /* | 
 |  * group commit flag | 
 |  */ | 
 | /* jfs_log */ | 
 | #define logGC_PAGEOUT	0x00000001 | 
 |  | 
 | /* tblock/lbuf */ | 
 | #define tblkGC_QUEUE		0x0001 | 
 | #define tblkGC_READY		0x0002 | 
 | #define tblkGC_COMMIT		0x0004 | 
 | #define tblkGC_COMMITTED	0x0008 | 
 | #define tblkGC_EOP		0x0010 | 
 | #define tblkGC_FREE		0x0020 | 
 | #define tblkGC_LEADER		0x0040 | 
 | #define tblkGC_ERROR		0x0080 | 
 | #define tblkGC_LAZY		0x0100	// D230860 | 
 | #define tblkGC_UNLOCKED		0x0200	// D230860 | 
 |  | 
 | /* | 
 |  *		log cache buffer header | 
 |  */ | 
 | struct lbuf { | 
 | 	struct jfs_log *l_log;	/* 4: log associated with buffer */ | 
 |  | 
 | 	/* | 
 | 	 * data buffer base area | 
 | 	 */ | 
 | 	uint l_flag;		/* 4: pageout control flags */ | 
 |  | 
 | 	struct lbuf *l_wqnext;	/* 4: write queue link */ | 
 | 	struct lbuf *l_freelist;	/* 4: freelistlink */ | 
 |  | 
 | 	int l_pn;		/* 4: log page number */ | 
 | 	int l_eor;		/* 4: log record eor */ | 
 | 	int l_ceor;		/* 4: committed log record eor */ | 
 |  | 
 | 	s64 l_blkno;		/* 8: log page block number */ | 
 | 	caddr_t l_ldata;	/* 4: data page */ | 
 | 	struct page *l_page;	/* The page itself */ | 
 | 	uint l_offset;		/* Offset of l_ldata within the page */ | 
 |  | 
 | 	wait_queue_head_t l_ioevent;	/* 4: i/o done event */ | 
 | }; | 
 |  | 
 | /* Reuse l_freelist for redrive list */ | 
 | #define l_redrive_next l_freelist | 
 |  | 
 | /* | 
 |  *	logsynclist block | 
 |  * | 
 |  * common logsyncblk prefix for jbuf_t and tblock | 
 |  */ | 
 | struct logsyncblk { | 
 | 	u16 xflag;		/* flags */ | 
 | 	u16 flag;		/* only meaninful in tblock */ | 
 | 	lid_t lid;		/* lock id */ | 
 | 	s32 lsn;		/* log sequence number */ | 
 | 	struct list_head synclist;	/* log sync list link */ | 
 | }; | 
 |  | 
 | /* | 
 |  *	logsynclist serialization (per log) | 
 |  */ | 
 |  | 
 | #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) | 
 | #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) | 
 | #define LOGSYNC_UNLOCK(log, flags) \ | 
 | 	spin_unlock_irqrestore(&(log)->synclock, flags) | 
 |  | 
 | /* compute the difference in bytes of lsn from sync point */ | 
 | #define logdiff(diff, lsn, log)\ | 
 | {\ | 
 | 	diff = (lsn) - (log)->syncpt;\ | 
 | 	if (diff < 0)\ | 
 | 		diff += (log)->logsize;\ | 
 | } | 
 |  | 
 | extern int lmLogOpen(struct super_block *sb); | 
 | extern int lmLogClose(struct super_block *sb); | 
 | extern int lmLogShutdown(struct jfs_log * log); | 
 | extern int lmLogInit(struct jfs_log * log); | 
 | extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); | 
 | extern int lmGroupCommit(struct jfs_log *, struct tblock *); | 
 | extern int jfsIOWait(void *); | 
 | extern void jfs_flush_journal(struct jfs_log * log, int wait); | 
 | extern void jfs_syncpt(struct jfs_log *log, int hard_sync); | 
 |  | 
 | #endif				/* _H_JFS_LOGMGR */ |