Blame - fs/xfs/xfs_vnodeops.c - android_kernel_htc_msm8960

blob: e92cacde02f5966b8e289806ea829dbea1008d91 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	2	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				3	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	* published by the Free Software Foundation.
				8	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	*/
Randy Dunlap	16f7e0f	2006-01-11 12:17:46 -0800	[diff] [blame^]	18
				19	#include <linux/capability.h>
				20
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	22	#include "xfs_fs.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	23	#include "xfs_types.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	24	#include "xfs_bit.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	25	#include "xfs_log.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	26	#include "xfs_inum.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	27	#include "xfs_trans.h"
				28	#include "xfs_sb.h"
				29	#include "xfs_ag.h"
				30	#include "xfs_dir.h"
				31	#include "xfs_dir2.h"
				32	#include "xfs_dmapi.h"
				33	#include "xfs_mount.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	34	#include "xfs_da_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	35	#include "xfs_bmap_btree.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	36	#include "xfs_alloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37	#include "xfs_ialloc_btree.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	38	#include "xfs_dir_sf.h"
				39	#include "xfs_dir2_sf.h"
				40	#include "xfs_attr_sf.h"
				41	#include "xfs_dinode.h"
				42	#include "xfs_inode.h"
				43	#include "xfs_inode_item.h"
				44	#include "xfs_dir_leaf.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	45	#include "xfs_itable.h"
				46	#include "xfs_btree.h"
				47	#include "xfs_ialloc.h"
				48	#include "xfs_alloc.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	49	#include "xfs_bmap.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	50	#include "xfs_attr.h"
				51	#include "xfs_rw.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	52	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53	#include "xfs_quota.h"
				54	#include "xfs_utils.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	55	#include "xfs_rtalloc.h"
				56	#include "xfs_refcache.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	57	#include "xfs_trans_space.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	58	#include "xfs_log_priv.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	59	#include "xfs_mac.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	60
				61
				62	/*
				63	* The maximum pathlen is 1024 bytes. Since the minimum file system
				64	* blocksize is 512 bytes, we can get a max of 2 extents back from
				65	* bmapi.
				66	*/
				67	#define SYMLINK_MAPS 2
				68
				69	/*
				70	* For xfs, we check that the file isn't too big to be opened by this kernel.
				71	* No other open action is required for regular files. Devices are handled
				72	* through the specfs file system, pipes through fifofs. Device and
				73	* fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
				74	* when a new vnode is first looked up or created.
				75	*/
				76	STATIC int
				77	xfs_open(
				78	bhv_desc_t *bdp,
				79	cred_t *credp)
				80	{
				81	int mode;
				82	vnode_t *vp;
				83	xfs_inode_t *ip;
				84
				85	vp = BHV_TO_VNODE(bdp);
				86	ip = XFS_BHVTOI(bdp);
				87
				88	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
				89	return XFS_ERROR(EIO);
				90
				91	/*
				92	* If it's a directory with any blocks, read-ahead block 0
				93	* as we're almost certain to have the next operation be a read there.
				94	*/
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	95	if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	96	mode = xfs_ilock_map_shared(ip);
				97	if (ip->i_d.di_nextents > 0)
				98	(void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
				99	xfs_iunlock(ip, mode);
				100	}
				101	return 0;
				102	}
				103
				104
				105	/*
				106	* xfs_getattr
				107	*/
				108	STATIC int
				109	xfs_getattr(
				110	bhv_desc_t *bdp,
				111	vattr_t *vap,
				112	int flags,
				113	cred_t *credp)
				114	{
				115	xfs_inode_t *ip;
				116	xfs_mount_t *mp;
				117	vnode_t *vp;
				118
				119	vp = BHV_TO_VNODE(bdp);
				120	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
				121
				122	ip = XFS_BHVTOI(bdp);
				123	mp = ip->i_mount;
				124
				125	if (XFS_FORCED_SHUTDOWN(mp))
				126	return XFS_ERROR(EIO);
				127
				128	if (!(flags & ATTR_LAZY))
				129	xfs_ilock(ip, XFS_ILOCK_SHARED);
				130
				131	vap->va_size = ip->i_d.di_size;
				132	if (vap->va_mask == XFS_AT_SIZE)
				133	goto all_done;
				134
				135	vap->va_nblocks =
				136	XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
				137	vap->va_nodeid = ip->i_ino;
				138	#if XFS_BIG_INUMS
				139	vap->va_nodeid += mp->m_inoadd;
				140	#endif
				141	vap->va_nlink = ip->i_d.di_nlink;
				142
				143	/*
				144	* Quick exit for non-stat callers
				145	*/
				146	if ((vap->va_mask &
				147	~(XFS_AT_SIZE\|XFS_AT_FSID\|XFS_AT_NODEID\|
				148	XFS_AT_NLINK\|XFS_AT_BLKSIZE)) == 0)
				149	goto all_done;
				150
				151	/*
				152	* Copy from in-core inode.
				153	*/
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	154	vap->va_mode = ip->i_d.di_mode;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	155	vap->va_uid = ip->i_d.di_uid;
				156	vap->va_gid = ip->i_d.di_gid;
				157	vap->va_projid = ip->i_d.di_projid;
				158
				159	/*
				160	* Check vnode type block/char vs. everything else.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	161	*/
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	162	switch (ip->i_d.di_mode & S_IFMT) {
				163	case S_IFBLK:
				164	case S_IFCHR:
				165	vap->va_rdev = ip->i_df.if_u2.if_rdev;
				166	vap->va_blocksize = BLKDEV_IOSIZE;
				167	break;
				168	default:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	169	vap->va_rdev = 0;
				170
				171	if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
David Chinner	e8c8b3a	2005-11-02 10:33:05 +1100	[diff] [blame]	172	vap->va_blocksize = xfs_preferred_iosize(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	173	} else {
				174
				175	/*
				176	* If the file blocks are being allocated from a
				177	* realtime partition, then return the inode's
				178	* realtime extent size or the realtime volume's
				179	* extent size.
				180	*/
				181	vap->va_blocksize = ip->i_d.di_extsize ?
				182	(ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
				183	(mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
				184	}
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	185	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	186	}
				187
				188	vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
				189	vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
				190	vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
				191	vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
				192	vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
				193	vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
				194
				195	/*
				196	* Exit for stat callers. See if any of the rest of the fields
				197	* to be filled in are needed.
				198	*/
				199	if ((vap->va_mask &
				200	(XFS_AT_XFLAGS\|XFS_AT_EXTSIZE\|XFS_AT_NEXTENTS\|XFS_AT_ANEXTENTS\|
				201	XFS_AT_GENCOUNT\|XFS_AT_VCODE)) == 0)
				202	goto all_done;
				203
				204	/*
				205	* Convert di_flags to xflags.
				206	*/
				207	vap->va_xflags = xfs_ip2xflags(ip);
				208
				209	/*
				210	* Exit for inode revalidate. See if any of the rest of
				211	* the fields to be filled in are needed.
				212	*/
				213	if ((vap->va_mask &
				214	(XFS_AT_EXTSIZE\|XFS_AT_NEXTENTS\|XFS_AT_ANEXTENTS\|
				215	XFS_AT_GENCOUNT\|XFS_AT_VCODE)) == 0)
				216	goto all_done;
				217
				218	vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
				219	vap->va_nextents =
				220	(ip->i_df.if_flags & XFS_IFEXTENTS) ?
				221	ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
				222	ip->i_d.di_nextents;
				223	if (ip->i_afp)
				224	vap->va_anextents =
				225	(ip->i_afp->if_flags & XFS_IFEXTENTS) ?
				226	ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
				227	ip->i_d.di_anextents;
				228	else
				229	vap->va_anextents = 0;
				230	vap->va_gen = ip->i_d.di_gen;
				231
				232	all_done:
				233	if (!(flags & ATTR_LAZY))
				234	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				235	return 0;
				236	}
				237
				238
				239	/*
				240	* xfs_setattr
				241	*/
				242	int
				243	xfs_setattr(
				244	bhv_desc_t *bdp,
				245	vattr_t *vap,
				246	int flags,
				247	cred_t *credp)
				248	{
				249	xfs_inode_t *ip;
				250	xfs_trans_t *tp;
				251	xfs_mount_t *mp;
				252	int mask;
				253	int code;
				254	uint lock_flags;
				255	uint commit_flags=0;
				256	uid_t uid=0, iuid=0;
				257	gid_t gid=0, igid=0;
				258	int timeflags = 0;
				259	vnode_t *vp;
				260	xfs_prid_t projid=0, iprojid=0;
				261	int mandlock_before, mandlock_after;
				262	struct xfs_dquot udqp, gdqp, olddquot1, olddquot2;
				263	int file_owner;
Dean Roehrich	5fcbab3	2005-05-05 13:27:19 -0700	[diff] [blame]	264	int need_iolock = 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	265
				266	vp = BHV_TO_VNODE(bdp);
				267	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
				268
				269	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
				270	return XFS_ERROR(EROFS);
				271
				272	/*
				273	* Cannot set certain attributes.
				274	*/
				275	mask = vap->va_mask;
				276	if (mask & XFS_AT_NOSET) {
				277	return XFS_ERROR(EINVAL);
				278	}
				279
				280	ip = XFS_BHVTOI(bdp);
				281	mp = ip->i_mount;
				282
				283	if (XFS_FORCED_SHUTDOWN(mp))
				284	return XFS_ERROR(EIO);
				285
				286	/*
				287	* Timestamps do not need to be logged and hence do not
				288	* need to be done within a transaction.
				289	*/
				290	if (mask & XFS_AT_UPDTIMES) {
				291	ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
				292	timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) \|
				293	((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) \|
				294	((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
				295	xfs_ichgtime(ip, timeflags);
				296	return 0;
				297	}
				298
				299	olddquot1 = olddquot2 = NULL;
				300	udqp = gdqp = NULL;
				301
				302	/*
				303	* If disk quotas is on, we make sure that the dquots do exist on disk,
				304	* before we start any other transactions. Trying to do this later
				305	* is messy. We don't care to take a readlock to look at the ids
				306	* in inode here, because we can't hold it across the trans_reserve.
				307	* If the IDs do change before we take the ilock, we're covered
				308	* because the i_*dquot fields will get updated anyway.
				309	*/
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	310	if (XFS_IS_QUOTA_ON(mp) &&
				311	(mask & (XFS_AT_UID\|XFS_AT_GID\|XFS_AT_PROJID))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	312	uint qflags = 0;
				313
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	314	if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	315	uid = vap->va_uid;
				316	qflags \|= XFS_QMOPT_UQUOTA;
				317	} else {
				318	uid = ip->i_d.di_uid;
				319	}
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	320	if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	321	gid = vap->va_gid;
				322	qflags \|= XFS_QMOPT_GQUOTA;
				323	} else {
				324	gid = ip->i_d.di_gid;
				325	}
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	326	if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) {
				327	projid = vap->va_projid;
				328	qflags \|= XFS_QMOPT_PQUOTA;
				329	} else {
				330	projid = ip->i_d.di_projid;
				331	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	332	/*
				333	* We take a reference when we initialize udqp and gdqp,
				334	* so it is important that we never blindly double trip on
				335	* the same variable. See xfs_create() for an example.
				336	*/
				337	ASSERT(udqp == NULL);
				338	ASSERT(gdqp == NULL);
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	339	code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
				340	&udqp, &gdqp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	341	if (code)
				342	return (code);
				343	}
				344
				345	/*
				346	* For the other attributes, we acquire the inode lock and
				347	* first do an error checking pass.
				348	*/
				349	tp = NULL;
				350	lock_flags = XFS_ILOCK_EXCL;
Dean Roehrich	5fcbab3	2005-05-05 13:27:19 -0700	[diff] [blame]	351	ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1);
				352	if (flags & ATTR_NOLOCK)
				353	need_iolock = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	354	if (!(mask & XFS_AT_SIZE)) {
				355	if ((mask != (XFS_AT_CTIME\|XFS_AT_ATIME\|XFS_AT_MTIME)) \|\|
				356	(mp->m_flags & XFS_MOUNT_WSYNC)) {
				357	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
				358	commit_flags = 0;
				359	if ((code = xfs_trans_reserve(tp, 0,
				360	XFS_ICHANGE_LOG_RES(mp), 0,
				361	0, 0))) {
				362	lock_flags = 0;
				363	goto error_return;
				364	}
				365	}
				366	} else {
				367	if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) &&
				368	!(flags & ATTR_DMI)) {
				369	int dmflags = AT_DELAY_FLAG(flags) \| DM_SEM_FLAG_WR;
				370	code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp,
				371	vap->va_size, 0, dmflags, NULL);
				372	if (code) {
				373	lock_flags = 0;
				374	goto error_return;
				375	}
				376	}
				377	if (need_iolock)
				378	lock_flags \|= XFS_IOLOCK_EXCL;
				379	}
				380
				381	xfs_ilock(ip, lock_flags);
				382
				383	/* boolean: are we the file owner? */
				384	file_owner = (current_fsuid(credp) == ip->i_d.di_uid);
				385
				386	/*
				387	* Change various properties of a file.
				388	* Only the owner or users with CAP_FOWNER
				389	* capability may do these things.
				390	*/
				391	if (mask &
				392	(XFS_AT_MODE\|XFS_AT_XFLAGS\|XFS_AT_EXTSIZE\|XFS_AT_UID\|
				393	XFS_AT_GID\|XFS_AT_PROJID)) {
				394	/*
				395	* CAP_FOWNER overrides the following restrictions:
				396	*
				397	* The user ID of the calling process must be equal
				398	* to the file owner ID, except in cases where the
				399	* CAP_FSETID capability is applicable.
				400	*/
				401	if (!file_owner && !capable(CAP_FOWNER)) {
				402	code = XFS_ERROR(EPERM);
				403	goto error_return;
				404	}
				405
				406	/*
				407	* CAP_FSETID overrides the following restrictions:
				408	*
				409	* The effective user ID of the calling process shall match
				410	* the file owner when setting the set-user-ID and
				411	* set-group-ID bits on that file.
				412	*
				413	* The effective group ID or one of the supplementary group
				414	* IDs of the calling process shall match the group owner of
				415	* the file when setting the set-group-ID bit on that file
				416	*/
				417	if (mask & XFS_AT_MODE) {
				418	mode_t m = 0;
				419
				420	if ((vap->va_mode & S_ISUID) && !file_owner)
				421	m \|= S_ISUID;
				422	if ((vap->va_mode & S_ISGID) &&
				423	!in_group_p((gid_t)ip->i_d.di_gid))
				424	m \|= S_ISGID;
				425	#if 0
				426	/* Linux allows this, Irix doesn't. */
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	427	if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	428	m \|= S_ISVTX;
				429	#endif
				430	if (m && !capable(CAP_FSETID))
				431	vap->va_mode &= ~m;
				432	}
				433	}
				434
				435	/*
				436	* Change file ownership. Must be the owner or privileged.
				437	* If the system was configured with the "restricted_chown"
				438	* option, the owner is not permitted to give away the file,
				439	* and can change the group id only to a group of which he
				440	* or she is a member.
				441	*/
				442	if (mask & (XFS_AT_UID\|XFS_AT_GID\|XFS_AT_PROJID)) {
				443	/*
				444	* These IDs could have changed since we last looked at them.
				445	* But, we're assured that if the ownership did change
				446	* while we didn't have the inode locked, inode's dquot(s)
				447	* would have changed also.
				448	*/
				449	iuid = ip->i_d.di_uid;
				450	iprojid = ip->i_d.di_projid;
				451	igid = ip->i_d.di_gid;
				452	gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
				453	uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
				454	projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
				455	iprojid;
				456
				457	/*
				458	* CAP_CHOWN overrides the following restrictions:
				459	*
				460	* If _POSIX_CHOWN_RESTRICTED is defined, this capability
				461	* shall override the restriction that a process cannot
				462	* change the user ID of a file it owns and the restriction
				463	* that the group ID supplied to the chown() function
				464	* shall be equal to either the group ID or one of the
				465	* supplementary group IDs of the calling process.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	466	*/
				467	if (restricted_chown &&
				468	(iuid != uid \|\| (igid != gid &&
				469	!in_group_p((gid_t)gid))) &&
				470	!capable(CAP_CHOWN)) {
				471	code = XFS_ERROR(EPERM);
				472	goto error_return;
				473	}
				474	/*
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	475	* Do a quota reservation only if uid/projid/gid is actually
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	476	* going to change.
				477	*/
				478	if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) \|\|
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	479	(XFS_IS_PQUOTA_ON(mp) && iprojid != projid) \|\|
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	480	(XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
				481	ASSERT(tp);
				482	code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
				483	capable(CAP_FOWNER) ?
				484	XFS_QMOPT_FORCE_RES : 0);
				485	if (code) /* out of quota */
				486	goto error_return;
				487	}
				488	}
				489
				490	/*
				491	* Truncate file. Must have write permission and not be a directory.
				492	*/
				493	if (mask & XFS_AT_SIZE) {
				494	/* Short circuit the truncate case for zero length files */
				495	if ((vap->va_size == 0) &&
				496	(ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) {
				497	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				498	lock_flags &= ~XFS_ILOCK_EXCL;
				499	if (mask & XFS_AT_CTIME)
				500	xfs_ichgtime(ip, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				501	code = 0;
				502	goto error_return;
				503	}
				504
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	505	if (VN_ISDIR(vp)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	506	code = XFS_ERROR(EISDIR);
				507	goto error_return;
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	508	} else if (!VN_ISREG(vp)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	509	code = XFS_ERROR(EINVAL);
				510	goto error_return;
				511	}
				512	/*
				513	* Make sure that the dquots are attached to the inode.
				514	*/
				515	if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED)))
				516	goto error_return;
				517	}
				518
				519	/*
				520	* Change file access or modified times.
				521	*/
				522	if (mask & (XFS_AT_ATIME\|XFS_AT_MTIME)) {
				523	if (!file_owner) {
				524	if ((flags & ATTR_UTIME) &&
				525	!capable(CAP_FOWNER)) {
				526	code = XFS_ERROR(EPERM);
				527	goto error_return;
				528	}
				529	}
				530	}
				531
				532	/*
				533	* Change extent size or realtime flag.
				534	*/
				535	if (mask & (XFS_AT_EXTSIZE\|XFS_AT_XFLAGS)) {
				536	/*
				537	* Can't change extent size if any extents are allocated.
				538	*/
Eric Sandeen	e94af02	2005-11-02 15:10:41 +1100	[diff] [blame]	539	if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	540	((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
				541	vap->va_extsize) ) {
				542	code = XFS_ERROR(EINVAL); /* EFBIG? */
				543	goto error_return;
				544	}
				545
				546	/*
				547	* Can't set extent size unless the file is marked, or
				548	* about to be marked as a realtime file.
				549	*
				550	* This check will be removed when fixed size extents
				551	* with buffered data writes is implemented.
				552	*
				553	*/
				554	if ((mask & XFS_AT_EXTSIZE) &&
				555	((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
				556	vap->va_extsize) &&
				557	(!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) \|\|
				558	((mask & XFS_AT_XFLAGS) &&
				559	(vap->va_xflags & XFS_XFLAG_REALTIME))))) {
				560	code = XFS_ERROR(EINVAL);
				561	goto error_return;
				562	}
				563
				564	/*
				565	* Can't change realtime flag if any extents are allocated.
				566	*/
Eric Sandeen	e94af02	2005-11-02 15:10:41 +1100	[diff] [blame]	567	if ((ip->i_d.di_nextents \|\| ip->i_delayed_blks) &&
				568	(mask & XFS_AT_XFLAGS) &&
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	569	(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) !=
				570	(vap->va_xflags & XFS_XFLAG_REALTIME)) {
				571	code = XFS_ERROR(EINVAL); /* EFBIG? */
				572	goto error_return;
				573	}
				574	/*
				575	* Extent size must be a multiple of the appropriate block
				576	* size, if set at all.
				577	*/
				578	if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
				579	xfs_extlen_t size;
				580
				581	if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) \|\|
				582	((mask & XFS_AT_XFLAGS) &&
				583	(vap->va_xflags & XFS_XFLAG_REALTIME))) {
				584	size = mp->m_sb.sb_rextsize <<
				585	mp->m_sb.sb_blocklog;
				586	} else {
				587	size = mp->m_sb.sb_blocksize;
				588	}
				589	if (vap->va_extsize % size) {
				590	code = XFS_ERROR(EINVAL);
				591	goto error_return;
				592	}
				593	}
				594	/*
				595	* If realtime flag is set then must have realtime data.
				596	*/
				597	if ((mask & XFS_AT_XFLAGS) &&
				598	(vap->va_xflags & XFS_XFLAG_REALTIME)) {
				599	if ((mp->m_sb.sb_rblocks == 0) \|\|
				600	(mp->m_sb.sb_rextsize == 0) \|\|
				601	(ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
				602	code = XFS_ERROR(EINVAL);
				603	goto error_return;
				604	}
				605	}
				606
				607	/*
				608	* Can't modify an immutable/append-only file unless
				609	* we have appropriate permission.
				610	*/
				611	if ((mask & XFS_AT_XFLAGS) &&
				612	(ip->i_d.di_flags &
				613	(XFS_DIFLAG_IMMUTABLE\|XFS_DIFLAG_APPEND) \|\|
				614	(vap->va_xflags &
				615	(XFS_XFLAG_IMMUTABLE \| XFS_XFLAG_APPEND))) &&
				616	!capable(CAP_LINUX_IMMUTABLE)) {
				617	code = XFS_ERROR(EPERM);
				618	goto error_return;
				619	}
				620	}
				621
				622	/*
				623	* Now we can make the changes. Before we join the inode
				624	* to the transaction, if XFS_AT_SIZE is set then take care of
				625	* the part of the truncation that must be done without the
				626	* inode lock. This needs to be done before joining the inode
				627	* to the transaction, because the inode cannot be unlocked
				628	* once it is a part of the transaction.
				629	*/
				630	if (mask & XFS_AT_SIZE) {
				631	code = 0;
Eric Sandeen	374e2ac	2005-11-02 15:07:34 +1100	[diff] [blame]	632	if ((vap->va_size > ip->i_d.di_size) &&
				633	(flags & ATTR_NOSIZETOK) == 0) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	634	code = xfs_igrow_start(ip, vap->va_size, credp);
Eric Sandeen	374e2ac	2005-11-02 15:07:34 +1100	[diff] [blame]	635	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	636	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				637	if (!code)
				638	code = xfs_itruncate_data(ip, vap->va_size);
				639	if (code) {
				640	ASSERT(tp == NULL);
				641	lock_flags &= ~XFS_ILOCK_EXCL;
				642	ASSERT(lock_flags == XFS_IOLOCK_EXCL);
				643	goto error_return;
				644	}
				645	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
				646	if ((code = xfs_trans_reserve(tp, 0,
				647	XFS_ITRUNCATE_LOG_RES(mp), 0,
				648	XFS_TRANS_PERM_LOG_RES,
				649	XFS_ITRUNCATE_LOG_COUNT))) {
				650	xfs_trans_cancel(tp, 0);
				651	if (need_iolock)
				652	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				653	return code;
				654	}
				655	commit_flags = XFS_TRANS_RELEASE_LOG_RES;
				656	xfs_ilock(ip, XFS_ILOCK_EXCL);
				657	}
				658
				659	if (tp) {
				660	xfs_trans_ijoin(tp, ip, lock_flags);
				661	xfs_trans_ihold(tp, ip);
				662	}
				663
				664	/* determine whether mandatory locking mode changes */
				665	mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
				666
				667	/*
				668	* Truncate file. Must have write permission and not be a directory.
				669	*/
				670	if (mask & XFS_AT_SIZE) {
				671	if (vap->va_size > ip->i_d.di_size) {
				672	xfs_igrow_finish(tp, ip, vap->va_size,
				673	!(flags & ATTR_DMI));
				674	} else if ((vap->va_size <= ip->i_d.di_size) \|\|
				675	((vap->va_size == 0) && ip->i_d.di_nextents)) {
				676	/*
				677	* signal a sync transaction unless
				678	* we're truncating an already unlinked
				679	* file on a wsync filesystem
				680	*/
				681	code = xfs_itruncate_finish(&tp, ip,
				682	(xfs_fsize_t)vap->va_size,
				683	XFS_DATA_FORK,
				684	((ip->i_d.di_nlink != 0 \|\|
				685	!(mp->m_flags & XFS_MOUNT_WSYNC))
				686	? 1 : 0));
				687	if (code) {
				688	goto abort_return;
				689	}
				690	}
				691	/*
				692	* Have to do this even if the file's size doesn't change.
				693	*/
				694	timeflags \|= XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				695	}
				696
				697	/*
				698	* Change file access modes.
				699	*/
				700	if (mask & XFS_AT_MODE) {
				701	ip->i_d.di_mode &= S_IFMT;
				702	ip->i_d.di_mode \|= vap->va_mode & ~S_IFMT;
				703
				704	xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
				705	timeflags \|= XFS_ICHGTIME_CHG;
				706	}
				707
				708	/*
				709	* Change file ownership. Must be the owner or privileged.
				710	* If the system was configured with the "restricted_chown"
				711	* option, the owner is not permitted to give away the file,
				712	* and can change the group id only to a group of which he
				713	* or she is a member.
				714	*/
				715	if (mask & (XFS_AT_UID\|XFS_AT_GID\|XFS_AT_PROJID)) {
				716	/*
				717	* CAP_FSETID overrides the following restrictions:
				718	*
				719	* The set-user-ID and set-group-ID bits of a file will be
				720	* cleared upon successful return from chown()
				721	*/
				722	if ((ip->i_d.di_mode & (S_ISUID\|S_ISGID)) &&
				723	!capable(CAP_FSETID)) {
				724	ip->i_d.di_mode &= ~(S_ISUID\|S_ISGID);
				725	}
				726
				727	/*
				728	* Change the ownerships and register quota modifications
				729	* in the transaction.
				730	*/
				731	if (iuid != uid) {
				732	if (XFS_IS_UQUOTA_ON(mp)) {
				733	ASSERT(mask & XFS_AT_UID);
				734	ASSERT(udqp);
				735	olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
				736	&ip->i_udquot, udqp);
				737	}
				738	ip->i_d.di_uid = uid;
				739	}
				740	if (igid != gid) {
				741	if (XFS_IS_GQUOTA_ON(mp)) {
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	742	ASSERT(!XFS_IS_PQUOTA_ON(mp));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	743	ASSERT(mask & XFS_AT_GID);
				744	ASSERT(gdqp);
				745	olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
				746	&ip->i_gdquot, gdqp);
				747	}
				748	ip->i_d.di_gid = gid;
				749	}
				750	if (iprojid != projid) {
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	751	if (XFS_IS_PQUOTA_ON(mp)) {
				752	ASSERT(!XFS_IS_GQUOTA_ON(mp));
				753	ASSERT(mask & XFS_AT_PROJID);
				754	ASSERT(gdqp);
				755	olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
				756	&ip->i_gdquot, gdqp);
				757	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	758	ip->i_d.di_projid = projid;
				759	/*
				760	* We may have to rev the inode as well as
				761	* the superblock version number since projids didn't
				762	* exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
				763	*/
				764	if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
				765	xfs_bump_ino_vers2(tp, ip);
				766	}
				767
				768	xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
				769	timeflags \|= XFS_ICHGTIME_CHG;
				770	}
				771
				772
				773	/*
				774	* Change file access or modified times.
				775	*/
				776	if (mask & (XFS_AT_ATIME\|XFS_AT_MTIME)) {
				777	if (mask & XFS_AT_ATIME) {
				778	ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec;
				779	ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec;
				780	ip->i_update_core = 1;
				781	timeflags &= ~XFS_ICHGTIME_ACC;
				782	}
				783	if (mask & XFS_AT_MTIME) {
				784	ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec;
				785	ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec;
				786	timeflags &= ~XFS_ICHGTIME_MOD;
				787	timeflags \|= XFS_ICHGTIME_CHG;
				788	}
				789	if (tp && (flags & ATTR_UTIME))
				790	xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
				791	}
				792
				793	/*
				794	* Change XFS-added attributes.
				795	*/
				796	if (mask & (XFS_AT_EXTSIZE\|XFS_AT_XFLAGS)) {
				797	if (mask & XFS_AT_EXTSIZE) {
				798	/*
				799	* Converting bytes to fs blocks.
				800	*/
				801	ip->i_d.di_extsize = vap->va_extsize >>
				802	mp->m_sb.sb_blocklog;
				803	}
				804	if (mask & XFS_AT_XFLAGS) {
				805	uint di_flags;
				806
				807	/* can't set PREALLOC this way, just preserve it */
				808	di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
				809	if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
				810	di_flags \|= XFS_DIFLAG_IMMUTABLE;
				811	if (vap->va_xflags & XFS_XFLAG_APPEND)
				812	di_flags \|= XFS_DIFLAG_APPEND;
				813	if (vap->va_xflags & XFS_XFLAG_SYNC)
				814	di_flags \|= XFS_DIFLAG_SYNC;
				815	if (vap->va_xflags & XFS_XFLAG_NOATIME)
				816	di_flags \|= XFS_DIFLAG_NOATIME;
				817	if (vap->va_xflags & XFS_XFLAG_NODUMP)
				818	di_flags \|= XFS_DIFLAG_NODUMP;
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	819	if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
				820	di_flags \|= XFS_DIFLAG_PROJINHERIT;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	821	if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
				822	if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
				823	di_flags \|= XFS_DIFLAG_RTINHERIT;
				824	if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
				825	di_flags \|= XFS_DIFLAG_NOSYMLINKS;
				826	} else {
				827	if (vap->va_xflags & XFS_XFLAG_REALTIME) {
				828	di_flags \|= XFS_DIFLAG_REALTIME;
				829	ip->i_iocore.io_flags \|= XFS_IOCORE_RT;
				830	} else {
				831	ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
				832	}
				833	}
				834	ip->i_d.di_flags = di_flags;
				835	}
				836	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				837	timeflags \|= XFS_ICHGTIME_CHG;
				838	}
				839
				840	/*
				841	* Change file inode change time only if XFS_AT_CTIME set
				842	* AND we have been called by a DMI function.
				843	*/
				844
				845	if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) {
				846	ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec;
				847	ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec;
				848	ip->i_update_core = 1;
				849	timeflags &= ~XFS_ICHGTIME_CHG;
				850	}
				851
				852	/*
				853	* Send out timestamp changes that need to be set to the
				854	* current time. Not done when called by a DMI function.
				855	*/
				856	if (timeflags && !(flags & ATTR_DMI))
				857	xfs_ichgtime(ip, timeflags);
				858
				859	XFS_STATS_INC(xs_ig_attrchg);
				860
				861	/*
				862	* If this is a synchronous mount, make sure that the
				863	* transaction goes to disk before returning to the user.
				864	* This is slightly sub-optimal in that truncates require
				865	* two sync transactions instead of one for wsync filesytems.
				866	* One for the truncate and one for the timestamps since we
				867	* don't want to change the timestamps unless we're sure the
				868	* truncate worked. Truncates are less than 1% of the laddis
				869	* mix so this probably isn't worth the trouble to optimize.
				870	*/
				871	code = 0;
				872	if (tp) {
				873	if (mp->m_flags & XFS_MOUNT_WSYNC)
				874	xfs_trans_set_sync(tp);
				875
				876	code = xfs_trans_commit(tp, commit_flags, NULL);
				877	}
				878
				879	/*
				880	* If the (regular) file's mandatory locking mode changed, then
				881	* notify the vnode. We do this under the inode lock to prevent
				882	* racing calls to vop_vnode_change.
				883	*/
				884	mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
				885	if (mandlock_before != mandlock_after) {
				886	VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING,
				887	mandlock_after);
				888	}
				889
				890	xfs_iunlock(ip, lock_flags);
				891
				892	/*
				893	* Release any dquot(s) the inode had kept before chown.
				894	*/
				895	XFS_QM_DQRELE(mp, olddquot1);
				896	XFS_QM_DQRELE(mp, olddquot2);
				897	XFS_QM_DQRELE(mp, udqp);
				898	XFS_QM_DQRELE(mp, gdqp);
				899
				900	if (code) {
				901	return code;
				902	}
				903
				904	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) &&
				905	!(flags & ATTR_DMI)) {
				906	(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL,
				907	NULL, DM_RIGHT_NULL, NULL, NULL,
				908	0, 0, AT_DELAY_FLAG(flags));
				909	}
				910	return 0;
				911
				912	abort_return:
				913	commit_flags \|= XFS_TRANS_ABORT;
				914	/* FALLTHROUGH */
				915	error_return:
				916	XFS_QM_DQRELE(mp, udqp);
				917	XFS_QM_DQRELE(mp, gdqp);
				918	if (tp) {
				919	xfs_trans_cancel(tp, commit_flags);
				920	}
				921	if (lock_flags != 0) {
				922	xfs_iunlock(ip, lock_flags);
				923	}
				924	return code;
				925	}
				926
				927
				928	/*
				929	* xfs_access
				930	* Null conversion from vnode mode bits to inode mode bits, as in efs.
				931	*/
				932	STATIC int
				933	xfs_access(
				934	bhv_desc_t *bdp,
				935	int mode,
				936	cred_t *credp)
				937	{
				938	xfs_inode_t *ip;
				939	int error;
				940
				941	vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__,
				942	(inst_t *)__return_address);
				943
				944	ip = XFS_BHVTOI(bdp);
				945	xfs_ilock(ip, XFS_ILOCK_SHARED);
				946	error = xfs_iaccess(ip, mode, credp);
				947	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				948	return error;
				949	}
				950
				951
				952	/*
				953	* xfs_readlink
				954	*
				955	*/
				956	STATIC int
				957	xfs_readlink(
				958	bhv_desc_t *bdp,
				959	uio_t *uiop,
				960	int ioflags,
				961	cred_t *credp)
				962	{
				963	xfs_inode_t *ip;
				964	int count;
				965	xfs_off_t offset;
				966	int pathlen;
				967	vnode_t *vp;
				968	int error = 0;
				969	xfs_mount_t *mp;
				970	int nmaps;
				971	xfs_bmbt_irec_t mval[SYMLINK_MAPS];
				972	xfs_daddr_t d;
				973	int byte_cnt;
				974	int n;
				975	xfs_buf_t *bp;
				976
				977	vp = BHV_TO_VNODE(bdp);
				978	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
				979
				980	ip = XFS_BHVTOI(bdp);
				981	mp = ip->i_mount;
				982
				983	if (XFS_FORCED_SHUTDOWN(mp))
				984	return XFS_ERROR(EIO);
				985
				986	xfs_ilock(ip, XFS_ILOCK_SHARED);
				987
				988	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
				989
				990	offset = uiop->uio_offset;
				991	count = uiop->uio_resid;
				992
				993	if (offset < 0) {
				994	error = XFS_ERROR(EINVAL);
				995	goto error_return;
				996	}
				997	if (count <= 0) {
				998	error = 0;
				999	goto error_return;
				1000	}
				1001
				1002	if (!(ioflags & IO_INVIS)) {
				1003	xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
				1004	}
				1005
				1006	/*
				1007	* See if the symlink is stored inline.
				1008	*/
				1009	pathlen = (int)ip->i_d.di_size;
				1010
				1011	if (ip->i_df.if_flags & XFS_IFINLINE) {
				1012	error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
				1013	}
				1014	else {
				1015	/*
				1016	* Symlink not inline. Call bmap to get it in.
				1017	*/
				1018	nmaps = SYMLINK_MAPS;
				1019
				1020	error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen),
				1021	0, NULL, 0, mval, &nmaps, NULL);
				1022
				1023	if (error) {
				1024	goto error_return;
				1025	}
				1026
				1027	for (n = 0; n < nmaps; n++) {
				1028	d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
				1029	byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
				1030	bp = xfs_buf_read(mp->m_ddev_targp, d,
				1031	BTOBB(byte_cnt), 0);
				1032	error = XFS_BUF_GETERROR(bp);
				1033	if (error) {
				1034	xfs_ioerror_alert("xfs_readlink",
				1035	ip->i_mount, bp, XFS_BUF_ADDR(bp));
				1036	xfs_buf_relse(bp);
				1037	goto error_return;
				1038	}
				1039	if (pathlen < byte_cnt)
				1040	byte_cnt = pathlen;
				1041	pathlen -= byte_cnt;
				1042
				1043	error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
				1044	xfs_buf_relse (bp);
				1045	}
				1046
				1047	}
				1048
				1049
				1050	error_return:
				1051
				1052	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				1053
				1054	return error;
				1055	}
				1056
				1057
				1058	/*
				1059	* xfs_fsync
				1060	*
				1061	* This is called to sync the inode and its data out to disk.
				1062	* We need to hold the I/O lock while flushing the data, and
				1063	* the inode lock while flushing the inode. The inode lock CANNOT
				1064	* be held while flushing the data, so acquire after we're done
				1065	* with that.
				1066	*/
				1067	STATIC int
				1068	xfs_fsync(
				1069	bhv_desc_t *bdp,
				1070	int flag,
				1071	cred_t *credp,
				1072	xfs_off_t start,
				1073	xfs_off_t stop)
				1074	{
				1075	xfs_inode_t *ip;
				1076	xfs_trans_t *tp;
				1077	int error;
Christoph Hellwig	f538d4d	2005-11-02 10:26:59 +1100	[diff] [blame]	1078	int log_flushed = 0, changed = 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1079
				1080	vn_trace_entry(BHV_TO_VNODE(bdp),
				1081	__FUNCTION__, (inst_t *)__return_address);
				1082
				1083	ip = XFS_BHVTOI(bdp);
				1084
				1085	ASSERT(start >= 0 && stop >= -1);
				1086
				1087	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
				1088	return XFS_ERROR(EIO);
				1089
				1090	/*
				1091	* We always need to make sure that the required inode state
				1092	* is safe on disk. The vnode might be clean but because
				1093	* of committed transactions that haven't hit the disk yet.
				1094	* Likewise, there could be unflushed non-transactional
				1095	* changes to the inode core that have to go to disk.
				1096	*
				1097	* The following code depends on one assumption: that
				1098	* any transaction that changes an inode logs the core
				1099	* because it has to change some field in the inode core
				1100	* (typically nextents or nblocks). That assumption
				1101	* implies that any transactions against an inode will
				1102	* catch any non-transactional updates. If inode-altering
				1103	* transactions exist that violate this assumption, the
				1104	* code breaks. Right now, it figures that if the involved
				1105	* update_* field is clear and the inode is unpinned, the
				1106	* inode is clean. Either it's been flushed or it's been
				1107	* committed and the commit has hit the disk unpinning the inode.
				1108	* (Note that xfs_inode_item_format() called at commit clears
				1109	* the update_* fields.)
				1110	*/
				1111	xfs_ilock(ip, XFS_ILOCK_SHARED);
				1112
				1113	/* If we are flushing data then we care about update_size
				1114	* being set, otherwise we care about update_core
				1115	*/
				1116	if ((flag & FSYNC_DATA) ?
				1117	(ip->i_update_size == 0) :
				1118	(ip->i_update_core == 0)) {
				1119	/*
				1120	* Timestamps/size haven't changed since last inode
				1121	* flush or inode transaction commit. That means
				1122	* either nothing got written or a transaction
				1123	* committed which caught the updates. If the
				1124	* latter happened and the transaction hasn't
				1125	* hit the disk yet, the inode will be still
				1126	* be pinned. If it is, force the log.
				1127	*/
				1128
				1129	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				1130
				1131	if (xfs_ipincount(ip)) {
Christoph Hellwig	f538d4d	2005-11-02 10:26:59 +1100	[diff] [blame]	1132	_xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1133	XFS_LOG_FORCE \|
				1134	((flag & FSYNC_WAIT)
Christoph Hellwig	f538d4d	2005-11-02 10:26:59 +1100	[diff] [blame]	1135	? XFS_LOG_SYNC : 0),
				1136	&log_flushed);
				1137	} else {
				1138	/*
				1139	* If the inode is not pinned and nothing
				1140	* has changed we don't need to flush the
				1141	* cache.
				1142	*/
				1143	changed = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1144	}
				1145	error = 0;
				1146	} else {
				1147	/*
				1148	* Kick off a transaction to log the inode
				1149	* core to get the updates. Make it
				1150	* sync if FSYNC_WAIT is passed in (which
				1151	* is done by everybody but specfs). The
				1152	* sync transaction will also force the log.
				1153	*/
				1154	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				1155	tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
				1156	if ((error = xfs_trans_reserve(tp, 0,
				1157	XFS_FSYNC_TS_LOG_RES(ip->i_mount),
				1158	0, 0, 0))) {
				1159	xfs_trans_cancel(tp, 0);
				1160	return error;
				1161	}
				1162	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1163
				1164	/*
				1165	* Note - it's possible that we might have pushed
				1166	* ourselves out of the way during trans_reserve
				1167	* which would flush the inode. But there's no
				1168	* guarantee that the inode buffer has actually
				1169	* gone out yet (it's delwri). Plus the buffer
				1170	* could be pinned anyway if it's part of an
				1171	* inode in another recent transaction. So we
				1172	* play it safe and fire off the transaction anyway.
				1173	*/
				1174	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				1175	xfs_trans_ihold(tp, ip);
				1176	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1177	if (flag & FSYNC_WAIT)
				1178	xfs_trans_set_sync(tp);
Christoph Hellwig	f538d4d	2005-11-02 10:26:59 +1100	[diff] [blame]	1179	error = _xfs_trans_commit(tp, 0, NULL, &log_flushed);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1180
				1181	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1182	}
Christoph Hellwig	f538d4d	2005-11-02 10:26:59 +1100	[diff] [blame]	1183
				1184	if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) {
				1185	/*
				1186	* If the log write didn't issue an ordered tag we need
				1187	* to flush the disk cache for the data device now.
				1188	*/
				1189	if (!log_flushed)
				1190	xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
				1191
				1192	/*
				1193	* If this inode is on the RT dev we need to flush that
				1194	* cache aswell.
				1195	*/
				1196	if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
				1197	xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
				1198	}
				1199
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1200	return error;
				1201	}
				1202
				1203	/*
				1204	* This is called by xfs_inactive to free any blocks beyond eof,
				1205	* when the link count isn't zero.
				1206	*/
				1207	STATIC int
				1208	xfs_inactive_free_eofblocks(
				1209	xfs_mount_t *mp,
				1210	xfs_inode_t *ip)
				1211	{
				1212	xfs_trans_t *tp;
				1213	int error;
				1214	xfs_fileoff_t end_fsb;
				1215	xfs_fileoff_t last_fsb;
				1216	xfs_filblks_t map_len;
				1217	int nimaps;
				1218	xfs_bmbt_irec_t imap;
				1219
				1220	/*
				1221	* Figure out if there are any blocks beyond the end
				1222	* of the file. If not, then there is nothing to do.
				1223	*/
				1224	end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size));
				1225	last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
				1226	map_len = last_fsb - end_fsb;
				1227	if (map_len <= 0)
				1228	return (0);
				1229
				1230	nimaps = 1;
				1231	xfs_ilock(ip, XFS_ILOCK_SHARED);
				1232	error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
				1233	NULL, 0, &imap, &nimaps, NULL);
				1234	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				1235
				1236	if (!error && (nimaps != 0) &&
				1237	(imap.br_startblock != HOLESTARTBLOCK)) {
				1238	/*
				1239	* Attach the dquots to the inode up front.
				1240	*/
				1241	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
				1242	return (error);
				1243
				1244	/*
				1245	* There are blocks after the end of file.
				1246	* Free them up now by truncating the file to
				1247	* its current size.
				1248	*/
				1249	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
				1250
				1251	/*
				1252	* Do the xfs_itruncate_start() call before
				1253	* reserving any log space because
				1254	* itruncate_start will call into the buffer
				1255	* cache and we can't
				1256	* do that within a transaction.
				1257	*/
				1258	xfs_ilock(ip, XFS_IOLOCK_EXCL);
				1259	xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
				1260	ip->i_d.di_size);
				1261
				1262	error = xfs_trans_reserve(tp, 0,
				1263	XFS_ITRUNCATE_LOG_RES(mp),
				1264	0, XFS_TRANS_PERM_LOG_RES,
				1265	XFS_ITRUNCATE_LOG_COUNT);
				1266	if (error) {
				1267	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1268	xfs_trans_cancel(tp, 0);
				1269	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				1270	return (error);
				1271	}
				1272
				1273	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1274	xfs_trans_ijoin(tp, ip,
				1275	XFS_IOLOCK_EXCL \|
				1276	XFS_ILOCK_EXCL);
				1277	xfs_trans_ihold(tp, ip);
				1278
				1279	error = xfs_itruncate_finish(&tp, ip,
				1280	ip->i_d.di_size,
				1281	XFS_DATA_FORK,
				1282	0);
				1283	/*
				1284	* If we get an error at this point we
				1285	* simply don't bother truncating the file.
				1286	*/
				1287	if (error) {
				1288	xfs_trans_cancel(tp,
				1289	(XFS_TRANS_RELEASE_LOG_RES \|
				1290	XFS_TRANS_ABORT));
				1291	} else {
				1292	error = xfs_trans_commit(tp,
				1293	XFS_TRANS_RELEASE_LOG_RES,
				1294	NULL);
				1295	}
				1296	xfs_iunlock(ip, XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL);
				1297	}
				1298	return (error);
				1299	}
				1300
				1301	/*
				1302	* Free a symlink that has blocks associated with it.
				1303	*/
				1304	STATIC int
				1305	xfs_inactive_symlink_rmt(
				1306	xfs_inode_t *ip,
				1307	xfs_trans_t **tpp)
				1308	{
				1309	xfs_buf_t *bp;
				1310	int committed;
				1311	int done;
				1312	int error;
				1313	xfs_fsblock_t first_block;
				1314	xfs_bmap_free_t free_list;
				1315	int i;
				1316	xfs_mount_t *mp;
				1317	xfs_bmbt_irec_t mval[SYMLINK_MAPS];
				1318	int nmaps;
				1319	xfs_trans_t *ntp;
				1320	int size;
				1321	xfs_trans_t *tp;
				1322
				1323	tp = *tpp;
				1324	mp = ip->i_mount;
				1325	ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
				1326	/*
				1327	* We're freeing a symlink that has some
				1328	* blocks allocated to it. Free the
				1329	* blocks here. We know that we've got
				1330	* either 1 or 2 extents and that we can
				1331	* free them all in one bunmapi call.
				1332	*/
				1333	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
				1334	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
				1335	XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
				1336	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1337	xfs_trans_cancel(tp, 0);
				1338	*tpp = NULL;
				1339	return error;
				1340	}
				1341	/*
				1342	* Lock the inode, fix the size, and join it to the transaction.
				1343	* Hold it so in the normal path, we still have it locked for
				1344	* the second transaction. In the error paths we need it
				1345	* held so the cancel won't rele it, see below.
				1346	*/
				1347	xfs_ilock(ip, XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL);
				1348	size = (int)ip->i_d.di_size;
				1349	ip->i_d.di_size = 0;
				1350	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL \| XFS_IOLOCK_EXCL);
				1351	xfs_trans_ihold(tp, ip);
				1352	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1353	/*
				1354	* Find the block(s) so we can inval and unmap them.
				1355	*/
				1356	done = 0;
				1357	XFS_BMAP_INIT(&free_list, &first_block);
				1358	nmaps = sizeof(mval) / sizeof(mval[0]);
				1359	if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
				1360	XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
				1361	&free_list)))
				1362	goto error0;
				1363	/*
				1364	* Invalidate the block(s).
				1365	*/
				1366	for (i = 0; i < nmaps; i++) {
				1367	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
				1368	XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
				1369	XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
				1370	xfs_trans_binval(tp, bp);
				1371	}
				1372	/*
				1373	* Unmap the dead block(s) to the free_list.
				1374	*/
				1375	if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
				1376	&first_block, &free_list, &done)))
				1377	goto error1;
				1378	ASSERT(done);
				1379	/*
				1380	* Commit the first transaction. This logs the EFI and the inode.
				1381	*/
				1382	if ((error = xfs_bmap_finish(&tp, &free_list, first_block, &committed)))
				1383	goto error1;
				1384	/*
				1385	* The transaction must have been committed, since there were
				1386	* actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
				1387	* The new tp has the extent freeing and EFDs.
				1388	*/
				1389	ASSERT(committed);
				1390	/*
				1391	* The first xact was committed, so add the inode to the new one.
				1392	* Mark it dirty so it will be logged and moved forward in the log as
				1393	* part of every commit.
				1394	*/
				1395	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL \| XFS_IOLOCK_EXCL);
				1396	xfs_trans_ihold(tp, ip);
				1397	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1398	/*
				1399	* Get a new, empty transaction to return to our caller.
				1400	*/
				1401	ntp = xfs_trans_dup(tp);
				1402	/*
				1403	* Commit the transaction containing extent freeing and EFD's.
				1404	* If we get an error on the commit here or on the reserve below,
				1405	* we need to unlock the inode since the new transaction doesn't
				1406	* have the inode attached.
				1407	*/
				1408	error = xfs_trans_commit(tp, 0, NULL);
				1409	tp = ntp;
				1410	if (error) {
				1411	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1412	goto error0;
				1413	}
				1414	/*
				1415	* Remove the memory for extent descriptions (just bookkeeping).
				1416	*/
				1417	if (ip->i_df.if_bytes)
				1418	xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
				1419	ASSERT(ip->i_df.if_bytes == 0);
				1420	/*
				1421	* Put an itruncate log reservation in the new transaction
				1422	* for our caller.
				1423	*/
				1424	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
				1425	XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
				1426	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1427	goto error0;
				1428	}
				1429	/*
				1430	* Return with the inode locked but not joined to the transaction.
				1431	*/
				1432	*tpp = tp;
				1433	return 0;
				1434
				1435	error1:
				1436	xfs_bmap_cancel(&free_list);
				1437	error0:
				1438	/*
				1439	* Have to come here with the inode locked and either
				1440	* (held and in the transaction) or (not in the transaction).
				1441	* If the inode isn't held then cancel would iput it, but
				1442	* that's wrong since this is inactive and the vnode ref
				1443	* count is 0 already.
				1444	* Cancel won't do anything to the inode if held, but it still
				1445	* needs to be locked until the cancel is done, if it was
				1446	* joined to the transaction.
				1447	*/
				1448	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES \| XFS_TRANS_ABORT);
				1449	xfs_iunlock(ip, XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL);
				1450	*tpp = NULL;
				1451	return error;
				1452
				1453	}
				1454
				1455	STATIC int
				1456	xfs_inactive_symlink_local(
				1457	xfs_inode_t *ip,
				1458	xfs_trans_t **tpp)
				1459	{
				1460	int error;
				1461
				1462	ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip));
				1463	/*
				1464	* We're freeing a symlink which fit into
				1465	* the inode. Just free the memory used
				1466	* to hold the old symlink.
				1467	*/
				1468	error = xfs_trans_reserve(*tpp, 0,
				1469	XFS_ITRUNCATE_LOG_RES(ip->i_mount),
				1470	0, XFS_TRANS_PERM_LOG_RES,
				1471	XFS_ITRUNCATE_LOG_COUNT);
				1472
				1473	if (error) {
				1474	xfs_trans_cancel(*tpp, 0);
				1475	*tpp = NULL;
				1476	return (error);
				1477	}
				1478	xfs_ilock(ip, XFS_ILOCK_EXCL \| XFS_IOLOCK_EXCL);
				1479
				1480	/*
				1481	* Zero length symlinks _can_ exist.
				1482	*/
				1483	if (ip->i_df.if_bytes > 0) {
				1484	xfs_idata_realloc(ip,
				1485	-(ip->i_df.if_bytes),
				1486	XFS_DATA_FORK);
				1487	ASSERT(ip->i_df.if_bytes == 0);
				1488	}
				1489	return (0);
				1490	}
				1491
				1492	/*
				1493	*
				1494	*/
				1495	STATIC int
				1496	xfs_inactive_attrs(
				1497	xfs_inode_t *ip,
				1498	xfs_trans_t **tpp)
				1499	{
				1500	xfs_trans_t *tp;
				1501	int error;
				1502	xfs_mount_t *mp;
				1503
				1504	ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
				1505	tp = *tpp;
				1506	mp = ip->i_mount;
				1507	ASSERT(ip->i_d.di_forkoff != 0);
				1508	xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				1509	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1510
				1511	error = xfs_attr_inactive(ip);
				1512	if (error) {
				1513	*tpp = NULL;
				1514	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				1515	return (error); /* goto out*/
				1516	}
				1517
				1518	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
				1519	error = xfs_trans_reserve(tp, 0,
				1520	XFS_IFREE_LOG_RES(mp),
				1521	0, XFS_TRANS_PERM_LOG_RES,
				1522	XFS_INACTIVE_LOG_COUNT);
				1523	if (error) {
				1524	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1525	xfs_trans_cancel(tp, 0);
				1526	*tpp = NULL;
				1527	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				1528	return (error);
				1529	}
				1530
				1531	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1532	xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL);
				1533	xfs_trans_ihold(tp, ip);
				1534	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
				1535
				1536	ASSERT(ip->i_d.di_anextents == 0);
				1537
				1538	*tpp = tp;
				1539	return (0);
				1540	}
				1541
				1542	STATIC int
				1543	xfs_release(
				1544	bhv_desc_t *bdp)
				1545	{
				1546	xfs_inode_t *ip;
				1547	vnode_t *vp;
				1548	xfs_mount_t *mp;
				1549	int error;
				1550
				1551	vp = BHV_TO_VNODE(bdp);
				1552	ip = XFS_BHVTOI(bdp);
				1553
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	1554	if (!VN_ISREG(vp) \|\| (ip->i_d.di_mode == 0)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1555	return 0;
				1556	}
				1557
				1558	/* If this is a read-only mount, don't do this (would generate I/O) */
				1559	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
				1560	return 0;
				1561
				1562	#ifdef HAVE_REFCACHE
				1563	/* If we are in the NFS reference cache then don't do this now */
				1564	if (ip->i_refcache)
				1565	return 0;
				1566	#endif
				1567
				1568	mp = ip->i_mount;
				1569
				1570	if (ip->i_d.di_nlink != 0) {
				1571	if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
				1572	((ip->i_d.di_size > 0) \|\| (VN_CACHED(vp) > 0)) &&
				1573	(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
				1574	(!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC\|XFS_DIFLAG_APPEND)))) {
				1575	if ((error = xfs_inactive_free_eofblocks(mp, ip)))
				1576	return (error);
				1577	/* Update linux inode block count after free above */
				1578	LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
				1579	ip->i_d.di_nblocks + ip->i_delayed_blks);
				1580	}
				1581	}
				1582
				1583	return 0;
				1584	}
				1585
				1586	/*
				1587	* xfs_inactive
				1588	*
				1589	* This is called when the vnode reference count for the vnode
				1590	* goes to zero. If the file has been unlinked, then it must
				1591	* now be truncated. Also, we clear all of the read-ahead state
				1592	* kept for the inode here since the file is now closed.
				1593	*/
				1594	STATIC int
				1595	xfs_inactive(
				1596	bhv_desc_t *bdp,
				1597	cred_t *credp)
				1598	{
				1599	xfs_inode_t *ip;
				1600	vnode_t *vp;
				1601	xfs_bmap_free_t free_list;
				1602	xfs_fsblock_t first_block;
				1603	int committed;
				1604	xfs_trans_t *tp;
				1605	xfs_mount_t *mp;
				1606	int error;
				1607	int truncate;
				1608
				1609	vp = BHV_TO_VNODE(bdp);
				1610	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
				1611
				1612	ip = XFS_BHVTOI(bdp);
				1613
				1614	/*
				1615	* If the inode is already free, then there can be nothing
				1616	* to clean up here.
				1617	*/
				1618	if (ip->i_d.di_mode == 0 \|\| VN_BAD(vp)) {
				1619	ASSERT(ip->i_df.if_real_bytes == 0);
				1620	ASSERT(ip->i_df.if_broot_bytes == 0);
				1621	return VN_INACTIVE_CACHE;
				1622	}
				1623
				1624	/*
				1625	* Only do a truncate if it's a regular file with
				1626	* some actual space in it. It's OK to look at the
				1627	* inode's fields without the lock because we're the
				1628	* only one with a reference to the inode.
				1629	*/
				1630	truncate = ((ip->i_d.di_nlink == 0) &&
				1631	((ip->i_d.di_size != 0) \|\| (ip->i_d.di_nextents > 0)) &&
				1632	((ip->i_d.di_mode & S_IFMT) == S_IFREG));
				1633
				1634	mp = ip->i_mount;
				1635
				1636	if (ip->i_d.di_nlink == 0 &&
				1637	DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_DESTROY)) {
				1638	(void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL);
				1639	}
				1640
				1641	error = 0;
				1642
				1643	/* If this is a read-only mount, don't do this (would generate I/O) */
				1644	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
				1645	goto out;
				1646
				1647	if (ip->i_d.di_nlink != 0) {
				1648	if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
				1649	((ip->i_d.di_size > 0) \|\| (VN_CACHED(vp) > 0)) &&
				1650	(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
				1651	(!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC\|XFS_DIFLAG_APPEND)) \|\|
				1652	(ip->i_delayed_blks != 0))) {
				1653	if ((error = xfs_inactive_free_eofblocks(mp, ip)))
				1654	return (VN_INACTIVE_CACHE);
				1655	/* Update linux inode block count after free above */
				1656	LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
				1657	ip->i_d.di_nblocks + ip->i_delayed_blks);
				1658	}
				1659	goto out;
				1660	}
				1661
				1662	ASSERT(ip->i_d.di_nlink == 0);
				1663
				1664	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
				1665	return (VN_INACTIVE_CACHE);
				1666
				1667	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
				1668	if (truncate) {
				1669	/*
				1670	* Do the xfs_itruncate_start() call before
				1671	* reserving any log space because itruncate_start
				1672	* will call into the buffer cache and we can't
				1673	* do that within a transaction.
				1674	*/
				1675	xfs_ilock(ip, XFS_IOLOCK_EXCL);
				1676
				1677	xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
				1678
				1679	error = xfs_trans_reserve(tp, 0,
				1680	XFS_ITRUNCATE_LOG_RES(mp),
				1681	0, XFS_TRANS_PERM_LOG_RES,
				1682	XFS_ITRUNCATE_LOG_COUNT);
				1683	if (error) {
				1684	/* Don't call itruncate_cleanup */
				1685	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1686	xfs_trans_cancel(tp, 0);
				1687	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				1688	return (VN_INACTIVE_CACHE);
				1689	}
				1690
				1691	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1692	xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL);
				1693	xfs_trans_ihold(tp, ip);
				1694
				1695	/*
				1696	* normally, we have to run xfs_itruncate_finish sync.
				1697	* But if filesystem is wsync and we're in the inactive
				1698	* path, then we know that nlink == 0, and that the
				1699	* xaction that made nlink == 0 is permanently committed
				1700	* since xfs_remove runs as a synchronous transaction.
				1701	*/
				1702	error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
				1703	(!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
				1704
				1705	if (error) {
				1706	xfs_trans_cancel(tp,
				1707	XFS_TRANS_RELEASE_LOG_RES \| XFS_TRANS_ABORT);
				1708	xfs_iunlock(ip, XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL);
				1709	return (VN_INACTIVE_CACHE);
				1710	}
				1711	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
				1712
				1713	/*
				1714	* If we get an error while cleaning up a
				1715	* symlink we bail out.
				1716	*/
				1717	error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ?
				1718	xfs_inactive_symlink_rmt(ip, &tp) :
				1719	xfs_inactive_symlink_local(ip, &tp);
				1720
				1721	if (error) {
				1722	ASSERT(tp == NULL);
				1723	return (VN_INACTIVE_CACHE);
				1724	}
				1725
				1726	xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL);
				1727	xfs_trans_ihold(tp, ip);
				1728	} else {
				1729	error = xfs_trans_reserve(tp, 0,
				1730	XFS_IFREE_LOG_RES(mp),
				1731	0, XFS_TRANS_PERM_LOG_RES,
				1732	XFS_INACTIVE_LOG_COUNT);
				1733	if (error) {
				1734	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1735	xfs_trans_cancel(tp, 0);
				1736	return (VN_INACTIVE_CACHE);
				1737	}
				1738
				1739	xfs_ilock(ip, XFS_ILOCK_EXCL \| XFS_IOLOCK_EXCL);
				1740	xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL);
				1741	xfs_trans_ihold(tp, ip);
				1742	}
				1743
				1744	/*
				1745	* If there are attributes associated with the file
				1746	* then blow them away now. The code calls a routine
				1747	* that recursively deconstructs the attribute fork.
				1748	* We need to just commit the current transaction
				1749	* because we can't use it for xfs_attr_inactive().
				1750	*/
				1751	if (ip->i_d.di_anextents > 0) {
				1752	error = xfs_inactive_attrs(ip, &tp);
				1753	/*
				1754	* If we got an error, the transaction is already
				1755	* cancelled, and the inode is unlocked. Just get out.
				1756	*/
				1757	if (error)
				1758	return (VN_INACTIVE_CACHE);
				1759	} else if (ip->i_afp) {
				1760	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
				1761	}
				1762
				1763	/*
				1764	* Free the inode.
				1765	*/
				1766	XFS_BMAP_INIT(&free_list, &first_block);
				1767	error = xfs_ifree(tp, ip, &free_list);
				1768	if (error) {
				1769	/*
				1770	* If we fail to free the inode, shut down. The cancel
				1771	* might do that, we need to make sure. Otherwise the
				1772	* inode might be lost for a long time or forever.
				1773	*/
				1774	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1775	cmn_err(CE_NOTE,
				1776	"xfs_inactive: xfs_ifree() returned an error = %d on %s",
				1777	error, mp->m_fsname);
				1778	xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
				1779	}
				1780	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES\|XFS_TRANS_ABORT);
				1781	} else {
				1782	/*
				1783	* Credit the quota account(s). The inode is gone.
				1784	*/
				1785	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
				1786
				1787	/*
				1788	* Just ignore errors at this point. There is
				1789	* nothing we can do except to try to keep going.
				1790	*/
				1791	(void) xfs_bmap_finish(&tp, &free_list, first_block,
				1792	&committed);
				1793	(void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				1794	}
				1795	/*
				1796	* Release the dquots held by inode, if any.
				1797	*/
				1798	XFS_QM_DQDETACH(mp, ip);
				1799
				1800	xfs_iunlock(ip, XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL);
				1801
				1802	out:
				1803	return VN_INACTIVE_CACHE;
				1804	}
				1805
				1806
				1807	/*
				1808	* xfs_lookup
				1809	*/
				1810	STATIC int
				1811	xfs_lookup(
				1812	bhv_desc_t *dir_bdp,
				1813	vname_t *dentry,
				1814	vnode_t **vpp,
				1815	int flags,
				1816	vnode_t *rdir,
				1817	cred_t *credp)
				1818	{
				1819	xfs_inode_t dp, ip;
				1820	xfs_ino_t e_inum;
				1821	int error;
				1822	uint lock_mode;
				1823	vnode_t *dir_vp;
				1824
				1825	dir_vp = BHV_TO_VNODE(dir_bdp);
				1826	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
				1827
				1828	dp = XFS_BHVTOI(dir_bdp);
				1829
				1830	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
				1831	return XFS_ERROR(EIO);
				1832
				1833	lock_mode = xfs_ilock_map_shared(dp);
				1834	error = xfs_dir_lookup_int(dir_bdp, lock_mode, dentry, &e_inum, &ip);
				1835	if (!error) {
				1836	*vpp = XFS_ITOV(ip);
				1837	ITRACE(ip);
				1838	}
				1839	xfs_iunlock_map_shared(dp, lock_mode);
				1840	return error;
				1841	}
				1842
				1843
				1844	/*
				1845	* xfs_create (create a new file).
				1846	*/
				1847	STATIC int
				1848	xfs_create(
				1849	bhv_desc_t *dir_bdp,
				1850	vname_t *dentry,
				1851	vattr_t *vap,
				1852	vnode_t **vpp,
				1853	cred_t *credp)
				1854	{
				1855	char *name = VNAME(dentry);
				1856	vnode_t *dir_vp;
				1857	xfs_inode_t dp, ip;
				1858	vnode_t *vp=NULL;
				1859	xfs_trans_t *tp;
				1860	xfs_mount_t *mp;
				1861	xfs_dev_t rdev;
				1862	int error;
				1863	xfs_bmap_free_t free_list;
				1864	xfs_fsblock_t first_block;
				1865	boolean_t dp_joined_to_trans;
				1866	int dm_event_sent = 0;
				1867	uint cancel_flags;
				1868	int committed;
				1869	xfs_prid_t prid;
				1870	struct xfs_dquot udqp, gdqp;
				1871	uint resblks;
				1872	int dm_di_mode;
				1873	int namelen;
				1874
				1875	ASSERT(!*vpp);
				1876	dir_vp = BHV_TO_VNODE(dir_bdp);
				1877	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
				1878
				1879	dp = XFS_BHVTOI(dir_bdp);
				1880	mp = dp->i_mount;
				1881
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	1882	dm_di_mode = vap->va_mode;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1883	namelen = VNAMELEN(dentry);
				1884
				1885	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
				1886	error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
				1887	dir_vp, DM_RIGHT_NULL, NULL,
				1888	DM_RIGHT_NULL, name, NULL,
				1889	dm_di_mode, 0, 0);
				1890
				1891	if (error)
				1892	return error;
				1893	dm_event_sent = 1;
				1894	}
				1895
				1896	if (XFS_FORCED_SHUTDOWN(mp))
				1897	return XFS_ERROR(EIO);
				1898
				1899	/* Return through std_return after this point. */
				1900
				1901	udqp = gdqp = NULL;
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	1902	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
				1903	prid = dp->i_d.di_projid;
				1904	else if (vap->va_mask & XFS_AT_PROJID)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1905	prid = (xfs_prid_t)vap->va_projid;
				1906	else
				1907	prid = (xfs_prid_t)dfltprid;
				1908
				1909	/*
				1910	* Make sure that we have allocated dquot(s) on disk.
				1911	*/
				1912	error = XFS_QM_DQVOPALLOC(mp, dp,
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	1913	current_fsuid(credp), current_fsgid(credp), prid,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1914	XFS_QMOPT_QUOTALL\|XFS_QMOPT_INHERIT, &udqp, &gdqp);
				1915	if (error)
				1916	goto std_return;
				1917
				1918	ip = NULL;
				1919	dp_joined_to_trans = B_FALSE;
				1920
				1921	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
				1922	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				1923	resblks = XFS_CREATE_SPACE_RES(mp, namelen);
				1924	/*
				1925	* Initially assume that the file does not exist and
				1926	* reserve the resources for that case. If that is not
				1927	* the case we'll drop the one we have and get a more
				1928	* appropriate transaction later.
				1929	*/
				1930	error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
				1931	XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
				1932	if (error == ENOSPC) {
				1933	resblks = 0;
				1934	error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0,
				1935	XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
				1936	}
				1937	if (error) {
				1938	cancel_flags = 0;
				1939	dp = NULL;
				1940	goto error_return;
				1941	}
				1942
				1943	xfs_ilock(dp, XFS_ILOCK_EXCL);
				1944
				1945	XFS_BMAP_INIT(&free_list, &first_block);
				1946
				1947	ASSERT(ip == NULL);
				1948
				1949	/*
				1950	* Reserve disk quota and the inode.
				1951	*/
				1952	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
				1953	if (error)
				1954	goto error_return;
				1955
				1956	if (resblks == 0 &&
				1957	(error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen)))
				1958	goto error_return;
				1959	rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0;
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	1960	error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1961	rdev, credp, prid, resblks > 0,
				1962	&ip, &committed);
				1963	if (error) {
				1964	if (error == ENOSPC)
				1965	goto error_return;
				1966	goto abort_return;
				1967	}
				1968	ITRACE(ip);
				1969
				1970	/*
				1971	* At this point, we've gotten a newly allocated inode.
				1972	* It is locked (and joined to the transaction).
				1973	*/
				1974
				1975	ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
				1976
				1977	/*
				1978	* Now we join the directory inode to the transaction.
				1979	* We do not do it earlier because xfs_dir_ialloc
				1980	* might commit the previous transaction (and release
				1981	* all the locks).
				1982	*/
				1983
				1984	VN_HOLD(dir_vp);
				1985	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
				1986	dp_joined_to_trans = B_TRUE;
				1987
				1988	error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino,
				1989	&first_block, &free_list,
				1990	resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
				1991	if (error) {
				1992	ASSERT(error != ENOSPC);
				1993	goto abort_return;
				1994	}
				1995	xfs_ichgtime(dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1996	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				1997
				1998	/*
				1999	* If this is a synchronous mount, make sure that the
				2000	* create transaction goes to disk before returning to
				2001	* the user.
				2002	*/
				2003	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				2004	xfs_trans_set_sync(tp);
				2005	}
				2006
				2007	dp->i_gen++;
				2008
				2009	/*
				2010	* Attach the dquot(s) to the inodes and modify them incore.
				2011	* These ids of the inode couldn't have changed since the new
				2012	* inode has been locked ever since it was created.
				2013	*/
				2014	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
				2015
				2016	/*
				2017	* xfs_trans_commit normally decrements the vnode ref count
				2018	* when it unlocks the inode. Since we want to return the
				2019	* vnode to the caller, we bump the vnode ref count now.
				2020	*/
				2021	IHOLD(ip);
				2022	vp = XFS_ITOV(ip);
				2023
				2024	error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
				2025	if (error) {
				2026	xfs_bmap_cancel(&free_list);
				2027	goto abort_rele;
				2028	}
				2029
				2030	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				2031	if (error) {
				2032	IRELE(ip);
				2033	tp = NULL;
				2034	goto error_return;
				2035	}
				2036
				2037	XFS_QM_DQRELE(mp, udqp);
				2038	XFS_QM_DQRELE(mp, gdqp);
				2039
				2040	/*
				2041	* Propogate the fact that the vnode changed after the
				2042	* xfs_inode locks have been released.
				2043	*/
				2044	VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3);
				2045
				2046	*vpp = vp;
				2047
				2048	/* Fallthrough to std_return with error = 0 */
				2049
				2050	std_return:
				2051	if ( (*vpp \|\| (error != 0 && dm_event_sent != 0)) &&
				2052	DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
				2053	DM_EVENT_POSTCREATE)) {
				2054	(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
				2055	dir_vp, DM_RIGHT_NULL,
				2056	*vpp ? vp:NULL,
				2057	DM_RIGHT_NULL, name, NULL,
				2058	dm_di_mode, error, 0);
				2059	}
				2060	return error;
				2061
				2062	abort_return:
				2063	cancel_flags \|= XFS_TRANS_ABORT;
				2064	/* FALLTHROUGH */
				2065	error_return:
				2066
				2067	if (tp != NULL)
				2068	xfs_trans_cancel(tp, cancel_flags);
				2069
				2070	if (!dp_joined_to_trans && (dp != NULL))
				2071	xfs_iunlock(dp, XFS_ILOCK_EXCL);
				2072	XFS_QM_DQRELE(mp, udqp);
				2073	XFS_QM_DQRELE(mp, gdqp);
				2074
				2075	goto std_return;
				2076
				2077	abort_rele:
				2078	/*
				2079	* Wait until after the current transaction is aborted to
				2080	* release the inode. This prevents recursive transactions
				2081	* and deadlocks from xfs_inactive.
				2082	*/
				2083	cancel_flags \|= XFS_TRANS_ABORT;
				2084	xfs_trans_cancel(tp, cancel_flags);
				2085	IRELE(ip);
				2086
				2087	XFS_QM_DQRELE(mp, udqp);
				2088	XFS_QM_DQRELE(mp, gdqp);
				2089
				2090	goto std_return;
				2091	}
				2092
				2093	#ifdef DEBUG
				2094	/*
				2095	* Some counters to see if (and how often) we are hitting some deadlock
				2096	* prevention code paths.
				2097	*/
				2098
				2099	int xfs_rm_locks;
				2100	int xfs_rm_lock_delays;
				2101	int xfs_rm_attempts;
				2102	#endif
				2103
				2104	/*
				2105	* The following routine will lock the inodes associated with the
				2106	* directory and the named entry in the directory. The locks are
				2107	* acquired in increasing inode number.
				2108	*
				2109	* If the entry is "..", then only the directory is locked. The
				2110	* vnode ref count will still include that from the .. entry in
				2111	* this case.
				2112	*
				2113	* There is a deadlock we need to worry about. If the locked directory is
				2114	* in the AIL, it might be blocking up the log. The next inode we lock
				2115	* could be already locked by another thread waiting for log space (e.g
				2116	* a permanent log reservation with a long running transaction (see
				2117	* xfs_itruncate_finish)). To solve this, we must check if the directory
				2118	* is in the ail and use lock_nowait. If we can't lock, we need to
				2119	* drop the inode lock on the directory and try again. xfs_iunlock will
				2120	* potentially push the tail if we were holding up the log.
				2121	*/
				2122	STATIC int
				2123	xfs_lock_dir_and_entry(
				2124	xfs_inode_t *dp,
				2125	vname_t *dentry,
				2126	xfs_inode_t ip) / inode of entry 'name' */
				2127	{
				2128	int attempts;
				2129	xfs_ino_t e_inum;
				2130	xfs_inode_t *ips[2];
				2131	xfs_log_item_t *lp;
				2132
				2133	#ifdef DEBUG
				2134	xfs_rm_locks++;
				2135	#endif
				2136	attempts = 0;
				2137
				2138	again:
				2139	xfs_ilock(dp, XFS_ILOCK_EXCL);
				2140
				2141	e_inum = ip->i_ino;
				2142
				2143	ITRACE(ip);
				2144
				2145	/*
				2146	* We want to lock in increasing inum. Since we've already
				2147	* acquired the lock on the directory, we may need to release
				2148	* if if the inum of the entry turns out to be less.
				2149	*/
				2150	if (e_inum > dp->i_ino) {
				2151	/*
				2152	* We are already in the right order, so just
				2153	* lock on the inode of the entry.
				2154	* We need to use nowait if dp is in the AIL.
				2155	*/
				2156
				2157	lp = (xfs_log_item_t *)dp->i_itemp;
				2158	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
				2159	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
				2160	attempts++;
				2161	#ifdef DEBUG
				2162	xfs_rm_attempts++;
				2163	#endif
				2164
				2165	/*
				2166	* Unlock dp and try again.
				2167	* xfs_iunlock will try to push the tail
				2168	* if the inode is in the AIL.
				2169	*/
				2170
				2171	xfs_iunlock(dp, XFS_ILOCK_EXCL);
				2172
				2173	if ((attempts % 5) == 0) {
				2174	delay(1); /* Don't just spin the CPU */
				2175	#ifdef DEBUG
				2176	xfs_rm_lock_delays++;
				2177	#endif
				2178	}
				2179	goto again;
				2180	}
				2181	} else {
				2182	xfs_ilock(ip, XFS_ILOCK_EXCL);
				2183	}
				2184	} else if (e_inum < dp->i_ino) {
				2185	xfs_iunlock(dp, XFS_ILOCK_EXCL);
				2186
				2187	ips[0] = ip;
				2188	ips[1] = dp;
				2189	xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
				2190	}
				2191	/* else e_inum == dp->i_ino */
				2192	/* This can happen if we're asked to lock /x/..
				2193	* the entry is "..", which is also the parent directory.
				2194	*/
				2195
				2196	return 0;
				2197	}
				2198
				2199	#ifdef DEBUG
				2200	int xfs_locked_n;
				2201	int xfs_small_retries;
				2202	int xfs_middle_retries;
				2203	int xfs_lots_retries;
				2204	int xfs_lock_delays;
				2205	#endif
				2206
				2207	/*
				2208	* The following routine will lock n inodes in exclusive mode.
				2209	* We assume the caller calls us with the inodes in i_ino order.
				2210	*
				2211	* We need to detect deadlock where an inode that we lock
				2212	* is in the AIL and we start waiting for another inode that is locked
				2213	* by a thread in a long running transaction (such as truncate). This can
				2214	* result in deadlock since the long running trans might need to wait
				2215	* for the inode we just locked in order to push the tail and free space
				2216	* in the log.
				2217	*/
				2218	void
				2219	xfs_lock_inodes(
				2220	xfs_inode_t **ips,
				2221	int inodes,
				2222	int first_locked,
				2223	uint lock_mode)
				2224	{
				2225	int attempts = 0, i, j, try_lock;
				2226	xfs_log_item_t *lp;
				2227
				2228	ASSERT(ips && (inodes >= 2)); /* we need at least two */
				2229
				2230	if (first_locked) {
				2231	try_lock = 1;
				2232	i = 1;
				2233	} else {
				2234	try_lock = 0;
				2235	i = 0;
				2236	}
				2237
				2238	again:
				2239	for (; i < inodes; i++) {
				2240	ASSERT(ips[i]);
				2241
				2242	if (i && (ips[i] == ips[i-1])) /* Already locked */
				2243	continue;
				2244
				2245	/*
				2246	* If try_lock is not set yet, make sure all locked inodes
				2247	* are not in the AIL.
				2248	* If any are, set try_lock to be used later.
				2249	*/
				2250
				2251	if (!try_lock) {
				2252	for (j = (i - 1); j >= 0 && !try_lock; j--) {
				2253	lp = (xfs_log_item_t *)ips[j]->i_itemp;
				2254	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
				2255	try_lock++;
				2256	}
				2257	}
				2258	}
				2259
				2260	/*
				2261	* If any of the previous locks we have locked is in the AIL,
				2262	* we must TRY to get the second and subsequent locks. If
				2263	* we can't get any, we must release all we have
				2264	* and try again.
				2265	*/
				2266
				2267	if (try_lock) {
				2268	/* try_lock must be 0 if i is 0. */
				2269	/*
				2270	* try_lock means we have an inode locked
				2271	* that is in the AIL.
				2272	*/
				2273	ASSERT(i != 0);
				2274	if (!xfs_ilock_nowait(ips[i], lock_mode)) {
				2275	attempts++;
				2276
				2277	/*
				2278	* Unlock all previous guys and try again.
				2279	* xfs_iunlock will try to push the tail
				2280	* if the inode is in the AIL.
				2281	*/
				2282
				2283	for(j = i - 1; j >= 0; j--) {
				2284
				2285	/*
				2286	* Check to see if we've already
				2287	* unlocked this one.
				2288	* Not the first one going back,
				2289	* and the inode ptr is the same.
				2290	*/
				2291	if ((j != (i - 1)) && ips[j] ==
				2292	ips[j+1])
				2293	continue;
				2294
				2295	xfs_iunlock(ips[j], lock_mode);
				2296	}
				2297
				2298	if ((attempts % 5) == 0) {
				2299	delay(1); /* Don't just spin the CPU */
				2300	#ifdef DEBUG
				2301	xfs_lock_delays++;
				2302	#endif
				2303	}
				2304	i = 0;
				2305	try_lock = 0;
				2306	goto again;
				2307	}
				2308	} else {
				2309	xfs_ilock(ips[i], lock_mode);
				2310	}
				2311	}
				2312
				2313	#ifdef DEBUG
				2314	if (attempts) {
				2315	if (attempts < 5) xfs_small_retries++;
				2316	else if (attempts < 100) xfs_middle_retries++;
				2317	else xfs_lots_retries++;
				2318	} else {
				2319	xfs_locked_n++;
				2320	}
				2321	#endif
				2322	}
				2323
				2324	#ifdef DEBUG
				2325	#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);}
				2326	int remove_which_error_return = 0;
				2327	#else /* ! DEBUG */
				2328	#define REMOVE_DEBUG_TRACE(x)
				2329	#endif /* ! DEBUG */
				2330
				2331
				2332	/*
				2333	* xfs_remove
				2334	*
				2335	*/
				2336	STATIC int
				2337	xfs_remove(
				2338	bhv_desc_t *dir_bdp,
				2339	vname_t *dentry,
				2340	cred_t *credp)
				2341	{
				2342	vnode_t *dir_vp;
				2343	char *name = VNAME(dentry);
				2344	xfs_inode_t dp, ip;
				2345	xfs_trans_t *tp = NULL;
				2346	xfs_mount_t *mp;
				2347	int error = 0;
				2348	xfs_bmap_free_t free_list;
				2349	xfs_fsblock_t first_block;
				2350	int cancel_flags;
				2351	int committed;
				2352	int dm_di_mode = 0;
				2353	int link_zero;
				2354	uint resblks;
				2355	int namelen;
				2356
				2357	dir_vp = BHV_TO_VNODE(dir_bdp);
				2358	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
				2359
				2360	dp = XFS_BHVTOI(dir_bdp);
				2361	mp = dp->i_mount;
				2362
				2363	if (XFS_FORCED_SHUTDOWN(mp))
				2364	return XFS_ERROR(EIO);
				2365
				2366	namelen = VNAMELEN(dentry);
				2367
				2368	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
				2369	error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp,
				2370	DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
				2371	name, NULL, 0, 0, 0);
				2372	if (error)
				2373	return error;
				2374	}
				2375
				2376	/* From this point on, return through std_return */
				2377	ip = NULL;
				2378
				2379	/*
				2380	* We need to get a reference to ip before we get our log
				2381	* reservation. The reason for this is that we cannot call
				2382	* xfs_iget for an inode for which we do not have a reference
				2383	* once we've acquired a log reservation. This is because the
				2384	* inode we are trying to get might be in xfs_inactive going
				2385	* for a log reservation. Since we'll have to wait for the
				2386	* inactive code to complete before returning from xfs_iget,
				2387	* we need to make sure that we don't have log space reserved
				2388	* when we call xfs_iget. Instead we get an unlocked referece
				2389	* to the inode before getting our log reservation.
				2390	*/
				2391	error = xfs_get_dir_entry(dentry, &ip);
				2392	if (error) {
				2393	REMOVE_DEBUG_TRACE(__LINE__);
				2394	goto std_return;
				2395	}
				2396
				2397	dm_di_mode = ip->i_d.di_mode;
				2398
				2399	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
				2400
				2401	ITRACE(ip);
				2402
				2403	error = XFS_QM_DQATTACH(mp, dp, 0);
				2404	if (!error && dp != ip)
				2405	error = XFS_QM_DQATTACH(mp, ip, 0);
				2406	if (error) {
				2407	REMOVE_DEBUG_TRACE(__LINE__);
				2408	IRELE(ip);
				2409	goto std_return;
				2410	}
				2411
				2412	tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
				2413	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				2414	/*
				2415	* We try to get the real space reservation first,
				2416	* allowing for directory btree deletion(s) implying
				2417	* possible bmap insert(s). If we can't get the space
				2418	* reservation then we use 0 instead, and avoid the bmap
				2419	* btree insert(s) in the directory code by, if the bmap
				2420	* insert tries to happen, instead trimming the LAST
				2421	* block from the directory.
				2422	*/
				2423	resblks = XFS_REMOVE_SPACE_RES(mp);
				2424	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
				2425	XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
				2426	if (error == ENOSPC) {
				2427	resblks = 0;
				2428	error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
				2429	XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
				2430	}
				2431	if (error) {
				2432	ASSERT(error != ENOSPC);
				2433	REMOVE_DEBUG_TRACE(__LINE__);
				2434	xfs_trans_cancel(tp, 0);
				2435	IRELE(ip);
				2436	return error;
				2437	}
				2438
				2439	error = xfs_lock_dir_and_entry(dp, dentry, ip);
				2440	if (error) {
				2441	REMOVE_DEBUG_TRACE(__LINE__);
				2442	xfs_trans_cancel(tp, cancel_flags);
				2443	IRELE(ip);
				2444	goto std_return;
				2445	}
				2446
				2447	/*
				2448	* At this point, we've gotten both the directory and the entry
				2449	* inodes locked.
				2450	*/
				2451	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
				2452	if (dp != ip) {
				2453	/*
				2454	* Increment vnode ref count only in this case since
				2455	* there's an extra vnode reference in the case where
				2456	* dp == ip.
				2457	*/
				2458	IHOLD(dp);
				2459	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				2460	}
				2461
				2462	/*
				2463	* Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
				2464	*/
				2465	XFS_BMAP_INIT(&free_list, &first_block);
				2466	error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino,
				2467	&first_block, &free_list, 0);
				2468	if (error) {
				2469	ASSERT(error != ENOENT);
				2470	REMOVE_DEBUG_TRACE(__LINE__);
				2471	goto error1;
				2472	}
				2473	xfs_ichgtime(dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2474
				2475	dp->i_gen++;
				2476	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				2477
				2478	error = xfs_droplink(tp, ip);
				2479	if (error) {
				2480	REMOVE_DEBUG_TRACE(__LINE__);
				2481	goto error1;
				2482	}
				2483
				2484	/* Determine if this is the last link while
				2485	* we are in the transaction.
				2486	*/
				2487	link_zero = (ip)->i_d.di_nlink==0;
				2488
				2489	/*
				2490	* Take an extra ref on the inode so that it doesn't
				2491	* go to xfs_inactive() from within the commit.
				2492	*/
				2493	IHOLD(ip);
				2494
				2495	/*
				2496	* If this is a synchronous mount, make sure that the
				2497	* remove transaction goes to disk before returning to
				2498	* the user.
				2499	*/
				2500	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				2501	xfs_trans_set_sync(tp);
				2502	}
				2503
				2504	error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
				2505	if (error) {
				2506	REMOVE_DEBUG_TRACE(__LINE__);
				2507	goto error_rele;
				2508	}
				2509
				2510	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				2511	if (error) {
				2512	IRELE(ip);
				2513	goto std_return;
				2514	}
				2515
				2516	/*
				2517	* Before we drop our extra reference to the inode, purge it
				2518	* from the refcache if it is there. By waiting until afterwards
				2519	* to do the IRELE, we ensure that we won't go inactive in the
				2520	* xfs_refcache_purge_ip routine (although that would be OK).
				2521	*/
				2522	xfs_refcache_purge_ip(ip);
				2523
				2524	vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
				2525
				2526	/*
				2527	* Let interposed file systems know about removed links.
				2528	*/
				2529	VOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero);
				2530
				2531	IRELE(ip);
				2532
				2533	/* Fall through to std_return with error = 0 */
				2534	std_return:
				2535	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp,
				2536	DM_EVENT_POSTREMOVE)) {
				2537	(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
				2538	dir_vp, DM_RIGHT_NULL,
				2539	NULL, DM_RIGHT_NULL,
				2540	name, NULL, dm_di_mode, error, 0);
				2541	}
				2542	return error;
				2543
				2544	error1:
				2545	xfs_bmap_cancel(&free_list);
				2546	cancel_flags \|= XFS_TRANS_ABORT;
				2547	xfs_trans_cancel(tp, cancel_flags);
				2548	goto std_return;
				2549
				2550	error_rele:
				2551	/*
				2552	* In this case make sure to not release the inode until after
				2553	* the current transaction is aborted. Releasing it beforehand
				2554	* can cause us to go to xfs_inactive and start a recursive
				2555	* transaction which can easily deadlock with the current one.
				2556	*/
				2557	xfs_bmap_cancel(&free_list);
				2558	cancel_flags \|= XFS_TRANS_ABORT;
				2559	xfs_trans_cancel(tp, cancel_flags);
				2560
				2561	/*
				2562	* Before we drop our extra reference to the inode, purge it
				2563	* from the refcache if it is there. By waiting until afterwards
				2564	* to do the IRELE, we ensure that we won't go inactive in the
				2565	* xfs_refcache_purge_ip routine (although that would be OK).
				2566	*/
				2567	xfs_refcache_purge_ip(ip);
				2568
				2569	IRELE(ip);
				2570
				2571	goto std_return;
				2572	}
				2573
				2574
				2575	/*
				2576	* xfs_link
				2577	*
				2578	*/
				2579	STATIC int
				2580	xfs_link(
				2581	bhv_desc_t *target_dir_bdp,
				2582	vnode_t *src_vp,
				2583	vname_t *dentry,
				2584	cred_t *credp)
				2585	{
				2586	xfs_inode_t tdp, sip;
				2587	xfs_trans_t *tp;
				2588	xfs_mount_t *mp;
				2589	xfs_inode_t *ips[2];
				2590	int error;
				2591	xfs_bmap_free_t free_list;
				2592	xfs_fsblock_t first_block;
				2593	int cancel_flags;
				2594	int committed;
				2595	vnode_t *target_dir_vp;
				2596	bhv_desc_t *src_bdp;
				2597	int resblks;
				2598	char *target_name = VNAME(dentry);
				2599	int target_namelen;
				2600
				2601	target_dir_vp = BHV_TO_VNODE(target_dir_bdp);
				2602	vn_trace_entry(target_dir_vp, __FUNCTION__, (inst_t *)__return_address);
				2603	vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
				2604
				2605	target_namelen = VNAMELEN(dentry);
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	2606	if (VN_ISDIR(src_vp))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2607	return XFS_ERROR(EPERM);
				2608
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2609	src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2610	sip = XFS_BHVTOI(src_bdp);
				2611	tdp = XFS_BHVTOI(target_dir_bdp);
				2612	mp = tdp->i_mount;
				2613	if (XFS_FORCED_SHUTDOWN(mp))
				2614	return XFS_ERROR(EIO);
				2615
				2616	if (DM_EVENT_ENABLED(src_vp->v_vfsp, tdp, DM_EVENT_LINK)) {
				2617	error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
				2618	target_dir_vp, DM_RIGHT_NULL,
				2619	src_vp, DM_RIGHT_NULL,
				2620	target_name, NULL, 0, 0, 0);
				2621	if (error)
				2622	return error;
				2623	}
				2624
				2625	/* Return through std_return after this point. */
				2626
				2627	error = XFS_QM_DQATTACH(mp, sip, 0);
				2628	if (!error && sip != tdp)
				2629	error = XFS_QM_DQATTACH(mp, tdp, 0);
				2630	if (error)
				2631	goto std_return;
				2632
				2633	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
				2634	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				2635	resblks = XFS_LINK_SPACE_RES(mp, target_namelen);
				2636	error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
				2637	XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
				2638	if (error == ENOSPC) {
				2639	resblks = 0;
				2640	error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0,
				2641	XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
				2642	}
				2643	if (error) {
				2644	cancel_flags = 0;
				2645	goto error_return;
				2646	}
				2647
				2648	if (sip->i_ino < tdp->i_ino) {
				2649	ips[0] = sip;
				2650	ips[1] = tdp;
				2651	} else {
				2652	ips[0] = tdp;
				2653	ips[1] = sip;
				2654	}
				2655
				2656	xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
				2657
				2658	/*
				2659	* Increment vnode ref counts since xfs_trans_commit &
				2660	* xfs_trans_cancel will both unlock the inodes and
				2661	* decrement the associated ref counts.
				2662	*/
				2663	VN_HOLD(src_vp);
				2664	VN_HOLD(target_dir_vp);
				2665	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
				2666	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
				2667
				2668	/*
				2669	* If the source has too many links, we can't make any more to it.
				2670	*/
				2671	if (sip->i_d.di_nlink >= XFS_MAXLINK) {
				2672	error = XFS_ERROR(EMLINK);
				2673	goto error_return;
				2674	}
				2675
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	2676	/*
				2677	* If we are using project inheritance, we only allow hard link
				2678	* creation in our tree when the project IDs are the same; else
				2679	* the tree quota mechanism could be circumvented.
				2680	*/
				2681	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
				2682	(tdp->i_d.di_projid != sip->i_d.di_projid))) {
				2683	error = XFS_ERROR(EPERM);
				2684	goto error_return;
				2685	}
				2686
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2687	if (resblks == 0 &&
				2688	(error = XFS_DIR_CANENTER(mp, tp, tdp, target_name,
				2689	target_namelen)))
				2690	goto error_return;
				2691
				2692	XFS_BMAP_INIT(&free_list, &first_block);
				2693
				2694	error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen,
				2695	sip->i_ino, &first_block, &free_list,
				2696	resblks);
				2697	if (error)
				2698	goto abort_return;
				2699	xfs_ichgtime(tdp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2700	tdp->i_gen++;
				2701	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
				2702
				2703	error = xfs_bumplink(tp, sip);
				2704	if (error) {
				2705	goto abort_return;
				2706	}
				2707
				2708	/*
				2709	* If this is a synchronous mount, make sure that the
				2710	* link transaction goes to disk before returning to
				2711	* the user.
				2712	*/
				2713	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				2714	xfs_trans_set_sync(tp);
				2715	}
				2716
				2717	error = xfs_bmap_finish (&tp, &free_list, first_block, &committed);
				2718	if (error) {
				2719	xfs_bmap_cancel(&free_list);
				2720	goto abort_return;
				2721	}
				2722
				2723	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				2724	if (error) {
				2725	goto std_return;
				2726	}
				2727
				2728	/* Fall through to std_return with error = 0. */
				2729	std_return:
				2730	if (DM_EVENT_ENABLED(src_vp->v_vfsp, sip,
				2731	DM_EVENT_POSTLINK)) {
				2732	(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
				2733	target_dir_vp, DM_RIGHT_NULL,
				2734	src_vp, DM_RIGHT_NULL,
				2735	target_name, NULL, 0, error, 0);
				2736	}
				2737	return error;
				2738
				2739	abort_return:
				2740	cancel_flags \|= XFS_TRANS_ABORT;
				2741	/* FALLTHROUGH */
				2742	error_return:
				2743	xfs_trans_cancel(tp, cancel_flags);
				2744
				2745	goto std_return;
				2746	}
				2747	/*
				2748	* xfs_mkdir
				2749	*
				2750	*/
				2751	STATIC int
				2752	xfs_mkdir(
				2753	bhv_desc_t *dir_bdp,
				2754	vname_t *dentry,
				2755	vattr_t *vap,
				2756	vnode_t **vpp,
				2757	cred_t *credp)
				2758	{
				2759	char *dir_name = VNAME(dentry);
				2760	xfs_inode_t *dp;
				2761	xfs_inode_t cdp; / inode of created dir */
				2762	vnode_t cvp; / vnode of created dir */
				2763	xfs_trans_t *tp;
				2764	xfs_mount_t *mp;
				2765	int cancel_flags;
				2766	int error;
				2767	int committed;
				2768	xfs_bmap_free_t free_list;
				2769	xfs_fsblock_t first_block;
				2770	vnode_t *dir_vp;
				2771	boolean_t dp_joined_to_trans;
				2772	boolean_t created = B_FALSE;
				2773	int dm_event_sent = 0;
				2774	xfs_prid_t prid;
				2775	struct xfs_dquot udqp, gdqp;
				2776	uint resblks;
				2777	int dm_di_mode;
				2778	int dir_namelen;
				2779
				2780	dir_vp = BHV_TO_VNODE(dir_bdp);
				2781	dp = XFS_BHVTOI(dir_bdp);
				2782	mp = dp->i_mount;
				2783
				2784	if (XFS_FORCED_SHUTDOWN(mp))
				2785	return XFS_ERROR(EIO);
				2786
				2787	dir_namelen = VNAMELEN(dentry);
				2788
				2789	tp = NULL;
				2790	dp_joined_to_trans = B_FALSE;
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	2791	dm_di_mode = vap->va_mode;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2792
				2793	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
				2794	error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
				2795	dir_vp, DM_RIGHT_NULL, NULL,
				2796	DM_RIGHT_NULL, dir_name, NULL,
				2797	dm_di_mode, 0, 0);
				2798	if (error)
				2799	return error;
				2800	dm_event_sent = 1;
				2801	}
				2802
				2803	/* Return through std_return after this point. */
				2804
				2805	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
				2806
				2807	mp = dp->i_mount;
				2808	udqp = gdqp = NULL;
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	2809	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
				2810	prid = dp->i_d.di_projid;
				2811	else if (vap->va_mask & XFS_AT_PROJID)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2812	prid = (xfs_prid_t)vap->va_projid;
				2813	else
				2814	prid = (xfs_prid_t)dfltprid;
				2815
				2816	/*
				2817	* Make sure that we have allocated dquot(s) on disk.
				2818	*/
				2819	error = XFS_QM_DQVOPALLOC(mp, dp,
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	2820	current_fsuid(credp), current_fsgid(credp), prid,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2821	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT, &udqp, &gdqp);
				2822	if (error)
				2823	goto std_return;
				2824
				2825	tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
				2826	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				2827	resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen);
				2828	error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0,
				2829	XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT);
				2830	if (error == ENOSPC) {
				2831	resblks = 0;
				2832	error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0,
				2833	XFS_TRANS_PERM_LOG_RES,
				2834	XFS_MKDIR_LOG_COUNT);
				2835	}
				2836	if (error) {
				2837	cancel_flags = 0;
				2838	dp = NULL;
				2839	goto error_return;
				2840	}
				2841
				2842	xfs_ilock(dp, XFS_ILOCK_EXCL);
				2843
				2844	/*
				2845	* Check for directory link count overflow.
				2846	*/
				2847	if (dp->i_d.di_nlink >= XFS_MAXLINK) {
				2848	error = XFS_ERROR(EMLINK);
				2849	goto error_return;
				2850	}
				2851
				2852	/*
				2853	* Reserve disk quota and the inode.
				2854	*/
				2855	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
				2856	if (error)
				2857	goto error_return;
				2858
				2859	if (resblks == 0 &&
				2860	(error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen)))
				2861	goto error_return;
				2862	/*
				2863	* create the directory inode.
				2864	*/
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	2865	error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2866	0, credp, prid, resblks > 0,
				2867	&cdp, NULL);
				2868	if (error) {
				2869	if (error == ENOSPC)
				2870	goto error_return;
				2871	goto abort_return;
				2872	}
				2873	ITRACE(cdp);
				2874
				2875	/*
				2876	* Now we add the directory inode to the transaction.
				2877	* We waited until now since xfs_dir_ialloc might start
				2878	* a new transaction. Had we joined the transaction
				2879	* earlier, the locks might have gotten released.
				2880	*/
				2881	VN_HOLD(dir_vp);
				2882	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
				2883	dp_joined_to_trans = B_TRUE;
				2884
				2885	XFS_BMAP_INIT(&free_list, &first_block);
				2886
				2887	error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen,
				2888	cdp->i_ino, &first_block, &free_list,
				2889	resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
				2890	if (error) {
				2891	ASSERT(error != ENOSPC);
				2892	goto error1;
				2893	}
				2894	xfs_ichgtime(dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2895
				2896	/*
				2897	* Bump the in memory version number of the parent directory
				2898	* so that other processes accessing it will recognize that
				2899	* the directory has changed.
				2900	*/
				2901	dp->i_gen++;
				2902
				2903	error = XFS_DIR_INIT(mp, tp, cdp, dp);
				2904	if (error) {
				2905	goto error2;
				2906	}
				2907
				2908	cdp->i_gen = 1;
				2909	error = xfs_bumplink(tp, dp);
				2910	if (error) {
				2911	goto error2;
				2912	}
				2913
				2914	cvp = XFS_ITOV(cdp);
				2915
				2916	created = B_TRUE;
				2917
				2918	*vpp = cvp;
				2919	IHOLD(cdp);
				2920
				2921	/*
				2922	* Attach the dquots to the new inode and modify the icount incore.
				2923	*/
				2924	XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp);
				2925
				2926	/*
				2927	* If this is a synchronous mount, make sure that the
				2928	* mkdir transaction goes to disk before returning to
				2929	* the user.
				2930	*/
				2931	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				2932	xfs_trans_set_sync(tp);
				2933	}
				2934
				2935	error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
				2936	if (error) {
				2937	IRELE(cdp);
				2938	goto error2;
				2939	}
				2940
				2941	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				2942	XFS_QM_DQRELE(mp, udqp);
				2943	XFS_QM_DQRELE(mp, gdqp);
				2944	if (error) {
				2945	IRELE(cdp);
				2946	}
				2947
				2948	/* Fall through to std_return with error = 0 or errno from
				2949	* xfs_trans_commit. */
				2950
				2951	std_return:
				2952	if ( (created \|\| (error != 0 && dm_event_sent != 0)) &&
				2953	DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
				2954	DM_EVENT_POSTCREATE)) {
				2955	(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
				2956	dir_vp, DM_RIGHT_NULL,
				2957	created ? XFS_ITOV(cdp):NULL,
				2958	DM_RIGHT_NULL,
				2959	dir_name, NULL,
				2960	dm_di_mode, error, 0);
				2961	}
				2962	return error;
				2963
				2964	error2:
				2965	error1:
				2966	xfs_bmap_cancel(&free_list);
				2967	abort_return:
				2968	cancel_flags \|= XFS_TRANS_ABORT;
				2969	error_return:
				2970	xfs_trans_cancel(tp, cancel_flags);
				2971	XFS_QM_DQRELE(mp, udqp);
				2972	XFS_QM_DQRELE(mp, gdqp);
				2973
				2974	if (!dp_joined_to_trans && (dp != NULL)) {
				2975	xfs_iunlock(dp, XFS_ILOCK_EXCL);
				2976	}
				2977
				2978	goto std_return;
				2979	}
				2980
				2981
				2982	/*
				2983	* xfs_rmdir
				2984	*
				2985	*/
				2986	STATIC int
				2987	xfs_rmdir(
				2988	bhv_desc_t *dir_bdp,
				2989	vname_t *dentry,
				2990	cred_t *credp)
				2991	{
				2992	char *name = VNAME(dentry);
				2993	xfs_inode_t *dp;
				2994	xfs_inode_t cdp; / child directory */
				2995	xfs_trans_t *tp;
				2996	xfs_mount_t *mp;
				2997	int error;
				2998	xfs_bmap_free_t free_list;
				2999	xfs_fsblock_t first_block;
				3000	int cancel_flags;
				3001	int committed;
				3002	vnode_t *dir_vp;
				3003	int dm_di_mode = 0;
				3004	int last_cdp_link;
				3005	int namelen;
				3006	uint resblks;
				3007
				3008	dir_vp = BHV_TO_VNODE(dir_bdp);
				3009	dp = XFS_BHVTOI(dir_bdp);
				3010	mp = dp->i_mount;
				3011
				3012	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
				3013
				3014	if (XFS_FORCED_SHUTDOWN(XFS_BHVTOI(dir_bdp)->i_mount))
				3015	return XFS_ERROR(EIO);
				3016	namelen = VNAMELEN(dentry);
				3017
				3018	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
				3019	error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
				3020	dir_vp, DM_RIGHT_NULL,
				3021	NULL, DM_RIGHT_NULL,
				3022	name, NULL, 0, 0, 0);
				3023	if (error)
				3024	return XFS_ERROR(error);
				3025	}
				3026
				3027	/* Return through std_return after this point. */
				3028
				3029	cdp = NULL;
				3030
				3031	/*
				3032	* We need to get a reference to cdp before we get our log
				3033	* reservation. The reason for this is that we cannot call
				3034	* xfs_iget for an inode for which we do not have a reference
				3035	* once we've acquired a log reservation. This is because the
				3036	* inode we are trying to get might be in xfs_inactive going
				3037	* for a log reservation. Since we'll have to wait for the
				3038	* inactive code to complete before returning from xfs_iget,
				3039	* we need to make sure that we don't have log space reserved
				3040	* when we call xfs_iget. Instead we get an unlocked referece
				3041	* to the inode before getting our log reservation.
				3042	*/
				3043	error = xfs_get_dir_entry(dentry, &cdp);
				3044	if (error) {
				3045	REMOVE_DEBUG_TRACE(__LINE__);
				3046	goto std_return;
				3047	}
				3048	mp = dp->i_mount;
				3049	dm_di_mode = cdp->i_d.di_mode;
				3050
				3051	/*
				3052	* Get the dquots for the inodes.
				3053	*/
				3054	error = XFS_QM_DQATTACH(mp, dp, 0);
				3055	if (!error && dp != cdp)
				3056	error = XFS_QM_DQATTACH(mp, cdp, 0);
				3057	if (error) {
				3058	IRELE(cdp);
				3059	REMOVE_DEBUG_TRACE(__LINE__);
				3060	goto std_return;
				3061	}
				3062
				3063	tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
				3064	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				3065	/*
				3066	* We try to get the real space reservation first,
				3067	* allowing for directory btree deletion(s) implying
				3068	* possible bmap insert(s). If we can't get the space
				3069	* reservation then we use 0 instead, and avoid the bmap
				3070	* btree insert(s) in the directory code by, if the bmap
				3071	* insert tries to happen, instead trimming the LAST
				3072	* block from the directory.
				3073	*/
				3074	resblks = XFS_REMOVE_SPACE_RES(mp);
				3075	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
				3076	XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
				3077	if (error == ENOSPC) {
				3078	resblks = 0;
				3079	error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
				3080	XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
				3081	}
				3082	if (error) {
				3083	ASSERT(error != ENOSPC);
				3084	cancel_flags = 0;
				3085	IRELE(cdp);
				3086	goto error_return;
				3087	}
				3088	XFS_BMAP_INIT(&free_list, &first_block);
				3089
				3090	/*
				3091	* Now lock the child directory inode and the parent directory
				3092	* inode in the proper order. This will take care of validating
				3093	* that the directory entry for the child directory inode has
				3094	* not changed while we were obtaining a log reservation.
				3095	*/
				3096	error = xfs_lock_dir_and_entry(dp, dentry, cdp);
				3097	if (error) {
				3098	xfs_trans_cancel(tp, cancel_flags);
				3099	IRELE(cdp);
				3100	goto std_return;
				3101	}
				3102
				3103	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
				3104	if (dp != cdp) {
				3105	/*
				3106	* Only increment the parent directory vnode count if
				3107	* we didn't bump it in looking up cdp. The only time
				3108	* we don't bump it is when we're looking up ".".
				3109	*/
				3110	VN_HOLD(dir_vp);
				3111	}
				3112
				3113	ITRACE(cdp);
				3114	xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
				3115
				3116	ASSERT(cdp->i_d.di_nlink >= 2);
				3117	if (cdp->i_d.di_nlink != 2) {
				3118	error = XFS_ERROR(ENOTEMPTY);
				3119	goto error_return;
				3120	}
				3121	if (!XFS_DIR_ISEMPTY(mp, cdp)) {
				3122	error = XFS_ERROR(ENOTEMPTY);
				3123	goto error_return;
				3124	}
				3125
				3126	error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino,
				3127	&first_block, &free_list, resblks);
				3128	if (error) {
				3129	goto error1;
				3130	}
				3131
				3132	xfs_ichgtime(dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3133
				3134	/*
				3135	* Bump the in memory generation count on the parent
				3136	* directory so that other can know that it has changed.
				3137	*/
				3138	dp->i_gen++;
				3139
				3140	/*
				3141	* Drop the link from cdp's "..".
				3142	*/
				3143	error = xfs_droplink(tp, dp);
				3144	if (error) {
				3145	goto error1;
				3146	}
				3147
				3148	/*
				3149	* Drop the link from dp to cdp.
				3150	*/
				3151	error = xfs_droplink(tp, cdp);
				3152	if (error) {
				3153	goto error1;
				3154	}
				3155
				3156	/*
				3157	* Drop the "." link from cdp to self.
				3158	*/
				3159	error = xfs_droplink(tp, cdp);
				3160	if (error) {
				3161	goto error1;
				3162	}
				3163
				3164	/* Determine these before committing transaction */
				3165	last_cdp_link = (cdp)->i_d.di_nlink==0;
				3166
				3167	/*
				3168	* Take an extra ref on the child vnode so that it
				3169	* does not go to xfs_inactive() from within the commit.
				3170	*/
				3171	IHOLD(cdp);
				3172
				3173	/*
				3174	* If this is a synchronous mount, make sure that the
				3175	* rmdir transaction goes to disk before returning to
				3176	* the user.
				3177	*/
				3178	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				3179	xfs_trans_set_sync(tp);
				3180	}
				3181
				3182	error = xfs_bmap_finish (&tp, &free_list, first_block, &committed);
				3183	if (error) {
				3184	xfs_bmap_cancel(&free_list);
				3185	xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES \|
				3186	XFS_TRANS_ABORT));
				3187	IRELE(cdp);
				3188	goto std_return;
				3189	}
				3190
				3191	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				3192	if (error) {
				3193	IRELE(cdp);
				3194	goto std_return;
				3195	}
				3196
				3197
				3198	/*
				3199	* Let interposed file systems know about removed links.
				3200	*/
				3201	VOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link);
				3202
				3203	IRELE(cdp);
				3204
				3205	/* Fall through to std_return with error = 0 or the errno
				3206	* from xfs_trans_commit. */
				3207	std_return:
				3208	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) {
				3209	(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
				3210	dir_vp, DM_RIGHT_NULL,
				3211	NULL, DM_RIGHT_NULL,
				3212	name, NULL, dm_di_mode,
				3213	error, 0);
				3214	}
				3215	return error;
				3216
				3217	error1:
				3218	xfs_bmap_cancel(&free_list);
				3219	cancel_flags \|= XFS_TRANS_ABORT;
				3220	error_return:
				3221	xfs_trans_cancel(tp, cancel_flags);
				3222	goto std_return;
				3223	}
				3224
				3225
				3226	/*
				3227	* xfs_readdir
				3228	*
				3229	* Read dp's entries starting at uiop->uio_offset and translate them into
				3230	* bufsize bytes worth of struct dirents starting at bufbase.
				3231	*/
				3232	STATIC int
				3233	xfs_readdir(
				3234	bhv_desc_t *dir_bdp,
				3235	uio_t *uiop,
				3236	cred_t *credp,
				3237	int *eofp)
				3238	{
				3239	xfs_inode_t *dp;
				3240	xfs_trans_t *tp = NULL;
				3241	int error = 0;
				3242	uint lock_mode;
				3243	xfs_off_t start_offset;
				3244
				3245	vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__,
				3246	(inst_t *)__return_address);
				3247	dp = XFS_BHVTOI(dir_bdp);
				3248
				3249	if (XFS_FORCED_SHUTDOWN(dp->i_mount)) {
				3250	return XFS_ERROR(EIO);
				3251	}
				3252
				3253	lock_mode = xfs_ilock_map_shared(dp);
				3254	start_offset = uiop->uio_offset;
				3255	error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp);
				3256	if (start_offset != uiop->uio_offset) {
				3257	xfs_ichgtime(dp, XFS_ICHGTIME_ACC);
				3258	}
				3259	xfs_iunlock_map_shared(dp, lock_mode);
				3260	return error;
				3261	}
				3262
				3263
				3264	/*
				3265	* xfs_symlink
				3266	*
				3267	*/
				3268	STATIC int
				3269	xfs_symlink(
				3270	bhv_desc_t *dir_bdp,
				3271	vname_t *dentry,
				3272	vattr_t *vap,
				3273	char *target_path,
				3274	vnode_t **vpp,
				3275	cred_t *credp)
				3276	{
				3277	xfs_trans_t *tp;
				3278	xfs_mount_t *mp;
				3279	xfs_inode_t *dp;
				3280	xfs_inode_t *ip;
				3281	int error;
				3282	int pathlen;
				3283	xfs_bmap_free_t free_list;
				3284	xfs_fsblock_t first_block;
				3285	boolean_t dp_joined_to_trans;
				3286	vnode_t *dir_vp;
				3287	uint cancel_flags;
				3288	int committed;
				3289	xfs_fileoff_t first_fsb;
				3290	xfs_filblks_t fs_blocks;
				3291	int nmaps;
				3292	xfs_bmbt_irec_t mval[SYMLINK_MAPS];
				3293	xfs_daddr_t d;
				3294	char *cur_chunk;
				3295	int byte_cnt;
				3296	int n;
				3297	xfs_buf_t *bp;
				3298	xfs_prid_t prid;
				3299	struct xfs_dquot udqp, gdqp;
				3300	uint resblks;
				3301	char *link_name = VNAME(dentry);
				3302	int link_namelen;
				3303
				3304	*vpp = NULL;
				3305	dir_vp = BHV_TO_VNODE(dir_bdp);
				3306	dp = XFS_BHVTOI(dir_bdp);
				3307	dp_joined_to_trans = B_FALSE;
				3308	error = 0;
				3309	ip = NULL;
				3310	tp = NULL;
				3311
				3312	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
				3313
				3314	mp = dp->i_mount;
				3315
				3316	if (XFS_FORCED_SHUTDOWN(mp))
				3317	return XFS_ERROR(EIO);
				3318
				3319	link_namelen = VNAMELEN(dentry);
				3320
				3321	/*
				3322	* Check component lengths of the target path name.
				3323	*/
				3324	pathlen = strlen(target_path);
				3325	if (pathlen >= MAXPATHLEN) /* total string too long */
				3326	return XFS_ERROR(ENAMETOOLONG);
				3327	if (pathlen >= MAXNAMELEN) { /* is any component too long? */
				3328	int len, total;
				3329	char *path;
				3330
				3331	for(total = 0, path = target_path; total < pathlen;) {
				3332	/*
				3333	* Skip any slashes.
				3334	*/
				3335	while(*path == '/') {
				3336	total++;
				3337	path++;
				3338	}
				3339
				3340	/*
				3341	* Count up to the next slash or end of path.
				3342	* Error out if the component is bigger than MAXNAMELEN.
				3343	*/
				3344	for(len = 0; *path != '/' && total < pathlen;total++, path++) {
				3345	if (++len >= MAXNAMELEN) {
				3346	error = ENAMETOOLONG;
				3347	return error;
				3348	}
				3349	}
				3350	}
				3351	}
				3352
				3353	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_SYMLINK)) {
				3354	error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp,
				3355	DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
				3356	link_name, target_path, 0, 0, 0);
				3357	if (error)
				3358	return error;
				3359	}
				3360
				3361	/* Return through std_return after this point. */
				3362
				3363	udqp = gdqp = NULL;
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	3364	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
				3365	prid = dp->i_d.di_projid;
				3366	else if (vap->va_mask & XFS_AT_PROJID)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3367	prid = (xfs_prid_t)vap->va_projid;
				3368	else
				3369	prid = (xfs_prid_t)dfltprid;
				3370
				3371	/*
				3372	* Make sure that we have allocated dquot(s) on disk.
				3373	*/
				3374	error = XFS_QM_DQVOPALLOC(mp, dp,
Nathan Scott	c8ad20f	2005-06-21 15:38:48 +1000	[diff] [blame]	3375	current_fsuid(credp), current_fsgid(credp), prid,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3376	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT, &udqp, &gdqp);
				3377	if (error)
				3378	goto std_return;
				3379
				3380	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
				3381	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				3382	/*
				3383	* The symlink will fit into the inode data fork?
				3384	* There can't be any attributes so we get the whole variable part.
				3385	*/
				3386	if (pathlen <= XFS_LITINO(mp))
				3387	fs_blocks = 0;
				3388	else
				3389	fs_blocks = XFS_B_TO_FSB(mp, pathlen);
				3390	resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks);
				3391	error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
				3392	XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
				3393	if (error == ENOSPC && fs_blocks == 0) {
				3394	resblks = 0;
				3395	error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
				3396	XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
				3397	}
				3398	if (error) {
				3399	cancel_flags = 0;
				3400	dp = NULL;
				3401	goto error_return;
				3402	}
				3403
				3404	xfs_ilock(dp, XFS_ILOCK_EXCL);
				3405
				3406	/*
				3407	* Check whether the directory allows new symlinks or not.
				3408	*/
				3409	if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
				3410	error = XFS_ERROR(EPERM);
				3411	goto error_return;
				3412	}
				3413
				3414	/*
				3415	* Reserve disk quota : blocks and inode.
				3416	*/
				3417	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
				3418	if (error)
				3419	goto error_return;
				3420
				3421	/*
				3422	* Check for ability to enter directory entry, if no space reserved.
				3423	*/
				3424	if (resblks == 0 &&
				3425	(error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen)))
				3426	goto error_return;
				3427	/*
				3428	* Initialize the bmap freelist prior to calling either
				3429	* bmapi or the directory create code.
				3430	*/
				3431	XFS_BMAP_INIT(&free_list, &first_block);
				3432
				3433	/*
				3434	* Allocate an inode for the symlink.
				3435	*/
				3436	error = xfs_dir_ialloc(&tp, dp, S_IFLNK \| (vap->va_mode&~S_IFMT),
				3437	1, 0, credp, prid, resblks > 0, &ip, NULL);
				3438	if (error) {
				3439	if (error == ENOSPC)
				3440	goto error_return;
				3441	goto error1;
				3442	}
				3443	ITRACE(ip);
				3444
				3445	VN_HOLD(dir_vp);
				3446	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
				3447	dp_joined_to_trans = B_TRUE;
				3448
				3449	/*
				3450	* Also attach the dquot(s) to it, if applicable.
				3451	*/
				3452	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
				3453
				3454	if (resblks)
				3455	resblks -= XFS_IALLOC_SPACE_RES(mp);
				3456	/*
				3457	* If the symlink will fit into the inode, write it inline.
				3458	*/
				3459	if (pathlen <= XFS_IFORK_DSIZE(ip)) {
				3460	xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
				3461	memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
				3462	ip->i_d.di_size = pathlen;
				3463
				3464	/*
				3465	* The inode was initially created in extent format.
				3466	*/
				3467	ip->i_df.if_flags &= ~(XFS_IFEXTENTS \| XFS_IFBROOT);
				3468	ip->i_df.if_flags \|= XFS_IFINLINE;
				3469
				3470	ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
				3471	xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA \| XFS_ILOG_CORE);
				3472
				3473	} else {
				3474	first_fsb = 0;
				3475	nmaps = SYMLINK_MAPS;
				3476
				3477	error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
				3478	XFS_BMAPI_WRITE \| XFS_BMAPI_METADATA,
				3479	&first_block, resblks, mval, &nmaps,
				3480	&free_list);
				3481	if (error) {
				3482	goto error1;
				3483	}
				3484
				3485	if (resblks)
				3486	resblks -= fs_blocks;
				3487	ip->i_d.di_size = pathlen;
				3488	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				3489
				3490	cur_chunk = target_path;
				3491	for (n = 0; n < nmaps; n++) {
				3492	d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
				3493	byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
				3494	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
				3495	BTOBB(byte_cnt), 0);
				3496	ASSERT(bp && !XFS_BUF_GETERROR(bp));
				3497	if (pathlen < byte_cnt) {
				3498	byte_cnt = pathlen;
				3499	}
				3500	pathlen -= byte_cnt;
				3501
				3502	memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt);
				3503	cur_chunk += byte_cnt;
				3504
				3505	xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
				3506	}
				3507	}
				3508
				3509	/*
				3510	* Create the directory entry for the symlink.
				3511	*/
				3512	error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen,
				3513	ip->i_ino, &first_block, &free_list, resblks);
				3514	if (error) {
				3515	goto error1;
				3516	}
				3517	xfs_ichgtime(dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3518	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				3519
				3520	/*
				3521	* Bump the in memory version number of the parent directory
				3522	* so that other processes accessing it will recognize that
				3523	* the directory has changed.
				3524	*/
				3525	dp->i_gen++;
				3526
				3527	/*
				3528	* If this is a synchronous mount, make sure that the
				3529	* symlink transaction goes to disk before returning to
				3530	* the user.
				3531	*/
				3532	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				3533	xfs_trans_set_sync(tp);
				3534	}
				3535
				3536	/*
				3537	* xfs_trans_commit normally decrements the vnode ref count
				3538	* when it unlocks the inode. Since we want to return the
				3539	* vnode to the caller, we bump the vnode ref count now.
				3540	*/
				3541	IHOLD(ip);
				3542
				3543	error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
				3544	if (error) {
				3545	goto error2;
				3546	}
				3547	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				3548	XFS_QM_DQRELE(mp, udqp);
				3549	XFS_QM_DQRELE(mp, gdqp);
				3550
				3551	/* Fall through to std_return with error = 0 or errno from
				3552	* xfs_trans_commit */
				3553	std_return:
				3554	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
				3555	DM_EVENT_POSTSYMLINK)) {
				3556	(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
				3557	dir_vp, DM_RIGHT_NULL,
				3558	error ? NULL : XFS_ITOV(ip),
				3559	DM_RIGHT_NULL, link_name, target_path,
				3560	0, error, 0);
				3561	}
				3562
				3563	if (!error) {
				3564	vnode_t *vp;
				3565
				3566	ASSERT(ip);
				3567	vp = XFS_ITOV(ip);
				3568	*vpp = vp;
				3569	}
				3570	return error;
				3571
				3572	error2:
				3573	IRELE(ip);
				3574	error1:
				3575	xfs_bmap_cancel(&free_list);
				3576	cancel_flags \|= XFS_TRANS_ABORT;
				3577	error_return:
				3578	xfs_trans_cancel(tp, cancel_flags);
				3579	XFS_QM_DQRELE(mp, udqp);
				3580	XFS_QM_DQRELE(mp, gdqp);
				3581
				3582	if (!dp_joined_to_trans && (dp != NULL)) {
				3583	xfs_iunlock(dp, XFS_ILOCK_EXCL);
				3584	}
				3585
				3586	goto std_return;
				3587	}
				3588
				3589
				3590	/*
				3591	* xfs_fid2
				3592	*
				3593	* A fid routine that takes a pointer to a previously allocated
				3594	* fid structure (like xfs_fast_fid) but uses a 64 bit inode number.
				3595	*/
				3596	STATIC int
				3597	xfs_fid2(
				3598	bhv_desc_t *bdp,
				3599	fid_t *fidp)
				3600	{
				3601	xfs_inode_t *ip;
				3602	xfs_fid2_t *xfid;
				3603
				3604	vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__,
				3605	(inst_t *)__return_address);
				3606	ASSERT(sizeof(fid_t) >= sizeof(xfs_fid2_t));
				3607
				3608	xfid = (xfs_fid2_t *)fidp;
				3609	ip = XFS_BHVTOI(bdp);
				3610	xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len);
				3611	xfid->fid_pad = 0;
				3612	/*
				3613	* use memcpy because the inode is a long long and there's no
				3614	* assurance that xfid->fid_ino is properly aligned.
				3615	*/
				3616	memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino));
				3617	xfid->fid_gen = ip->i_d.di_gen;
				3618
				3619	return 0;
				3620	}
				3621
				3622
				3623	/*
				3624	* xfs_rwlock
				3625	*/
				3626	int
				3627	xfs_rwlock(
				3628	bhv_desc_t *bdp,
				3629	vrwlock_t locktype)
				3630	{
				3631	xfs_inode_t *ip;
				3632	vnode_t *vp;
				3633
				3634	vp = BHV_TO_VNODE(bdp);
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	3635	if (VN_ISDIR(vp))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3636	return 1;
				3637	ip = XFS_BHVTOI(bdp);
				3638	if (locktype == VRWLOCK_WRITE) {
				3639	xfs_ilock(ip, XFS_IOLOCK_EXCL);
				3640	} else if (locktype == VRWLOCK_TRY_READ) {
				3641	return (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED));
				3642	} else if (locktype == VRWLOCK_TRY_WRITE) {
				3643	return (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL));
				3644	} else {
				3645	ASSERT((locktype == VRWLOCK_READ) \|\|
				3646	(locktype == VRWLOCK_WRITE_DIRECT));
				3647	xfs_ilock(ip, XFS_IOLOCK_SHARED);
				3648	}
				3649
				3650	return 1;
				3651	}
				3652
				3653
				3654	/*
				3655	* xfs_rwunlock
				3656	*/
				3657	void
				3658	xfs_rwunlock(
				3659	bhv_desc_t *bdp,
				3660	vrwlock_t locktype)
				3661	{
				3662	xfs_inode_t *ip;
				3663	vnode_t *vp;
				3664
				3665	vp = BHV_TO_VNODE(bdp);
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	3666	if (VN_ISDIR(vp))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3667	return;
				3668	ip = XFS_BHVTOI(bdp);
				3669	if (locktype == VRWLOCK_WRITE) {
				3670	/*
				3671	* In the write case, we may have added a new entry to
				3672	* the reference cache. This might store a pointer to
				3673	* an inode to be released in this inode. If it is there,
				3674	* clear the pointer and release the inode after unlocking
				3675	* this one.
				3676	*/
				3677	xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL);
				3678	} else {
				3679	ASSERT((locktype == VRWLOCK_READ) \|\|
				3680	(locktype == VRWLOCK_WRITE_DIRECT));
				3681	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
				3682	}
				3683	return;
				3684	}
				3685
				3686	STATIC int
				3687	xfs_inode_flush(
				3688	bhv_desc_t *bdp,
				3689	int flags)
				3690	{
				3691	xfs_inode_t *ip;
				3692	xfs_mount_t *mp;
				3693	xfs_inode_log_item_t *iip;
				3694	int error = 0;
				3695
				3696	ip = XFS_BHVTOI(bdp);
				3697	mp = ip->i_mount;
				3698	iip = ip->i_itemp;
				3699
				3700	if (XFS_FORCED_SHUTDOWN(mp))
				3701	return XFS_ERROR(EIO);
				3702
				3703	/*
				3704	* Bypass inodes which have already been cleaned by
				3705	* the inode flush clustering code inside xfs_iflush
				3706	*/
				3707	if ((ip->i_update_core == 0) &&
				3708	((iip == NULL) \|\| !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)))
				3709	return 0;
				3710
				3711	if (flags & FLUSH_LOG) {
				3712	if (iip && iip->ili_last_lsn) {
				3713	xlog_t *log = mp->m_log;
				3714	xfs_lsn_t sync_lsn;
				3715	int s, log_flags = XFS_LOG_FORCE;
				3716
				3717	s = GRANT_LOCK(log);
				3718	sync_lsn = log->l_last_sync_lsn;
				3719	GRANT_UNLOCK(log, s);
				3720
				3721	if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0))
				3722	return 0;
				3723
				3724	if (flags & FLUSH_SYNC)
				3725	log_flags \|= XFS_LOG_SYNC;
				3726	return xfs_log_force(mp, iip->ili_last_lsn, log_flags);
				3727	}
				3728	}
				3729
				3730	/*
				3731	* We make this non-blocking if the inode is contended,
				3732	* return EAGAIN to indicate to the caller that they
				3733	* did not succeed. This prevents the flush path from
				3734	* blocking on inodes inside another operation right
				3735	* now, they get caught later by xfs_sync.
				3736	*/
				3737	if (flags & FLUSH_INODE) {
				3738	int flush_flags;
				3739
				3740	if (xfs_ipincount(ip))
				3741	return EAGAIN;
				3742
				3743	if (flags & FLUSH_SYNC) {
				3744	xfs_ilock(ip, XFS_ILOCK_SHARED);
				3745	xfs_iflock(ip);
				3746	} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
				3747	if (xfs_ipincount(ip) \|\| !xfs_iflock_nowait(ip)) {
				3748	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3749	return EAGAIN;
				3750	}
				3751	} else {
				3752	return EAGAIN;
				3753	}
				3754
				3755	if (flags & FLUSH_SYNC)
				3756	flush_flags = XFS_IFLUSH_SYNC;
				3757	else
				3758	flush_flags = XFS_IFLUSH_ASYNC;
				3759
				3760	error = xfs_iflush(ip, flush_flags);
				3761	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3762	}
				3763
				3764	return error;
				3765	}
				3766
				3767
				3768	int
				3769	xfs_set_dmattrs (
				3770	bhv_desc_t *bdp,
				3771	u_int evmask,
				3772	u_int16_t state,
				3773	cred_t *credp)
				3774	{
				3775	xfs_inode_t *ip;
				3776	xfs_trans_t *tp;
				3777	xfs_mount_t *mp;
				3778	int error;
				3779
				3780	if (!capable(CAP_SYS_ADMIN))
				3781	return XFS_ERROR(EPERM);
				3782
				3783	ip = XFS_BHVTOI(bdp);
				3784	mp = ip->i_mount;
				3785
				3786	if (XFS_FORCED_SHUTDOWN(mp))
				3787	return XFS_ERROR(EIO);
				3788
				3789	tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
				3790	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0);
				3791	if (error) {
				3792	xfs_trans_cancel(tp, 0);
				3793	return error;
				3794	}
				3795	xfs_ilock(ip, XFS_ILOCK_EXCL);
				3796	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				3797
				3798	ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask;
				3799	ip->i_iocore.io_dmstate = ip->i_d.di_dmstate = state;
				3800
				3801	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				3802	IHOLD(ip);
				3803	error = xfs_trans_commit(tp, 0, NULL);
				3804
				3805	return error;
				3806	}
				3807
				3808
				3809	/*
				3810	* xfs_reclaim
				3811	*/
				3812	STATIC int
				3813	xfs_reclaim(
				3814	bhv_desc_t *bdp)
				3815	{
				3816	xfs_inode_t *ip;
				3817	vnode_t *vp;
				3818
				3819	vp = BHV_TO_VNODE(bdp);
				3820	ip = XFS_BHVTOI(bdp);
				3821
				3822	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
				3823
				3824	ASSERT(!VN_MAPPED(vp));
				3825
				3826	/* bad inode, get out here ASAP */
				3827	if (VN_BAD(vp)) {
				3828	xfs_ireclaim(ip);
				3829	return 0;
				3830	}
				3831
Christoph Hellwig	51c91ed	2005-09-02 16:58:38 +1000	[diff] [blame]	3832	vn_iowait(vp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3833
Christoph Hellwig	51c91ed	2005-09-02 16:58:38 +1000	[diff] [blame]	3834	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) \|\| ip->i_delayed_blks == 0);
				3835	ASSERT(VN_CACHED(vp) == 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3836
				3837	/* If we have nothing to flush with this inode then complete the
				3838	* teardown now, otherwise break the link between the xfs inode
				3839	* and the linux inode and clean up the xfs inode later. This
				3840	* avoids flushing the inode to disk during the delete operation
				3841	* itself.
				3842	*/
				3843	if (!ip->i_update_core && (ip->i_itemp == NULL)) {
				3844	xfs_ilock(ip, XFS_ILOCK_EXCL);
				3845	xfs_iflock(ip);
				3846	return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
				3847	} else {
				3848	xfs_mount_t *mp = ip->i_mount;
				3849
				3850	/* Protect sync from us */
				3851	XFS_MOUNT_ILOCK(mp);
				3852	vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
				3853	list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
				3854	ip->i_flags \|= XFS_IRECLAIMABLE;
				3855	XFS_MOUNT_IUNLOCK(mp);
				3856	}
				3857	return 0;
				3858	}
				3859
				3860	int
				3861	xfs_finish_reclaim(
				3862	xfs_inode_t *ip,
				3863	int locked,
				3864	int sync_mode)
				3865	{
				3866	xfs_ihash_t *ih = ip->i_hash;
				3867	vnode_t *vp = XFS_ITOV_NULL(ip);
				3868	int error;
				3869
				3870	if (vp && VN_BAD(vp))
				3871	goto reclaim;
				3872
				3873	/* The hash lock here protects a thread in xfs_iget_core from
				3874	* racing with us on linking the inode back with a vnode.
				3875	* Once we have the XFS_IRECLAIM flag set it will not touch
				3876	* us.
				3877	*/
				3878	write_lock(&ih->ih_lock);
				3879	if ((ip->i_flags & XFS_IRECLAIM) \|\|
				3880	(!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) {
				3881	write_unlock(&ih->ih_lock);
				3882	if (locked) {
				3883	xfs_ifunlock(ip);
				3884	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				3885	}
				3886	return(1);
				3887	}
				3888	ip->i_flags \|= XFS_IRECLAIM;
				3889	write_unlock(&ih->ih_lock);
				3890
				3891	/*
				3892	* If the inode is still dirty, then flush it out. If the inode
				3893	* is not in the AIL, then it will be OK to flush it delwri as
				3894	* long as xfs_iflush() does not keep any references to the inode.
				3895	* We leave that decision up to xfs_iflush() since it has the
				3896	* knowledge of whether it's OK to simply do a delwri flush of
				3897	* the inode or whether we need to wait until the inode is
				3898	* pulled from the AIL.
				3899	* We get the flush lock regardless, though, just to make sure
				3900	* we don't free it while it is being flushed.
				3901	*/
				3902	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
				3903	if (!locked) {
				3904	xfs_ilock(ip, XFS_ILOCK_EXCL);
				3905	xfs_iflock(ip);
				3906	}
				3907
				3908	if (ip->i_update_core \|\|
				3909	((ip->i_itemp != NULL) &&
				3910	(ip->i_itemp->ili_format.ilf_fields != 0))) {
				3911	error = xfs_iflush(ip, sync_mode);
				3912	/*
				3913	* If we hit an error, typically because of filesystem
				3914	* shutdown, we don't need to let vn_reclaim to know
				3915	* because we're gonna reclaim the inode anyway.
				3916	*/
				3917	if (error) {
				3918	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				3919	goto reclaim;
				3920	}
				3921	xfs_iflock(ip); /* synchronize with xfs_iflush_done */
				3922	}
				3923
				3924	ASSERT(ip->i_update_core == 0);
				3925	ASSERT(ip->i_itemp == NULL \|\|
				3926	ip->i_itemp->ili_format.ilf_fields == 0);
				3927	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				3928	} else if (locked) {
				3929	/*
				3930	* We are not interested in doing an iflush if we're
				3931	* in the process of shutting down the filesystem forcibly.
				3932	* So, just reclaim the inode.
				3933	*/
				3934	xfs_ifunlock(ip);
				3935	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				3936	}
				3937
				3938	reclaim:
				3939	xfs_ireclaim(ip);
				3940	return 0;
				3941	}
				3942
				3943	int
				3944	xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
				3945	{
				3946	int purged;
				3947	xfs_inode_t ip, n;
				3948	int done = 0;
				3949
				3950	while (!done) {
				3951	purged = 0;
				3952	XFS_MOUNT_ILOCK(mp);
				3953	list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
				3954	if (noblock) {
				3955	if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
				3956	continue;
				3957	if (xfs_ipincount(ip) \|\|
				3958	!xfs_iflock_nowait(ip)) {
				3959	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				3960	continue;
				3961	}
				3962	}
				3963	XFS_MOUNT_IUNLOCK(mp);
Felix Blyakher	6b2cf61	2005-11-25 16:42:13 +1100	[diff] [blame]	3964	if (xfs_finish_reclaim(ip, noblock,
				3965	XFS_IFLUSH_DELWRI_ELSE_ASYNC))
				3966	delay(1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3967	purged = 1;
				3968	break;
				3969	}
				3970
				3971	done = !purged;
				3972	}
				3973
				3974	XFS_MOUNT_IUNLOCK(mp);
				3975	return 0;
				3976	}
				3977
				3978	/*
				3979	* xfs_alloc_file_space()
				3980	* This routine allocates disk space for the given file.
				3981	*
				3982	* If alloc_type == 0, this request is for an ALLOCSP type
				3983	* request which will change the file size. In this case, no
				3984	* DMAPI event will be generated by the call. A TRUNCATE event
				3985	* will be generated later by xfs_setattr.
				3986	*
				3987	* If alloc_type != 0, this request is for a RESVSP type
				3988	* request, and a DMAPI DM_EVENT_WRITE will be generated if the
				3989	* lower block boundary byte address is less than the file's
				3990	* length.
				3991	*
				3992	* RETURNS:
				3993	* 0 on success
				3994	* errno on error
				3995	*
				3996	*/
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	3997	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3998	xfs_alloc_file_space(
				3999	xfs_inode_t *ip,
				4000	xfs_off_t offset,
				4001	xfs_off_t len,
				4002	int alloc_type,
				4003	int attr_flags)
				4004	{
				4005	xfs_filblks_t allocated_fsb;
				4006	xfs_filblks_t allocatesize_fsb;
				4007	int committed;
				4008	xfs_off_t count;
				4009	xfs_filblks_t datablocks;
				4010	int error;
				4011	xfs_fsblock_t firstfsb;
				4012	xfs_bmap_free_t free_list;
				4013	xfs_bmbt_irec_t *imapp;
				4014	xfs_bmbt_irec_t imaps[1];
				4015	xfs_mount_t *mp;
				4016	int numrtextents;
				4017	int reccount;
				4018	uint resblks;
				4019	int rt;
				4020	int rtextsize;
				4021	xfs_fileoff_t startoffset_fsb;
				4022	xfs_trans_t *tp;
				4023	int xfs_bmapi_flags;
				4024
				4025	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
				4026	mp = ip->i_mount;
				4027
				4028	if (XFS_FORCED_SHUTDOWN(mp))
				4029	return XFS_ERROR(EIO);
				4030
				4031	/*
				4032	* determine if this is a realtime file
				4033	*/
				4034	if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) {
				4035	if (ip->i_d.di_extsize)
				4036	rtextsize = ip->i_d.di_extsize;
				4037	else
				4038	rtextsize = mp->m_sb.sb_rextsize;
				4039	} else
				4040	rtextsize = 0;
				4041
				4042	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
				4043	return error;
				4044
				4045	if (len <= 0)
				4046	return XFS_ERROR(EINVAL);
				4047
				4048	count = len;
				4049	error = 0;
				4050	imapp = &imaps[0];
				4051	reccount = 1;
				4052	xfs_bmapi_flags = XFS_BMAPI_WRITE \| (alloc_type ? XFS_BMAPI_PREALLOC : 0);
				4053	startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
				4054	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
				4055
				4056	/* Generate a DMAPI event if needed. */
				4057	if (alloc_type != 0 && offset < ip->i_d.di_size &&
				4058	(attr_flags&ATTR_DMI) == 0 &&
				4059	DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
				4060	xfs_off_t end_dmi_offset;
				4061
				4062	end_dmi_offset = offset+len;
				4063	if (end_dmi_offset > ip->i_d.di_size)
				4064	end_dmi_offset = ip->i_d.di_size;
				4065	error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
				4066	offset, end_dmi_offset - offset,
				4067	0, NULL);
				4068	if (error)
				4069	return(error);
				4070	}
				4071
				4072	/*
				4073	* allocate file space until done or until there is an error
				4074	*/
				4075	retry:
				4076	while (allocatesize_fsb && !error) {
				4077	/*
				4078	* determine if reserving space on
				4079	* the data or realtime partition.
				4080	*/
				4081	if (rt) {
				4082	xfs_fileoff_t s, e;
				4083
				4084	s = startoffset_fsb;
				4085	do_div(s, rtextsize);
				4086	s *= rtextsize;
				4087	e = roundup_64(startoffset_fsb + allocatesize_fsb,
				4088	rtextsize);
				4089	numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize;
				4090	datablocks = 0;
				4091	} else {
				4092	datablocks = allocatesize_fsb;
				4093	numrtextents = 0;
				4094	}
				4095
				4096	/*
				4097	* allocate and setup the transaction
				4098	*/
				4099	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
				4100	resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
				4101	error = xfs_trans_reserve(tp,
				4102	resblks,
				4103	XFS_WRITE_LOG_RES(mp),
				4104	numrtextents,
				4105	XFS_TRANS_PERM_LOG_RES,
				4106	XFS_WRITE_LOG_COUNT);
				4107
				4108	/*
				4109	* check for running out of space
				4110	*/
				4111	if (error) {
				4112	/*
				4113	* Free the transaction structure.
				4114	*/
				4115	ASSERT(error == ENOSPC \|\| XFS_FORCED_SHUTDOWN(mp));
				4116	xfs_trans_cancel(tp, 0);
				4117	break;
				4118	}
				4119	xfs_ilock(ip, XFS_ILOCK_EXCL);
Nathan Scott	06d10dd	2005-06-21 15:48:47 +1000	[diff] [blame]	4120	error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
				4121	ip->i_udquot, ip->i_gdquot, resblks, 0, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4122	if (error)
				4123	goto error1;
				4124
				4125	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				4126	xfs_trans_ihold(tp, ip);
				4127
				4128	/*
				4129	* issue the bmapi() call to allocate the blocks
				4130	*/
				4131	XFS_BMAP_INIT(&free_list, &firstfsb);
				4132	error = xfs_bmapi(tp, ip, startoffset_fsb,
				4133	allocatesize_fsb, xfs_bmapi_flags,
				4134	&firstfsb, 0, imapp, &reccount,
				4135	&free_list);
				4136	if (error) {
				4137	goto error0;
				4138	}
				4139
				4140	/*
				4141	* complete the transaction
				4142	*/
				4143	error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
				4144	if (error) {
				4145	goto error0;
				4146	}
				4147
				4148	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				4149	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				4150	if (error) {
				4151	break;
				4152	}
				4153
				4154	allocated_fsb = imapp->br_blockcount;
				4155
				4156	if (reccount == 0) {
				4157	error = XFS_ERROR(ENOSPC);
				4158	break;
				4159	}
				4160
				4161	startoffset_fsb += allocated_fsb;
				4162	allocatesize_fsb -= allocated_fsb;
				4163	}
				4164	dmapi_enospc_check:
				4165	if (error == ENOSPC && (attr_flags&ATTR_DMI) == 0 &&
				4166	DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_NOSPACE)) {
				4167
				4168	error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
				4169	XFS_ITOV(ip), DM_RIGHT_NULL,
				4170	XFS_ITOV(ip), DM_RIGHT_NULL,
				4171	NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
				4172	if (error == 0)
				4173	goto retry; /* Maybe DMAPI app. has made space */
				4174	/* else fall through with error from XFS_SEND_DATA */
				4175	}
				4176
				4177	return error;
				4178
				4179	error0:
				4180	xfs_bmap_cancel(&free_list);
				4181	error1:
				4182	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES \| XFS_TRANS_ABORT);
				4183	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				4184	goto dmapi_enospc_check;
				4185	}
				4186
				4187	/*
				4188	* Zero file bytes between startoff and endoff inclusive.
				4189	* The iolock is held exclusive and no blocks are buffered.
				4190	*/
				4191	STATIC int
				4192	xfs_zero_remaining_bytes(
				4193	xfs_inode_t *ip,
				4194	xfs_off_t startoff,
				4195	xfs_off_t endoff)
				4196	{
				4197	xfs_bmbt_irec_t imap;
				4198	xfs_fileoff_t offset_fsb;
				4199	xfs_off_t lastoffset;
				4200	xfs_off_t offset;
				4201	xfs_buf_t *bp;
				4202	xfs_mount_t *mp = ip->i_mount;
				4203	int nimap;
				4204	int error = 0;
				4205
				4206	bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
				4207	ip->i_d.di_flags & XFS_DIFLAG_REALTIME ?
				4208	mp->m_rtdev_targp : mp->m_ddev_targp);
				4209
				4210	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
				4211	offset_fsb = XFS_B_TO_FSBT(mp, offset);
				4212	nimap = 1;
				4213	error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap,
				4214	&nimap, NULL);
				4215	if (error \|\| nimap < 1)
				4216	break;
				4217	ASSERT(imap.br_blockcount >= 1);
				4218	ASSERT(imap.br_startoff == offset_fsb);
				4219	lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
				4220	if (lastoffset > endoff)
				4221	lastoffset = endoff;
				4222	if (imap.br_startblock == HOLESTARTBLOCK)
				4223	continue;
				4224	ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
				4225	if (imap.br_state == XFS_EXT_UNWRITTEN)
				4226	continue;
				4227	XFS_BUF_UNDONE(bp);
				4228	XFS_BUF_UNWRITE(bp);
				4229	XFS_BUF_READ(bp);
				4230	XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock));
				4231	xfsbdstrat(mp, bp);
				4232	if ((error = xfs_iowait(bp))) {
				4233	xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
				4234	mp, bp, XFS_BUF_ADDR(bp));
				4235	break;
				4236	}
				4237	memset(XFS_BUF_PTR(bp) +
				4238	(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
				4239	0, lastoffset - offset + 1);
				4240	XFS_BUF_UNDONE(bp);
				4241	XFS_BUF_UNREAD(bp);
				4242	XFS_BUF_WRITE(bp);
				4243	xfsbdstrat(mp, bp);
				4244	if ((error = xfs_iowait(bp))) {
				4245	xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
				4246	mp, bp, XFS_BUF_ADDR(bp));
				4247	break;
				4248	}
				4249	}
				4250	xfs_buf_free(bp);
				4251	return error;
				4252	}
				4253
				4254	/*
				4255	* xfs_free_file_space()
				4256	* This routine frees disk space for the given file.
				4257	*
				4258	* This routine is only called by xfs_change_file_space
				4259	* for an UNRESVSP type call.
				4260	*
				4261	* RETURNS:
				4262	* 0 on success
				4263	* errno on error
				4264	*
				4265	*/
				4266	STATIC int
				4267	xfs_free_file_space(
				4268	xfs_inode_t *ip,
				4269	xfs_off_t offset,
				4270	xfs_off_t len,
				4271	int attr_flags)
				4272	{
Christoph Hellwig	bd5a876	2005-06-21 15:47:39 +1000	[diff] [blame]	4273	vnode_t *vp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4274	int committed;
				4275	int done;
				4276	xfs_off_t end_dmi_offset;
				4277	xfs_fileoff_t endoffset_fsb;
				4278	int error;
				4279	xfs_fsblock_t firstfsb;
				4280	xfs_bmap_free_t free_list;
				4281	xfs_off_t ilen;
				4282	xfs_bmbt_irec_t imap;
				4283	xfs_off_t ioffset;
				4284	xfs_extlen_t mod=0;
				4285	xfs_mount_t *mp;
				4286	int nimap;
				4287	uint resblks;
				4288	int rounding;
				4289	int rt;
				4290	xfs_fileoff_t startoffset_fsb;
				4291	xfs_trans_t *tp;
Dean Roehrich	5fcbab3	2005-05-05 13:27:19 -0700	[diff] [blame]	4292	int need_iolock = 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4293
Christoph Hellwig	bd5a876	2005-06-21 15:47:39 +1000	[diff] [blame]	4294	vp = XFS_ITOV(ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4295	mp = ip->i_mount;
				4296
Christoph Hellwig	bd5a876	2005-06-21 15:47:39 +1000	[diff] [blame]	4297	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
				4298
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4299	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
				4300	return error;
				4301
				4302	error = 0;
				4303	if (len <= 0) /* if nothing being freed */
				4304	return error;
				4305	rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
				4306	startoffset_fsb = XFS_B_TO_FSB(mp, offset);
				4307	end_dmi_offset = offset + len;
				4308	endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
				4309
				4310	if (offset < ip->i_d.di_size &&
				4311	(attr_flags & ATTR_DMI) == 0 &&
				4312	DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
				4313	if (end_dmi_offset > ip->i_d.di_size)
				4314	end_dmi_offset = ip->i_d.di_size;
Christoph Hellwig	bd5a876	2005-06-21 15:47:39 +1000	[diff] [blame]	4315	error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4316	offset, end_dmi_offset - offset,
				4317	AT_DELAY_FLAG(attr_flags), NULL);
				4318	if (error)
				4319	return(error);
				4320	}
				4321
Dean Roehrich	5fcbab3	2005-05-05 13:27:19 -0700	[diff] [blame]	4322	ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1);
				4323	if (attr_flags & ATTR_NOLOCK)
				4324	need_iolock = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4325	if (need_iolock)
				4326	xfs_ilock(ip, XFS_IOLOCK_EXCL);
Dean Roehrich	5fcbab3	2005-05-05 13:27:19 -0700	[diff] [blame]	4327
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4328	rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog),
				4329	(__uint8_t)NBPP);
				4330	ilen = len + (offset & (rounding - 1));
				4331	ioffset = offset & ~(rounding - 1);
				4332	if (ilen & (rounding - 1))
				4333	ilen = (ilen + rounding) & ~(rounding - 1);
Christoph Hellwig	bd5a876	2005-06-21 15:47:39 +1000	[diff] [blame]	4334
				4335	if (VN_CACHED(vp) != 0) {
				4336	xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1,
				4337	ctooff(offtoct(ioffset)), -1);
				4338	VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)),
				4339	-1, FI_REMAPF_LOCKED);
				4340	}
				4341
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4342	/*
				4343	* Need to zero the stuff we're not freeing, on disk.
				4344	* If its a realtime file & can't use unwritten extents then we
				4345	* actually need to zero the extent edges. Otherwise xfs_bunmapi
				4346	* will take care of it for us.
				4347	*/
				4348	if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
				4349	nimap = 1;
				4350	error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0,
				4351	&imap, &nimap, NULL);
				4352	if (error)
				4353	goto out_unlock_iolock;
				4354	ASSERT(nimap == 0 \|\| nimap == 1);
				4355	if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
				4356	xfs_daddr_t block;
				4357
				4358	ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
				4359	block = imap.br_startblock;
				4360	mod = do_div(block, mp->m_sb.sb_rextsize);
				4361	if (mod)
				4362	startoffset_fsb += mp->m_sb.sb_rextsize - mod;
				4363	}
				4364	nimap = 1;
				4365	error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0,
				4366	&imap, &nimap, NULL);
				4367	if (error)
				4368	goto out_unlock_iolock;
				4369	ASSERT(nimap == 0 \|\| nimap == 1);
				4370	if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
				4371	ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
				4372	mod++;
				4373	if (mod && (mod != mp->m_sb.sb_rextsize))
				4374	endoffset_fsb -= mod;
				4375	}
				4376	}
				4377	if ((done = (endoffset_fsb <= startoffset_fsb)))
				4378	/*
				4379	* One contiguous piece to clear
				4380	*/
				4381	error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
				4382	else {
				4383	/*
				4384	* Some full blocks, possibly two pieces to clear
				4385	*/
				4386	if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
				4387	error = xfs_zero_remaining_bytes(ip, offset,
				4388	XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
				4389	if (!error &&
				4390	XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
				4391	error = xfs_zero_remaining_bytes(ip,
				4392	XFS_FSB_TO_B(mp, endoffset_fsb),
				4393	offset + len - 1);
				4394	}
				4395
				4396	/*
				4397	* free file space until done or until there is an error
				4398	*/
				4399	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
				4400	while (!error && !done) {
				4401
				4402	/*
				4403	* allocate and setup the transaction
				4404	*/
				4405	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
				4406	error = xfs_trans_reserve(tp,
				4407	resblks,
				4408	XFS_WRITE_LOG_RES(mp),
				4409	0,
				4410	XFS_TRANS_PERM_LOG_RES,
				4411	XFS_WRITE_LOG_COUNT);
				4412
				4413	/*
				4414	* check for running out of space
				4415	*/
				4416	if (error) {
				4417	/*
				4418	* Free the transaction structure.
				4419	*/
				4420	ASSERT(error == ENOSPC \|\| XFS_FORCED_SHUTDOWN(mp));
				4421	xfs_trans_cancel(tp, 0);
				4422	break;
				4423	}
				4424	xfs_ilock(ip, XFS_ILOCK_EXCL);
				4425	error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
				4426	ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
				4427	XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
				4428	if (error)
				4429	goto error1;
				4430
				4431	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				4432	xfs_trans_ihold(tp, ip);
				4433
				4434	/*
				4435	* issue the bunmapi() call to free the blocks
				4436	*/
				4437	XFS_BMAP_INIT(&free_list, &firstfsb);
				4438	error = xfs_bunmapi(tp, ip, startoffset_fsb,
				4439	endoffset_fsb - startoffset_fsb,
				4440	0, 2, &firstfsb, &free_list, &done);
				4441	if (error) {
				4442	goto error0;
				4443	}
				4444
				4445	/*
				4446	* complete the transaction
				4447	*/
				4448	error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
				4449	if (error) {
				4450	goto error0;
				4451	}
				4452
				4453	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
				4454	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				4455	}
				4456
				4457	out_unlock_iolock:
				4458	if (need_iolock)
				4459	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				4460	return error;
				4461
				4462	error0:
				4463	xfs_bmap_cancel(&free_list);
				4464	error1:
				4465	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES \| XFS_TRANS_ABORT);
				4466	xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL \| XFS_IOLOCK_EXCL) :
				4467	XFS_ILOCK_EXCL);
				4468	return error;
				4469	}
				4470
				4471	/*
				4472	* xfs_change_file_space()
				4473	* This routine allocates or frees disk space for the given file.
				4474	* The user specified parameters are checked for alignment and size
				4475	* limitations.
				4476	*
				4477	* RETURNS:
				4478	* 0 on success
				4479	* errno on error
				4480	*
				4481	*/
				4482	int
				4483	xfs_change_file_space(
				4484	bhv_desc_t *bdp,
				4485	int cmd,
				4486	xfs_flock64_t *bf,
				4487	xfs_off_t offset,
				4488	cred_t *credp,
				4489	int attr_flags)
				4490	{
				4491	int clrprealloc;
				4492	int error;
				4493	xfs_fsize_t fsize;
				4494	xfs_inode_t *ip;
				4495	xfs_mount_t *mp;
				4496	int setprealloc;
				4497	xfs_off_t startoffset;
				4498	xfs_off_t llen;
				4499	xfs_trans_t *tp;
				4500	vattr_t va;
				4501	vnode_t *vp;
				4502
				4503	vp = BHV_TO_VNODE(bdp);
				4504	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
				4505
				4506	ip = XFS_BHVTOI(bdp);
				4507	mp = ip->i_mount;
				4508
				4509	/*
				4510	* must be a regular file and have write permission
				4511	*/
Christoph Hellwig	0432dab	2005-09-02 16:46:51 +1000	[diff] [blame]	4512	if (!VN_ISREG(vp))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4513	return XFS_ERROR(EINVAL);
				4514
				4515	xfs_ilock(ip, XFS_ILOCK_SHARED);
				4516
				4517	if ((error = xfs_iaccess(ip, S_IWUSR, credp))) {
				4518	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				4519	return error;
				4520	}
				4521
				4522	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				4523
				4524	switch (bf->l_whence) {
				4525	case 0: /SEEK_SET/
				4526	break;
				4527	case 1: /SEEK_CUR/
				4528	bf->l_start += offset;
				4529	break;
				4530	case 2: /SEEK_END/
				4531	bf->l_start += ip->i_d.di_size;
				4532	break;
				4533	default:
				4534	return XFS_ERROR(EINVAL);
				4535	}
				4536
				4537	llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len;
				4538
				4539	if ( (bf->l_start < 0)
				4540	\|\| (bf->l_start > XFS_MAXIOFFSET(mp))
				4541	\|\| (bf->l_start + llen < 0)
				4542	\|\| (bf->l_start + llen > XFS_MAXIOFFSET(mp)))
				4543	return XFS_ERROR(EINVAL);
				4544
				4545	bf->l_whence = 0;
				4546
				4547	startoffset = bf->l_start;
				4548	fsize = ip->i_d.di_size;
				4549
				4550	/*
				4551	* XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve
				4552	* file space.
				4553	* These calls do NOT zero the data space allocated to the file,
				4554	* nor do they change the file size.
				4555	*
				4556	* XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file
				4557	* space.
				4558	* These calls cause the new file data to be zeroed and the file
				4559	* size to be changed.
				4560	*/
				4561	setprealloc = clrprealloc = 0;
				4562
				4563	switch (cmd) {
				4564	case XFS_IOC_RESVSP:
				4565	case XFS_IOC_RESVSP64:
				4566	error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
				4567	1, attr_flags);
				4568	if (error)
				4569	return error;
				4570	setprealloc = 1;
				4571	break;
				4572
				4573	case XFS_IOC_UNRESVSP:
				4574	case XFS_IOC_UNRESVSP64:
				4575	if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
				4576	attr_flags)))
				4577	return error;
				4578	break;
				4579
				4580	case XFS_IOC_ALLOCSP:
				4581	case XFS_IOC_ALLOCSP64:
				4582	case XFS_IOC_FREESP:
				4583	case XFS_IOC_FREESP64:
				4584	if (startoffset > fsize) {
				4585	error = xfs_alloc_file_space(ip, fsize,
				4586	startoffset - fsize, 0, attr_flags);
				4587	if (error)
				4588	break;
				4589	}
				4590
				4591	va.va_mask = XFS_AT_SIZE;
				4592	va.va_size = startoffset;
				4593
				4594	error = xfs_setattr(bdp, &va, attr_flags, credp);
				4595
				4596	if (error)
				4597	return error;
				4598
				4599	clrprealloc = 1;
				4600	break;
				4601
				4602	default:
				4603	ASSERT(0);
				4604	return XFS_ERROR(EINVAL);
				4605	}
				4606
				4607	/*
				4608	* update the inode timestamp, mode, and prealloc flag bits
				4609	*/
				4610	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
				4611
				4612	if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp),
				4613	0, 0, 0))) {
				4614	/* ASSERT(0); */
				4615	xfs_trans_cancel(tp, 0);
				4616	return error;
				4617	}
				4618
				4619	xfs_ilock(ip, XFS_ILOCK_EXCL);
				4620
				4621	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				4622	xfs_trans_ihold(tp, ip);
				4623
				4624	if ((attr_flags & ATTR_DMI) == 0) {
				4625	ip->i_d.di_mode &= ~S_ISUID;
				4626
				4627	/*
				4628	* Note that we don't have to worry about mandatory
				4629	* file locking being disabled here because we only
				4630	* clear the S_ISGID bit if the Group execute bit is
				4631	* on, but if it was on then mandatory locking wouldn't
				4632	* have been enabled.
				4633	*/
				4634	if (ip->i_d.di_mode & S_IXGRP)
				4635	ip->i_d.di_mode &= ~S_ISGID;
				4636
				4637	xfs_ichgtime(ip, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				4638	}
				4639	if (setprealloc)
				4640	ip->i_d.di_flags \|= XFS_DIFLAG_PREALLOC;
				4641	else if (clrprealloc)
				4642	ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
				4643
				4644	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				4645	xfs_trans_set_sync(tp);
				4646
				4647	error = xfs_trans_commit(tp, 0, NULL);
				4648
				4649	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				4650
				4651	return error;
				4652	}
				4653
				4654	vnodeops_t xfs_vnodeops = {
				4655	BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
				4656	.vop_open = xfs_open,
				4657	.vop_read = xfs_read,
				4658	#ifdef HAVE_SENDFILE
				4659	.vop_sendfile = xfs_sendfile,
				4660	#endif
				4661	.vop_write = xfs_write,
				4662	.vop_ioctl = xfs_ioctl,
				4663	.vop_getattr = xfs_getattr,
				4664	.vop_setattr = xfs_setattr,
				4665	.vop_access = xfs_access,
				4666	.vop_lookup = xfs_lookup,
				4667	.vop_create = xfs_create,
				4668	.vop_remove = xfs_remove,
				4669	.vop_link = xfs_link,
				4670	.vop_rename = xfs_rename,
				4671	.vop_mkdir = xfs_mkdir,
				4672	.vop_rmdir = xfs_rmdir,
				4673	.vop_readdir = xfs_readdir,
				4674	.vop_symlink = xfs_symlink,
				4675	.vop_readlink = xfs_readlink,
				4676	.vop_fsync = xfs_fsync,
				4677	.vop_inactive = xfs_inactive,
				4678	.vop_fid2 = xfs_fid2,
				4679	.vop_rwlock = xfs_rwlock,
				4680	.vop_rwunlock = xfs_rwunlock,
				4681	.vop_bmap = xfs_bmap,
				4682	.vop_reclaim = xfs_reclaim,
				4683	.vop_attr_get = xfs_attr_get,
				4684	.vop_attr_set = xfs_attr_set,
				4685	.vop_attr_remove = xfs_attr_remove,
				4686	.vop_attr_list = xfs_attr_list,
				4687	.vop_link_removed = (vop_link_removed_t)fs_noval,
				4688	.vop_vnode_change = (vop_vnode_change_t)fs_noval,
				4689	.vop_tosspages = fs_tosspages,
				4690	.vop_flushinval_pages = fs_flushinval_pages,
				4691	.vop_flush_pages = fs_flush_pages,
				4692	.vop_release = xfs_release,
				4693	.vop_iflush = xfs_inode_flush,
				4694	};