Blame - fs/ceph/super.c - android_kernel_oneplus_msm8996

blob: b3404a319c22f6a4f515ddfee4d85b7b61375fe4 [file] [log] [blame]

Sage Weil	16725b9	2009-10-06 11:31:07 -0700	[diff] [blame]	1
				2	#include "ceph_debug.h"
				3
				4	#include <linux/backing-dev.h>
				5	#include <linux/fs.h>
				6	#include <linux/inet.h>
				7	#include <linux/in6.h>
				8	#include <linux/module.h>
				9	#include <linux/mount.h>
				10	#include <linux/parser.h>
				11	#include <linux/rwsem.h>
				12	#include <linux/sched.h>
				13	#include <linux/seq_file.h>
				14	#include <linux/statfs.h>
				15	#include <linux/string.h>
				16	#include <linux/version.h>
				17	#include <linux/vmalloc.h>
				18
Sage Weil	16725b9	2009-10-06 11:31:07 -0700	[diff] [blame]	19	#include "decode.h"
				20	#include "super.h"
				21	#include "mon_client.h"
				22
				23	/*
				24	* Ceph superblock operations
				25	*
				26	* Handle the basics of mounting, unmounting.
				27	*/
				28
				29
				30	/*
				31	* find filename portion of a path (/foo/bar/baz -> baz)
				32	*/
				33	const char ceph_file_part(const char s, int len)
				34	{
				35	const char *e = s + len;
				36
				37	while (e != s && *(e-1) != '/')
				38	e--;
				39	return e;
				40	}
				41
				42
				43	/*
				44	* super ops
				45	*/
				46	static void ceph_put_super(struct super_block *s)
				47	{
				48	struct ceph_client *cl = ceph_client(s);
				49
				50	dout("put_super\n");
				51	ceph_mdsc_close_sessions(&cl->mdsc);
				52	return;
				53	}
				54
				55	static int ceph_statfs(struct dentry dentry, struct kstatfs buf)
				56	{
				57	struct ceph_client *client = ceph_inode_to_client(dentry->d_inode);
				58	struct ceph_monmap *monmap = client->monc.monmap;
				59	struct ceph_statfs st;
				60	u64 fsid;
				61	int err;
				62
				63	dout("statfs\n");
				64	err = ceph_monc_do_statfs(&client->monc, &st);
				65	if (err < 0)
				66	return err;
				67
				68	/* fill in kstatfs */
				69	buf->f_type = CEPH_SUPER_MAGIC; /* ?? */
				70
				71	/*
				72	* express utilization in terms of large blocks to avoid
				73	* overflow on 32-bit machines.
				74	*/
				75	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
				76	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
				77	buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >>
				78	(CEPH_BLOCK_SHIFT-10);
				79	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
				80
				81	buf->f_files = le64_to_cpu(st.num_objects);
				82	buf->f_ffree = -1;
				83	buf->f_namelen = PATH_MAX;
				84	buf->f_frsize = PAGE_CACHE_SIZE;
				85
				86	/* leave fsid little-endian, regardless of host endianness */
				87	fsid = (u64 )(&monmap->fsid) ^ ((u64 )&monmap->fsid + 1);
				88	buf->f_fsid.val[0] = fsid & 0xffffffff;
				89	buf->f_fsid.val[1] = fsid >> 32;
				90
				91	return 0;
				92	}
				93
				94
				95	static int ceph_syncfs(struct super_block *sb, int wait)
				96	{
				97	dout("sync_fs %d\n", wait);
				98	ceph_osdc_sync(&ceph_client(sb)->osdc);
				99	ceph_mdsc_sync(&ceph_client(sb)->mdsc);
				100	return 0;
				101	}
				102
				103
				104	/**
				105	* ceph_show_options - Show mount options in /proc/mounts
				106	* @m: seq_file to write to
				107	* @mnt: mount descriptor
				108	*/
				109	static int ceph_show_options(struct seq_file m, struct vfsmount mnt)
				110	{
				111	struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb);
				112	struct ceph_mount_args *args = &client->mount_args;
				113
				114	if (args->flags & CEPH_OPT_FSID)
				115	seq_printf(m, ",fsidmajor=%llu,fsidminor%llu",
				116	le64_to_cpu((__le64 )&args->fsid.fsid[0]),
				117	le64_to_cpu((__le64 )&args->fsid.fsid[8]));
				118	if (args->flags & CEPH_OPT_NOSHARE)
				119	seq_puts(m, ",noshare");
				120	if (args->flags & CEPH_OPT_DIRSTAT)
				121	seq_puts(m, ",dirstat");
				122	if ((args->flags & CEPH_OPT_RBYTES) == 0)
				123	seq_puts(m, ",norbytes");
				124	if (args->flags & CEPH_OPT_NOCRC)
				125	seq_puts(m, ",nocrc");
				126	if (args->flags & CEPH_OPT_NOASYNCREADDIR)
				127	seq_puts(m, ",noasyncreaddir");
				128	if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
				129	seq_printf(m, ",snapdirname=%s", args->snapdir_name);
				130	if (args->secret)
				131	seq_puts(m, ",secret=<hidden>");
				132	return 0;
				133	}
				134
				135	/*
				136	* caches
				137	*/
				138	struct kmem_cache *ceph_inode_cachep;
				139	struct kmem_cache *ceph_cap_cachep;
				140	struct kmem_cache *ceph_dentry_cachep;
				141	struct kmem_cache *ceph_file_cachep;
				142
				143	static void ceph_inode_init_once(void *foo)
				144	{
				145	struct ceph_inode_info *ci = foo;
				146	inode_init_once(&ci->vfs_inode);
				147	}
				148
				149	static int __init init_caches(void)
				150	{
				151	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
				152	sizeof(struct ceph_inode_info),
				153	__alignof__(struct ceph_inode_info),
				154	(SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD),
				155	ceph_inode_init_once);
				156	if (ceph_inode_cachep == NULL)
				157	return -ENOMEM;
				158
				159	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
				160	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD);
				161	if (ceph_cap_cachep == NULL)
				162	goto bad_cap;
				163
				164	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
				165	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD);
				166	if (ceph_dentry_cachep == NULL)
				167	goto bad_dentry;
				168
				169	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
				170	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD);
				171	if (ceph_file_cachep == NULL)
				172	goto bad_file;
				173
				174	return 0;
				175
				176	bad_file:
				177	kmem_cache_destroy(ceph_dentry_cachep);
				178	bad_dentry:
				179	kmem_cache_destroy(ceph_cap_cachep);
				180	bad_cap:
				181	kmem_cache_destroy(ceph_inode_cachep);
				182	return -ENOMEM;
				183	}
				184
				185	static void destroy_caches(void)
				186	{
				187	kmem_cache_destroy(ceph_inode_cachep);
				188	kmem_cache_destroy(ceph_cap_cachep);
				189	kmem_cache_destroy(ceph_dentry_cachep);
				190	kmem_cache_destroy(ceph_file_cachep);
				191	}
				192
				193
				194	/*
				195	* ceph_umount_begin - initiate forced umount. Tear down down the
				196	* mount, skipping steps that may hang while waiting for server(s).
				197	*/
				198	static void ceph_umount_begin(struct super_block *sb)
				199	{
				200	struct ceph_client *client = ceph_sb_to_client(sb);
				201
				202	dout("ceph_umount_begin - starting forced umount\n");
				203	if (!client)
				204	return;
				205	client->mount_state = CEPH_MOUNT_SHUTDOWN;
				206	return;
				207	}
				208
				209	static const struct super_operations ceph_super_ops = {
				210	.alloc_inode = ceph_alloc_inode,
				211	.destroy_inode = ceph_destroy_inode,
				212	.write_inode = ceph_write_inode,
				213	.sync_fs = ceph_syncfs,
				214	.put_super = ceph_put_super,
				215	.show_options = ceph_show_options,
				216	.statfs = ceph_statfs,
				217	.umount_begin = ceph_umount_begin,
				218	};
				219
				220
				221	const char *ceph_msg_type_name(int type)
				222	{
				223	switch (type) {
				224	case CEPH_MSG_SHUTDOWN: return "shutdown";
				225	case CEPH_MSG_PING: return "ping";
				226	case CEPH_MSG_MON_MAP: return "mon_map";
				227	case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
				228	case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
				229	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
				230	case CEPH_MSG_CLIENT_MOUNT: return "client_mount";
				231	case CEPH_MSG_CLIENT_MOUNT_ACK: return "client_mount_ack";
				232	case CEPH_MSG_STATFS: return "statfs";
				233	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
				234	case CEPH_MSG_MDS_GETMAP: return "mds_getmap";
				235	case CEPH_MSG_MDS_MAP: return "mds_map";
				236	case CEPH_MSG_CLIENT_SESSION: return "client_session";
				237	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
				238	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
				239	case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
				240	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
				241	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
				242	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
				243	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
				244	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
				245	case CEPH_MSG_OSD_GETMAP: return "osd_getmap";
				246	case CEPH_MSG_OSD_MAP: return "osd_map";
				247	case CEPH_MSG_OSD_OP: return "osd_op";
				248	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
				249	default: return "unknown";
				250	}
				251	}
				252
				253
				254	/*
				255	* mount options
				256	*/
				257	enum {
				258	Opt_fsidmajor,
				259	Opt_fsidminor,
				260	Opt_monport,
				261	Opt_wsize,
				262	Opt_rsize,
				263	Opt_osdtimeout,
				264	Opt_mount_timeout,
				265	Opt_caps_wanted_delay_min,
				266	Opt_caps_wanted_delay_max,
				267	Opt_readdir_max_entries,
				268	/* int args above */
				269	Opt_snapdirname,
				270	Opt_secret,
				271	/* string args above */
				272	Opt_ip,
				273	Opt_noshare,
				274	Opt_dirstat,
				275	Opt_nodirstat,
				276	Opt_rbytes,
				277	Opt_norbytes,
				278	Opt_nocrc,
				279	Opt_noasyncreaddir,
				280	};
				281
				282	static match_table_t arg_tokens = {
				283	{Opt_fsidmajor, "fsidmajor=%ld"},
				284	{Opt_fsidminor, "fsidminor=%ld"},
				285	{Opt_monport, "monport=%d"},
				286	{Opt_wsize, "wsize=%d"},
				287	{Opt_rsize, "rsize=%d"},
				288	{Opt_osdtimeout, "osdtimeout=%d"},
				289	{Opt_mount_timeout, "mount_timeout=%d"},
				290	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
				291	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
				292	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
				293	/* int args above */
				294	{Opt_snapdirname, "snapdirname=%s"},
				295	{Opt_secret, "secret=%s"},
				296	/* string args above */
				297	{Opt_ip, "ip=%s"},
				298	{Opt_noshare, "noshare"},
				299	{Opt_dirstat, "dirstat"},
				300	{Opt_nodirstat, "nodirstat"},
				301	{Opt_rbytes, "rbytes"},
				302	{Opt_norbytes, "norbytes"},
				303	{Opt_nocrc, "nocrc"},
				304	{Opt_noasyncreaddir, "noasyncreaddir"},
				305	{-1, NULL}
				306	};
				307
				308
				309	static int parse_mount_args(struct ceph_client *client,
				310	int flags, char options, const char dev_name,
				311	const char **path)
				312	{
				313	struct ceph_mount_args *args = &client->mount_args;
				314	const char *c;
				315	int err;
				316	substring_t argstr[MAX_OPT_ARGS];
				317	int num_mon;
				318	struct ceph_entity_addr mon_addr[CEPH_MAX_MON_MOUNT_ADDR];
				319	int i;
				320
				321	dout("parse_mount_args dev_name '%s'\n", dev_name);
				322	memset(args, 0, sizeof(*args));
				323
				324	/* start with defaults */
				325	args->sb_flags = flags;
				326	args->flags = CEPH_OPT_DEFAULT;
				327	args->osd_timeout = 5; /* seconds */
				328	args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
				329	args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
				330	args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
				331	args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
				332	args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4;
				333	args->max_readdir = 1024;
				334
				335	/* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
				336	if (!dev_name)
				337	return -EINVAL;
				338	*path = strstr(dev_name, ":/");
				339	if (*path == NULL) {
				340	pr_err("device name is missing path (no :/ in %s)\n",
				341	dev_name);
				342	return -EINVAL;
				343	}
				344
				345	/* get mon ip(s) */
				346	err = ceph_parse_ips(dev_name, *path, mon_addr,
				347	CEPH_MAX_MON_MOUNT_ADDR, &num_mon);
				348	if (err < 0)
				349	return err;
				350
				351	/* build initial monmap */
				352	client->monc.monmap = kzalloc(sizeof(*client->monc.monmap) +
				353	num_mon*sizeof(client->monc.monmap->mon_inst[0]),
				354	GFP_KERNEL);
				355	if (!client->monc.monmap)
				356	return -ENOMEM;
				357	for (i = 0; i < num_mon; i++) {
				358	client->monc.monmap->mon_inst[i].addr = mon_addr[i];
				359	client->monc.monmap->mon_inst[i].addr.erank = 0;
				360	client->monc.monmap->mon_inst[i].addr.nonce = 0;
				361	client->monc.monmap->mon_inst[i].name.type =
				362	CEPH_ENTITY_TYPE_MON;
				363	client->monc.monmap->mon_inst[i].name.num = cpu_to_le64(i);
				364	}
				365	client->monc.monmap->num_mon = num_mon;
				366	memset(&args->my_addr.in_addr, 0, sizeof(args->my_addr.in_addr));
				367
				368	/* path on server */
				369	*path += 2;
				370	dout("server path '%s'\n", *path);
				371
				372	/* parse mount options */
				373	while ((c = strsep(&options, ",")) != NULL) {
				374	int token, intval, ret;
				375	if (!*c)
				376	continue;
				377	token = match_token((char *)c, arg_tokens, argstr);
				378	if (token < 0) {
				379	pr_err("bad mount option at '%s'\n", c);
				380	return -EINVAL;
				381
				382	}
				383	if (token < Opt_ip) {
				384	ret = match_int(&argstr[0], &intval);
				385	if (ret < 0) {
				386	pr_err("bad mount option arg (not int) "
				387	"at '%s'\n", c);
				388	continue;
				389	}
				390	dout("got token %d intval %d\n", token, intval);
				391	}
				392	switch (token) {
				393	case Opt_fsidmajor:
				394	(__le64 )&args->fsid.fsid[0] = cpu_to_le64(intval);
				395	break;
				396	case Opt_fsidminor:
				397	(__le64 )&args->fsid.fsid[8] = cpu_to_le64(intval);
				398	break;
				399	case Opt_ip:
				400	err = ceph_parse_ips(argstr[0].from,
				401	argstr[0].to,
				402	&args->my_addr,
				403	1, NULL);
				404	if (err < 0)
				405	return err;
				406	args->flags \|= CEPH_OPT_MYIP;
				407	break;
				408
				409	case Opt_snapdirname:
				410	kfree(args->snapdir_name);
				411	args->snapdir_name = kstrndup(argstr[0].from,
				412	argstr[0].to-argstr[0].from,
				413	GFP_KERNEL);
				414	break;
				415	case Opt_secret:
				416	args->secret = kstrndup(argstr[0].from,
				417	argstr[0].to-argstr[0].from,
				418	GFP_KERNEL);
				419	break;
				420
				421	/* misc */
				422	case Opt_wsize:
				423	args->wsize = intval;
				424	break;
				425	case Opt_rsize:
				426	args->rsize = intval;
				427	break;
				428	case Opt_osdtimeout:
				429	args->osd_timeout = intval;
				430	break;
				431	case Opt_mount_timeout:
				432	args->mount_timeout = intval;
				433	break;
				434	case Opt_caps_wanted_delay_min:
				435	args->caps_wanted_delay_min = intval;
				436	break;
				437	case Opt_caps_wanted_delay_max:
				438	args->caps_wanted_delay_max = intval;
				439	break;
				440	case Opt_readdir_max_entries:
				441	args->max_readdir = intval;
				442	break;
				443
				444	case Opt_noshare:
				445	args->flags \|= CEPH_OPT_NOSHARE;
				446	break;
				447
				448	case Opt_dirstat:
				449	args->flags \|= CEPH_OPT_DIRSTAT;
				450	break;
				451	case Opt_nodirstat:
				452	args->flags &= ~CEPH_OPT_DIRSTAT;
				453	break;
				454	case Opt_rbytes:
				455	args->flags \|= CEPH_OPT_RBYTES;
				456	break;
				457	case Opt_norbytes:
				458	args->flags &= ~CEPH_OPT_RBYTES;
				459	break;
				460	case Opt_nocrc:
				461	args->flags \|= CEPH_OPT_NOCRC;
				462	break;
				463	case Opt_noasyncreaddir:
				464	args->flags \|= CEPH_OPT_NOASYNCREADDIR;
				465	break;
				466
				467	default:
				468	BUG_ON(token);
				469	}
				470	}
				471
				472	return 0;
				473	}
				474
				475	static void release_mount_args(struct ceph_mount_args *args)
				476	{
				477	kfree(args->snapdir_name);
				478	args->snapdir_name = NULL;
				479	kfree(args->secret);
				480	args->secret = NULL;
				481	}
				482
				483	/*
				484	* create a fresh client instance
				485	*/
				486	static struct ceph_client *ceph_create_client(void)
				487	{
				488	struct ceph_client *client;
				489	int err = -ENOMEM;
				490
				491	client = kzalloc(sizeof(*client), GFP_KERNEL);
				492	if (client == NULL)
				493	return ERR_PTR(-ENOMEM);
				494
				495	mutex_init(&client->mount_mutex);
				496
				497	init_waitqueue_head(&client->mount_wq);
				498
				499	client->sb = NULL;
				500	client->mount_state = CEPH_MOUNT_MOUNTING;
				501	client->whoami = -1;
				502
				503	client->msgr = NULL;
				504
				505	client->mount_err = 0;
				506	client->signed_ticket = NULL;
				507	client->signed_ticket_len = 0;
				508
				509	err = -ENOMEM;
				510	client->wb_wq = create_workqueue("ceph-writeback");
				511	if (client->wb_wq == NULL)
				512	goto fail;
				513	client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
				514	if (client->pg_inv_wq == NULL)
				515	goto fail_wb_wq;
				516	client->trunc_wq = create_singlethread_workqueue("ceph-trunc");
				517	if (client->trunc_wq == NULL)
				518	goto fail_pg_inv_wq;
				519
				520	/* subsystems */
				521	err = ceph_monc_init(&client->monc, client);
				522	if (err < 0)
				523	goto fail_trunc_wq;
				524	err = ceph_osdc_init(&client->osdc, client);
				525	if (err < 0)
				526	goto fail_monc;
				527	ceph_mdsc_init(&client->mdsc, client);
				528	return client;
				529
				530	fail_monc:
				531	ceph_monc_stop(&client->monc);
				532	fail_trunc_wq:
				533	destroy_workqueue(client->trunc_wq);
				534	fail_pg_inv_wq:
				535	destroy_workqueue(client->pg_inv_wq);
				536	fail_wb_wq:
				537	destroy_workqueue(client->wb_wq);
				538	fail:
				539	kfree(client);
				540	return ERR_PTR(err);
				541	}
				542
				543	static void ceph_destroy_client(struct ceph_client *client)
				544	{
				545	dout("destroy_client %p\n", client);
				546
				547	/* unmount */
				548	ceph_mdsc_stop(&client->mdsc);
				549	ceph_monc_stop(&client->monc);
				550	ceph_osdc_stop(&client->osdc);
				551
				552	kfree(client->signed_ticket);
				553
				554	ceph_debugfs_client_cleanup(client);
				555	destroy_workqueue(client->wb_wq);
				556	destroy_workqueue(client->pg_inv_wq);
				557	destroy_workqueue(client->trunc_wq);
				558
				559	if (client->msgr)
				560	ceph_messenger_destroy(client->msgr);
				561	if (client->wb_pagevec_pool)
				562	mempool_destroy(client->wb_pagevec_pool);
				563
				564	release_mount_args(&client->mount_args);
				565
				566	kfree(client);
				567	dout("destroy_client %p done\n", client);
				568	}
				569
				570	/*
				571	* true if we have the mon map (and have thus joined the cluster)
				572	*/
				573	static int have_mon_map(struct ceph_client *client)
				574	{
				575	return client->monc.monmap && client->monc.monmap->epoch;
				576	}
				577
				578	/*
				579	* Bootstrap mount by opening the root directory. Note the mount
				580	* @started time from caller, and time out if this takes too long.
				581	*/
				582	static struct dentry open_root_dentry(struct ceph_client client,
				583	const char *path,
				584	unsigned long started)
				585	{
				586	struct ceph_mds_client *mdsc = &client->mdsc;
				587	struct ceph_mds_request *req = NULL;
				588	int err;
				589	struct dentry *root;
				590
				591	/* open dir */
				592	dout("open_root_inode opening '%s'\n", path);
				593	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
				594	if (IS_ERR(req))
				595	return ERR_PTR(PTR_ERR(req));
				596	req->r_path1 = kstrdup(path, GFP_NOFS);
				597	req->r_ino1.ino = CEPH_INO_ROOT;
				598	req->r_ino1.snap = CEPH_NOSNAP;
				599	req->r_started = started;
				600	req->r_timeout = client->mount_args.mount_timeout * HZ;
				601	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
				602	req->r_num_caps = 2;
				603	err = ceph_mdsc_do_request(mdsc, NULL, req);
				604	if (err == 0) {
				605	dout("open_root_inode success\n");
				606	if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
				607	client->sb->s_root == NULL)
				608	root = d_alloc_root(req->r_target_inode);
				609	else
				610	root = d_obtain_alias(req->r_target_inode);
				611	req->r_target_inode = NULL;
				612	dout("open_root_inode success, root dentry is %p\n", root);
				613	} else {
				614	root = ERR_PTR(err);
				615	}
				616	ceph_mdsc_put_request(req);
				617	return root;
				618	}
				619
				620	/*
				621	* mount: join the ceph cluster, and open root directory.
				622	*/
				623	static int ceph_mount(struct ceph_client client, struct vfsmount mnt,
				624	const char *path)
				625	{
				626	struct ceph_entity_addr *myaddr = NULL;
				627	int err;
				628	unsigned long timeout = client->mount_args.mount_timeout * HZ;
				629	unsigned long started = jiffies; /* note the start time */
				630	struct dentry *root;
				631
				632	dout("mount start\n");
				633	mutex_lock(&client->mount_mutex);
				634
				635	/* initialize the messenger */
				636	if (client->msgr == NULL) {
				637	if (ceph_test_opt(client, MYIP))
				638	myaddr = &client->mount_args.my_addr;
				639	client->msgr = ceph_messenger_create(myaddr);
				640	if (IS_ERR(client->msgr)) {
				641	err = PTR_ERR(client->msgr);
				642	client->msgr = NULL;
				643	goto out;
				644	}
				645	client->msgr->nocrc = ceph_test_opt(client, NOCRC);
				646	}
				647
				648	/* send mount request, and wait for mon, mds, and osd maps */
				649	err = ceph_monc_request_mount(&client->monc);
				650	if (err < 0)
				651	goto out;
				652
				653	while (!have_mon_map(client) && !client->mount_err) {
				654	err = -EIO;
				655	if (timeout && time_after_eq(jiffies, started + timeout))
				656	goto out;
				657
				658	/* wait */
				659	dout("mount waiting for mount\n");
				660	err = wait_event_interruptible_timeout(client->mount_wq,
				661	client->mount_err \|\| have_mon_map(client),
				662	timeout);
				663	if (err == -EINTR \|\| err == -ERESTARTSYS)
				664	goto out;
				665	if (client->mount_err) {
				666	err = client->mount_err;
				667	goto out;
				668	}
				669	}
				670
				671	dout("mount opening root\n");
				672	root = open_root_dentry(client, "", started);
				673	if (IS_ERR(root)) {
				674	err = PTR_ERR(root);
				675	goto out;
				676	}
				677	if (client->sb->s_root)
				678	dput(root);
				679	else
				680	client->sb->s_root = root;
				681
				682	if (path[0] == 0) {
				683	dget(root);
				684	} else {
				685	dout("mount opening base mountpoint\n");
				686	root = open_root_dentry(client, path, started);
				687	if (IS_ERR(root)) {
				688	err = PTR_ERR(root);
				689	dput(client->sb->s_root);
				690	client->sb->s_root = NULL;
				691	goto out;
				692	}
				693	}
				694
				695	mnt->mnt_root = root;
				696	mnt->mnt_sb = client->sb;
				697
				698	client->mount_state = CEPH_MOUNT_MOUNTED;
				699	dout("mount success\n");
				700	err = 0;
				701
				702	out:
				703	mutex_unlock(&client->mount_mutex);
				704	return err;
				705	}
				706
				707	static int ceph_set_super(struct super_block s, void data)
				708	{
				709	struct ceph_client *client = data;
				710	int ret;
				711
				712	dout("set_super %p data %p\n", s, data);
				713
				714	s->s_flags = client->mount_args.sb_flags;
				715	s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */
				716
				717	s->s_fs_info = client;
				718	client->sb = s;
				719
				720	s->s_op = &ceph_super_ops;
				721	s->s_export_op = &ceph_export_ops;
				722
				723	s->s_time_gran = 1000; /* 1000 ns == 1 us */
				724
				725	ret = set_anon_super(s, NULL); /* what is that second arg for? */
				726	if (ret != 0)
				727	goto fail;
				728
				729	return ret;
				730
				731	fail:
				732	s->s_fs_info = NULL;
				733	client->sb = NULL;
				734	return ret;
				735	}
				736
				737	/*
				738	* share superblock if same fs AND options
				739	*/
				740	static int ceph_compare_super(struct super_block sb, void data)
				741	{
				742	struct ceph_client *new = data;
				743	struct ceph_mount_args *args = &new->mount_args;
				744	struct ceph_client *other = ceph_sb_to_client(sb);
				745	int i;
				746
				747	dout("ceph_compare_super %p\n", sb);
				748	if (args->flags & CEPH_OPT_FSID) {
				749	if (ceph_fsid_compare(&args->fsid, &other->fsid)) {
				750	dout("fsid doesn't match\n");
				751	return 0;
				752	}
				753	} else {
				754	/* do we share (a) monitor? */
				755	for (i = 0; i < new->monc.monmap->num_mon; i++)
				756	if (ceph_monmap_contains(other->monc.monmap,
				757	&new->monc.monmap->mon_inst[i].addr))
				758	break;
				759	if (i == new->monc.monmap->num_mon) {
				760	dout("mon ip not part of monmap\n");
				761	return 0;
				762	}
				763	dout("mon ip matches existing sb %p\n", sb);
				764	}
				765	if (args->sb_flags != other->mount_args.sb_flags) {
				766	dout("flags differ\n");
				767	return 0;
				768	}
				769	return 1;
				770	}
				771
				772	/*
				773	* construct our own bdi so we can control readahead, etc.
				774	*/
				775	static int ceph_init_bdi(struct super_block sb, struct ceph_client client)
				776	{
				777	int err;
				778
				779	err = bdi_init(&client->backing_dev_info);
				780	if (err < 0)
				781	return err;
				782
				783	/* set ra_pages based on rsize mount option? */
				784	if (client->mount_args.rsize >= PAGE_CACHE_SIZE)
				785	client->backing_dev_info.ra_pages =
				786	(client->mount_args.rsize + PAGE_CACHE_SIZE - 1)
				787	>> PAGE_SHIFT;
				788
				789	err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
				790	return err;
				791	}
				792
				793	static int ceph_get_sb(struct file_system_type *fs_type,
				794	int flags, const char dev_name, void data,
				795	struct vfsmount *mnt)
				796	{
				797	struct super_block *sb;
				798	struct ceph_client *client;
				799	int err;
				800	int (compare_super)(struct super_block , void *) = ceph_compare_super;
				801	const char *path;
				802
				803	dout("ceph_get_sb\n");
				804
				805	/* create client (which we may/may not use) */
				806	client = ceph_create_client();
				807	if (IS_ERR(client))
				808	return PTR_ERR(client);
				809
				810	err = parse_mount_args(client, flags, data, dev_name, &path);
				811	if (err < 0)
				812	goto out;
				813
				814	if (client->mount_args.flags & CEPH_OPT_NOSHARE)
				815	compare_super = NULL;
				816	sb = sget(fs_type, compare_super, ceph_set_super, client);
				817	if (IS_ERR(sb)) {
				818	err = PTR_ERR(sb);
				819	goto out;
				820	}
				821
				822	if (ceph_client(sb) != client) {
				823	ceph_destroy_client(client);
				824	client = ceph_client(sb);
				825	dout("get_sb got existing client %p\n", client);
				826	} else {
				827	dout("get_sb using new client %p\n", client);
				828
				829	/* set up mempools */
				830	err = -ENOMEM;
				831	client->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
				832	client->mount_args.wsize >> PAGE_CACHE_SHIFT);
				833	if (!client->wb_pagevec_pool)
				834	goto out_splat;
				835
				836	err = ceph_init_bdi(sb, client);
				837	if (err < 0)
				838	goto out_splat;
				839	}
				840
				841	err = ceph_mount(client, mnt, path);
				842	if (err < 0)
				843	goto out_splat;
				844	dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
				845	mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode));
				846	return 0;
				847
				848	out_splat:
				849	ceph_mdsc_close_sessions(&client->mdsc);
				850	up_write(&sb->s_umount);
				851	deactivate_super(sb);
				852	goto out_final;
				853
				854	out:
				855	ceph_destroy_client(client);
				856	out_final:
				857	dout("ceph_get_sb fail %d\n", err);
				858	return err;
				859	}
				860
				861	static void ceph_kill_sb(struct super_block *s)
				862	{
				863	struct ceph_client *client = ceph_sb_to_client(s);
				864	dout("kill_sb %p\n", s);
				865	ceph_mdsc_pre_umount(&client->mdsc);
				866	bdi_unregister(&client->backing_dev_info);
				867	kill_anon_super(s); /* will call put_super after sb is r/o */
				868	bdi_destroy(&client->backing_dev_info);
				869	ceph_destroy_client(client);
				870	}
				871
				872	static struct file_system_type ceph_fs_type = {
				873	.owner = THIS_MODULE,
				874	.name = "ceph",
				875	.get_sb = ceph_get_sb,
				876	.kill_sb = ceph_kill_sb,
				877	.fs_flags = FS_RENAME_DOES_D_MOVE,
				878	};
				879
				880	#define _STRINGIFY(x) #x
				881	#define STRINGIFY(x) _STRINGIFY(x)
				882
				883	static int __init init_ceph(void)
				884	{
				885	int ret = 0;
				886
				887	ret = ceph_debugfs_init();
				888	if (ret < 0)
				889	goto out;
				890
				891	ret = ceph_msgr_init();
				892	if (ret < 0)
				893	goto out_debugfs;
				894
				895	ret = init_caches();
				896	if (ret)
				897	goto out_msgr;
				898
				899	ceph_caps_init();
				900
				901	ret = register_filesystem(&ceph_fs_type);
				902	if (ret)
				903	goto out_icache;
				904
Sage Weil	fa0b72e	2009-10-07 10:59:10 -0700	[diff] [blame^]	905	pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n",
				906	CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH,
				907	CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL);
Sage Weil	16725b9	2009-10-06 11:31:07 -0700	[diff] [blame]	908	return 0;
				909
				910	out_icache:
				911	destroy_caches();
				912	out_msgr:
				913	ceph_msgr_exit();
				914	out_debugfs:
				915	ceph_debugfs_cleanup();
				916	out:
				917	return ret;
				918	}
				919
				920	static void __exit exit_ceph(void)
				921	{
				922	dout("exit_ceph\n");
				923	unregister_filesystem(&ceph_fs_type);
				924	ceph_caps_finalize();
				925	destroy_caches();
				926	ceph_msgr_exit();
				927	ceph_debugfs_cleanup();
				928	}
				929
				930	module_init(init_ceph);
				931	module_exit(exit_ceph);
				932
				933	MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
				934	MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
				935	MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
				936	MODULE_DESCRIPTION("Ceph filesystem for Linux");
				937	MODULE_LICENSE("GPL");