blob: e38423e82f2ebe3f311a3f173964181c5ebe1860 [file] [log] [blame]
Sage Weila8e63b72009-10-06 11:31:13 -07001#include "ceph_debug.h"
2
3#include <linux/exportfs.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +09004#include <linux/slab.h>
Sage Weila8e63b72009-10-06 11:31:13 -07005#include <asm/unaligned.h>
6
7#include "super.h"
8
9/*
10 * NFS export support
11 *
12 * NFS re-export of a ceph mount is, at present, only semireliable.
13 * The basic issue is that the Ceph architectures doesn't lend itself
14 * well to generating filehandles that will remain valid forever.
15 *
16 * So, we do our best. If you're lucky, your inode will be in the
17 * client's cache. If it's not, and you have a connectable fh, then
18 * the MDS server may be able to find it for you. Otherwise, you get
19 * ESTALE.
20 *
21 * There are ways to this more reliable, but in the non-connectable fh
22 * case, we won't every work perfectly, and in the connectable case,
23 * some changes are needed on the MDS side to work better.
24 */
25
26/*
27 * Basic fh
28 */
29struct ceph_nfs_fh {
30 u64 ino;
31} __attribute__ ((packed));
32
33/*
34 * Larger 'connectable' fh that includes parent ino and name hash.
35 * Use this whenever possible, as it works more reliably.
36 */
37struct ceph_nfs_confh {
38 u64 ino, parent_ino;
39 u32 parent_name_hash;
40} __attribute__ ((packed));
41
42static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
43 int connectable)
44{
Aneesh Kumar K.V92923dc2010-10-05 16:03:41 +053045 int type;
Sage Weila8e63b72009-10-06 11:31:13 -070046 struct ceph_nfs_fh *fh = (void *)rawfh;
47 struct ceph_nfs_confh *cfh = (void *)rawfh;
48 struct dentry *parent = dentry->d_parent;
49 struct inode *inode = dentry->d_inode;
Aneesh Kumar K.V92923dc2010-10-05 16:03:41 +053050 int connected_handle_length = sizeof(*cfh)/4;
51 int handle_length = sizeof(*fh)/4;
Sage Weila8e63b72009-10-06 11:31:13 -070052
53 /* don't re-export snaps */
54 if (ceph_snap(inode) != CEPH_NOSNAP)
55 return -EINVAL;
56
Aneesh Kumar K.V92923dc2010-10-05 16:03:41 +053057 if (*max_len >= connected_handle_length) {
Sage Weila8e63b72009-10-06 11:31:13 -070058 dout("encode_fh %p connectable\n", dentry);
59 cfh->ino = ceph_ino(dentry->d_inode);
60 cfh->parent_ino = ceph_ino(parent->d_inode);
61 cfh->parent_name_hash = parent->d_name.hash;
Aneesh Kumar K.V92923dc2010-10-05 16:03:41 +053062 *max_len = connected_handle_length;
Sage Weila8e63b72009-10-06 11:31:13 -070063 type = 2;
Aneesh Kumar K.V92923dc2010-10-05 16:03:41 +053064 } else if (*max_len >= handle_length) {
Aneesh Kumar K.Vbba0cd02010-10-05 16:03:42 +053065 if (connectable) {
66 *max_len = connected_handle_length;
Aneesh Kumar K.V92923dc2010-10-05 16:03:41 +053067 return 255;
Aneesh Kumar K.Vbba0cd02010-10-05 16:03:42 +053068 }
Sage Weila8e63b72009-10-06 11:31:13 -070069 dout("encode_fh %p\n", dentry);
70 fh->ino = ceph_ino(dentry->d_inode);
Aneesh Kumar K.V92923dc2010-10-05 16:03:41 +053071 *max_len = handle_length;
Sage Weila8e63b72009-10-06 11:31:13 -070072 type = 1;
73 } else {
Aneesh Kumar K.Vbba0cd02010-10-05 16:03:42 +053074 *max_len = handle_length;
Aneesh Kumar K.V92923dc2010-10-05 16:03:41 +053075 return 255;
Sage Weila8e63b72009-10-06 11:31:13 -070076 }
77 return type;
78}
79
80/*
81 * convert regular fh to dentry
82 *
83 * FIXME: we should try harder by querying the mds for the ino.
84 */
85static struct dentry *__fh_to_dentry(struct super_block *sb,
86 struct ceph_nfs_fh *fh)
87{
88 struct inode *inode;
89 struct dentry *dentry;
90 struct ceph_vino vino;
91 int err;
92
93 dout("__fh_to_dentry %llx\n", fh->ino);
94 vino.ino = fh->ino;
95 vino.snap = CEPH_NOSNAP;
96 inode = ceph_find_inode(sb, vino);
97 if (!inode)
98 return ERR_PTR(-ESTALE);
99
100 dentry = d_obtain_alias(inode);
Dan Carpenter0d509c92010-04-21 12:31:13 +0200101 if (IS_ERR(dentry)) {
Sage Weila8e63b72009-10-06 11:31:13 -0700102 pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
103 fh->ino, inode);
104 iput(inode);
Dan Carpenter0d509c92010-04-21 12:31:13 +0200105 return dentry;
Sage Weila8e63b72009-10-06 11:31:13 -0700106 }
107 err = ceph_init_dentry(dentry);
108
109 if (err < 0) {
110 iput(inode);
111 return ERR_PTR(err);
112 }
113 dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry);
114 return dentry;
115}
116
117/*
118 * convert connectable fh to dentry
119 */
120static struct dentry *__cfh_to_dentry(struct super_block *sb,
121 struct ceph_nfs_confh *cfh)
122{
Cheng Renquan640ef792010-03-26 17:40:33 +0800123 struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc;
Sage Weila8e63b72009-10-06 11:31:13 -0700124 struct inode *inode;
125 struct dentry *dentry;
126 struct ceph_vino vino;
127 int err;
128
129 dout("__cfh_to_dentry %llx (%llx/%x)\n",
130 cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
131
132 vino.ino = cfh->ino;
133 vino.snap = CEPH_NOSNAP;
134 inode = ceph_find_inode(sb, vino);
135 if (!inode) {
136 struct ceph_mds_request *req;
137
138 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
139 USE_ANY_MDS);
140 if (IS_ERR(req))
Julia Lawall7e34bc52010-05-22 12:01:14 +0200141 return ERR_CAST(req);
Sage Weila8e63b72009-10-06 11:31:13 -0700142
143 req->r_ino1 = vino;
144 req->r_ino2.ino = cfh->parent_ino;
145 req->r_ino2.snap = CEPH_NOSNAP;
146 req->r_path2 = kmalloc(16, GFP_NOFS);
147 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
148 req->r_num_caps = 1;
149 err = ceph_mdsc_do_request(mdsc, NULL, req);
150 ceph_mdsc_put_request(req);
151 inode = ceph_find_inode(sb, vino);
152 if (!inode)
153 return ERR_PTR(err ? err : -ESTALE);
154 }
155
156 dentry = d_obtain_alias(inode);
Dan Carpenter0d509c92010-04-21 12:31:13 +0200157 if (IS_ERR(dentry)) {
Sage Weila8e63b72009-10-06 11:31:13 -0700158 pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
159 cfh->ino, inode);
160 iput(inode);
Dan Carpenter0d509c92010-04-21 12:31:13 +0200161 return dentry;
Sage Weila8e63b72009-10-06 11:31:13 -0700162 }
163 err = ceph_init_dentry(dentry);
164 if (err < 0) {
165 iput(inode);
166 return ERR_PTR(err);
167 }
168 dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry);
169 return dentry;
170}
171
172static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid,
173 int fh_len, int fh_type)
174{
175 if (fh_type == 1)
176 return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw);
177 else
178 return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw);
179}
180
181/*
182 * get parent, if possible.
183 *
184 * FIXME: we could do better by querying the mds to discover the
185 * parent.
186 */
187static struct dentry *ceph_fh_to_parent(struct super_block *sb,
188 struct fid *fid,
189 int fh_len, int fh_type)
190{
191 struct ceph_nfs_confh *cfh = (void *)fid->raw;
192 struct ceph_vino vino;
193 struct inode *inode;
194 struct dentry *dentry;
195 int err;
196
197 if (fh_type == 1)
198 return ERR_PTR(-ESTALE);
199
200 pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino,
201 cfh->parent_name_hash);
202
203 vino.ino = cfh->ino;
204 vino.snap = CEPH_NOSNAP;
205 inode = ceph_find_inode(sb, vino);
206 if (!inode)
207 return ERR_PTR(-ESTALE);
208
209 dentry = d_obtain_alias(inode);
Dan Carpenter0d509c92010-04-21 12:31:13 +0200210 if (IS_ERR(dentry)) {
Sage Weila8e63b72009-10-06 11:31:13 -0700211 pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n",
212 cfh->ino, inode);
213 iput(inode);
Dan Carpenter0d509c92010-04-21 12:31:13 +0200214 return dentry;
Sage Weila8e63b72009-10-06 11:31:13 -0700215 }
216 err = ceph_init_dentry(dentry);
217 if (err < 0) {
218 iput(inode);
219 return ERR_PTR(err);
220 }
221 dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry);
222 return dentry;
223}
224
225const struct export_operations ceph_export_ops = {
226 .encode_fh = ceph_encode_fh,
227 .fh_to_dentry = ceph_fh_to_dentry,
228 .fh_to_parent = ceph_fh_to_parent,
229};