| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1 |  | 
| Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 2 | #include <linux/ceph/ceph_debug.h> | 
| Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 3 |  | 
| Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 4 | #include <linux/module.h> | 
| Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 5 | #include <linux/slab.h> | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 6 | #include <asm/div64.h> | 
 | 7 |  | 
| Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 8 | #include <linux/ceph/libceph.h> | 
 | 9 | #include <linux/ceph/osdmap.h> | 
 | 10 | #include <linux/ceph/decode.h> | 
 | 11 | #include <linux/crush/hash.h> | 
 | 12 | #include <linux/crush/mapper.h> | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 13 |  | 
 | 14 | char *ceph_osdmap_state_str(char *str, int len, int state) | 
 | 15 | { | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 16 | 	if (!len) | 
| Cong Ding | 1ec3911 | 2013-01-25 17:48:59 -0600 | [diff] [blame] | 17 | 		return str; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 18 |  | 
| Cong Ding | 1ec3911 | 2013-01-25 17:48:59 -0600 | [diff] [blame] | 19 | 	if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP)) | 
 | 20 | 		snprintf(str, len, "exists, up"); | 
 | 21 | 	else if (state & CEPH_OSD_EXISTS) | 
 | 22 | 		snprintf(str, len, "exists"); | 
 | 23 | 	else if (state & CEPH_OSD_UP) | 
 | 24 | 		snprintf(str, len, "up"); | 
 | 25 | 	else | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 26 | 		snprintf(str, len, "doesn't exist"); | 
| Cong Ding | 1ec3911 | 2013-01-25 17:48:59 -0600 | [diff] [blame] | 27 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 28 | 	return str; | 
 | 29 | } | 
 | 30 |  | 
 | 31 | /* maps */ | 
 | 32 |  | 
| Eric Dumazet | 95c9617 | 2012-04-15 05:58:06 +0000 | [diff] [blame] | 33 | static int calc_bits_of(unsigned int t) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 34 | { | 
 | 35 | 	int b = 0; | 
 | 36 | 	while (t) { | 
 | 37 | 		t = t >> 1; | 
 | 38 | 		b++; | 
 | 39 | 	} | 
 | 40 | 	return b; | 
 | 41 | } | 
 | 42 |  | 
 | 43 | /* | 
 | 44 |  * the foo_mask is the smallest value 2^n-1 that is >= foo. | 
 | 45 |  */ | 
 | 46 | static void calc_pg_masks(struct ceph_pg_pool_info *pi) | 
 | 47 | { | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 48 | 	pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1; | 
 | 49 | 	pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 50 | } | 
 | 51 |  | 
 | 52 | /* | 
 | 53 |  * decode crush map | 
 | 54 |  */ | 
 | 55 | static int crush_decode_uniform_bucket(void **p, void *end, | 
 | 56 | 				       struct crush_bucket_uniform *b) | 
 | 57 | { | 
 | 58 | 	dout("crush_decode_uniform_bucket %p to %p\n", *p, end); | 
 | 59 | 	ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 60 | 	b->item_weight = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 61 | 	return 0; | 
 | 62 | bad: | 
 | 63 | 	return -EINVAL; | 
 | 64 | } | 
 | 65 |  | 
 | 66 | static int crush_decode_list_bucket(void **p, void *end, | 
 | 67 | 				    struct crush_bucket_list *b) | 
 | 68 | { | 
 | 69 | 	int j; | 
 | 70 | 	dout("crush_decode_list_bucket %p to %p\n", *p, end); | 
 | 71 | 	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | 
 | 72 | 	if (b->item_weights == NULL) | 
 | 73 | 		return -ENOMEM; | 
 | 74 | 	b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | 
 | 75 | 	if (b->sum_weights == NULL) | 
 | 76 | 		return -ENOMEM; | 
 | 77 | 	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad); | 
 | 78 | 	for (j = 0; j < b->h.size; j++) { | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 79 | 		b->item_weights[j] = ceph_decode_32(p); | 
 | 80 | 		b->sum_weights[j] = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 81 | 	} | 
 | 82 | 	return 0; | 
 | 83 | bad: | 
 | 84 | 	return -EINVAL; | 
 | 85 | } | 
 | 86 |  | 
 | 87 | static int crush_decode_tree_bucket(void **p, void *end, | 
 | 88 | 				    struct crush_bucket_tree *b) | 
 | 89 | { | 
 | 90 | 	int j; | 
 | 91 | 	dout("crush_decode_tree_bucket %p to %p\n", *p, end); | 
 | 92 | 	ceph_decode_32_safe(p, end, b->num_nodes, bad); | 
 | 93 | 	b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS); | 
 | 94 | 	if (b->node_weights == NULL) | 
 | 95 | 		return -ENOMEM; | 
 | 96 | 	ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad); | 
 | 97 | 	for (j = 0; j < b->num_nodes; j++) | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 98 | 		b->node_weights[j] = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 99 | 	return 0; | 
 | 100 | bad: | 
 | 101 | 	return -EINVAL; | 
 | 102 | } | 
 | 103 |  | 
 | 104 | static int crush_decode_straw_bucket(void **p, void *end, | 
 | 105 | 				     struct crush_bucket_straw *b) | 
 | 106 | { | 
 | 107 | 	int j; | 
 | 108 | 	dout("crush_decode_straw_bucket %p to %p\n", *p, end); | 
 | 109 | 	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | 
 | 110 | 	if (b->item_weights == NULL) | 
 | 111 | 		return -ENOMEM; | 
 | 112 | 	b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | 
 | 113 | 	if (b->straws == NULL) | 
 | 114 | 		return -ENOMEM; | 
 | 115 | 	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad); | 
 | 116 | 	for (j = 0; j < b->h.size; j++) { | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 117 | 		b->item_weights[j] = ceph_decode_32(p); | 
 | 118 | 		b->straws[j] = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 119 | 	} | 
 | 120 | 	return 0; | 
 | 121 | bad: | 
 | 122 | 	return -EINVAL; | 
 | 123 | } | 
 | 124 |  | 
| Sage Weil | 546f04e | 2012-07-30 18:15:23 -0700 | [diff] [blame] | 125 | static int skip_name_map(void **p, void *end) | 
 | 126 | { | 
 | 127 |         int len; | 
 | 128 |         ceph_decode_32_safe(p, end, len ,bad); | 
 | 129 |         while (len--) { | 
 | 130 |                 int strlen; | 
 | 131 |                 *p += sizeof(u32); | 
 | 132 |                 ceph_decode_32_safe(p, end, strlen, bad); | 
 | 133 |                 *p += strlen; | 
 | 134 | } | 
 | 135 |         return 0; | 
 | 136 | bad: | 
 | 137 |         return -EINVAL; | 
 | 138 | } | 
 | 139 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 140 | static struct crush_map *crush_decode(void *pbyval, void *end) | 
 | 141 | { | 
 | 142 | 	struct crush_map *c; | 
 | 143 | 	int err = -EINVAL; | 
 | 144 | 	int i, j; | 
 | 145 | 	void **p = &pbyval; | 
 | 146 | 	void *start = pbyval; | 
 | 147 | 	u32 magic; | 
| Sage Weil | 546f04e | 2012-07-30 18:15:23 -0700 | [diff] [blame] | 148 | 	u32 num_name_maps; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 149 |  | 
 | 150 | 	dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p)); | 
 | 151 |  | 
 | 152 | 	c = kzalloc(sizeof(*c), GFP_NOFS); | 
 | 153 | 	if (c == NULL) | 
 | 154 | 		return ERR_PTR(-ENOMEM); | 
 | 155 |  | 
| Sage Weil | 546f04e | 2012-07-30 18:15:23 -0700 | [diff] [blame] | 156 |         /* set tunables to default values */ | 
 | 157 |         c->choose_local_tries = 2; | 
 | 158 |         c->choose_local_fallback_tries = 5; | 
 | 159 |         c->choose_total_tries = 19; | 
| Jim Schutt | 1604f48 | 2012-11-30 09:15:25 -0700 | [diff] [blame] | 160 | 	c->chooseleaf_descend_once = 0; | 
| Sage Weil | 546f04e | 2012-07-30 18:15:23 -0700 | [diff] [blame] | 161 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 162 | 	ceph_decode_need(p, end, 4*sizeof(u32), bad); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 163 | 	magic = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 164 | 	if (magic != CRUSH_MAGIC) { | 
 | 165 | 		pr_err("crush_decode magic %x != current %x\n", | 
| Eric Dumazet | 95c9617 | 2012-04-15 05:58:06 +0000 | [diff] [blame] | 166 | 		       (unsigned int)magic, (unsigned int)CRUSH_MAGIC); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 167 | 		goto bad; | 
 | 168 | 	} | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 169 | 	c->max_buckets = ceph_decode_32(p); | 
 | 170 | 	c->max_rules = ceph_decode_32(p); | 
 | 171 | 	c->max_devices = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 172 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 173 | 	c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS); | 
 | 174 | 	if (c->buckets == NULL) | 
 | 175 | 		goto badmem; | 
 | 176 | 	c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS); | 
 | 177 | 	if (c->rules == NULL) | 
 | 178 | 		goto badmem; | 
 | 179 |  | 
 | 180 | 	/* buckets */ | 
 | 181 | 	for (i = 0; i < c->max_buckets; i++) { | 
 | 182 | 		int size = 0; | 
 | 183 | 		u32 alg; | 
 | 184 | 		struct crush_bucket *b; | 
 | 185 |  | 
 | 186 | 		ceph_decode_32_safe(p, end, alg, bad); | 
 | 187 | 		if (alg == 0) { | 
 | 188 | 			c->buckets[i] = NULL; | 
 | 189 | 			continue; | 
 | 190 | 		} | 
 | 191 | 		dout("crush_decode bucket %d off %x %p to %p\n", | 
 | 192 | 		     i, (int)(*p-start), *p, end); | 
 | 193 |  | 
 | 194 | 		switch (alg) { | 
 | 195 | 		case CRUSH_BUCKET_UNIFORM: | 
 | 196 | 			size = sizeof(struct crush_bucket_uniform); | 
 | 197 | 			break; | 
 | 198 | 		case CRUSH_BUCKET_LIST: | 
 | 199 | 			size = sizeof(struct crush_bucket_list); | 
 | 200 | 			break; | 
 | 201 | 		case CRUSH_BUCKET_TREE: | 
 | 202 | 			size = sizeof(struct crush_bucket_tree); | 
 | 203 | 			break; | 
 | 204 | 		case CRUSH_BUCKET_STRAW: | 
 | 205 | 			size = sizeof(struct crush_bucket_straw); | 
 | 206 | 			break; | 
 | 207 | 		default: | 
| Sage Weil | 30dc638 | 2009-12-21 14:49:37 -0800 | [diff] [blame] | 208 | 			err = -EINVAL; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 209 | 			goto bad; | 
 | 210 | 		} | 
 | 211 | 		BUG_ON(size == 0); | 
 | 212 | 		b = c->buckets[i] = kzalloc(size, GFP_NOFS); | 
 | 213 | 		if (b == NULL) | 
 | 214 | 			goto badmem; | 
 | 215 |  | 
 | 216 | 		ceph_decode_need(p, end, 4*sizeof(u32), bad); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 217 | 		b->id = ceph_decode_32(p); | 
 | 218 | 		b->type = ceph_decode_16(p); | 
| Sage Weil | fb69039 | 2009-11-07 20:18:22 -0800 | [diff] [blame] | 219 | 		b->alg = ceph_decode_8(p); | 
 | 220 | 		b->hash = ceph_decode_8(p); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 221 | 		b->weight = ceph_decode_32(p); | 
 | 222 | 		b->size = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 223 |  | 
 | 224 | 		dout("crush_decode bucket size %d off %x %p to %p\n", | 
 | 225 | 		     b->size, (int)(*p-start), *p, end); | 
 | 226 |  | 
 | 227 | 		b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS); | 
 | 228 | 		if (b->items == NULL) | 
 | 229 | 			goto badmem; | 
 | 230 | 		b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS); | 
 | 231 | 		if (b->perm == NULL) | 
 | 232 | 			goto badmem; | 
 | 233 | 		b->perm_n = 0; | 
 | 234 |  | 
 | 235 | 		ceph_decode_need(p, end, b->size*sizeof(u32), bad); | 
 | 236 | 		for (j = 0; j < b->size; j++) | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 237 | 			b->items[j] = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 238 |  | 
 | 239 | 		switch (b->alg) { | 
 | 240 | 		case CRUSH_BUCKET_UNIFORM: | 
 | 241 | 			err = crush_decode_uniform_bucket(p, end, | 
 | 242 | 				  (struct crush_bucket_uniform *)b); | 
 | 243 | 			if (err < 0) | 
 | 244 | 				goto bad; | 
 | 245 | 			break; | 
 | 246 | 		case CRUSH_BUCKET_LIST: | 
 | 247 | 			err = crush_decode_list_bucket(p, end, | 
 | 248 | 			       (struct crush_bucket_list *)b); | 
 | 249 | 			if (err < 0) | 
 | 250 | 				goto bad; | 
 | 251 | 			break; | 
 | 252 | 		case CRUSH_BUCKET_TREE: | 
 | 253 | 			err = crush_decode_tree_bucket(p, end, | 
 | 254 | 				(struct crush_bucket_tree *)b); | 
 | 255 | 			if (err < 0) | 
 | 256 | 				goto bad; | 
 | 257 | 			break; | 
 | 258 | 		case CRUSH_BUCKET_STRAW: | 
 | 259 | 			err = crush_decode_straw_bucket(p, end, | 
 | 260 | 				(struct crush_bucket_straw *)b); | 
 | 261 | 			if (err < 0) | 
 | 262 | 				goto bad; | 
 | 263 | 			break; | 
 | 264 | 		} | 
 | 265 | 	} | 
 | 266 |  | 
 | 267 | 	/* rules */ | 
 | 268 | 	dout("rule vec is %p\n", c->rules); | 
 | 269 | 	for (i = 0; i < c->max_rules; i++) { | 
 | 270 | 		u32 yes; | 
 | 271 | 		struct crush_rule *r; | 
 | 272 |  | 
 | 273 | 		ceph_decode_32_safe(p, end, yes, bad); | 
 | 274 | 		if (!yes) { | 
 | 275 | 			dout("crush_decode NO rule %d off %x %p to %p\n", | 
 | 276 | 			     i, (int)(*p-start), *p, end); | 
 | 277 | 			c->rules[i] = NULL; | 
 | 278 | 			continue; | 
 | 279 | 		} | 
 | 280 |  | 
 | 281 | 		dout("crush_decode rule %d off %x %p to %p\n", | 
 | 282 | 		     i, (int)(*p-start), *p, end); | 
 | 283 |  | 
 | 284 | 		/* len */ | 
 | 285 | 		ceph_decode_32_safe(p, end, yes, bad); | 
 | 286 | #if BITS_PER_LONG == 32 | 
| Sage Weil | 30dc638 | 2009-12-21 14:49:37 -0800 | [diff] [blame] | 287 | 		err = -EINVAL; | 
| Xi Wang | 6448669 | 2012-02-16 11:55:48 -0500 | [diff] [blame] | 288 | 		if (yes > (ULONG_MAX - sizeof(*r)) | 
 | 289 | 			  / sizeof(struct crush_rule_step)) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 290 | 			goto bad; | 
 | 291 | #endif | 
 | 292 | 		r = c->rules[i] = kmalloc(sizeof(*r) + | 
 | 293 | 					  yes*sizeof(struct crush_rule_step), | 
 | 294 | 					  GFP_NOFS); | 
 | 295 | 		if (r == NULL) | 
 | 296 | 			goto badmem; | 
 | 297 | 		dout(" rule %d is at %p\n", i, r); | 
 | 298 | 		r->len = yes; | 
 | 299 | 		ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */ | 
 | 300 | 		ceph_decode_need(p, end, r->len*3*sizeof(u32), bad); | 
 | 301 | 		for (j = 0; j < r->len; j++) { | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 302 | 			r->steps[j].op = ceph_decode_32(p); | 
 | 303 | 			r->steps[j].arg1 = ceph_decode_32(p); | 
 | 304 | 			r->steps[j].arg2 = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 305 | 		} | 
 | 306 | 	} | 
 | 307 |  | 
 | 308 | 	/* ignore trailing name maps. */ | 
| Sage Weil | 546f04e | 2012-07-30 18:15:23 -0700 | [diff] [blame] | 309 |         for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) { | 
 | 310 |                 err = skip_name_map(p, end); | 
 | 311 |                 if (err < 0) | 
 | 312 |                         goto done; | 
 | 313 |         } | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 314 |  | 
| Sage Weil | 546f04e | 2012-07-30 18:15:23 -0700 | [diff] [blame] | 315 |         /* tunables */ | 
 | 316 |         ceph_decode_need(p, end, 3*sizeof(u32), done); | 
 | 317 |         c->choose_local_tries = ceph_decode_32(p); | 
 | 318 |         c->choose_local_fallback_tries =  ceph_decode_32(p); | 
 | 319 |         c->choose_total_tries = ceph_decode_32(p); | 
 | 320 |         dout("crush decode tunable choose_local_tries = %d", | 
 | 321 |              c->choose_local_tries); | 
 | 322 |         dout("crush decode tunable choose_local_fallback_tries = %d", | 
 | 323 |              c->choose_local_fallback_tries); | 
 | 324 |         dout("crush decode tunable choose_total_tries = %d", | 
 | 325 |              c->choose_total_tries); | 
 | 326 |  | 
| Jim Schutt | 1604f48 | 2012-11-30 09:15:25 -0700 | [diff] [blame] | 327 | 	ceph_decode_need(p, end, sizeof(u32), done); | 
 | 328 | 	c->chooseleaf_descend_once = ceph_decode_32(p); | 
 | 329 | 	dout("crush decode tunable chooseleaf_descend_once = %d", | 
 | 330 | 	     c->chooseleaf_descend_once); | 
 | 331 |  | 
| Sage Weil | 546f04e | 2012-07-30 18:15:23 -0700 | [diff] [blame] | 332 | done: | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 333 | 	dout("crush_decode success\n"); | 
 | 334 | 	return c; | 
 | 335 |  | 
 | 336 | badmem: | 
 | 337 | 	err = -ENOMEM; | 
 | 338 | bad: | 
 | 339 | 	dout("crush_decode fail %d\n", err); | 
 | 340 | 	crush_destroy(c); | 
 | 341 | 	return ERR_PTR(err); | 
 | 342 | } | 
 | 343 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 344 | /* | 
| Sage Weil | 9794b14 | 2010-02-16 15:53:32 -0800 | [diff] [blame] | 345 |  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid | 
 | 346 |  * to a set of osds) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 347 |  */ | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 348 | static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) | 
| Sage Weil | 5104212 | 2009-11-04 11:39:12 -0800 | [diff] [blame] | 349 | { | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 350 | 	if (l.pool < r.pool) | 
| Sage Weil | 5104212 | 2009-11-04 11:39:12 -0800 | [diff] [blame] | 351 | 		return -1; | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 352 | 	if (l.pool > r.pool) | 
 | 353 | 		return 1; | 
 | 354 | 	if (l.seed < r.seed) | 
 | 355 | 		return -1; | 
 | 356 | 	if (l.seed > r.seed) | 
| Sage Weil | 5104212 | 2009-11-04 11:39:12 -0800 | [diff] [blame] | 357 | 		return 1; | 
 | 358 | 	return 0; | 
 | 359 | } | 
 | 360 |  | 
| Sage Weil | 991abb6 | 2009-10-08 22:22:37 -0700 | [diff] [blame] | 361 | static int __insert_pg_mapping(struct ceph_pg_mapping *new, | 
 | 362 | 			       struct rb_root *root) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 363 | { | 
 | 364 | 	struct rb_node **p = &root->rb_node; | 
 | 365 | 	struct rb_node *parent = NULL; | 
 | 366 | 	struct ceph_pg_mapping *pg = NULL; | 
| Sage Weil | 5104212 | 2009-11-04 11:39:12 -0800 | [diff] [blame] | 367 | 	int c; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 368 |  | 
| Sage Weil | 8adc8b3 | 2011-09-28 10:11:04 -0700 | [diff] [blame] | 369 | 	dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 370 | 	while (*p) { | 
 | 371 | 		parent = *p; | 
 | 372 | 		pg = rb_entry(parent, struct ceph_pg_mapping, node); | 
| Sage Weil | 5104212 | 2009-11-04 11:39:12 -0800 | [diff] [blame] | 373 | 		c = pgid_cmp(new->pgid, pg->pgid); | 
 | 374 | 		if (c < 0) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 375 | 			p = &(*p)->rb_left; | 
| Sage Weil | 5104212 | 2009-11-04 11:39:12 -0800 | [diff] [blame] | 376 | 		else if (c > 0) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 377 | 			p = &(*p)->rb_right; | 
 | 378 | 		else | 
| Sage Weil | 991abb6 | 2009-10-08 22:22:37 -0700 | [diff] [blame] | 379 | 			return -EEXIST; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 380 | 	} | 
 | 381 |  | 
 | 382 | 	rb_link_node(&new->node, parent, p); | 
 | 383 | 	rb_insert_color(&new->node, root); | 
| Sage Weil | 991abb6 | 2009-10-08 22:22:37 -0700 | [diff] [blame] | 384 | 	return 0; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 385 | } | 
 | 386 |  | 
| Sage Weil | 9794b14 | 2010-02-16 15:53:32 -0800 | [diff] [blame] | 387 | static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 388 | 						   struct ceph_pg pgid) | 
| Sage Weil | 9794b14 | 2010-02-16 15:53:32 -0800 | [diff] [blame] | 389 | { | 
 | 390 | 	struct rb_node *n = root->rb_node; | 
 | 391 | 	struct ceph_pg_mapping *pg; | 
 | 392 | 	int c; | 
 | 393 |  | 
 | 394 | 	while (n) { | 
 | 395 | 		pg = rb_entry(n, struct ceph_pg_mapping, node); | 
 | 396 | 		c = pgid_cmp(pgid, pg->pgid); | 
| Sage Weil | 8adc8b3 | 2011-09-28 10:11:04 -0700 | [diff] [blame] | 397 | 		if (c < 0) { | 
| Sage Weil | 9794b14 | 2010-02-16 15:53:32 -0800 | [diff] [blame] | 398 | 			n = n->rb_left; | 
| Sage Weil | 8adc8b3 | 2011-09-28 10:11:04 -0700 | [diff] [blame] | 399 | 		} else if (c > 0) { | 
| Sage Weil | 9794b14 | 2010-02-16 15:53:32 -0800 | [diff] [blame] | 400 | 			n = n->rb_right; | 
| Sage Weil | 8adc8b3 | 2011-09-28 10:11:04 -0700 | [diff] [blame] | 401 | 		} else { | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 402 | 			dout("__lookup_pg_mapping %lld.%x got %p\n", | 
 | 403 | 			     pgid.pool, pgid.seed, pg); | 
| Sage Weil | 9794b14 | 2010-02-16 15:53:32 -0800 | [diff] [blame] | 404 | 			return pg; | 
| Sage Weil | 8adc8b3 | 2011-09-28 10:11:04 -0700 | [diff] [blame] | 405 | 		} | 
| Sage Weil | 9794b14 | 2010-02-16 15:53:32 -0800 | [diff] [blame] | 406 | 	} | 
 | 407 | 	return NULL; | 
 | 408 | } | 
 | 409 |  | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 410 | static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid) | 
| Sage Weil | 8adc8b3 | 2011-09-28 10:11:04 -0700 | [diff] [blame] | 411 | { | 
 | 412 | 	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); | 
 | 413 |  | 
 | 414 | 	if (pg) { | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 415 | 		dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed, | 
 | 416 | 		     pg); | 
| Sage Weil | 8adc8b3 | 2011-09-28 10:11:04 -0700 | [diff] [blame] | 417 | 		rb_erase(&pg->node, root); | 
 | 418 | 		kfree(pg); | 
 | 419 | 		return 0; | 
 | 420 | 	} | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 421 | 	dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed); | 
| Sage Weil | 8adc8b3 | 2011-09-28 10:11:04 -0700 | [diff] [blame] | 422 | 	return -ENOENT; | 
 | 423 | } | 
 | 424 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 425 | /* | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 426 |  * rbtree of pg pool info | 
 | 427 |  */ | 
 | 428 | static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new) | 
 | 429 | { | 
 | 430 | 	struct rb_node **p = &root->rb_node; | 
 | 431 | 	struct rb_node *parent = NULL; | 
 | 432 | 	struct ceph_pg_pool_info *pi = NULL; | 
 | 433 |  | 
 | 434 | 	while (*p) { | 
 | 435 | 		parent = *p; | 
 | 436 | 		pi = rb_entry(parent, struct ceph_pg_pool_info, node); | 
 | 437 | 		if (new->id < pi->id) | 
 | 438 | 			p = &(*p)->rb_left; | 
 | 439 | 		else if (new->id > pi->id) | 
 | 440 | 			p = &(*p)->rb_right; | 
 | 441 | 		else | 
 | 442 | 			return -EEXIST; | 
 | 443 | 	} | 
 | 444 |  | 
 | 445 | 	rb_link_node(&new->node, parent, p); | 
 | 446 | 	rb_insert_color(&new->node, root); | 
 | 447 | 	return 0; | 
 | 448 | } | 
 | 449 |  | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 450 | static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id) | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 451 | { | 
 | 452 | 	struct ceph_pg_pool_info *pi; | 
 | 453 | 	struct rb_node *n = root->rb_node; | 
 | 454 |  | 
 | 455 | 	while (n) { | 
 | 456 | 		pi = rb_entry(n, struct ceph_pg_pool_info, node); | 
 | 457 | 		if (id < pi->id) | 
 | 458 | 			n = n->rb_left; | 
 | 459 | 		else if (id > pi->id) | 
 | 460 | 			n = n->rb_right; | 
 | 461 | 		else | 
 | 462 | 			return pi; | 
 | 463 | 	} | 
 | 464 | 	return NULL; | 
 | 465 | } | 
 | 466 |  | 
| Alex Elder | 72afc71 | 2012-10-30 19:40:33 -0500 | [diff] [blame] | 467 | const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id) | 
 | 468 | { | 
 | 469 | 	struct ceph_pg_pool_info *pi; | 
 | 470 |  | 
 | 471 | 	if (id == CEPH_NOPOOL) | 
 | 472 | 		return NULL; | 
 | 473 |  | 
 | 474 | 	if (WARN_ON_ONCE(id > (u64) INT_MAX)) | 
 | 475 | 		return NULL; | 
 | 476 |  | 
 | 477 | 	pi = __lookup_pg_pool(&map->pg_pools, (int) id); | 
 | 478 |  | 
 | 479 | 	return pi ? pi->name : NULL; | 
 | 480 | } | 
 | 481 | EXPORT_SYMBOL(ceph_pg_pool_name_by_id); | 
 | 482 |  | 
| Yehuda Sadeh | 7669a2c | 2010-05-17 12:31:35 -0700 | [diff] [blame] | 483 | int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) | 
 | 484 | { | 
 | 485 | 	struct rb_node *rbp; | 
 | 486 |  | 
 | 487 | 	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) { | 
 | 488 | 		struct ceph_pg_pool_info *pi = | 
 | 489 | 			rb_entry(rbp, struct ceph_pg_pool_info, node); | 
 | 490 | 		if (pi->name && strcmp(pi->name, name) == 0) | 
 | 491 | 			return pi->id; | 
 | 492 | 	} | 
 | 493 | 	return -ENOENT; | 
 | 494 | } | 
| Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 495 | EXPORT_SYMBOL(ceph_pg_poolid_by_name); | 
| Yehuda Sadeh | 7669a2c | 2010-05-17 12:31:35 -0700 | [diff] [blame] | 496 |  | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 497 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | 
 | 498 | { | 
 | 499 | 	rb_erase(&pi->node, root); | 
 | 500 | 	kfree(pi->name); | 
 | 501 | 	kfree(pi); | 
 | 502 | } | 
 | 503 |  | 
| Sage Weil | 73a7e69 | 2010-08-02 11:00:55 -0700 | [diff] [blame] | 504 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) | 
| Sage Weil | efd7576 | 2010-03-17 10:05:28 -0700 | [diff] [blame] | 505 | { | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 506 | 	u8 ev, cv; | 
 | 507 | 	unsigned len, num; | 
 | 508 | 	void *pool_end; | 
| Sage Weil | 73a7e69 | 2010-08-02 11:00:55 -0700 | [diff] [blame] | 509 |  | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 510 | 	ceph_decode_need(p, end, 2 + 4, bad); | 
 | 511 | 	ev = ceph_decode_8(p);  /* encoding version */ | 
 | 512 | 	cv = ceph_decode_8(p); /* compat version */ | 
 | 513 | 	if (ev < 5) { | 
 | 514 | 		pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); | 
 | 515 | 		return -EINVAL; | 
 | 516 | 	} | 
 | 517 | 	if (cv > 7) { | 
 | 518 | 		pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); | 
 | 519 | 		return -EINVAL; | 
 | 520 | 	} | 
 | 521 | 	len = ceph_decode_32(p); | 
 | 522 | 	ceph_decode_need(p, end, len, bad); | 
 | 523 | 	pool_end = *p + len; | 
| Sage Weil | 73a7e69 | 2010-08-02 11:00:55 -0700 | [diff] [blame] | 524 |  | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 525 | 	pi->type = ceph_decode_8(p); | 
 | 526 | 	pi->size = ceph_decode_8(p); | 
 | 527 | 	pi->crush_ruleset = ceph_decode_8(p); | 
 | 528 | 	pi->object_hash = ceph_decode_8(p); | 
 | 529 |  | 
 | 530 | 	pi->pg_num = ceph_decode_32(p); | 
 | 531 | 	pi->pgp_num = ceph_decode_32(p); | 
 | 532 |  | 
 | 533 | 	*p += 4 + 4;  /* skip lpg* */ | 
 | 534 | 	*p += 4;      /* skip last_change */ | 
 | 535 | 	*p += 8 + 4;  /* skip snap_seq, snap_epoch */ | 
 | 536 |  | 
 | 537 | 	/* skip snaps */ | 
 | 538 | 	num = ceph_decode_32(p); | 
 | 539 | 	while (num--) { | 
 | 540 | 		*p += 8;  /* snapid key */ | 
 | 541 | 		*p += 1 + 1; /* versions */ | 
 | 542 | 		len = ceph_decode_32(p); | 
 | 543 | 		*p += len; | 
| Sage Weil | 73a7e69 | 2010-08-02 11:00:55 -0700 | [diff] [blame] | 544 | 	} | 
 | 545 |  | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 546 | 	/* skip removed snaps */ | 
 | 547 | 	num = ceph_decode_32(p); | 
 | 548 | 	*p += num * (8 + 8); | 
 | 549 |  | 
 | 550 | 	*p += 8;  /* skip auid */ | 
 | 551 | 	pi->flags = ceph_decode_64(p); | 
 | 552 |  | 
 | 553 | 	/* ignore the rest */ | 
 | 554 |  | 
 | 555 | 	*p = pool_end; | 
 | 556 | 	calc_pg_masks(pi); | 
| Sage Weil | 73a7e69 | 2010-08-02 11:00:55 -0700 | [diff] [blame] | 557 | 	return 0; | 
 | 558 |  | 
 | 559 | bad: | 
 | 560 | 	return -EINVAL; | 
| Sage Weil | efd7576 | 2010-03-17 10:05:28 -0700 | [diff] [blame] | 561 | } | 
 | 562 |  | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 563 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | 
 | 564 | { | 
 | 565 | 	struct ceph_pg_pool_info *pi; | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 566 | 	u32 num, len; | 
 | 567 | 	u64 pool; | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 568 |  | 
 | 569 | 	ceph_decode_32_safe(p, end, num, bad); | 
 | 570 | 	dout(" %d pool names\n", num); | 
 | 571 | 	while (num--) { | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 572 | 		ceph_decode_64_safe(p, end, pool, bad); | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 573 | 		ceph_decode_32_safe(p, end, len, bad); | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 574 | 		dout("  pool %llu len %d\n", pool, len); | 
| Xi Wang | ad3b904 | 2012-06-06 19:35:55 -0500 | [diff] [blame] | 575 | 		ceph_decode_need(p, end, len, bad); | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 576 | 		pi = __lookup_pg_pool(&map->pg_pools, pool); | 
 | 577 | 		if (pi) { | 
| Xi Wang | ad3b904 | 2012-06-06 19:35:55 -0500 | [diff] [blame] | 578 | 			char *name = kstrndup(*p, len, GFP_NOFS); | 
 | 579 |  | 
 | 580 | 			if (!name) | 
 | 581 | 				return -ENOMEM; | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 582 | 			kfree(pi->name); | 
| Xi Wang | ad3b904 | 2012-06-06 19:35:55 -0500 | [diff] [blame] | 583 | 			pi->name = name; | 
 | 584 | 			dout("  name is %s\n", pi->name); | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 585 | 		} | 
 | 586 | 		*p += len; | 
 | 587 | 	} | 
 | 588 | 	return 0; | 
 | 589 |  | 
 | 590 | bad: | 
 | 591 | 	return -EINVAL; | 
 | 592 | } | 
 | 593 |  | 
 | 594 | /* | 
 | 595 |  * osd map | 
 | 596 |  */ | 
 | 597 | void ceph_osdmap_destroy(struct ceph_osdmap *map) | 
 | 598 | { | 
 | 599 | 	dout("osdmap_destroy %p\n", map); | 
 | 600 | 	if (map->crush) | 
 | 601 | 		crush_destroy(map->crush); | 
 | 602 | 	while (!RB_EMPTY_ROOT(&map->pg_temp)) { | 
 | 603 | 		struct ceph_pg_mapping *pg = | 
 | 604 | 			rb_entry(rb_first(&map->pg_temp), | 
 | 605 | 				 struct ceph_pg_mapping, node); | 
 | 606 | 		rb_erase(&pg->node, &map->pg_temp); | 
 | 607 | 		kfree(pg); | 
 | 608 | 	} | 
 | 609 | 	while (!RB_EMPTY_ROOT(&map->pg_pools)) { | 
 | 610 | 		struct ceph_pg_pool_info *pi = | 
 | 611 | 			rb_entry(rb_first(&map->pg_pools), | 
 | 612 | 				 struct ceph_pg_pool_info, node); | 
 | 613 | 		__remove_pg_pool(&map->pg_pools, pi); | 
 | 614 | 	} | 
 | 615 | 	kfree(map->osd_state); | 
 | 616 | 	kfree(map->osd_weight); | 
 | 617 | 	kfree(map->osd_addr); | 
 | 618 | 	kfree(map); | 
 | 619 | } | 
 | 620 |  | 
 | 621 | /* | 
 | 622 |  * adjust max osd value.  reallocate arrays. | 
 | 623 |  */ | 
 | 624 | static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | 
 | 625 | { | 
 | 626 | 	u8 *state; | 
 | 627 | 	struct ceph_entity_addr *addr; | 
 | 628 | 	u32 *weight; | 
 | 629 |  | 
 | 630 | 	state = kcalloc(max, sizeof(*state), GFP_NOFS); | 
 | 631 | 	addr = kcalloc(max, sizeof(*addr), GFP_NOFS); | 
 | 632 | 	weight = kcalloc(max, sizeof(*weight), GFP_NOFS); | 
 | 633 | 	if (state == NULL || addr == NULL || weight == NULL) { | 
 | 634 | 		kfree(state); | 
 | 635 | 		kfree(addr); | 
 | 636 | 		kfree(weight); | 
 | 637 | 		return -ENOMEM; | 
 | 638 | 	} | 
 | 639 |  | 
 | 640 | 	/* copy old? */ | 
 | 641 | 	if (map->osd_state) { | 
 | 642 | 		memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); | 
 | 643 | 		memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); | 
 | 644 | 		memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); | 
 | 645 | 		kfree(map->osd_state); | 
 | 646 | 		kfree(map->osd_addr); | 
 | 647 | 		kfree(map->osd_weight); | 
 | 648 | 	} | 
 | 649 |  | 
 | 650 | 	map->osd_state = state; | 
 | 651 | 	map->osd_weight = weight; | 
 | 652 | 	map->osd_addr = addr; | 
 | 653 | 	map->max_osd = max; | 
 | 654 | 	return 0; | 
 | 655 | } | 
 | 656 |  | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 657 | /* | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 658 |  * decode a full map. | 
 | 659 |  */ | 
 | 660 | struct ceph_osdmap *osdmap_decode(void **p, void *end) | 
 | 661 | { | 
 | 662 | 	struct ceph_osdmap *map; | 
 | 663 | 	u16 version; | 
 | 664 | 	u32 len, max, i; | 
 | 665 | 	int err = -EINVAL; | 
 | 666 | 	void *start = *p; | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 667 | 	struct ceph_pg_pool_info *pi; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 668 |  | 
 | 669 | 	dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p)); | 
 | 670 |  | 
 | 671 | 	map = kzalloc(sizeof(*map), GFP_NOFS); | 
 | 672 | 	if (map == NULL) | 
 | 673 | 		return ERR_PTR(-ENOMEM); | 
 | 674 | 	map->pg_temp = RB_ROOT; | 
 | 675 |  | 
 | 676 | 	ceph_decode_16_safe(p, end, version, bad); | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 677 | 	if (version > 6) { | 
 | 678 | 		pr_warning("got unknown v %d > 6 of osdmap\n", version); | 
 | 679 | 		goto bad; | 
 | 680 | 	} | 
 | 681 | 	if (version < 6) { | 
 | 682 | 		pr_warning("got old v %d < 6 of osdmap\n", version); | 
| Sage Weil | 02f90c6 | 2010-02-04 16:18:10 -0800 | [diff] [blame] | 683 | 		goto bad; | 
 | 684 | 	} | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 685 |  | 
 | 686 | 	ceph_decode_need(p, end, 2*sizeof(u64)+6*sizeof(u32), bad); | 
 | 687 | 	ceph_decode_copy(p, &map->fsid, sizeof(map->fsid)); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 688 | 	map->epoch = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 689 | 	ceph_decode_copy(p, &map->created, sizeof(map->created)); | 
 | 690 | 	ceph_decode_copy(p, &map->modified, sizeof(map->modified)); | 
 | 691 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 692 | 	ceph_decode_32_safe(p, end, max, bad); | 
 | 693 | 	while (max--) { | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 694 | 		ceph_decode_need(p, end, 8 + 2, bad); | 
| Sage Weil | 0ed7285 | 2012-10-29 11:01:42 -0700 | [diff] [blame] | 695 | 		err = -ENOMEM; | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 696 | 		pi = kzalloc(sizeof(*pi), GFP_NOFS); | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 697 | 		if (!pi) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 698 | 			goto bad; | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 699 | 		pi->id = ceph_decode_64(p); | 
| Sage Weil | 73a7e69 | 2010-08-02 11:00:55 -0700 | [diff] [blame] | 700 | 		err = __decode_pool(p, end, pi); | 
| Jesper Juhl | b0aee35 | 2010-12-24 23:01:12 +0100 | [diff] [blame] | 701 | 		if (err < 0) { | 
 | 702 | 			kfree(pi); | 
| Sage Weil | 73a7e69 | 2010-08-02 11:00:55 -0700 | [diff] [blame] | 703 | 			goto bad; | 
| Jesper Juhl | b0aee35 | 2010-12-24 23:01:12 +0100 | [diff] [blame] | 704 | 		} | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 705 | 		__insert_pg_pool(&map->pg_pools, pi); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 706 | 	} | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 707 |  | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 708 | 	err = __decode_pool_names(p, end, map); | 
 | 709 | 	if (err < 0) { | 
 | 710 | 		dout("fail to decode pool names"); | 
 | 711 | 		goto bad; | 
| Sage Weil | 0ed7285 | 2012-10-29 11:01:42 -0700 | [diff] [blame] | 712 | 	} | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 713 |  | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 714 | 	ceph_decode_32_safe(p, end, map->pool_max, bad); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 715 |  | 
 | 716 | 	ceph_decode_32_safe(p, end, map->flags, bad); | 
 | 717 |  | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 718 | 	max = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 719 |  | 
 | 720 | 	/* (re)alloc osd arrays */ | 
 | 721 | 	err = osdmap_set_max_osd(map, max); | 
 | 722 | 	if (err < 0) | 
 | 723 | 		goto bad; | 
 | 724 | 	dout("osdmap_decode max_osd = %d\n", map->max_osd); | 
 | 725 |  | 
 | 726 | 	/* osds */ | 
 | 727 | 	err = -EINVAL; | 
 | 728 | 	ceph_decode_need(p, end, 3*sizeof(u32) + | 
 | 729 | 			 map->max_osd*(1 + sizeof(*map->osd_weight) + | 
 | 730 | 				       sizeof(*map->osd_addr)), bad); | 
 | 731 | 	*p += 4; /* skip length field (should match max) */ | 
 | 732 | 	ceph_decode_copy(p, map->osd_state, map->max_osd); | 
 | 733 |  | 
 | 734 | 	*p += 4; /* skip length field (should match max) */ | 
 | 735 | 	for (i = 0; i < map->max_osd; i++) | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 736 | 		map->osd_weight[i] = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 737 |  | 
 | 738 | 	*p += 4; /* skip length field (should match max) */ | 
 | 739 | 	ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr)); | 
| Sage Weil | 63f2d21 | 2009-11-03 15:17:56 -0800 | [diff] [blame] | 740 | 	for (i = 0; i < map->max_osd; i++) | 
 | 741 | 		ceph_decode_addr(&map->osd_addr[i]); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 742 |  | 
 | 743 | 	/* pg_temp */ | 
 | 744 | 	ceph_decode_32_safe(p, end, len, bad); | 
 | 745 | 	for (i = 0; i < len; i++) { | 
 | 746 | 		int n, j; | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 747 | 		struct ceph_pg pgid; | 
 | 748 | 		struct ceph_pg_v1 pgid_v1; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 749 | 		struct ceph_pg_mapping *pg; | 
 | 750 |  | 
 | 751 | 		ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 752 | 		ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); | 
 | 753 | 		pgid.pool = le32_to_cpu(pgid_v1.pool); | 
 | 754 | 		pgid.seed = le16_to_cpu(pgid_v1.ps); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 755 | 		n = ceph_decode_32(p); | 
| Xi Wang | e91a9b6 | 2012-06-06 19:35:55 -0500 | [diff] [blame] | 756 | 		err = -EINVAL; | 
 | 757 | 		if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) | 
 | 758 | 			goto bad; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 759 | 		ceph_decode_need(p, end, n * sizeof(u32), bad); | 
| Sage Weil | 30dc638 | 2009-12-21 14:49:37 -0800 | [diff] [blame] | 760 | 		err = -ENOMEM; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 761 | 		pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); | 
| Sage Weil | 30dc638 | 2009-12-21 14:49:37 -0800 | [diff] [blame] | 762 | 		if (!pg) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 763 | 			goto bad; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 764 | 		pg->pgid = pgid; | 
 | 765 | 		pg->len = n; | 
 | 766 | 		for (j = 0; j < n; j++) | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 767 | 			pg->osds[j] = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 768 |  | 
| Sage Weil | 991abb6 | 2009-10-08 22:22:37 -0700 | [diff] [blame] | 769 | 		err = __insert_pg_mapping(pg, &map->pg_temp); | 
 | 770 | 		if (err) | 
 | 771 | 			goto bad; | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 772 | 		dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed, | 
 | 773 | 		     len); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 774 | 	} | 
 | 775 |  | 
 | 776 | 	/* crush */ | 
 | 777 | 	ceph_decode_32_safe(p, end, len, bad); | 
 | 778 | 	dout("osdmap_decode crush len %d from off 0x%x\n", len, | 
 | 779 | 	     (int)(*p - start)); | 
 | 780 | 	ceph_decode_need(p, end, len, bad); | 
 | 781 | 	map->crush = crush_decode(*p, end); | 
 | 782 | 	*p += len; | 
 | 783 | 	if (IS_ERR(map->crush)) { | 
 | 784 | 		err = PTR_ERR(map->crush); | 
 | 785 | 		map->crush = NULL; | 
 | 786 | 		goto bad; | 
 | 787 | 	} | 
 | 788 |  | 
 | 789 | 	/* ignore the rest of the map */ | 
 | 790 | 	*p = end; | 
 | 791 |  | 
 | 792 | 	dout("osdmap_decode done %p %p\n", *p, end); | 
 | 793 | 	return map; | 
 | 794 |  | 
 | 795 | bad: | 
| Sage Weil | 0ed7285 | 2012-10-29 11:01:42 -0700 | [diff] [blame] | 796 | 	dout("osdmap_decode fail err %d\n", err); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 797 | 	ceph_osdmap_destroy(map); | 
 | 798 | 	return ERR_PTR(err); | 
 | 799 | } | 
 | 800 |  | 
 | 801 | /* | 
 | 802 |  * decode and apply an incremental map update. | 
 | 803 |  */ | 
 | 804 | struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | 
 | 805 | 					     struct ceph_osdmap *map, | 
 | 806 | 					     struct ceph_messenger *msgr) | 
 | 807 | { | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 808 | 	struct crush_map *newcrush = NULL; | 
 | 809 | 	struct ceph_fsid fsid; | 
 | 810 | 	u32 epoch = 0; | 
 | 811 | 	struct ceph_timespec modified; | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 812 | 	s32 len; | 
 | 813 | 	u64 pool; | 
 | 814 | 	__s64 new_pool_max; | 
 | 815 | 	__s32 new_flags, max; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 816 | 	void *start = *p; | 
 | 817 | 	int err = -EINVAL; | 
 | 818 | 	u16 version; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 819 |  | 
 | 820 | 	ceph_decode_16_safe(p, end, version, bad); | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 821 | 	if (version > 6) { | 
 | 822 | 		pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6); | 
| Sage Weil | 02f90c6 | 2010-02-04 16:18:10 -0800 | [diff] [blame] | 823 | 		goto bad; | 
 | 824 | 	} | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 825 |  | 
 | 826 | 	ceph_decode_need(p, end, sizeof(fsid)+sizeof(modified)+2*sizeof(u32), | 
 | 827 | 			 bad); | 
 | 828 | 	ceph_decode_copy(p, &fsid, sizeof(fsid)); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 829 | 	epoch = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 830 | 	BUG_ON(epoch != map->epoch+1); | 
 | 831 | 	ceph_decode_copy(p, &modified, sizeof(modified)); | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 832 | 	new_pool_max = ceph_decode_64(p); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 833 | 	new_flags = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 834 |  | 
 | 835 | 	/* full map? */ | 
 | 836 | 	ceph_decode_32_safe(p, end, len, bad); | 
 | 837 | 	if (len > 0) { | 
 | 838 | 		dout("apply_incremental full map len %d, %p to %p\n", | 
 | 839 | 		     len, *p, end); | 
| Sage Weil | 30dc638 | 2009-12-21 14:49:37 -0800 | [diff] [blame] | 840 | 		return osdmap_decode(p, min(*p+len, end)); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 841 | 	} | 
 | 842 |  | 
 | 843 | 	/* new crush? */ | 
 | 844 | 	ceph_decode_32_safe(p, end, len, bad); | 
 | 845 | 	if (len > 0) { | 
 | 846 | 		dout("apply_incremental new crush map len %d, %p to %p\n", | 
 | 847 | 		     len, *p, end); | 
 | 848 | 		newcrush = crush_decode(*p, min(*p+len, end)); | 
 | 849 | 		if (IS_ERR(newcrush)) | 
| Julia Lawall | 7e34bc5 | 2010-05-22 12:01:14 +0200 | [diff] [blame] | 850 | 			return ERR_CAST(newcrush); | 
| Sage Weil | cebc5be | 2010-06-17 10:22:48 -0700 | [diff] [blame] | 851 | 		*p += len; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 852 | 	} | 
 | 853 |  | 
 | 854 | 	/* new flags? */ | 
 | 855 | 	if (new_flags >= 0) | 
 | 856 | 		map->flags = new_flags; | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 857 | 	if (new_pool_max >= 0) | 
 | 858 | 		map->pool_max = new_pool_max; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 859 |  | 
 | 860 | 	ceph_decode_need(p, end, 5*sizeof(u32), bad); | 
 | 861 |  | 
 | 862 | 	/* new max? */ | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 863 | 	max = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 864 | 	if (max >= 0) { | 
 | 865 | 		err = osdmap_set_max_osd(map, max); | 
 | 866 | 		if (err < 0) | 
 | 867 | 			goto bad; | 
 | 868 | 	} | 
 | 869 |  | 
 | 870 | 	map->epoch++; | 
| Sage Weil | 3145666 | 2011-05-12 15:18:43 -0700 | [diff] [blame] | 871 | 	map->modified = modified; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 872 | 	if (newcrush) { | 
 | 873 | 		if (map->crush) | 
 | 874 | 			crush_destroy(map->crush); | 
 | 875 | 		map->crush = newcrush; | 
 | 876 | 		newcrush = NULL; | 
 | 877 | 	} | 
 | 878 |  | 
 | 879 | 	/* new_pool */ | 
 | 880 | 	ceph_decode_32_safe(p, end, len, bad); | 
 | 881 | 	while (len--) { | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 882 | 		struct ceph_pg_pool_info *pi; | 
| Sage Weil | 361be86 | 2010-01-25 16:03:02 -0800 | [diff] [blame] | 883 |  | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 884 | 		ceph_decode_64_safe(p, end, pool, bad); | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 885 | 		pi = __lookup_pg_pool(&map->pg_pools, pool); | 
 | 886 | 		if (!pi) { | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 887 | 			pi = kzalloc(sizeof(*pi), GFP_NOFS); | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 888 | 			if (!pi) { | 
 | 889 | 				err = -ENOMEM; | 
 | 890 | 				goto bad; | 
 | 891 | 			} | 
 | 892 | 			pi->id = pool; | 
 | 893 | 			__insert_pg_pool(&map->pg_pools, pi); | 
 | 894 | 		} | 
| Sage Weil | 73a7e69 | 2010-08-02 11:00:55 -0700 | [diff] [blame] | 895 | 		err = __decode_pool(p, end, pi); | 
 | 896 | 		if (err < 0) | 
 | 897 | 			goto bad; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 898 | 	} | 
| Sage Weil | 0ed7285 | 2012-10-29 11:01:42 -0700 | [diff] [blame] | 899 | 	if (version >= 5) { | 
 | 900 | 		err = __decode_pool_names(p, end, map); | 
 | 901 | 		if (err < 0) | 
 | 902 | 			goto bad; | 
 | 903 | 	} | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 904 |  | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 905 | 	/* old_pool */ | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 906 | 	ceph_decode_32_safe(p, end, len, bad); | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 907 | 	while (len--) { | 
 | 908 | 		struct ceph_pg_pool_info *pi; | 
 | 909 |  | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 910 | 		ceph_decode_64_safe(p, end, pool, bad); | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 911 | 		pi = __lookup_pg_pool(&map->pg_pools, pool); | 
| Sage Weil | 2844a76 | 2010-04-09 15:46:42 -0700 | [diff] [blame] | 912 | 		if (pi) | 
 | 913 | 			__remove_pg_pool(&map->pg_pools, pi); | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 914 | 	} | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 915 |  | 
 | 916 | 	/* new_up */ | 
 | 917 | 	err = -EINVAL; | 
 | 918 | 	ceph_decode_32_safe(p, end, len, bad); | 
 | 919 | 	while (len--) { | 
 | 920 | 		u32 osd; | 
 | 921 | 		struct ceph_entity_addr addr; | 
 | 922 | 		ceph_decode_32_safe(p, end, osd, bad); | 
 | 923 | 		ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad); | 
| Sage Weil | 63f2d21 | 2009-11-03 15:17:56 -0800 | [diff] [blame] | 924 | 		ceph_decode_addr(&addr); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 925 | 		pr_info("osd%d up\n", osd); | 
 | 926 | 		BUG_ON(osd >= map->max_osd); | 
 | 927 | 		map->osd_state[osd] |= CEPH_OSD_UP; | 
 | 928 | 		map->osd_addr[osd] = addr; | 
 | 929 | 	} | 
 | 930 |  | 
| Sage Weil | 7662d8f | 2011-05-03 12:52:05 -0700 | [diff] [blame] | 931 | 	/* new_state */ | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 932 | 	ceph_decode_32_safe(p, end, len, bad); | 
 | 933 | 	while (len--) { | 
 | 934 | 		u32 osd; | 
| Sage Weil | 7662d8f | 2011-05-03 12:52:05 -0700 | [diff] [blame] | 935 | 		u8 xorstate; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 936 | 		ceph_decode_32_safe(p, end, osd, bad); | 
| Sage Weil | 7662d8f | 2011-05-03 12:52:05 -0700 | [diff] [blame] | 937 | 		xorstate = **(u8 **)p; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 938 | 		(*p)++;  /* clean flag */ | 
| Sage Weil | 7662d8f | 2011-05-03 12:52:05 -0700 | [diff] [blame] | 939 | 		if (xorstate == 0) | 
 | 940 | 			xorstate = CEPH_OSD_UP; | 
 | 941 | 		if (xorstate & CEPH_OSD_UP) | 
 | 942 | 			pr_info("osd%d down\n", osd); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 943 | 		if (osd < map->max_osd) | 
| Sage Weil | 7662d8f | 2011-05-03 12:52:05 -0700 | [diff] [blame] | 944 | 			map->osd_state[osd] ^= xorstate; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 945 | 	} | 
 | 946 |  | 
 | 947 | 	/* new_weight */ | 
 | 948 | 	ceph_decode_32_safe(p, end, len, bad); | 
 | 949 | 	while (len--) { | 
 | 950 | 		u32 osd, off; | 
 | 951 | 		ceph_decode_need(p, end, sizeof(u32)*2, bad); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 952 | 		osd = ceph_decode_32(p); | 
 | 953 | 		off = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 954 | 		pr_info("osd%d weight 0x%x %s\n", osd, off, | 
 | 955 | 		     off == CEPH_OSD_IN ? "(in)" : | 
 | 956 | 		     (off == CEPH_OSD_OUT ? "(out)" : "")); | 
 | 957 | 		if (osd < map->max_osd) | 
 | 958 | 			map->osd_weight[osd] = off; | 
 | 959 | 	} | 
 | 960 |  | 
 | 961 | 	/* new_pg_temp */ | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 962 | 	ceph_decode_32_safe(p, end, len, bad); | 
 | 963 | 	while (len--) { | 
 | 964 | 		struct ceph_pg_mapping *pg; | 
 | 965 | 		int j; | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 966 | 		struct ceph_pg_v1 pgid_v1; | 
 | 967 | 		struct ceph_pg pgid; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 968 | 		u32 pglen; | 
 | 969 | 		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 970 | 		ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); | 
 | 971 | 		pgid.pool = le32_to_cpu(pgid_v1.pool); | 
 | 972 | 		pgid.seed = le16_to_cpu(pgid_v1.ps); | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 973 | 		pglen = ceph_decode_32(p); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 974 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 975 | 		if (pglen) { | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 976 | 			ceph_decode_need(p, end, pglen*sizeof(u32), bad); | 
| Sage Weil | 6bd9adb | 2012-05-21 09:45:23 -0700 | [diff] [blame] | 977 |  | 
 | 978 | 			/* removing existing (if any) */ | 
 | 979 | 			(void) __remove_pg_mapping(&map->pg_temp, pgid); | 
 | 980 |  | 
 | 981 | 			/* insert */ | 
| Sage Weil | 0ed7285 | 2012-10-29 11:01:42 -0700 | [diff] [blame] | 982 | 			err = -EINVAL; | 
 | 983 | 			if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) | 
| Xi Wang | a550604 | 2012-06-06 19:35:55 -0500 | [diff] [blame] | 984 | 				goto bad; | 
| Sage Weil | 0ed7285 | 2012-10-29 11:01:42 -0700 | [diff] [blame] | 985 | 			err = -ENOMEM; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 986 | 			pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); | 
| Sage Weil | 0ed7285 | 2012-10-29 11:01:42 -0700 | [diff] [blame] | 987 | 			if (!pg) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 988 | 				goto bad; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 989 | 			pg->pgid = pgid; | 
 | 990 | 			pg->len = pglen; | 
| Sage Weil | 7067f79 | 2009-12-21 16:02:37 -0800 | [diff] [blame] | 991 | 			for (j = 0; j < pglen; j++) | 
| Sage Weil | c89136e | 2009-10-14 09:59:09 -0700 | [diff] [blame] | 992 | 				pg->osds[j] = ceph_decode_32(p); | 
| Sage Weil | 991abb6 | 2009-10-08 22:22:37 -0700 | [diff] [blame] | 993 | 			err = __insert_pg_mapping(pg, &map->pg_temp); | 
| Sage Weil | bc4fdca | 2010-07-20 16:19:56 -0700 | [diff] [blame] | 994 | 			if (err) { | 
 | 995 | 				kfree(pg); | 
| Sage Weil | 991abb6 | 2009-10-08 22:22:37 -0700 | [diff] [blame] | 996 | 				goto bad; | 
| Sage Weil | bc4fdca | 2010-07-20 16:19:56 -0700 | [diff] [blame] | 997 | 			} | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 998 | 			dout(" added pg_temp %lld.%x len %d\n", pgid.pool, | 
 | 999 | 			     pgid.seed, pglen); | 
| Sage Weil | 8adc8b3 | 2011-09-28 10:11:04 -0700 | [diff] [blame] | 1000 | 		} else { | 
 | 1001 | 			/* remove */ | 
 | 1002 | 			__remove_pg_mapping(&map->pg_temp, pgid); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1003 | 		} | 
 | 1004 | 	} | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1005 |  | 
 | 1006 | 	/* ignore the rest */ | 
 | 1007 | 	*p = end; | 
 | 1008 | 	return map; | 
 | 1009 |  | 
 | 1010 | bad: | 
 | 1011 | 	pr_err("corrupt inc osdmap epoch %d off %d (%p of %p-%p)\n", | 
 | 1012 | 	       epoch, (int)(*p - start), *p, start, end); | 
| Sage Weil | 9ec7cab | 2009-12-14 15:13:47 -0800 | [diff] [blame] | 1013 | 	print_hex_dump(KERN_DEBUG, "osdmap: ", | 
 | 1014 | 		       DUMP_PREFIX_OFFSET, 16, 1, | 
 | 1015 | 		       start, end - start, true); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1016 | 	if (newcrush) | 
 | 1017 | 		crush_destroy(newcrush); | 
 | 1018 | 	return ERR_PTR(err); | 
 | 1019 | } | 
 | 1020 |  | 
 | 1021 |  | 
 | 1022 |  | 
 | 1023 |  | 
 | 1024 | /* | 
 | 1025 |  * calculate file layout from given offset, length. | 
 | 1026 |  * fill in correct oid, logical length, and object extent | 
 | 1027 |  * offset, length. | 
 | 1028 |  * | 
 | 1029 |  * for now, we write only a single su, until we can | 
 | 1030 |  * pass a stride back to the caller. | 
 | 1031 |  */ | 
| Sage Weil | d63b77f | 2012-09-24 20:59:48 -0700 | [diff] [blame] | 1032 | int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | 
| Alex Elder | e8afad6 | 2012-11-14 09:38:19 -0600 | [diff] [blame] | 1033 | 				   u64 off, u64 len, | 
| Sage Weil | 645a102 | 2009-10-28 15:15:05 -0700 | [diff] [blame] | 1034 | 				   u64 *ono, | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1035 | 				   u64 *oxoff, u64 *oxlen) | 
 | 1036 | { | 
 | 1037 | 	u32 osize = le32_to_cpu(layout->fl_object_size); | 
 | 1038 | 	u32 su = le32_to_cpu(layout->fl_stripe_unit); | 
 | 1039 | 	u32 sc = le32_to_cpu(layout->fl_stripe_count); | 
 | 1040 | 	u32 bl, stripeno, stripepos, objsetno; | 
 | 1041 | 	u32 su_per_object; | 
| Noah Watkins | ff1d1f7 | 2009-10-30 12:57:30 -0700 | [diff] [blame] | 1042 | 	u64 t, su_offset; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1043 |  | 
| Alex Elder | e8afad6 | 2012-11-14 09:38:19 -0600 | [diff] [blame] | 1044 | 	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, len, | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1045 | 	     osize, su); | 
| Sage Weil | d63b77f | 2012-09-24 20:59:48 -0700 | [diff] [blame] | 1046 | 	if (su == 0 || sc == 0) | 
 | 1047 | 		goto invalid; | 
| Noah Watkins | 35e054a | 2009-10-28 14:04:48 -0700 | [diff] [blame] | 1048 | 	su_per_object = osize / su; | 
| Sage Weil | d63b77f | 2012-09-24 20:59:48 -0700 | [diff] [blame] | 1049 | 	if (su_per_object == 0) | 
 | 1050 | 		goto invalid; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1051 | 	dout("osize %u / su %u = su_per_object %u\n", osize, su, | 
 | 1052 | 	     su_per_object); | 
 | 1053 |  | 
| Sage Weil | d63b77f | 2012-09-24 20:59:48 -0700 | [diff] [blame] | 1054 | 	if ((su & ~PAGE_MASK) != 0) | 
 | 1055 | 		goto invalid; | 
 | 1056 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1057 | 	/* bl = *off / su; */ | 
 | 1058 | 	t = off; | 
 | 1059 | 	do_div(t, su); | 
 | 1060 | 	bl = t; | 
 | 1061 | 	dout("off %llu / su %u = bl %u\n", off, su, bl); | 
 | 1062 |  | 
 | 1063 | 	stripeno = bl / sc; | 
 | 1064 | 	stripepos = bl % sc; | 
 | 1065 | 	objsetno = stripeno / su_per_object; | 
 | 1066 |  | 
| Sage Weil | 645a102 | 2009-10-28 15:15:05 -0700 | [diff] [blame] | 1067 | 	*ono = objsetno * sc + stripepos; | 
| Eric Dumazet | 95c9617 | 2012-04-15 05:58:06 +0000 | [diff] [blame] | 1068 | 	dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono); | 
| Sage Weil | 645a102 | 2009-10-28 15:15:05 -0700 | [diff] [blame] | 1069 |  | 
 | 1070 | 	/* *oxoff = *off % layout->fl_stripe_unit;  # offset in su */ | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1071 | 	t = off; | 
| Noah Watkins | ff1d1f7 | 2009-10-30 12:57:30 -0700 | [diff] [blame] | 1072 | 	su_offset = do_div(t, su); | 
 | 1073 | 	*oxoff = su_offset + (stripeno % su_per_object) * su; | 
| Sage Weil | 645a102 | 2009-10-28 15:15:05 -0700 | [diff] [blame] | 1074 |  | 
| Noah Watkins | ff1d1f7 | 2009-10-30 12:57:30 -0700 | [diff] [blame] | 1075 | 	/* | 
 | 1076 | 	 * Calculate the length of the extent being written to the selected | 
| Alex Elder | e8afad6 | 2012-11-14 09:38:19 -0600 | [diff] [blame] | 1077 | 	 * object. This is the minimum of the full length requested (len) or | 
| Noah Watkins | ff1d1f7 | 2009-10-30 12:57:30 -0700 | [diff] [blame] | 1078 | 	 * the remainder of the current stripe being written to. | 
 | 1079 | 	 */ | 
| Alex Elder | e8afad6 | 2012-11-14 09:38:19 -0600 | [diff] [blame] | 1080 | 	*oxlen = min_t(u64, len, su - su_offset); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1081 |  | 
 | 1082 | 	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); | 
| Sage Weil | d63b77f | 2012-09-24 20:59:48 -0700 | [diff] [blame] | 1083 | 	return 0; | 
 | 1084 |  | 
 | 1085 | invalid: | 
 | 1086 | 	dout(" invalid layout\n"); | 
 | 1087 | 	*ono = 0; | 
 | 1088 | 	*oxoff = 0; | 
 | 1089 | 	*oxlen = 0; | 
 | 1090 | 	return -EINVAL; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1091 | } | 
| Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 1092 | EXPORT_SYMBOL(ceph_calc_file_object_mapping); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1093 |  | 
 | 1094 | /* | 
 | 1095 |  * calculate an object layout (i.e. pgid) from an oid, | 
 | 1096 |  * file_layout, and osdmap | 
 | 1097 |  */ | 
| Sage Weil | 2169aea | 2013-02-25 16:13:08 -0800 | [diff] [blame] | 1098 | int ceph_calc_object_layout(struct ceph_pg *pg, | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1099 | 			    const char *oid, | 
 | 1100 | 			    struct ceph_file_layout *fl, | 
 | 1101 | 			    struct ceph_osdmap *osdmap) | 
 | 1102 | { | 
| Eric Dumazet | 95c9617 | 2012-04-15 05:58:06 +0000 | [diff] [blame] | 1103 | 	unsigned int num, num_mask; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1104 | 	struct ceph_pg_pool_info *pool; | 
 | 1105 |  | 
| Sage Weil | 30dc638 | 2009-12-21 14:49:37 -0800 | [diff] [blame] | 1106 | 	BUG_ON(!osdmap); | 
| Sage Weil | 2169aea | 2013-02-25 16:13:08 -0800 | [diff] [blame] | 1107 | 	pg->pool = le32_to_cpu(fl->fl_pg_pool); | 
 | 1108 | 	pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool); | 
| Sage Weil | 4fc51be | 2010-02-16 15:55:03 -0800 | [diff] [blame] | 1109 | 	if (!pool) | 
 | 1110 | 		return -EIO; | 
| Sage Weil | 2169aea | 2013-02-25 16:13:08 -0800 | [diff] [blame] | 1111 | 	pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 1112 | 	num = pool->pg_num; | 
| Sage Weil | 3469ac1 | 2012-05-07 15:33:36 -0700 | [diff] [blame] | 1113 | 	num_mask = pool->pg_num_mask; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1114 |  | 
| Sage Weil | 2169aea | 2013-02-25 16:13:08 -0800 | [diff] [blame] | 1115 | 	dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1116 | 	return 0; | 
 | 1117 | } | 
| Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 1118 | EXPORT_SYMBOL(ceph_calc_object_layout); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1119 |  | 
 | 1120 | /* | 
 | 1121 |  * Calculate raw osd vector for the given pgid.  Return pointer to osd | 
 | 1122 |  * array, or NULL on failure. | 
 | 1123 |  */ | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 1124 | static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1125 | 			int *osds, int *num) | 
 | 1126 | { | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1127 | 	struct ceph_pg_mapping *pg; | 
 | 1128 | 	struct ceph_pg_pool_info *pool; | 
 | 1129 | 	int ruleno; | 
| Sage Weil | 83ca14f | 2013-02-26 10:39:09 -0800 | [diff] [blame] | 1130 | 	int r; | 
 | 1131 | 	u32 pps; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1132 |  | 
| Sage Weil | 83ca14f | 2013-02-26 10:39:09 -0800 | [diff] [blame] | 1133 | 	pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); | 
| Sage Weil | 782e182 | 2011-09-28 10:08:27 -0700 | [diff] [blame] | 1134 | 	if (!pool) | 
 | 1135 | 		return NULL; | 
 | 1136 |  | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1137 | 	/* pg_temp? */ | 
| Sage Weil | 83ca14f | 2013-02-26 10:39:09 -0800 | [diff] [blame] | 1138 | 	pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, | 
 | 1139 | 				    pool->pgp_num_mask); | 
| Sage Weil | 9794b14 | 2010-02-16 15:53:32 -0800 | [diff] [blame] | 1140 | 	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); | 
 | 1141 | 	if (pg) { | 
 | 1142 | 		*num = pg->len; | 
 | 1143 | 		return pg->osds; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1144 | 	} | 
 | 1145 |  | 
 | 1146 | 	/* crush */ | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 1147 | 	ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, | 
 | 1148 | 				 pool->type, pool->size); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1149 | 	if (ruleno < 0) { | 
| Sage Weil | 83ca14f | 2013-02-26 10:39:09 -0800 | [diff] [blame] | 1150 | 		pr_err("no crush rule pool %lld ruleset %d type %d size %d\n", | 
 | 1151 | 		       pgid.pool, pool->crush_ruleset, pool->type, | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 1152 | 		       pool->size); | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1153 | 		return NULL; | 
 | 1154 | 	} | 
 | 1155 |  | 
| Sage Weil | 83ca14f | 2013-02-26 10:39:09 -0800 | [diff] [blame] | 1156 | 	if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { | 
 | 1157 | 		/* hash pool id and seed sothat pool PGs do not overlap */ | 
 | 1158 | 		pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, | 
 | 1159 | 				     ceph_stable_mod(pgid.seed, pool->pgp_num, | 
 | 1160 | 						     pool->pgp_num_mask), | 
 | 1161 | 				     pgid.pool); | 
 | 1162 | 	} else { | 
 | 1163 | 		/* | 
 | 1164 | 		 * legacy ehavior: add ps and pool together.  this is | 
 | 1165 | 		 * not a great approach because the PGs from each pool | 
 | 1166 | 		 * will overlap on top of each other: 0.5 == 1.4 == | 
 | 1167 | 		 * 2.3 == ... | 
 | 1168 | 		 */ | 
 | 1169 | 		pps = ceph_stable_mod(pgid.seed, pool->pgp_num, | 
 | 1170 | 				      pool->pgp_num_mask) + | 
 | 1171 | 			(unsigned)pgid.pool; | 
 | 1172 | 	} | 
| Sage Weil | 8b39326 | 2012-05-07 15:37:23 -0700 | [diff] [blame] | 1173 | 	r = crush_do_rule(osdmap->crush, ruleno, pps, osds, | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 1174 | 			  min_t(int, pool->size, *num), | 
| Sage Weil | 8b39326 | 2012-05-07 15:37:23 -0700 | [diff] [blame] | 1175 | 			  osdmap->osd_weight); | 
 | 1176 | 	if (r < 0) { | 
| Sage Weil | 83ca14f | 2013-02-26 10:39:09 -0800 | [diff] [blame] | 1177 | 		pr_err("error %d from crush rule: pool %lld ruleset %d type %d" | 
 | 1178 | 		       " size %d\n", r, pgid.pool, pool->crush_ruleset, | 
| Sage Weil | 4f6a7e5 | 2013-02-23 10:41:09 -0800 | [diff] [blame] | 1179 | 		       pool->type, pool->size); | 
| Sage Weil | 8b39326 | 2012-05-07 15:37:23 -0700 | [diff] [blame] | 1180 | 		return NULL; | 
 | 1181 | 	} | 
 | 1182 | 	*num = r; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1183 | 	return osds; | 
 | 1184 | } | 
 | 1185 |  | 
 | 1186 | /* | 
| Sage Weil | d85b705 | 2010-05-10 10:24:48 -0700 | [diff] [blame] | 1187 |  * Return acting set for given pgid. | 
 | 1188 |  */ | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 1189 | int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | 
| Sage Weil | d85b705 | 2010-05-10 10:24:48 -0700 | [diff] [blame] | 1190 | 			int *acting) | 
 | 1191 | { | 
 | 1192 | 	int rawosds[CEPH_PG_MAX_SIZE], *osds; | 
 | 1193 | 	int i, o, num = CEPH_PG_MAX_SIZE; | 
 | 1194 |  | 
 | 1195 | 	osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | 
 | 1196 | 	if (!osds) | 
 | 1197 | 		return -1; | 
 | 1198 |  | 
 | 1199 | 	/* primary is first up osd */ | 
 | 1200 | 	o = 0; | 
 | 1201 | 	for (i = 0; i < num; i++) | 
 | 1202 | 		if (ceph_osd_is_up(osdmap, osds[i])) | 
 | 1203 | 			acting[o++] = osds[i]; | 
 | 1204 | 	return o; | 
 | 1205 | } | 
 | 1206 |  | 
 | 1207 | /* | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1208 |  * Return primary osd for given pgid, or -1 if none. | 
 | 1209 |  */ | 
| Sage Weil | 5b191d9 | 2013-02-23 10:38:16 -0800 | [diff] [blame] | 1210 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1211 | { | 
| Sage Weil | d85b705 | 2010-05-10 10:24:48 -0700 | [diff] [blame] | 1212 | 	int rawosds[CEPH_PG_MAX_SIZE], *osds; | 
 | 1213 | 	int i, num = CEPH_PG_MAX_SIZE; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1214 |  | 
 | 1215 | 	osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | 
 | 1216 | 	if (!osds) | 
 | 1217 | 		return -1; | 
 | 1218 |  | 
 | 1219 | 	/* primary is first up osd */ | 
 | 1220 | 	for (i = 0; i < num; i++) | 
| Sage Weil | d85b705 | 2010-05-10 10:24:48 -0700 | [diff] [blame] | 1221 | 		if (ceph_osd_is_up(osdmap, osds[i])) | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1222 | 			return osds[i]; | 
| Sage Weil | f24e998 | 2009-10-06 11:31:10 -0700 | [diff] [blame] | 1223 | 	return -1; | 
 | 1224 | } | 
| Yehuda Sadeh | 3d14c5d | 2010-04-06 15:14:15 -0700 | [diff] [blame] | 1225 | EXPORT_SYMBOL(ceph_calc_pg_primary); |