|  | /****************************************************************************** | 
|  | ******************************************************************************* | 
|  | ** | 
|  | **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved. | 
|  | **  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved. | 
|  | ** | 
|  | **  This copyrighted material is made available to anyone wishing to use, | 
|  | **  modify, copy, or redistribute it subject to the terms and conditions | 
|  | **  of the GNU General Public License v.2. | 
|  | ** | 
|  | ******************************************************************************* | 
|  | ******************************************************************************/ | 
|  |  | 
|  | #include "dlm_internal.h" | 
|  | #include "lockspace.h" | 
|  | #include "member.h" | 
|  | #include "lowcomms.h" | 
|  | #include "rcom.h" | 
|  | #include "config.h" | 
|  | #include "memory.h" | 
|  | #include "recover.h" | 
|  | #include "util.h" | 
|  | #include "lock.h" | 
|  | #include "dir.h" | 
|  |  | 
|  |  | 
|  | static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de) | 
|  | { | 
|  | spin_lock(&ls->ls_recover_list_lock); | 
|  | list_add(&de->list, &ls->ls_recover_list); | 
|  | spin_unlock(&ls->ls_recover_list_lock); | 
|  | } | 
|  |  | 
|  | static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) | 
|  | { | 
|  | int found = 0; | 
|  | struct dlm_direntry *de; | 
|  |  | 
|  | spin_lock(&ls->ls_recover_list_lock); | 
|  | list_for_each_entry(de, &ls->ls_recover_list, list) { | 
|  | if (de->length == len) { | 
|  | list_del(&de->list); | 
|  | de->master_nodeid = 0; | 
|  | memset(de->name, 0, len); | 
|  | found = 1; | 
|  | break; | 
|  | } | 
|  | } | 
|  | spin_unlock(&ls->ls_recover_list_lock); | 
|  |  | 
|  | if (!found) | 
|  | de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL); | 
|  | return de; | 
|  | } | 
|  |  | 
|  | void dlm_clear_free_entries(struct dlm_ls *ls) | 
|  | { | 
|  | struct dlm_direntry *de; | 
|  |  | 
|  | spin_lock(&ls->ls_recover_list_lock); | 
|  | while (!list_empty(&ls->ls_recover_list)) { | 
|  | de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, | 
|  | list); | 
|  | list_del(&de->list); | 
|  | kfree(de); | 
|  | } | 
|  | spin_unlock(&ls->ls_recover_list_lock); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * We use the upper 16 bits of the hash value to select the directory node. | 
|  | * Low bits are used for distribution of rsb's among hash buckets on each node. | 
|  | * | 
|  | * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of | 
|  | * num_nodes to the hash value.  This value in the desired range is used as an | 
|  | * offset into the sorted list of nodeid's to give the particular nodeid. | 
|  | */ | 
|  |  | 
|  | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) | 
|  | { | 
|  | struct list_head *tmp; | 
|  | struct dlm_member *memb = NULL; | 
|  | uint32_t node, n = 0; | 
|  | int nodeid; | 
|  |  | 
|  | if (ls->ls_num_nodes == 1) { | 
|  | nodeid = dlm_our_nodeid(); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (ls->ls_node_array) { | 
|  | node = (hash >> 16) % ls->ls_total_weight; | 
|  | nodeid = ls->ls_node_array[node]; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* make_member_array() failed to kmalloc ls_node_array... */ | 
|  |  | 
|  | node = (hash >> 16) % ls->ls_num_nodes; | 
|  |  | 
|  | list_for_each(tmp, &ls->ls_nodes) { | 
|  | if (n++ != node) | 
|  | continue; | 
|  | memb = list_entry(tmp, struct dlm_member, list); | 
|  | break; | 
|  | } | 
|  |  | 
|  | DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n", | 
|  | ls->ls_num_nodes, n, node);); | 
|  | nodeid = memb->nodeid; | 
|  | out: | 
|  | return nodeid; | 
|  | } | 
|  |  | 
|  | int dlm_dir_nodeid(struct dlm_rsb *r) | 
|  | { | 
|  | return dlm_hash2nodeid(r->res_ls, r->res_hash); | 
|  | } | 
|  |  | 
|  | static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len) | 
|  | { | 
|  | uint32_t val; | 
|  |  | 
|  | val = jhash(name, len, 0); | 
|  | val &= (ls->ls_dirtbl_size - 1); | 
|  |  | 
|  | return val; | 
|  | } | 
|  |  | 
|  | static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de) | 
|  | { | 
|  | uint32_t bucket; | 
|  |  | 
|  | bucket = dir_hash(ls, de->name, de->length); | 
|  | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | 
|  | } | 
|  |  | 
|  | static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name, | 
|  | int namelen, uint32_t bucket) | 
|  | { | 
|  | struct dlm_direntry *de; | 
|  |  | 
|  | list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) { | 
|  | if (de->length == namelen && !memcmp(name, de->name, namelen)) | 
|  | goto out; | 
|  | } | 
|  | de = NULL; | 
|  | out: | 
|  | return de; | 
|  | } | 
|  |  | 
|  | void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen) | 
|  | { | 
|  | struct dlm_direntry *de; | 
|  | uint32_t bucket; | 
|  |  | 
|  | bucket = dir_hash(ls, name, namelen); | 
|  |  | 
|  | spin_lock(&ls->ls_dirtbl[bucket].lock); | 
|  |  | 
|  | de = search_bucket(ls, name, namelen, bucket); | 
|  |  | 
|  | if (!de) { | 
|  | log_error(ls, "remove fr %u none", nodeid); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (de->master_nodeid != nodeid) { | 
|  | log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | list_del(&de->list); | 
|  | kfree(de); | 
|  | out: | 
|  | spin_unlock(&ls->ls_dirtbl[bucket].lock); | 
|  | } | 
|  |  | 
|  | void dlm_dir_clear(struct dlm_ls *ls) | 
|  | { | 
|  | struct list_head *head; | 
|  | struct dlm_direntry *de; | 
|  | int i; | 
|  |  | 
|  | DLM_ASSERT(list_empty(&ls->ls_recover_list), ); | 
|  |  | 
|  | for (i = 0; i < ls->ls_dirtbl_size; i++) { | 
|  | spin_lock(&ls->ls_dirtbl[i].lock); | 
|  | head = &ls->ls_dirtbl[i].list; | 
|  | while (!list_empty(head)) { | 
|  | de = list_entry(head->next, struct dlm_direntry, list); | 
|  | list_del(&de->list); | 
|  | put_free_de(ls, de); | 
|  | } | 
|  | spin_unlock(&ls->ls_dirtbl[i].lock); | 
|  | } | 
|  | } | 
|  |  | 
|  | int dlm_recover_directory(struct dlm_ls *ls) | 
|  | { | 
|  | struct dlm_member *memb; | 
|  | struct dlm_direntry *de; | 
|  | char *b, *last_name = NULL; | 
|  | int error = -ENOMEM, last_len, count = 0; | 
|  | uint16_t namelen; | 
|  |  | 
|  | log_debug(ls, "dlm_recover_directory"); | 
|  |  | 
|  | if (dlm_no_directory(ls)) | 
|  | goto out_status; | 
|  |  | 
|  | dlm_dir_clear(ls); | 
|  |  | 
|  | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); | 
|  | if (!last_name) | 
|  | goto out; | 
|  |  | 
|  | list_for_each_entry(memb, &ls->ls_nodes, list) { | 
|  | memset(last_name, 0, DLM_RESNAME_MAXLEN); | 
|  | last_len = 0; | 
|  |  | 
|  | for (;;) { | 
|  | int left; | 
|  | error = dlm_recovery_stopped(ls); | 
|  | if (error) | 
|  | goto out_free; | 
|  |  | 
|  | error = dlm_rcom_names(ls, memb->nodeid, | 
|  | last_name, last_len); | 
|  | if (error) | 
|  | goto out_free; | 
|  |  | 
|  | schedule(); | 
|  |  | 
|  | /* | 
|  | * pick namelen/name pairs out of received buffer | 
|  | */ | 
|  |  | 
|  | b = ls->ls_recover_buf->rc_buf; | 
|  | left = ls->ls_recover_buf->rc_header.h_length; | 
|  | left -= sizeof(struct dlm_rcom); | 
|  |  | 
|  | for (;;) { | 
|  | __be16 v; | 
|  |  | 
|  | error = -EINVAL; | 
|  | if (left < sizeof(__be16)) | 
|  | goto out_free; | 
|  |  | 
|  | memcpy(&v, b, sizeof(__be16)); | 
|  | namelen = be16_to_cpu(v); | 
|  | b += sizeof(__be16); | 
|  | left -= sizeof(__be16); | 
|  |  | 
|  | /* namelen of 0xFFFFF marks end of names for | 
|  | this node; namelen of 0 marks end of the | 
|  | buffer */ | 
|  |  | 
|  | if (namelen == 0xFFFF) | 
|  | goto done; | 
|  | if (!namelen) | 
|  | break; | 
|  |  | 
|  | if (namelen > left) | 
|  | goto out_free; | 
|  |  | 
|  | if (namelen > DLM_RESNAME_MAXLEN) | 
|  | goto out_free; | 
|  |  | 
|  | error = -ENOMEM; | 
|  | de = get_free_de(ls, namelen); | 
|  | if (!de) | 
|  | goto out_free; | 
|  |  | 
|  | de->master_nodeid = memb->nodeid; | 
|  | de->length = namelen; | 
|  | last_len = namelen; | 
|  | memcpy(de->name, b, namelen); | 
|  | memcpy(last_name, b, namelen); | 
|  | b += namelen; | 
|  | left -= namelen; | 
|  |  | 
|  | add_entry_to_hash(ls, de); | 
|  | count++; | 
|  | } | 
|  | } | 
|  | done: | 
|  | ; | 
|  | } | 
|  |  | 
|  | out_status: | 
|  | error = 0; | 
|  | dlm_set_recover_status(ls, DLM_RS_DIR); | 
|  | log_debug(ls, "dlm_recover_directory %d entries", count); | 
|  | out_free: | 
|  | kfree(last_name); | 
|  | out: | 
|  | dlm_clear_free_entries(ls); | 
|  | return error; | 
|  | } | 
|  |  | 
|  | static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | 
|  | int namelen, int *r_nodeid) | 
|  | { | 
|  | struct dlm_direntry *de, *tmp; | 
|  | uint32_t bucket; | 
|  |  | 
|  | bucket = dir_hash(ls, name, namelen); | 
|  |  | 
|  | spin_lock(&ls->ls_dirtbl[bucket].lock); | 
|  | de = search_bucket(ls, name, namelen, bucket); | 
|  | if (de) { | 
|  | *r_nodeid = de->master_nodeid; | 
|  | spin_unlock(&ls->ls_dirtbl[bucket].lock); | 
|  | if (*r_nodeid == nodeid) | 
|  | return -EEXIST; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | spin_unlock(&ls->ls_dirtbl[bucket].lock); | 
|  |  | 
|  | if (namelen > DLM_RESNAME_MAXLEN) | 
|  | return -EINVAL; | 
|  |  | 
|  | de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL); | 
|  | if (!de) | 
|  | return -ENOMEM; | 
|  |  | 
|  | de->master_nodeid = nodeid; | 
|  | de->length = namelen; | 
|  | memcpy(de->name, name, namelen); | 
|  |  | 
|  | spin_lock(&ls->ls_dirtbl[bucket].lock); | 
|  | tmp = search_bucket(ls, name, namelen, bucket); | 
|  | if (tmp) { | 
|  | kfree(de); | 
|  | de = tmp; | 
|  | } else { | 
|  | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | 
|  | } | 
|  | *r_nodeid = de->master_nodeid; | 
|  | spin_unlock(&ls->ls_dirtbl[bucket].lock); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, | 
|  | int *r_nodeid) | 
|  | { | 
|  | return get_entry(ls, nodeid, name, namelen, r_nodeid); | 
|  | } | 
|  |  | 
|  | static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) | 
|  | { | 
|  | struct dlm_rsb *r; | 
|  |  | 
|  | down_read(&ls->ls_root_sem); | 
|  | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 
|  | if (len == r->res_length && !memcmp(name, r->res_name, len)) { | 
|  | up_read(&ls->ls_root_sem); | 
|  | return r; | 
|  | } | 
|  | } | 
|  | up_read(&ls->ls_root_sem); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | /* Find the rsb where we left off (or start again), then send rsb names | 
|  | for rsb's we're master of and whose directory node matches the requesting | 
|  | node.  inbuf is the rsb name last sent, inlen is the name's length */ | 
|  |  | 
|  | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | 
|  | char *outbuf, int outlen, int nodeid) | 
|  | { | 
|  | struct list_head *list; | 
|  | struct dlm_rsb *r; | 
|  | int offset = 0, dir_nodeid; | 
|  | __be16 be_namelen; | 
|  |  | 
|  | down_read(&ls->ls_root_sem); | 
|  |  | 
|  | if (inlen > 1) { | 
|  | r = find_rsb_root(ls, inbuf, inlen); | 
|  | if (!r) { | 
|  | inbuf[inlen - 1] = '\0'; | 
|  | log_error(ls, "copy_master_names from %d start %d %s", | 
|  | nodeid, inlen, inbuf); | 
|  | goto out; | 
|  | } | 
|  | list = r->res_root_list.next; | 
|  | } else { | 
|  | list = ls->ls_root_list.next; | 
|  | } | 
|  |  | 
|  | for (offset = 0; list != &ls->ls_root_list; list = list->next) { | 
|  | r = list_entry(list, struct dlm_rsb, res_root_list); | 
|  | if (r->res_nodeid) | 
|  | continue; | 
|  |  | 
|  | dir_nodeid = dlm_dir_nodeid(r); | 
|  | if (dir_nodeid != nodeid) | 
|  | continue; | 
|  |  | 
|  | /* | 
|  | * The block ends when we can't fit the following in the | 
|  | * remaining buffer space: | 
|  | * namelen (uint16_t) + | 
|  | * name (r->res_length) + | 
|  | * end-of-block record 0x0000 (uint16_t) | 
|  | */ | 
|  |  | 
|  | if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { | 
|  | /* Write end-of-block record */ | 
|  | be_namelen = cpu_to_be16(0); | 
|  | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | 
|  | offset += sizeof(__be16); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | be_namelen = cpu_to_be16(r->res_length); | 
|  | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | 
|  | offset += sizeof(__be16); | 
|  | memcpy(outbuf + offset, r->res_name, r->res_length); | 
|  | offset += r->res_length; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * If we've reached the end of the list (and there's room) write a | 
|  | * terminating record. | 
|  | */ | 
|  |  | 
|  | if ((list == &ls->ls_root_list) && | 
|  | (offset + sizeof(uint16_t) <= outlen)) { | 
|  | be_namelen = cpu_to_be16(0xFFFF); | 
|  | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | 
|  | offset += sizeof(__be16); | 
|  | } | 
|  |  | 
|  | out: | 
|  | up_read(&ls->ls_root_sem); | 
|  | } | 
|  |  |