| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 1 | /* -*- mode: c; c-basic-offset: 8; -*- | 
 | 2 |  * vim: noexpandtab sw=8 ts=8 sts=0: | 
 | 3 |  * | 
 | 4 |  * Copyright (C) 2005 Oracle.  All rights reserved. | 
 | 5 |  * | 
 | 6 |  * This program is free software; you can redistribute it and/or | 
 | 7 |  * modify it under the terms of the GNU General Public | 
 | 8 |  * License as published by the Free Software Foundation; either | 
 | 9 |  * version 2 of the License, or (at your option) any later version. | 
 | 10 |  * | 
 | 11 |  * This program is distributed in the hope that it will be useful, | 
 | 12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 14 |  * General Public License for more details. | 
 | 15 |  * | 
 | 16 |  * You should have received a copy of the GNU General Public | 
 | 17 |  * License along with this program; if not, write to the | 
 | 18 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 
 | 19 |  * Boston, MA 021110-1307, USA. | 
 | 20 |  */ | 
 | 21 |  | 
 | 22 | #ifndef O2CLUSTER_TCP_INTERNAL_H | 
 | 23 | #define O2CLUSTER_TCP_INTERNAL_H | 
 | 24 |  | 
 | 25 | #define O2NET_MSG_MAGIC           ((u16)0xfa55) | 
 | 26 | #define O2NET_MSG_STATUS_MAGIC    ((u16)0xfa56) | 
 | 27 | #define O2NET_MSG_KEEP_REQ_MAGIC  ((u16)0xfa57) | 
 | 28 | #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58) | 
 | 29 |  | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 30 | /* we're delaying our quorum decision so that heartbeat will have timed | 
 | 31 |  * out truly dead nodes by the time we come around to making decisions | 
 | 32 |  * on their number */ | 
 | 33 | #define O2NET_QUORUM_DELAY_MS	((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) | 
 | 34 |  | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 35 | /*  | 
 | 36 |  * This version number represents quite a lot, unfortunately.  It not | 
 | 37 |  * only represents the raw network message protocol on the wire but also | 
 | 38 |  * locking semantics of the file system using the protocol.  It should  | 
 | 39 |  * be somewhere else, I'm sure, but right now it isn't. | 
 | 40 |  * | 
| Joel Becker | d24fbcd | 2008-01-25 17:02:21 -0800 | [diff] [blame] | 41 |  * With version 11, we separate out the filesystem locking portion.  The | 
 | 42 |  * filesystem now has a major.minor version it negotiates.  Version 11 | 
 | 43 |  * introduces this negotiation to the o2dlm protocol, and as such the | 
 | 44 |  * version here in tcp_internal.h should not need to be bumped for | 
 | 45 |  * filesystem locking changes. | 
 | 46 |  * | 
 | 47 |  * New in version 11 | 
 | 48 |  * 	- Negotiation of filesystem locking in the dlm join. | 
 | 49 |  * | 
| Mark Fasheh | c934a92 | 2007-10-18 15:23:46 -0700 | [diff] [blame] | 50 |  * New in version 10: | 
 | 51 |  * 	- Meta/data locks combined | 
 | 52 |  * | 
| Mark Fasheh | 34d024f | 2007-09-24 15:56:19 -0700 | [diff] [blame] | 53 |  * New in version 9: | 
 | 54 |  * 	- All votes removed | 
 | 55 |  * | 
| Tiger Yang | 5000863 | 2007-03-20 16:01:38 -0700 | [diff] [blame] | 56 |  * New in version 8: | 
 | 57 |  * 	- Replace delete inode votes with a cluster lock | 
 | 58 |  * | 
| Srinivas Eeda | 1faf289 | 2007-01-29 15:31:35 -0800 | [diff] [blame] | 59 |  * New in version 7: | 
 | 60 |  * 	- DLM join domain includes the live nodemap | 
 | 61 |  * | 
| Kurt Hackel | ba2bf21 | 2006-12-01 14:47:20 -0800 | [diff] [blame] | 62 |  * New in version 6: | 
 | 63 |  * 	- DLM lockres remote refcount fixes. | 
 | 64 |  * | 
| Andrew Beekhof | 828ae6a | 2006-12-04 14:04:55 +0100 | [diff] [blame] | 65 |  * New in version 5: | 
 | 66 |  * 	- Network timeout checking protocol | 
 | 67 |  * | 
| Mark Fasheh | 24c19ef | 2006-09-22 17:28:19 -0700 | [diff] [blame] | 68 |  * New in version 4: | 
 | 69 |  * 	- Remove i_generation from lock names for better stat performance. | 
 | 70 |  * | 
| Mark Fasheh | 379dfe9 | 2006-09-08 14:21:03 -0700 | [diff] [blame] | 71 |  * New in version 3: | 
 | 72 |  * 	- Replace dentry votes with a cluster lock | 
 | 73 |  * | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 74 |  * New in version 2: | 
 | 75 |  * 	- full 64 bit i_size in the metadata lock lvbs | 
 | 76 |  * 	- introduction of "rw" lock and pushing meta/data locking down | 
 | 77 |  */ | 
| Joel Becker | d24fbcd | 2008-01-25 17:02:21 -0800 | [diff] [blame] | 78 | #define O2NET_PROTOCOL_VERSION 11ULL | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 79 | struct o2net_handshake { | 
 | 80 | 	__be64	protocol_version; | 
 | 81 | 	__be64	connector_id; | 
| Andrew Beekhof | 828ae6a | 2006-12-04 14:04:55 +0100 | [diff] [blame] | 82 | 	__be32  o2hb_heartbeat_timeout_ms; | 
 | 83 | 	__be32  o2net_idle_timeout_ms; | 
 | 84 | 	__be32  o2net_keepalive_delay_ms; | 
 | 85 | 	__be32  o2net_reconnect_delay_ms; | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 86 | }; | 
 | 87 |  | 
 | 88 | struct o2net_node { | 
 | 89 | 	/* this is never called from int/bh */ | 
 | 90 | 	spinlock_t			nn_lock; | 
 | 91 |  | 
 | 92 | 	/* set the moment an sc is allocated and a connect is started */ | 
 | 93 | 	struct o2net_sock_container	*nn_sc; | 
 | 94 | 	/* _valid is only set after the handshake passes and tx can happen */ | 
 | 95 | 	unsigned			nn_sc_valid:1; | 
 | 96 | 	/* if this is set tx just returns it */ | 
 | 97 | 	int				nn_persistent_error; | 
| Tao Ma | 5cc3bf2 | 2008-03-05 15:50:12 +0800 | [diff] [blame] | 98 | 	/* It is only set to 1 after the idle time out. */ | 
 | 99 | 	atomic_t			nn_timeout; | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 100 |  | 
 | 101 | 	/* threads waiting for an sc to arrive wait on the wq for generation | 
 | 102 | 	 * to increase.  it is increased when a connecting socket succeeds | 
 | 103 | 	 * or fails or when an accepted socket is attached. */ | 
 | 104 | 	wait_queue_head_t		nn_sc_wq; | 
 | 105 |  | 
 | 106 | 	struct idr			nn_status_idr; | 
 | 107 | 	struct list_head		nn_status_list; | 
 | 108 |  | 
 | 109 | 	/* connects are attempted from when heartbeat comes up until either hb | 
 | 110 | 	 * goes down, the node is unconfigured, no connect attempts succeed | 
 | 111 | 	 * before O2NET_CONN_IDLE_DELAY, or a connect succeeds.  connect_work | 
 | 112 | 	 * is queued from set_nn_state both from hb up and from itself if a | 
 | 113 | 	 * connect attempt fails and so can be self-arming.  shutdown is | 
 | 114 | 	 * careful to first mark the nn such that no connects will be attempted | 
 | 115 | 	 * before canceling delayed connect work and flushing the queue. */ | 
| David Howells | c402895 | 2006-11-22 14:57:56 +0000 | [diff] [blame] | 116 | 	struct delayed_work		nn_connect_work; | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 117 | 	unsigned long			nn_last_connect_attempt; | 
 | 118 |  | 
 | 119 | 	/* this is queued as nodes come up and is canceled when a connection is | 
 | 120 | 	 * established.  this expiring gives up on the node and errors out | 
 | 121 | 	 * transmits */ | 
| David Howells | c402895 | 2006-11-22 14:57:56 +0000 | [diff] [blame] | 122 | 	struct delayed_work		nn_connect_expired; | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 123 |  | 
 | 124 | 	/* after we give up on a socket we wait a while before deciding | 
 | 125 | 	 * that it is still heartbeating and that we should do some | 
 | 126 | 	 * quorum work */ | 
| David Howells | c402895 | 2006-11-22 14:57:56 +0000 | [diff] [blame] | 127 | 	struct delayed_work		nn_still_up; | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 128 | }; | 
 | 129 |  | 
 | 130 | struct o2net_sock_container { | 
 | 131 | 	struct kref		sc_kref; | 
 | 132 | 	/* the next two are vaild for the life time of the sc */ | 
 | 133 | 	struct socket		*sc_sock; | 
 | 134 | 	struct o2nm_node	*sc_node; | 
 | 135 |  | 
 | 136 | 	/* all of these sc work structs hold refs on the sc while they are | 
 | 137 | 	 * queued.  they should not be able to ref a freed sc.  the teardown | 
 | 138 | 	 * race is with o2net_wq destruction in o2net_stop_listening() */ | 
 | 139 |  | 
 | 140 | 	/* rx and connect work are generated from socket callbacks.  sc | 
 | 141 | 	 * shutdown removes the callbacks and then flushes the work queue */ | 
 | 142 | 	struct work_struct	sc_rx_work; | 
 | 143 | 	struct work_struct	sc_connect_work; | 
 | 144 | 	/* shutdown work is triggered in two ways.  the simple way is | 
 | 145 | 	 * for a code path calls ensure_shutdown which gets a lock, removes | 
 | 146 | 	 * the sc from the nn, and queues the work.  in this case the | 
 | 147 | 	 * work is single-shot.  the work is also queued from a sock | 
 | 148 | 	 * callback, though, and in this case the work will find the sc | 
 | 149 | 	 * still on the nn and will call ensure_shutdown itself.. this | 
 | 150 | 	 * ends up triggering the shutdown work again, though nothing | 
 | 151 | 	 * will be done in that second iteration.  so work queue teardown | 
 | 152 | 	 * has to be careful to remove the sc from the nn before waiting | 
 | 153 | 	 * on the work queue so that the shutdown work doesn't remove the | 
 | 154 | 	 * sc and rearm itself. | 
 | 155 | 	 */ | 
 | 156 | 	struct work_struct	sc_shutdown_work; | 
 | 157 |  | 
 | 158 | 	struct timer_list	sc_idle_timeout; | 
| David Howells | c402895 | 2006-11-22 14:57:56 +0000 | [diff] [blame] | 159 | 	struct delayed_work	sc_keepalive_work; | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 160 |  | 
 | 161 | 	unsigned		sc_handshake_ok:1; | 
 | 162 |  | 
 | 163 | 	struct page 		*sc_page; | 
 | 164 | 	size_t			sc_page_off; | 
 | 165 |  | 
 | 166 | 	/* original handlers for the sockets */ | 
 | 167 | 	void			(*sc_state_change)(struct sock *sk); | 
 | 168 | 	void			(*sc_data_ready)(struct sock *sk, int bytes); | 
| Sunil Mushran | 2309e9e | 2008-04-14 10:46:19 -0700 | [diff] [blame] | 169 | #ifdef CONFIG_DEBUG_FS | 
 | 170 | 	struct list_head        sc_net_debug_item; | 
 | 171 | #endif | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 172 | 	struct timeval 		sc_tv_timer; | 
 | 173 | 	struct timeval 		sc_tv_data_ready; | 
 | 174 | 	struct timeval 		sc_tv_advance_start; | 
 | 175 | 	struct timeval 		sc_tv_advance_stop; | 
 | 176 | 	struct timeval 		sc_tv_func_start; | 
 | 177 | 	struct timeval 		sc_tv_func_stop; | 
 | 178 | 	u32			sc_msg_key; | 
 | 179 | 	u16			sc_msg_type; | 
| Zhen Wei | 925037b | 2007-01-23 17:19:59 -0800 | [diff] [blame] | 180 |  | 
 | 181 | 	struct mutex		sc_send_lock; | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 182 | }; | 
 | 183 |  | 
 | 184 | struct o2net_msg_handler { | 
 | 185 | 	struct rb_node		nh_node; | 
 | 186 | 	u32			nh_max_len; | 
 | 187 | 	u32			nh_msg_type; | 
 | 188 | 	u32			nh_key; | 
 | 189 | 	o2net_msg_handler_func	*nh_func; | 
 | 190 | 	o2net_msg_handler_func	*nh_func_data; | 
| Kurt Hackel | d74c980 | 2007-01-17 17:04:25 -0800 | [diff] [blame] | 191 | 	o2net_post_msg_handler_func | 
 | 192 | 				*nh_post_func; | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 193 | 	struct kref		nh_kref; | 
 | 194 | 	struct list_head	nh_unregister_item; | 
 | 195 | }; | 
 | 196 |  | 
 | 197 | enum o2net_system_error { | 
 | 198 | 	O2NET_ERR_NONE = 0, | 
 | 199 | 	O2NET_ERR_NO_HNDLR, | 
 | 200 | 	O2NET_ERR_OVERFLOW, | 
 | 201 | 	O2NET_ERR_DIED, | 
 | 202 | 	O2NET_ERR_MAX | 
 | 203 | }; | 
 | 204 |  | 
 | 205 | struct o2net_status_wait { | 
 | 206 | 	enum o2net_system_error	ns_sys_status; | 
 | 207 | 	s32			ns_status; | 
 | 208 | 	int			ns_id; | 
 | 209 | 	wait_queue_head_t	ns_wq; | 
 | 210 | 	struct list_head	ns_node_item; | 
 | 211 | }; | 
 | 212 |  | 
| Sunil Mushran | 2309e9e | 2008-04-14 10:46:19 -0700 | [diff] [blame] | 213 | #ifdef CONFIG_DEBUG_FS | 
 | 214 | /* just for state dumps */ | 
 | 215 | struct o2net_send_tracking { | 
 | 216 | 	struct list_head		st_net_debug_item; | 
 | 217 | 	struct task_struct		*st_task; | 
 | 218 | 	struct o2net_sock_container	*st_sc; | 
 | 219 | 	u32				st_id; | 
 | 220 | 	u32				st_msg_type; | 
 | 221 | 	u32				st_msg_key; | 
 | 222 | 	u8				st_node; | 
 | 223 | 	struct timeval			st_sock_time; | 
 | 224 | 	struct timeval			st_send_time; | 
 | 225 | 	struct timeval			st_status_time; | 
 | 226 | }; | 
| Sunil Mushran | 0f475b2 | 2008-05-12 18:31:37 -0700 | [diff] [blame] | 227 |  | 
 | 228 | void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | 
 | 229 | 		    u32 msgkey, struct task_struct *task, u8 node); | 
 | 230 | void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); | 
 | 231 | void o2net_set_nst_send_time(struct o2net_send_tracking *nst); | 
 | 232 | void o2net_set_nst_status_time(struct o2net_send_tracking *nst); | 
 | 233 | void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | 
 | 234 | 				  struct o2net_sock_container *sc); | 
 | 235 | void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); | 
 | 236 |  | 
| Sunil Mushran | 2309e9e | 2008-04-14 10:46:19 -0700 | [diff] [blame] | 237 | #else | 
 | 238 | struct o2net_send_tracking { | 
 | 239 | 	u32	dummy; | 
 | 240 | }; | 
| Sunil Mushran | 0f475b2 | 2008-05-12 18:31:37 -0700 | [diff] [blame] | 241 |  | 
 | 242 | static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | 
 | 243 | 				  u32 msgkey, struct task_struct *task, u8 node) | 
 | 244 | { | 
 | 245 | } | 
 | 246 | static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | 
 | 247 | { | 
 | 248 | } | 
 | 249 | static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | 
 | 250 | { | 
 | 251 | } | 
 | 252 | static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | 
 | 253 | { | 
 | 254 | } | 
 | 255 | static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | 
 | 256 | 						struct o2net_sock_container *sc) | 
 | 257 | { | 
 | 258 | } | 
 | 259 | static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, | 
 | 260 | 					u32 msg_id) | 
 | 261 | { | 
 | 262 | } | 
| Sunil Mushran | 2309e9e | 2008-04-14 10:46:19 -0700 | [diff] [blame] | 263 | #endif	/* CONFIG_DEBUG_FS */ | 
 | 264 |  | 
| Zach Brown | 9821148 | 2005-12-15 14:31:23 -0800 | [diff] [blame] | 265 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ |