blob: 94c050ad55b6745ebf45ebfe5d4f11a7f002a5f7 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Philipp Reisner00d56942011-02-09 18:09:48 +010068static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
418 * e_end_block, and e_end_resync_block, e_send_discard_ack.
419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 /* list_del not necessary, next/prev members not touched */
Philipp Reisner00d56942011-02-09 18:09:48 +0100423 ok = peer_req->w.cb(&peer_req->w, !ok) && ok;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700425 }
426 wake_up(&mdev->ee_wait);
427
428 return ok;
429}
430
431void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
432{
433 DEFINE_WAIT(wait);
434
435 /* avoids spin_lock/unlock
436 * and calling prepare_to_wait in the fast path */
437 while (!list_empty(head)) {
438 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100439 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100440 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100442 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 }
444}
445
446void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
447{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100450 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
453/* see also kernel_accept; which is only present since 2.6.18.
454 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100455static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 struct sock *sk = sock->sk;
458 int err = 0;
459
460 *what = "listen";
461 err = sock->ops->listen(sock, 5);
462 if (err < 0)
463 goto out;
464
465 *what = "sock_create_lite";
466 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
467 newsock);
468 if (err < 0)
469 goto out;
470
471 *what = "accept";
472 err = sock->ops->accept(sock, *newsock, 0);
473 if (err < 0) {
474 sock_release(*newsock);
475 *newsock = NULL;
476 goto out;
477 }
478 (*newsock)->ops = sock->ops;
479
480out:
481 return err;
482}
483
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100484static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485{
486 mm_segment_t oldfs;
487 struct kvec iov = {
488 .iov_base = buf,
489 .iov_len = size,
490 };
491 struct msghdr msg = {
492 .msg_iovlen = 1,
493 .msg_iov = (struct iovec *)&iov,
494 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
495 };
496 int rv;
497
498 oldfs = get_fs();
499 set_fs(KERNEL_DS);
500 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
501 set_fs(oldfs);
502
503 return rv;
504}
505
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100506static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700507{
508 mm_segment_t oldfs;
509 struct kvec iov = {
510 .iov_base = buf,
511 .iov_len = size,
512 };
513 struct msghdr msg = {
514 .msg_iovlen = 1,
515 .msg_iov = (struct iovec *)&iov,
516 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
517 };
518 int rv;
519
520 oldfs = get_fs();
521 set_fs(KERNEL_DS);
522
523 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100524 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 if (rv == size)
526 break;
527
528 /* Note:
529 * ECONNRESET other side closed the connection
530 * ERESTARTSYS (on sock) we got a signal
531 */
532
533 if (rv < 0) {
534 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100535 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700536 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100537 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700538 break;
539 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100540 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 break;
542 } else {
543 /* signal came in, or peer/link went down,
544 * after we read a partial message
545 */
546 /* D_ASSERT(signal_pending(current)); */
547 break;
548 }
549 };
550
551 set_fs(oldfs);
552
553 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100554 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 return rv;
557}
558
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200559/* quoting tcp(7):
560 * On individual connections, the socket buffer size must be set prior to the
561 * listen(2) or connect(2) calls in order to have it take effect.
562 * This is our wrapper to do so.
563 */
564static void drbd_setbufsize(struct socket *sock, unsigned int snd,
565 unsigned int rcv)
566{
567 /* open coded SO_SNDBUF, SO_RCVBUF */
568 if (snd) {
569 sock->sk->sk_sndbuf = snd;
570 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
571 }
572 if (rcv) {
573 sock->sk->sk_rcvbuf = rcv;
574 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
575 }
576}
577
Philipp Reisnereac3e992011-02-07 14:05:07 +0100578static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579{
580 const char *what;
581 struct socket *sock;
582 struct sockaddr_in6 src_in6;
583 int err;
584 int disconnect_on_error = 1;
585
Philipp Reisnereac3e992011-02-07 14:05:07 +0100586 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 return NULL;
588
589 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100590 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 SOCK_STREAM, IPPROTO_TCP, &sock);
592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100598 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
599 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
600 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100609 memcpy(&src_in6, tconn->net_conf->my_addr,
610 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
611 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
616 what = "bind before connect";
617 err = sock->ops->bind(sock,
618 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100619 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 if (err < 0)
621 goto out;
622
623 /* connect may fail, peer not yet available.
624 * stay C_WF_CONNECTION, don't go Disconnecting! */
625 disconnect_on_error = 0;
626 what = "connect";
627 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 (struct sockaddr *)tconn->net_conf->peer_addr,
629 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
631out:
632 if (err < 0) {
633 if (sock) {
634 sock_release(sock);
635 sock = NULL;
636 }
637 switch (-err) {
638 /* timeout, busy, signal pending */
639 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
640 case EINTR: case ERESTARTSYS:
641 /* peer not (yet) available, network problem */
642 case ECONNREFUSED: case ENETUNREACH:
643 case EHOSTDOWN: case EHOSTUNREACH:
644 disconnect_on_error = 0;
645 break;
646 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100647 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100650 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100652 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 return sock;
654}
655
Philipp Reisner76536202011-02-07 14:09:54 +0100656static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657{
658 int timeo, err;
659 struct socket *s_estab = NULL, *s_listen;
660 const char *what;
661
Philipp Reisner76536202011-02-07 14:09:54 +0100662 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 return NULL;
664
665 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100666 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 SOCK_STREAM, IPPROTO_TCP, &s_listen);
668 if (err) {
669 s_listen = NULL;
670 goto out;
671 }
672
Philipp Reisner76536202011-02-07 14:09:54 +0100673 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
675
676 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
677 s_listen->sk->sk_rcvtimeo = timeo;
678 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100679 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
680 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
682 what = "bind before listen";
683 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100684 (struct sockaddr *) tconn->net_conf->my_addr,
685 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700686 if (err < 0)
687 goto out;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691out:
692 if (s_listen)
693 sock_release(s_listen);
694 if (err < 0) {
695 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100696 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100697 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 }
Philipp Reisner76536202011-02-07 14:09:54 +0100700 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701
702 return s_estab;
703}
704
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100705static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100707 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100709 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710}
711
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100712static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100714 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 int rr;
716
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100717 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100719 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 return be16_to_cpu(h->command);
721
722 return 0xffff;
723}
724
725/**
726 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 * @sock: pointer to the pointer to the socket.
728 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100729static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730{
731 int rr;
732 char tb[4];
733
734 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100735 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100737 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100740 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741 } else {
742 sock_release(*sock);
743 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 }
746}
747
Philipp Reisner907599e2011-02-08 11:25:37 +0100748static int drbd_connected(int vnr, void *p, void *data)
749{
750 struct drbd_conf *mdev = (struct drbd_conf *)p;
751 int ok = 1;
752
753 atomic_set(&mdev->packet_seq, 0);
754 mdev->peer_seq = 0;
755
Philipp Reisner8410da82011-02-11 20:11:10 +0100756 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
757 &mdev->tconn->cstate_mutex :
758 &mdev->own_state_mutex;
759
Philipp Reisner907599e2011-02-08 11:25:37 +0100760 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
761 ok &= drbd_send_sizes(mdev, 0, 0);
762 ok &= drbd_send_uuids(mdev);
763 ok &= drbd_send_state(mdev);
764 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
765 clear_bit(RESIZE_PENDING, &mdev->flags);
766
Philipp Reisner8410da82011-02-11 20:11:10 +0100767
Philipp Reisner907599e2011-02-08 11:25:37 +0100768 return !ok;
769}
770
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771/*
772 * return values:
773 * 1 yes, we have a valid connection
774 * 0 oops, did not work out, please try again
775 * -1 peer talks different language,
776 * no point in trying again, please go standalone.
777 * -2 We do not have a network config...
778 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100779static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780{
781 struct socket *s, *sock, *msock;
782 int try, h, ok;
783
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100784 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 return -2;
786
Philipp Reisner907599e2011-02-08 11:25:37 +0100787 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
788 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100789 /* agreed_pro_version must be smaller than 100 so we send the old
790 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791
792 sock = NULL;
793 msock = NULL;
794
795 do {
796 for (try = 0;;) {
797 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100798 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 if (s || ++try >= 3)
800 break;
801 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100802 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803 }
804
805 if (s) {
806 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100807 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 sock = s;
809 s = NULL;
810 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100811 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 msock = s;
813 s = NULL;
814 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100815 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816 goto out_release_sockets;
817 }
818 }
819
820 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100822 ok = drbd_socket_okay(&sock);
823 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 if (ok)
825 break;
826 }
827
828retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100829 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100831 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100832 drbd_socket_okay(&sock);
833 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 switch (try) {
835 case P_HAND_SHAKE_S:
836 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100837 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700838 sock_release(sock);
839 }
840 sock = s;
841 break;
842 case P_HAND_SHAKE_M:
843 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100844 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700845 sock_release(msock);
846 }
847 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100848 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849 break;
850 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100851 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700852 sock_release(s);
853 if (random32() & 1)
854 goto retry;
855 }
856 }
857
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100858 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 goto out_release_sockets;
860 if (signal_pending(current)) {
861 flush_signals(current);
862 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100863 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700864 goto out_release_sockets;
865 }
866
867 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100868 ok = drbd_socket_okay(&sock);
869 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 if (ok)
871 break;
872 }
873 } while (1);
874
875 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
876 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
877
878 sock->sk->sk_allocation = GFP_NOIO;
879 msock->sk->sk_allocation = GFP_NOIO;
880
881 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
882 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
883
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100885 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
887 * first set it to the P_HAND_SHAKE timeout,
888 * which we set to 4x the configured ping_timeout. */
889 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100890 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891
Philipp Reisner907599e2011-02-08 11:25:37 +0100892 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
893 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
895 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300896 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897 drbd_tcp_nodelay(sock);
898 drbd_tcp_nodelay(msock);
899
Philipp Reisner907599e2011-02-08 11:25:37 +0100900 tconn->data.socket = sock;
901 tconn->meta.socket = msock;
902 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903
Philipp Reisner907599e2011-02-08 11:25:37 +0100904 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 if (h <= 0)
906 return h;
907
Philipp Reisner907599e2011-02-08 11:25:37 +0100908 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100911 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100912 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100914 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100916 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 }
918 }
919
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100920 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700921 return 0;
922
Philipp Reisner907599e2011-02-08 11:25:37 +0100923 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
925
Philipp Reisner907599e2011-02-08 11:25:37 +0100926 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927
Philipp Reisner907599e2011-02-08 11:25:37 +0100928 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200929 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930
Philipp Reisner907599e2011-02-08 11:25:37 +0100931 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932
933out_release_sockets:
934 if (sock)
935 sock_release(sock);
936 if (msock)
937 sock_release(msock);
938 return -1;
939}
940
Philipp Reisnerce243852011-02-07 17:27:47 +0100941static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100943 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100944 pi->cmd = be16_to_cpu(h->h80.command);
945 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100946 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100947 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100948 pi->cmd = be16_to_cpu(h->h95.command);
949 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
950 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200951 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100952 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200953 be32_to_cpu(h->h80.magic),
954 be16_to_cpu(h->h80.command),
955 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100956 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100958 return true;
959}
960
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100961static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100962{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100963 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100964 int r;
965
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100966 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100967 if (unlikely(r != sizeof(*h))) {
968 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100969 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100970 return false;
971 }
972
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100973 r = decode_header(tconn, h, pi);
974 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975
Philipp Reisner257d0af2011-01-26 12:15:29 +0100976 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977}
978
Philipp Reisner2451fc32010-08-24 13:43:11 +0200979static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980{
981 int rv;
982
983 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400984 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200985 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986 if (rv) {
987 dev_err(DEV, "local disk flush failed with status %d\n", rv);
988 /* would rather check on EOPNOTSUPP, but that is not reliable.
989 * don't try again for ANY return value != 0
990 * if (rv == -EOPNOTSUPP) */
991 drbd_bump_write_ordering(mdev, WO_drain_io);
992 }
993 put_ldev(mdev);
994 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995}
996
997/**
998 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
999 * @mdev: DRBD device.
1000 * @epoch: Epoch object.
1001 * @ev: Epoch event.
1002 */
1003static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1004 struct drbd_epoch *epoch,
1005 enum epoch_event ev)
1006{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001007 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009 enum finish_epoch rv = FE_STILL_LIVE;
1010
1011 spin_lock(&mdev->epoch_lock);
1012 do {
1013 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
1015 epoch_size = atomic_read(&epoch->epoch_size);
1016
1017 switch (ev & ~EV_CLEANUP) {
1018 case EV_PUT:
1019 atomic_dec(&epoch->active);
1020 break;
1021 case EV_GOT_BARRIER_NR:
1022 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 break;
1024 case EV_BECAME_LAST:
1025 /* nothing to do*/
1026 break;
1027 }
1028
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 if (epoch_size != 0 &&
1030 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001031 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032 if (!(ev & EV_CLEANUP)) {
1033 spin_unlock(&mdev->epoch_lock);
1034 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1035 spin_lock(&mdev->epoch_lock);
1036 }
1037 dec_unacked(mdev);
1038
1039 if (mdev->current_epoch != epoch) {
1040 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1041 list_del(&epoch->list);
1042 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1043 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044 kfree(epoch);
1045
1046 if (rv == FE_STILL_LIVE)
1047 rv = FE_DESTROYED;
1048 } else {
1049 epoch->flags = 0;
1050 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001051 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 if (rv == FE_STILL_LIVE)
1053 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001054 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001055 }
1056 }
1057
1058 if (!next_epoch)
1059 break;
1060
1061 epoch = next_epoch;
1062 } while (1);
1063
1064 spin_unlock(&mdev->epoch_lock);
1065
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066 return rv;
1067}
1068
1069/**
1070 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1071 * @mdev: DRBD device.
1072 * @wo: Write ordering method to try.
1073 */
1074void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1075{
1076 enum write_ordering_e pwo;
1077 static char *write_ordering_str[] = {
1078 [WO_none] = "none",
1079 [WO_drain_io] = "drain",
1080 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081 };
1082
1083 pwo = mdev->write_ordering;
1084 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1086 wo = WO_drain_io;
1087 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1088 wo = WO_none;
1089 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001090 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1092}
1093
1094/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001095 * drbd_submit_ee()
1096 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001097 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001098 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d992011-01-24 14:47:09 +01001099 *
1100 * May spread the pages to multiple bios,
1101 * depending on bio_add_page restrictions.
1102 *
1103 * Returns 0 if all bios have been submitted,
1104 * -ENOMEM if we could not allocate enough bios,
1105 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1106 * single page to an empty bio (which should never happen and likely indicates
1107 * that the lower level IO stack is in some way broken). This has been observed
1108 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001109 */
1110/* TODO allocate from our own bio_set. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001111int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001112 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001113{
1114 struct bio *bios = NULL;
1115 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001116 struct page *page = peer_req->pages;
1117 sector_t sector = peer_req->i.sector;
1118 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001119 unsigned n_bios = 0;
1120 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d992011-01-24 14:47:09 +01001121 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001122
1123 /* In most cases, we will only need one bio. But in case the lower
1124 * level restrictions happen to be different at this offset on this
1125 * side than those of the sending peer, we may need to submit the
1126 * request in more than one bio. */
1127next_bio:
1128 bio = bio_alloc(GFP_NOIO, nr_pages);
1129 if (!bio) {
1130 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1131 goto fail;
1132 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001133 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001134 bio->bi_sector = sector;
1135 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001136 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001137 bio->bi_private = peer_req;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001138 bio->bi_end_io = drbd_endio_sec;
1139
1140 bio->bi_next = bios;
1141 bios = bio;
1142 ++n_bios;
1143
1144 page_chain_for_each(page) {
1145 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1146 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d992011-01-24 14:47:09 +01001147 /* A single page must always be possible!
1148 * But in case it fails anyways,
1149 * we deal with it, and complain (below). */
1150 if (bio->bi_vcnt == 0) {
1151 dev_err(DEV,
1152 "bio_add_page failed for len=%u, "
1153 "bi_vcnt=0 (bi_sector=%llu)\n",
1154 len, (unsigned long long)bio->bi_sector);
1155 err = -ENOSPC;
1156 goto fail;
1157 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001158 goto next_bio;
1159 }
1160 ds -= len;
1161 sector += len >> 9;
1162 --nr_pages;
1163 }
1164 D_ASSERT(page == NULL);
1165 D_ASSERT(ds == 0);
1166
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001167 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001168 do {
1169 bio = bios;
1170 bios = bios->bi_next;
1171 bio->bi_next = NULL;
1172
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001173 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001174 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001175 return 0;
1176
1177fail:
1178 while (bios) {
1179 bio = bios;
1180 bios = bios->bi_next;
1181 bio_put(bio);
1182 }
Lars Ellenberg10f6d992011-01-24 14:47:09 +01001183 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001184}
1185
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001186static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001187 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001188{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001189 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001190
1191 drbd_remove_interval(&mdev->write_requests, i);
1192 drbd_clear_interval(i);
1193
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001194 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001195 if (i->waiting)
1196 wake_up(&mdev->misc_wait);
1197}
1198
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001199static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1200 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001202 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001203 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001204 struct drbd_epoch *epoch;
1205
Philipp Reisnerb411b362009-09-25 16:07:19 -07001206 inc_unacked(mdev);
1207
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 mdev->current_epoch->barrier_nr = p->barrier;
1209 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1210
1211 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1212 * the activity log, which means it would not be resynced in case the
1213 * R_PRIMARY crashes now.
1214 * Therefore we must send the barrier_ack after the barrier request was
1215 * completed. */
1216 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217 case WO_none:
1218 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001219 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001220
1221 /* receiver context, in the writeout path of the other node.
1222 * avoid potential distributed deadlock */
1223 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1224 if (epoch)
1225 break;
1226 else
1227 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1228 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229
1230 case WO_bdev_flush:
1231 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001233 drbd_flush(mdev);
1234
1235 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1236 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1237 if (epoch)
1238 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001239 }
1240
Philipp Reisner2451fc32010-08-24 13:43:11 +02001241 epoch = mdev->current_epoch;
1242 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1243
1244 D_ASSERT(atomic_read(&epoch->active) == 0);
1245 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001247 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001248 default:
1249 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001250 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001251 }
1252
1253 epoch->flags = 0;
1254 atomic_set(&epoch->epoch_size, 0);
1255 atomic_set(&epoch->active, 0);
1256
1257 spin_lock(&mdev->epoch_lock);
1258 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1259 list_add(&epoch->list, &mdev->current_epoch->list);
1260 mdev->current_epoch = epoch;
1261 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262 } else {
1263 /* The current_epoch got recycled while we allocated this one... */
1264 kfree(epoch);
1265 }
1266 spin_unlock(&mdev->epoch_lock);
1267
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001268 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001269}
1270
1271/* used from receive_RSDataReply (recv_resync_read)
1272 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001273static struct drbd_peer_request *
1274read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1275 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001276{
Lars Ellenberg66660322010-04-06 12:15:04 +02001277 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001278 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001280 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001281 void *dig_in = mdev->tconn->int_dig_in;
1282 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001283 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284
Philipp Reisnera0638452011-01-19 14:31:32 +01001285 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1286 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001287
1288 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001289 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001291 if (!signal_pending(current))
1292 dev_warn(DEV,
1293 "short read receiving data digest: read %d expected %d\n",
1294 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295 return NULL;
1296 }
1297 }
1298
1299 data_size -= dgs;
1300
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001301 if (!expect(data_size != 0))
1302 return NULL;
1303 if (!expect(IS_ALIGNED(data_size, 512)))
1304 return NULL;
1305 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1306 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001307
Lars Ellenberg66660322010-04-06 12:15:04 +02001308 /* even though we trust out peer,
1309 * we sometimes have to double check. */
1310 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001311 dev_err(DEV, "request from peer beyond end of local disk: "
1312 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001313 (unsigned long long)capacity,
1314 (unsigned long long)sector, data_size);
1315 return NULL;
1316 }
1317
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1319 * "criss-cross" setup, that might cause write-out on some other DRBD,
1320 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001321 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1322 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001323 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001324
Philipp Reisnerb411b362009-09-25 16:07:19 -07001325 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001326 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001327 page_chain_for_each(page) {
1328 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001329 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001330 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001331 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001332 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1333 data[0] = data[0] ^ (unsigned long)-1;
1334 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001335 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001336 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001337 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001338 if (!signal_pending(current))
1339 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1340 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001341 return NULL;
1342 }
1343 ds -= rr;
1344 }
1345
1346 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001347 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001348 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001349 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1350 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001351 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001352 dgs, dig_in, dig_vv, peer_req);
1353 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001354 return NULL;
1355 }
1356 }
1357 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001359}
1360
1361/* drbd_drain_block() just takes a data block
1362 * out of the socket input buffer, and discards it.
1363 */
1364static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1365{
1366 struct page *page;
1367 int rr, rv = 1;
1368 void *data;
1369
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001370 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001371 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001372
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001373 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001374
1375 data = kmap(page);
1376 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001377 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001378 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1379 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001380 if (!signal_pending(current))
1381 dev_warn(DEV,
1382 "short read receiving data: read %d expected %d\n",
1383 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001384 break;
1385 }
1386 data_size -= rr;
1387 }
1388 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001389 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001390 return rv;
1391}
1392
1393static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1394 sector_t sector, int data_size)
1395{
1396 struct bio_vec *bvec;
1397 struct bio *bio;
1398 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001399 void *dig_in = mdev->tconn->int_dig_in;
1400 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001401
Philipp Reisnera0638452011-01-19 14:31:32 +01001402 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1403 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001404
1405 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001406 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001408 if (!signal_pending(current))
1409 dev_warn(DEV,
1410 "short read receiving data reply digest: read %d expected %d\n",
1411 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001412 return 0;
1413 }
1414 }
1415
1416 data_size -= dgs;
1417
1418 /* optimistically update recv_cnt. if receiving fails below,
1419 * we disconnect anyways, and counters will be reset. */
1420 mdev->recv_cnt += data_size>>9;
1421
1422 bio = req->master_bio;
1423 D_ASSERT(sector == bio->bi_sector);
1424
1425 bio_for_each_segment(bvec, bio, i) {
1426 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001427 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001428 kmap(bvec->bv_page)+bvec->bv_offset,
1429 expect);
1430 kunmap(bvec->bv_page);
1431 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001432 if (!signal_pending(current))
1433 dev_warn(DEV, "short read receiving data reply: "
1434 "read %d expected %d\n",
1435 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001436 return 0;
1437 }
1438 data_size -= rr;
1439 }
1440
1441 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001442 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001443 if (memcmp(dig_in, dig_vv, dgs)) {
1444 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1445 return 0;
1446 }
1447 }
1448
1449 D_ASSERT(data_size == 0);
1450 return 1;
1451}
1452
1453/* e_end_resync_block() is called via
1454 * drbd_process_done_ee() by asender only */
Philipp Reisner00d56942011-02-09 18:09:48 +01001455static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001457 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisner00d56942011-02-09 18:09:48 +01001458 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001459 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001460 int ok;
1461
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001462 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001463
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001464 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1465 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1466 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001467 } else {
1468 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001469 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001470
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001471 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472 }
1473 dec_unacked(mdev);
1474
1475 return ok;
1476}
1477
1478static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1479{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001480 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001482 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1483 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001484 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001485
1486 dec_rs_pending(mdev);
1487
Philipp Reisnerb411b362009-09-25 16:07:19 -07001488 inc_unacked(mdev);
1489 /* corresponding dec_unacked() in e_end_resync_block()
1490 * respective _drbd_clear_done_ee */
1491
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001492 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001493
Philipp Reisner87eeee42011-01-19 14:16:30 +01001494 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001495 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001496 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001498 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001499 if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001500 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501
Lars Ellenberg10f6d992011-01-24 14:47:09 +01001502 /* don't care for the reason here */
1503 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001504 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001505 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001506 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001507
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001508 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001509fail:
1510 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001511 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001512}
1513
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001514static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001515find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1516 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001517{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001518 struct drbd_request *req;
1519
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001520 /* Request object according to our peer */
1521 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001522 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001523 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001524 if (!missing_ok) {
1525 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1526 (unsigned long)id, (unsigned long long)sector);
1527 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001528 return NULL;
1529}
1530
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001531static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1532 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001533{
1534 struct drbd_request *req;
1535 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001537 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538
1539 sector = be64_to_cpu(p->sector);
1540
Philipp Reisner87eeee42011-01-19 14:16:30 +01001541 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001542 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001543 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001544 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001545 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546
Bart Van Assche24c48302011-05-21 18:32:29 +02001547 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548 * special casing it there for the various failure cases.
1549 * still no race with drbd_fail_pending_reads */
1550 ok = recv_dless_read(mdev, req, sector, data_size);
1551
1552 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001553 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001554 /* else: nothing. handled from drbd_disconnect...
1555 * I don't think we may complete this just yet
1556 * in case we are "on-disconnect: freeze" */
1557
1558 return ok;
1559}
1560
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001561static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1562 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001563{
1564 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001566 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001567
1568 sector = be64_to_cpu(p->sector);
1569 D_ASSERT(p->block_id == ID_SYNCER);
1570
1571 if (get_ldev(mdev)) {
1572 /* data is submitted to disk within recv_resync_read.
1573 * corresponding put_ldev done below on error,
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001574 * or in drbd_endio_sec. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575 ok = recv_resync_read(mdev, sector, data_size);
1576 } else {
1577 if (__ratelimit(&drbd_ratelimit_state))
1578 dev_err(DEV, "Can not write resync data to local disk.\n");
1579
1580 ok = drbd_drain_block(mdev, data_size);
1581
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001582 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001583 }
1584
Philipp Reisner778f2712010-07-06 11:14:00 +02001585 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1586
Philipp Reisnerb411b362009-09-25 16:07:19 -07001587 return ok;
1588}
1589
1590/* e_end_block() is called via drbd_process_done_ee().
1591 * this means this function only runs in the asender thread
1592 */
Philipp Reisner00d56942011-02-09 18:09:48 +01001593static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001594{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001595 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisner00d56942011-02-09 18:09:48 +01001596 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001597 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001598 int ok = 1, pcmd;
1599
Philipp Reisner89e58e72011-01-19 13:12:45 +01001600 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001601 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1603 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001604 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001606 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001607 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001608 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001610 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001611 /* we expect it to be marked out of sync anyways...
1612 * maybe assert this? */
1613 }
1614 dec_unacked(mdev);
1615 }
1616 /* we delete from the conflict detection hash _after_ we sent out the
1617 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001618 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001619 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001620 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1621 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001622 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001623 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001624 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001625
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001626 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627
1628 return ok;
1629}
1630
Philipp Reisner00d56942011-02-09 18:09:48 +01001631static int e_send_discard_ack(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001632{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001633 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisner00d56942011-02-09 18:09:48 +01001634 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635 int ok = 1;
1636
Philipp Reisner89e58e72011-01-19 13:12:45 +01001637 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001638 ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001639
Philipp Reisner87eeee42011-01-19 14:16:30 +01001640 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001641 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1642 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001643 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001644
1645 dec_unacked(mdev);
1646
1647 return ok;
1648}
1649
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001650static bool seq_greater(u32 a, u32 b)
1651{
1652 /*
1653 * We assume 32-bit wrap-around here.
1654 * For 24-bit wrap-around, we would have to shift:
1655 * a <<= 8; b <<= 8;
1656 */
1657 return (s32)a - (s32)b > 0;
1658}
1659
1660static u32 seq_max(u32 a, u32 b)
1661{
1662 return seq_greater(a, b) ? a : b;
1663}
1664
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001665static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001666{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001667 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001668
1669 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001670 old_peer_seq = mdev->peer_seq;
1671 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001672 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001673 if (old_peer_seq != peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001674 wake_up(&mdev->seq_wait);
1675}
1676
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677/* Called from receive_Data.
1678 * Synchronize packets on sock with packets on msock.
1679 *
1680 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1681 * packet traveling on msock, they are still processed in the order they have
1682 * been sent.
1683 *
1684 * Note: we don't care for Ack packets overtaking P_DATA packets.
1685 *
1686 * In case packet_seq is larger than mdev->peer_seq number, there are
1687 * outstanding packets on the msock. We wait for them to arrive.
1688 * In case we are the logically next packet, we update mdev->peer_seq
1689 * ourselves. Correctly handles 32bit wrap around.
1690 *
1691 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1692 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1693 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1694 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1695 *
1696 * returns 0 if we may process the packet,
1697 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1698static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1699{
1700 DEFINE_WAIT(wait);
1701 unsigned int p_seq;
1702 long timeout;
1703 int ret = 0;
1704 spin_lock(&mdev->peer_seq_lock);
1705 for (;;) {
1706 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001707 if (!seq_greater(packet_seq, mdev->peer_seq + 1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001708 break;
1709 if (signal_pending(current)) {
1710 ret = -ERESTARTSYS;
1711 break;
1712 }
1713 p_seq = mdev->peer_seq;
1714 spin_unlock(&mdev->peer_seq_lock);
1715 timeout = schedule_timeout(30*HZ);
1716 spin_lock(&mdev->peer_seq_lock);
1717 if (timeout == 0 && p_seq == mdev->peer_seq) {
1718 ret = -ETIMEDOUT;
1719 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1720 break;
1721 }
1722 }
1723 finish_wait(&mdev->seq_wait, &wait);
1724 if (mdev->peer_seq+1 == packet_seq)
1725 mdev->peer_seq++;
1726 spin_unlock(&mdev->peer_seq_lock);
1727 return ret;
1728}
1729
Lars Ellenberg688593c2010-11-17 22:25:03 +01001730/* see also bio_flags_to_wire()
1731 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1732 * flags and back. We may replicate to other kernel versions. */
1733static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001734{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001735 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1736 (dpf & DP_FUA ? REQ_FUA : 0) |
1737 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1738 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001739}
1740
Philipp Reisnerb411b362009-09-25 16:07:19 -07001741/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001742static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1743 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744{
1745 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001746 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001747 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001748 int rw = WRITE;
1749 u32 dp_flags;
1750
Philipp Reisnerb411b362009-09-25 16:07:19 -07001751 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001752 spin_lock(&mdev->peer_seq_lock);
1753 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1754 mdev->peer_seq++;
1755 spin_unlock(&mdev->peer_seq_lock);
1756
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001757 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758 atomic_inc(&mdev->current_epoch->epoch_size);
1759 return drbd_drain_block(mdev, data_size);
1760 }
1761
1762 /* get_ldev(mdev) successful.
1763 * Corresponding put_ldev done either below (on various errors),
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001764 * or in drbd_endio_sec, if we successfully submit the data at
Philipp Reisnerb411b362009-09-25 16:07:19 -07001765 * the end of this function. */
1766
1767 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001768 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1769 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001770 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001771 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772 }
1773
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001774 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775
Lars Ellenberg688593c2010-11-17 22:25:03 +01001776 dp_flags = be32_to_cpu(p->dp_flags);
1777 rw |= wire_flags_to_bio(mdev, dp_flags);
1778
1779 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001780 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001781
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001783 peer_req->epoch = mdev->current_epoch;
1784 atomic_inc(&peer_req->epoch->epoch_size);
1785 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786 spin_unlock(&mdev->epoch_lock);
1787
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001789 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001790 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001791 } else {
1792 /* don't get the req_lock yet,
1793 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001794 const int size = peer_req->i.size;
Philipp Reisner25703f82011-02-07 14:35:25 +01001795 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 int first;
1798
Philipp Reisner89e58e72011-01-19 13:12:45 +01001799 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800
1801 /* conflict detection and handling:
1802 * 1. wait on the sequence number,
1803 * in case this data packet overtook ACK packets.
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001804 * 2. check for conflicting write requests.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805 *
1806 * Note: for two_primaries, we are protocol C,
1807 * so there cannot be any request that is DONE
1808 * but still on the transfer log.
1809 *
Philipp Reisnerb411b362009-09-25 16:07:19 -07001810 * if no conflicting request is found:
1811 * submit.
1812 *
1813 * if any conflicting request is found
1814 * that has not yet been acked,
1815 * AND I have the "discard concurrent writes" flag:
1816 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1817 *
1818 * if any conflicting request is found:
1819 * block the receiver, waiting on misc_wait
1820 * until no more conflicting requests are there,
1821 * or we get interrupted (disconnect).
1822 *
1823 * we do not just write after local io completion of those
1824 * requests, but only after req is done completely, i.e.
1825 * we wait for the P_DISCARD_ACK to arrive!
1826 *
1827 * then proceed normally, i.e. submit.
1828 */
1829 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1830 goto out_interrupted;
1831
Philipp Reisner87eeee42011-01-19 14:16:30 +01001832 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001833
Philipp Reisnerb411b362009-09-25 16:07:19 -07001834 first = 1;
1835 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001836 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001837 int have_unacked = 0;
1838 int have_conflict = 0;
1839 prepare_to_wait(&mdev->misc_wait, &wait,
1840 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001841
1842 i = drbd_find_overlap(&mdev->write_requests, sector, size);
1843 if (i) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001844 /* only ALERT on first iteration,
1845 * we may be woken up early... */
1846 if (first)
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001847 dev_alert(DEV, "%s[%u] Concurrent %s write detected!"
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001848 " new: %llus +%u; pending: %llus +%u\n",
1849 current->comm, current->pid,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001850 i->local ? "local" : "remote",
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001851 (unsigned long long)sector, size,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001852 (unsigned long long)i->sector, i->size);
1853
1854 if (i->local) {
1855 struct drbd_request *req2;
1856
1857 req2 = container_of(i, struct drbd_request, i);
1858 if (req2->rq_state & RQ_NET_PENDING)
1859 ++have_unacked;
1860 }
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001861 ++have_conflict;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001862 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001863 if (!have_conflict)
1864 break;
1865
1866 /* Discard Ack only for the _first_ iteration */
1867 if (first && discard && have_unacked) {
1868 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1869 (unsigned long long)sector);
1870 inc_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001871 peer_req->w.cb = e_send_discard_ack;
1872 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873
Philipp Reisner87eeee42011-01-19 14:16:30 +01001874 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001875
1876 /* we could probably send that P_DISCARD_ACK ourselves,
1877 * but I don't like the receiver using the msock */
1878
1879 put_ldev(mdev);
Philipp Reisner0625ac12011-02-07 14:49:19 +01001880 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001881 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001882 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001883 }
1884
1885 if (signal_pending(current)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001886 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001887 finish_wait(&mdev->misc_wait, &wait);
1888 goto out_interrupted;
1889 }
1890
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001891 /* Indicate to wake up mdev->misc_wait upon completion. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001892 i->waiting = true;
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001893
Philipp Reisner87eeee42011-01-19 14:16:30 +01001894 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001895 if (first) {
1896 first = 0;
1897 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1898 "sec=%llus\n", (unsigned long long)sector);
1899 } else if (discard) {
1900 /* we had none on the first iteration.
1901 * there must be none now. */
1902 D_ASSERT(have_unacked == 0);
1903 }
1904 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001905 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906 }
1907 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001908
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001909 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001910 }
1911
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001912 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001913 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001914
Philipp Reisner89e58e72011-01-19 13:12:45 +01001915 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001916 case DRBD_PROT_C:
1917 inc_unacked(mdev);
1918 /* corresponding dec_unacked() in e_end_block()
1919 * respective _drbd_clear_done_ee */
1920 break;
1921 case DRBD_PROT_B:
1922 /* I really don't like it that the receiver thread
1923 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001924 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001925 break;
1926 case DRBD_PROT_A:
1927 /* nothing to do */
1928 break;
1929 }
1930
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001931 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001932 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001933 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1934 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1935 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1936 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001937 }
1938
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001939 if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001940 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001941
Lars Ellenberg10f6d992011-01-24 14:47:09 +01001942 /* don't care for the reason here */
1943 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001944 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001945 list_del(&peer_req->w.list);
1946 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001947 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001948 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1949 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001950
Philipp Reisnerb411b362009-09-25 16:07:19 -07001951out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001952 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001953 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001954 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001955 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001956}
1957
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001958/* We may throttle resync, if the lower device seems to be busy,
1959 * and current sync rate is above c_min_rate.
1960 *
1961 * To decide whether or not the lower device is busy, we use a scheme similar
1962 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1963 * (more than 64 sectors) of activity we cannot account for with our own resync
1964 * activity, it obviously is "busy".
1965 *
1966 * The current sync rate used here uses only the most recent two step marks,
1967 * to have a short time average so we can react faster.
1968 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001969int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001970{
1971 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1972 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001973 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001974 int curr_events;
1975 int throttle = 0;
1976
1977 /* feature disabled? */
1978 if (mdev->sync_conf.c_min_rate == 0)
1979 return 0;
1980
Philipp Reisnere3555d82010-11-07 15:56:29 +01001981 spin_lock_irq(&mdev->al_lock);
1982 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1983 if (tmp) {
1984 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1985 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1986 spin_unlock_irq(&mdev->al_lock);
1987 return 0;
1988 }
1989 /* Do not slow down if app IO is already waiting for this extent */
1990 }
1991 spin_unlock_irq(&mdev->al_lock);
1992
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001993 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1994 (int)part_stat_read(&disk->part0, sectors[1]) -
1995 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01001996
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001997 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1998 unsigned long rs_left;
1999 int i;
2000
2001 mdev->rs_last_events = curr_events;
2002
2003 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2004 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002005 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2006
2007 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2008 rs_left = mdev->ov_left;
2009 else
2010 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002011
2012 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2013 if (!dt)
2014 dt++;
2015 db = mdev->rs_mark_left[i] - rs_left;
2016 dbdt = Bit2KB(db/dt);
2017
2018 if (dbdt > mdev->sync_conf.c_min_rate)
2019 throttle = 1;
2020 }
2021 return throttle;
2022}
2023
2024
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002025static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2026 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002027{
2028 sector_t sector;
2029 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002030 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002031 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002032 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002033 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002034 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002035
2036 sector = be64_to_cpu(p->sector);
2037 size = be32_to_cpu(p->blksize);
2038
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002039 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002040 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2041 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002042 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002043 }
2044 if (sector + (size>>9) > capacity) {
2045 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2046 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002047 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002048 }
2049
2050 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002051 verb = 1;
2052 switch (cmd) {
2053 case P_DATA_REQUEST:
2054 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2055 break;
2056 case P_RS_DATA_REQUEST:
2057 case P_CSUM_RS_REQUEST:
2058 case P_OV_REQUEST:
2059 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2060 break;
2061 case P_OV_REPLY:
2062 verb = 0;
2063 dec_rs_pending(mdev);
2064 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2065 break;
2066 default:
2067 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2068 cmdname(cmd));
2069 }
2070 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071 dev_err(DEV, "Can not satisfy peer's read request, "
2072 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002073
Lars Ellenberga821cc42010-09-06 12:31:37 +02002074 /* drain possibly payload */
2075 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002076 }
2077
2078 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2079 * "criss-cross" setup, that might cause write-out on some other DRBD,
2080 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002081 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2082 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002083 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002084 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002085 }
2086
Philipp Reisner02918be2010-08-20 14:35:10 +02002087 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002088 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002089 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002090 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002091 /* application IO, don't drbd_rs_begin_io */
2092 goto submit;
2093
Philipp Reisnerb411b362009-09-25 16:07:19 -07002094 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002095 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002096 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002097 /* used in the sector offset progress display */
2098 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002099 break;
2100
2101 case P_OV_REPLY:
2102 case P_CSUM_RS_REQUEST:
2103 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002104 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2105 if (!di)
2106 goto out_free_e;
2107
2108 di->digest_size = digest_size;
2109 di->digest = (((char *)di)+sizeof(struct digest_info));
2110
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002111 peer_req->digest = di;
2112 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002113
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002114 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002115 goto out_free_e;
2116
Philipp Reisner02918be2010-08-20 14:35:10 +02002117 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002118 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002119 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002120 /* used in the sector offset progress display */
2121 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002122 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002123 /* track progress, we may need to throttle */
2124 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002125 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002126 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002127 /* drbd_rs_begin_io done when we sent this request,
2128 * but accounting still needs to be done. */
2129 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002130 }
2131 break;
2132
2133 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002134 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002135 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002136 unsigned long now = jiffies;
2137 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002138 mdev->ov_start_sector = sector;
2139 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002140 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2141 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002142 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2143 mdev->rs_mark_left[i] = mdev->ov_left;
2144 mdev->rs_mark_time[i] = now;
2145 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002146 dev_info(DEV, "Online Verify start sector: %llu\n",
2147 (unsigned long long)sector);
2148 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002149 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002150 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151 break;
2152
Philipp Reisnerb411b362009-09-25 16:07:19 -07002153 default:
2154 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002155 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002156 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002157 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002158 }
2159
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002160 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2161 * wrt the receiver, but it is not as straightforward as it may seem.
2162 * Various places in the resync start and stop logic assume resync
2163 * requests are processed in order, requeuing this on the worker thread
2164 * introduces a bunch of new code for synchronization between threads.
2165 *
2166 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2167 * "forever", throttling after drbd_rs_begin_io will lock that extent
2168 * for application writes for the same time. For now, just throttle
2169 * here, where the rest of the code expects the receiver to sleep for
2170 * a while, anyways.
2171 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002172
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002173 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2174 * this defers syncer requests for some time, before letting at least
2175 * on request through. The resync controller on the receiving side
2176 * will adapt to the incoming rate accordingly.
2177 *
2178 * We cannot throttle here if remote is Primary/SyncTarget:
2179 * we would also throttle its application reads.
2180 * In that case, throttling is done on the SyncTarget only.
2181 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002182 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2183 schedule_timeout_uninterruptible(HZ/10);
2184 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002185 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002186
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002187submit_for_resync:
2188 atomic_add(size >> 9, &mdev->rs_sect_ev);
2189
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002190submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002191 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002192 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002193 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002194 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002195
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002196 if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002197 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002198
Lars Ellenberg10f6d992011-01-24 14:47:09 +01002199 /* don't care for the reason here */
2200 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002201 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002202 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002203 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002204 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2205
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002207 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002208 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002209 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002210}
2211
2212static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2213{
2214 int self, peer, rv = -100;
2215 unsigned long ch_self, ch_peer;
2216
2217 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2218 peer = mdev->p_uuid[UI_BITMAP] & 1;
2219
2220 ch_peer = mdev->p_uuid[UI_SIZE];
2221 ch_self = mdev->comm_bm_set;
2222
Philipp Reisner89e58e72011-01-19 13:12:45 +01002223 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002224 case ASB_CONSENSUS:
2225 case ASB_DISCARD_SECONDARY:
2226 case ASB_CALL_HELPER:
2227 dev_err(DEV, "Configuration error.\n");
2228 break;
2229 case ASB_DISCONNECT:
2230 break;
2231 case ASB_DISCARD_YOUNGER_PRI:
2232 if (self == 0 && peer == 1) {
2233 rv = -1;
2234 break;
2235 }
2236 if (self == 1 && peer == 0) {
2237 rv = 1;
2238 break;
2239 }
2240 /* Else fall through to one of the other strategies... */
2241 case ASB_DISCARD_OLDER_PRI:
2242 if (self == 0 && peer == 1) {
2243 rv = 1;
2244 break;
2245 }
2246 if (self == 1 && peer == 0) {
2247 rv = -1;
2248 break;
2249 }
2250 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002251 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002252 "Using discard-least-changes instead\n");
2253 case ASB_DISCARD_ZERO_CHG:
2254 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002255 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002256 ? -1 : 1;
2257 break;
2258 } else {
2259 if (ch_peer == 0) { rv = 1; break; }
2260 if (ch_self == 0) { rv = -1; break; }
2261 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002262 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002263 break;
2264 case ASB_DISCARD_LEAST_CHG:
2265 if (ch_self < ch_peer)
2266 rv = -1;
2267 else if (ch_self > ch_peer)
2268 rv = 1;
2269 else /* ( ch_self == ch_peer ) */
2270 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002271 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272 ? -1 : 1;
2273 break;
2274 case ASB_DISCARD_LOCAL:
2275 rv = -1;
2276 break;
2277 case ASB_DISCARD_REMOTE:
2278 rv = 1;
2279 }
2280
2281 return rv;
2282}
2283
2284static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2285{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002286 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002287
Philipp Reisner89e58e72011-01-19 13:12:45 +01002288 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002289 case ASB_DISCARD_YOUNGER_PRI:
2290 case ASB_DISCARD_OLDER_PRI:
2291 case ASB_DISCARD_LEAST_CHG:
2292 case ASB_DISCARD_LOCAL:
2293 case ASB_DISCARD_REMOTE:
2294 dev_err(DEV, "Configuration error.\n");
2295 break;
2296 case ASB_DISCONNECT:
2297 break;
2298 case ASB_CONSENSUS:
2299 hg = drbd_asb_recover_0p(mdev);
2300 if (hg == -1 && mdev->state.role == R_SECONDARY)
2301 rv = hg;
2302 if (hg == 1 && mdev->state.role == R_PRIMARY)
2303 rv = hg;
2304 break;
2305 case ASB_VIOLENTLY:
2306 rv = drbd_asb_recover_0p(mdev);
2307 break;
2308 case ASB_DISCARD_SECONDARY:
2309 return mdev->state.role == R_PRIMARY ? 1 : -1;
2310 case ASB_CALL_HELPER:
2311 hg = drbd_asb_recover_0p(mdev);
2312 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002313 enum drbd_state_rv rv2;
2314
2315 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002316 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2317 * we might be here in C_WF_REPORT_PARAMS which is transient.
2318 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002319 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2320 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002321 drbd_khelper(mdev, "pri-lost-after-sb");
2322 } else {
2323 dev_warn(DEV, "Successfully gave up primary role.\n");
2324 rv = hg;
2325 }
2326 } else
2327 rv = hg;
2328 }
2329
2330 return rv;
2331}
2332
2333static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2334{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002335 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002336
Philipp Reisner89e58e72011-01-19 13:12:45 +01002337 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002338 case ASB_DISCARD_YOUNGER_PRI:
2339 case ASB_DISCARD_OLDER_PRI:
2340 case ASB_DISCARD_LEAST_CHG:
2341 case ASB_DISCARD_LOCAL:
2342 case ASB_DISCARD_REMOTE:
2343 case ASB_CONSENSUS:
2344 case ASB_DISCARD_SECONDARY:
2345 dev_err(DEV, "Configuration error.\n");
2346 break;
2347 case ASB_VIOLENTLY:
2348 rv = drbd_asb_recover_0p(mdev);
2349 break;
2350 case ASB_DISCONNECT:
2351 break;
2352 case ASB_CALL_HELPER:
2353 hg = drbd_asb_recover_0p(mdev);
2354 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002355 enum drbd_state_rv rv2;
2356
Philipp Reisnerb411b362009-09-25 16:07:19 -07002357 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2358 * we might be here in C_WF_REPORT_PARAMS which is transient.
2359 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002360 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2361 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002362 drbd_khelper(mdev, "pri-lost-after-sb");
2363 } else {
2364 dev_warn(DEV, "Successfully gave up primary role.\n");
2365 rv = hg;
2366 }
2367 } else
2368 rv = hg;
2369 }
2370
2371 return rv;
2372}
2373
2374static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2375 u64 bits, u64 flags)
2376{
2377 if (!uuid) {
2378 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2379 return;
2380 }
2381 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2382 text,
2383 (unsigned long long)uuid[UI_CURRENT],
2384 (unsigned long long)uuid[UI_BITMAP],
2385 (unsigned long long)uuid[UI_HISTORY_START],
2386 (unsigned long long)uuid[UI_HISTORY_END],
2387 (unsigned long long)bits,
2388 (unsigned long long)flags);
2389}
2390
2391/*
2392 100 after split brain try auto recover
2393 2 C_SYNC_SOURCE set BitMap
2394 1 C_SYNC_SOURCE use BitMap
2395 0 no Sync
2396 -1 C_SYNC_TARGET use BitMap
2397 -2 C_SYNC_TARGET set BitMap
2398 -100 after split brain, disconnect
2399-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002400-1091 requires proto 91
2401-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002402 */
2403static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2404{
2405 u64 self, peer;
2406 int i, j;
2407
2408 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2409 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2410
2411 *rule_nr = 10;
2412 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2413 return 0;
2414
2415 *rule_nr = 20;
2416 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2417 peer != UUID_JUST_CREATED)
2418 return -2;
2419
2420 *rule_nr = 30;
2421 if (self != UUID_JUST_CREATED &&
2422 (peer == UUID_JUST_CREATED || peer == (u64)0))
2423 return 2;
2424
2425 if (self == peer) {
2426 int rct, dc; /* roles at crash time */
2427
2428 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2429
Philipp Reisner31890f42011-01-19 14:12:51 +01002430 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002431 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002432
2433 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2434 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2435 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2436 drbd_uuid_set_bm(mdev, 0UL);
2437
2438 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2439 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2440 *rule_nr = 34;
2441 } else {
2442 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2443 *rule_nr = 36;
2444 }
2445
2446 return 1;
2447 }
2448
2449 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2450
Philipp Reisner31890f42011-01-19 14:12:51 +01002451 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002452 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002453
2454 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2455 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2456 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2457
2458 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2459 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2460 mdev->p_uuid[UI_BITMAP] = 0UL;
2461
2462 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2463 *rule_nr = 35;
2464 } else {
2465 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2466 *rule_nr = 37;
2467 }
2468
2469 return -1;
2470 }
2471
2472 /* Common power [off|failure] */
2473 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2474 (mdev->p_uuid[UI_FLAGS] & 2);
2475 /* lowest bit is set when we were primary,
2476 * next bit (weight 2) is set when peer was primary */
2477 *rule_nr = 40;
2478
2479 switch (rct) {
2480 case 0: /* !self_pri && !peer_pri */ return 0;
2481 case 1: /* self_pri && !peer_pri */ return 1;
2482 case 2: /* !self_pri && peer_pri */ return -1;
2483 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002484 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002485 return dc ? -1 : 1;
2486 }
2487 }
2488
2489 *rule_nr = 50;
2490 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2491 if (self == peer)
2492 return -1;
2493
2494 *rule_nr = 51;
2495 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2496 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002497 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002498 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2499 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2500 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002501 /* The last P_SYNC_UUID did not get though. Undo the last start of
2502 resync as sync source modifications of the peer's UUIDs. */
2503
Philipp Reisner31890f42011-01-19 14:12:51 +01002504 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002505 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002506
2507 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2508 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002509
2510 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2511 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2512
Philipp Reisnerb411b362009-09-25 16:07:19 -07002513 return -1;
2514 }
2515 }
2516
2517 *rule_nr = 60;
2518 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2519 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2520 peer = mdev->p_uuid[i] & ~((u64)1);
2521 if (self == peer)
2522 return -2;
2523 }
2524
2525 *rule_nr = 70;
2526 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2527 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2528 if (self == peer)
2529 return 1;
2530
2531 *rule_nr = 71;
2532 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2533 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002534 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002535 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2536 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2537 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002538 /* The last P_SYNC_UUID did not get though. Undo the last start of
2539 resync as sync source modifications of our UUIDs. */
2540
Philipp Reisner31890f42011-01-19 14:12:51 +01002541 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002542 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002543
2544 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2545 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2546
Philipp Reisner4a23f262011-01-11 17:42:17 +01002547 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002548 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2549 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2550
2551 return 1;
2552 }
2553 }
2554
2555
2556 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002557 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002558 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2559 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2560 if (self == peer)
2561 return 2;
2562 }
2563
2564 *rule_nr = 90;
2565 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2566 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2567 if (self == peer && self != ((u64)0))
2568 return 100;
2569
2570 *rule_nr = 100;
2571 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2572 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2573 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2574 peer = mdev->p_uuid[j] & ~((u64)1);
2575 if (self == peer)
2576 return -100;
2577 }
2578 }
2579
2580 return -1000;
2581}
2582
2583/* drbd_sync_handshake() returns the new conn state on success, or
2584 CONN_MASK (-1) on failure.
2585 */
2586static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2587 enum drbd_disk_state peer_disk) __must_hold(local)
2588{
2589 int hg, rule_nr;
2590 enum drbd_conns rv = C_MASK;
2591 enum drbd_disk_state mydisk;
2592
2593 mydisk = mdev->state.disk;
2594 if (mydisk == D_NEGOTIATING)
2595 mydisk = mdev->new_state_tmp.disk;
2596
2597 dev_info(DEV, "drbd_sync_handshake:\n");
2598 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2599 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2600 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2601
2602 hg = drbd_uuid_compare(mdev, &rule_nr);
2603
2604 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2605
2606 if (hg == -1000) {
2607 dev_alert(DEV, "Unrelated data, aborting!\n");
2608 return C_MASK;
2609 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002610 if (hg < -1000) {
2611 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002612 return C_MASK;
2613 }
2614
2615 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2616 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2617 int f = (hg == -100) || abs(hg) == 2;
2618 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2619 if (f)
2620 hg = hg*2;
2621 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2622 hg > 0 ? "source" : "target");
2623 }
2624
Adam Gandelman3a11a482010-04-08 16:48:23 -07002625 if (abs(hg) == 100)
2626 drbd_khelper(mdev, "initial-split-brain");
2627
Philipp Reisner89e58e72011-01-19 13:12:45 +01002628 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002629 int pcount = (mdev->state.role == R_PRIMARY)
2630 + (peer_role == R_PRIMARY);
2631 int forced = (hg == -100);
2632
2633 switch (pcount) {
2634 case 0:
2635 hg = drbd_asb_recover_0p(mdev);
2636 break;
2637 case 1:
2638 hg = drbd_asb_recover_1p(mdev);
2639 break;
2640 case 2:
2641 hg = drbd_asb_recover_2p(mdev);
2642 break;
2643 }
2644 if (abs(hg) < 100) {
2645 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2646 "automatically solved. Sync from %s node\n",
2647 pcount, (hg < 0) ? "peer" : "this");
2648 if (forced) {
2649 dev_warn(DEV, "Doing a full sync, since"
2650 " UUIDs where ambiguous.\n");
2651 hg = hg*2;
2652 }
2653 }
2654 }
2655
2656 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002657 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002658 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002659 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002660 hg = 1;
2661
2662 if (abs(hg) < 100)
2663 dev_warn(DEV, "Split-Brain detected, manually solved. "
2664 "Sync from %s node\n",
2665 (hg < 0) ? "peer" : "this");
2666 }
2667
2668 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002669 /* FIXME this log message is not correct if we end up here
2670 * after an attempted attach on a diskless node.
2671 * We just refuse to attach -- well, we drop the "connection"
2672 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002673 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674 drbd_khelper(mdev, "split-brain");
2675 return C_MASK;
2676 }
2677
2678 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2679 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2680 return C_MASK;
2681 }
2682
2683 if (hg < 0 && /* by intention we do not use mydisk here. */
2684 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002685 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002686 case ASB_CALL_HELPER:
2687 drbd_khelper(mdev, "pri-lost");
2688 /* fall through */
2689 case ASB_DISCONNECT:
2690 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2691 return C_MASK;
2692 case ASB_VIOLENTLY:
2693 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2694 "assumption\n");
2695 }
2696 }
2697
Philipp Reisner89e58e72011-01-19 13:12:45 +01002698 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002699 if (hg == 0)
2700 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2701 else
2702 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2703 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2704 abs(hg) >= 2 ? "full" : "bit-map based");
2705 return C_MASK;
2706 }
2707
Philipp Reisnerb411b362009-09-25 16:07:19 -07002708 if (abs(hg) >= 2) {
2709 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002710 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2711 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002712 return C_MASK;
2713 }
2714
2715 if (hg > 0) { /* become sync source. */
2716 rv = C_WF_BITMAP_S;
2717 } else if (hg < 0) { /* become sync target */
2718 rv = C_WF_BITMAP_T;
2719 } else {
2720 rv = C_CONNECTED;
2721 if (drbd_bm_total_weight(mdev)) {
2722 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2723 drbd_bm_total_weight(mdev));
2724 }
2725 }
2726
2727 return rv;
2728}
2729
2730/* returns 1 if invalid */
2731static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2732{
2733 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2734 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2735 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2736 return 0;
2737
2738 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2739 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2740 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2741 return 1;
2742
2743 /* everything else is valid if they are equal on both sides. */
2744 if (peer == self)
2745 return 0;
2746
2747 /* everything es is invalid. */
2748 return 1;
2749}
2750
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002751static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2752 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002753{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002754 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002755 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002756 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002757 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2758
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759 p_proto = be32_to_cpu(p->protocol);
2760 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2761 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2762 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002764 cf = be32_to_cpu(p->conn_flags);
2765 p_want_lose = cf & CF_WANT_LOSE;
2766
2767 clear_bit(CONN_DRY_RUN, &mdev->flags);
2768
2769 if (cf & CF_DRY_RUN)
2770 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771
Philipp Reisner89e58e72011-01-19 13:12:45 +01002772 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002773 dev_err(DEV, "incompatible communication protocols\n");
2774 goto disconnect;
2775 }
2776
Philipp Reisner89e58e72011-01-19 13:12:45 +01002777 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2779 goto disconnect;
2780 }
2781
Philipp Reisner89e58e72011-01-19 13:12:45 +01002782 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002783 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2784 goto disconnect;
2785 }
2786
Philipp Reisner89e58e72011-01-19 13:12:45 +01002787 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002788 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2789 goto disconnect;
2790 }
2791
Philipp Reisner89e58e72011-01-19 13:12:45 +01002792 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002793 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2794 goto disconnect;
2795 }
2796
Philipp Reisner89e58e72011-01-19 13:12:45 +01002797 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002798 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2799 goto disconnect;
2800 }
2801
Philipp Reisner31890f42011-01-19 14:12:51 +01002802 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002803 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002804
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002805 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002806 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002807
2808 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2809 if (strcmp(p_integrity_alg, my_alg)) {
2810 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2811 goto disconnect;
2812 }
2813 dev_info(DEV, "data-integrity-alg: %s\n",
2814 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2815 }
2816
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002817 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002818
2819disconnect:
2820 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002821 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002822}
2823
2824/* helper function
2825 * input: alg name, feature name
2826 * return: NULL (alg name was "")
2827 * ERR_PTR(error) if something goes wrong
2828 * or the crypto hash ptr, if it worked out ok. */
2829struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2830 const char *alg, const char *name)
2831{
2832 struct crypto_hash *tfm;
2833
2834 if (!alg[0])
2835 return NULL;
2836
2837 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2838 if (IS_ERR(tfm)) {
2839 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2840 alg, name, PTR_ERR(tfm));
2841 return tfm;
2842 }
2843 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2844 crypto_free_hash(tfm);
2845 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2846 return ERR_PTR(-EINVAL);
2847 }
2848 return tfm;
2849}
2850
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002851static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2852 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002853{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002854 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002855 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002856 unsigned int header_size, data_size, exp_max_sz;
2857 struct crypto_hash *verify_tfm = NULL;
2858 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002859 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002860 int *rs_plan_s = NULL;
2861 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002862
2863 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2864 : apv == 88 ? sizeof(struct p_rs_param)
2865 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002866 : apv <= 94 ? sizeof(struct p_rs_param_89)
2867 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002868
Philipp Reisner02918be2010-08-20 14:35:10 +02002869 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002870 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002871 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002872 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 }
2874
2875 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002876 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002877 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002878 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002879 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002880 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002881 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002882 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002883 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002884 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002885 D_ASSERT(data_size == 0);
2886 }
2887
2888 /* initialize verify_alg and csums_alg */
2889 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2890
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002891 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002892 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002893
2894 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2895
2896 if (apv >= 88) {
2897 if (apv == 88) {
2898 if (data_size > SHARED_SECRET_MAX) {
2899 dev_err(DEV, "verify-alg too long, "
2900 "peer wants %u, accepting only %u byte\n",
2901 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002902 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002903 }
2904
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002905 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002906 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002907
2908 /* we expect NUL terminated string */
2909 /* but just in case someone tries to be evil */
2910 D_ASSERT(p->verify_alg[data_size-1] == 0);
2911 p->verify_alg[data_size-1] = 0;
2912
2913 } else /* apv >= 89 */ {
2914 /* we still expect NUL terminated strings */
2915 /* but just in case someone tries to be evil */
2916 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2917 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2918 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2919 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2920 }
2921
2922 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2923 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2924 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2925 mdev->sync_conf.verify_alg, p->verify_alg);
2926 goto disconnect;
2927 }
2928 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2929 p->verify_alg, "verify-alg");
2930 if (IS_ERR(verify_tfm)) {
2931 verify_tfm = NULL;
2932 goto disconnect;
2933 }
2934 }
2935
2936 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2937 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2938 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2939 mdev->sync_conf.csums_alg, p->csums_alg);
2940 goto disconnect;
2941 }
2942 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2943 p->csums_alg, "csums-alg");
2944 if (IS_ERR(csums_tfm)) {
2945 csums_tfm = NULL;
2946 goto disconnect;
2947 }
2948 }
2949
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002950 if (apv > 94) {
2951 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2952 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2953 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2954 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2955 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002956
2957 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2958 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2959 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2960 if (!rs_plan_s) {
2961 dev_err(DEV, "kmalloc of fifo_buffer failed");
2962 goto disconnect;
2963 }
2964 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002965 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002966
2967 spin_lock(&mdev->peer_seq_lock);
2968 /* lock against drbd_nl_syncer_conf() */
2969 if (verify_tfm) {
2970 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2971 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2972 crypto_free_hash(mdev->verify_tfm);
2973 mdev->verify_tfm = verify_tfm;
2974 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2975 }
2976 if (csums_tfm) {
2977 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2978 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2979 crypto_free_hash(mdev->csums_tfm);
2980 mdev->csums_tfm = csums_tfm;
2981 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2982 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002983 if (fifo_size != mdev->rs_plan_s.size) {
2984 kfree(mdev->rs_plan_s.values);
2985 mdev->rs_plan_s.values = rs_plan_s;
2986 mdev->rs_plan_s.size = fifo_size;
2987 mdev->rs_planed = 0;
2988 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002989 spin_unlock(&mdev->peer_seq_lock);
2990 }
2991
2992 return ok;
2993disconnect:
2994 /* just for completeness: actually not needed,
2995 * as this is not reached if csums_tfm was ok. */
2996 crypto_free_hash(csums_tfm);
2997 /* but free the verify_tfm again, if csums_tfm did not work out */
2998 crypto_free_hash(verify_tfm);
2999 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003000 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003001}
3002
Philipp Reisnerb411b362009-09-25 16:07:19 -07003003/* warn if the arguments differ by more than 12.5% */
3004static void warn_if_differ_considerably(struct drbd_conf *mdev,
3005 const char *s, sector_t a, sector_t b)
3006{
3007 sector_t d;
3008 if (a == 0 || b == 0)
3009 return;
3010 d = (a > b) ? (a - b) : (b - a);
3011 if (d > (a>>3) || d > (b>>3))
3012 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3013 (unsigned long long)a, (unsigned long long)b);
3014}
3015
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003016static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3017 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003018{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003019 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003020 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003021 sector_t p_size, p_usize, my_usize;
3022 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003023 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003024
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 p_size = be64_to_cpu(p->d_size);
3026 p_usize = be64_to_cpu(p->u_size);
3027
3028 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3029 dev_err(DEV, "some backing storage is needed\n");
3030 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003031 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003032 }
3033
3034 /* just store the peer's disk size for now.
3035 * we still need to figure out whether we accept that. */
3036 mdev->p_size = p_size;
3037
Philipp Reisnerb411b362009-09-25 16:07:19 -07003038 if (get_ldev(mdev)) {
3039 warn_if_differ_considerably(mdev, "lower level device sizes",
3040 p_size, drbd_get_max_capacity(mdev->ldev));
3041 warn_if_differ_considerably(mdev, "user requested size",
3042 p_usize, mdev->ldev->dc.disk_size);
3043
3044 /* if this is the first connect, or an otherwise expected
3045 * param exchange, choose the minimum */
3046 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3047 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3048 p_usize);
3049
3050 my_usize = mdev->ldev->dc.disk_size;
3051
3052 if (mdev->ldev->dc.disk_size != p_usize) {
3053 mdev->ldev->dc.disk_size = p_usize;
3054 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3055 (unsigned long)mdev->ldev->dc.disk_size);
3056 }
3057
3058 /* Never shrink a device with usable data during connect.
3059 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003060 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003061 drbd_get_capacity(mdev->this_bdev) &&
3062 mdev->state.disk >= D_OUTDATED &&
3063 mdev->state.conn < C_CONNECTED) {
3064 dev_err(DEV, "The peer's disk size is too small!\n");
3065 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3066 mdev->ldev->dc.disk_size = my_usize;
3067 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003068 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003069 }
3070 put_ldev(mdev);
3071 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003072
Philipp Reisnere89b5912010-03-24 17:11:33 +01003073 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003074 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003075 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003076 put_ldev(mdev);
3077 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003078 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003079 drbd_md_sync(mdev);
3080 } else {
3081 /* I am diskless, need to accept the peer's size. */
3082 drbd_set_my_capacity(mdev, p_size);
3083 }
3084
Philipp Reisner99432fc2011-05-20 16:39:13 +02003085 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3086 drbd_reconsider_max_bio_size(mdev);
3087
Philipp Reisnerb411b362009-09-25 16:07:19 -07003088 if (get_ldev(mdev)) {
3089 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3090 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3091 ldsc = 1;
3092 }
3093
Philipp Reisnerb411b362009-09-25 16:07:19 -07003094 put_ldev(mdev);
3095 }
3096
3097 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3098 if (be64_to_cpu(p->c_size) !=
3099 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3100 /* we have different sizes, probably peer
3101 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003102 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003103 }
3104 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3105 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3106 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003107 mdev->state.disk >= D_INCONSISTENT) {
3108 if (ddsf & DDSF_NO_RESYNC)
3109 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3110 else
3111 resync_after_online_grow(mdev);
3112 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003113 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3114 }
3115 }
3116
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003117 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003118}
3119
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003120static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3121 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003123 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003124 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003125 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126
Philipp Reisnerb411b362009-09-25 16:07:19 -07003127 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3128
3129 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3130 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3131
3132 kfree(mdev->p_uuid);
3133 mdev->p_uuid = p_uuid;
3134
3135 if (mdev->state.conn < C_CONNECTED &&
3136 mdev->state.disk < D_INCONSISTENT &&
3137 mdev->state.role == R_PRIMARY &&
3138 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3139 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3140 (unsigned long long)mdev->ed_uuid);
3141 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003142 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143 }
3144
3145 if (get_ldev(mdev)) {
3146 int skip_initial_sync =
3147 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003148 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003149 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3150 (p_uuid[UI_FLAGS] & 8);
3151 if (skip_initial_sync) {
3152 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3153 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003154 "clear_n_write from receive_uuids",
3155 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003156 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3157 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3158 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3159 CS_VERBOSE, NULL);
3160 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003161 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003162 }
3163 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003164 } else if (mdev->state.disk < D_INCONSISTENT &&
3165 mdev->state.role == R_PRIMARY) {
3166 /* I am a diskless primary, the peer just created a new current UUID
3167 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003168 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003169 }
3170
3171 /* Before we test for the disk state, we should wait until an eventually
3172 ongoing cluster wide state change is finished. That is important if
3173 we are primary and are detaching from our disk. We need to see the
3174 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003175 mutex_lock(mdev->state_mutex);
3176 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003177 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003178 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3179
3180 if (updated_uuids)
3181 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003182
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003183 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003184}
3185
3186/**
3187 * convert_state() - Converts the peer's view of the cluster state to our point of view
3188 * @ps: The state as seen by the peer.
3189 */
3190static union drbd_state convert_state(union drbd_state ps)
3191{
3192 union drbd_state ms;
3193
3194 static enum drbd_conns c_tab[] = {
3195 [C_CONNECTED] = C_CONNECTED,
3196
3197 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3198 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3199 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3200 [C_VERIFY_S] = C_VERIFY_T,
3201 [C_MASK] = C_MASK,
3202 };
3203
3204 ms.i = ps.i;
3205
3206 ms.conn = c_tab[ps.conn];
3207 ms.peer = ps.role;
3208 ms.role = ps.peer;
3209 ms.pdsk = ps.disk;
3210 ms.disk = ps.pdsk;
3211 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3212
3213 return ms;
3214}
3215
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003216static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3217 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003218{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003219 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003220 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003221 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003222
Philipp Reisnerb411b362009-09-25 16:07:19 -07003223 mask.i = be32_to_cpu(p->mask);
3224 val.i = be32_to_cpu(p->val);
3225
Philipp Reisner25703f82011-02-07 14:35:25 +01003226 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003227 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003228 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003229 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230 }
3231
3232 mask = convert_state(mask);
3233 val = convert_state(val);
3234
3235 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3236
3237 drbd_send_sr_reply(mdev, rv);
3238 drbd_md_sync(mdev);
3239
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003240 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241}
3242
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003243static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3244 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003245{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003246 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003247 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003248 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003249 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003250 int rv;
3251
Philipp Reisnerb411b362009-09-25 16:07:19 -07003252 peer_state.i = be32_to_cpu(p->state);
3253
3254 real_peer_disk = peer_state.disk;
3255 if (peer_state.disk == D_NEGOTIATING) {
3256 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3257 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3258 }
3259
Philipp Reisner87eeee42011-01-19 14:16:30 +01003260 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003261 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003262 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003263 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003264
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003265 /* peer says his disk is uptodate, while we think it is inconsistent,
3266 * and this happens while we think we have a sync going on. */
3267 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3268 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3269 /* If we are (becoming) SyncSource, but peer is still in sync
3270 * preparation, ignore its uptodate-ness to avoid flapping, it
3271 * will change to inconsistent once the peer reaches active
3272 * syncing states.
3273 * It may have changed syncer-paused flags, however, so we
3274 * cannot ignore this completely. */
3275 if (peer_state.conn > C_CONNECTED &&
3276 peer_state.conn < C_SYNC_SOURCE)
3277 real_peer_disk = D_INCONSISTENT;
3278
3279 /* if peer_state changes to connected at the same time,
3280 * it explicitly notifies us that it finished resync.
3281 * Maybe we should finish it up, too? */
3282 else if (os.conn >= C_SYNC_SOURCE &&
3283 peer_state.conn == C_CONNECTED) {
3284 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3285 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003286 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003287 }
3288 }
3289
3290 /* peer says his disk is inconsistent, while we think it is uptodate,
3291 * and this happens while the peer still thinks we have a sync going on,
3292 * but we think we are already done with the sync.
3293 * We ignore this to avoid flapping pdsk.
3294 * This should not happen, if the peer is a recent version of drbd. */
3295 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3296 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3297 real_peer_disk = D_UP_TO_DATE;
3298
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003299 if (ns.conn == C_WF_REPORT_PARAMS)
3300 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003301
Philipp Reisner67531712010-10-27 12:21:30 +02003302 if (peer_state.conn == C_AHEAD)
3303 ns.conn = C_BEHIND;
3304
Philipp Reisnerb411b362009-09-25 16:07:19 -07003305 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3306 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3307 int cr; /* consider resync */
3308
3309 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003310 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003311 /* if we had an established connection
3312 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003313 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003314 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003315 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003316 /* if we have both been inconsistent, and the peer has been
3317 * forced to be UpToDate with --overwrite-data */
3318 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3319 /* if we had been plain connected, and the admin requested to
3320 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003321 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322 (peer_state.conn >= C_STARTING_SYNC_S &&
3323 peer_state.conn <= C_WF_BITMAP_T));
3324
3325 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003326 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003327
3328 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003329 if (ns.conn == C_MASK) {
3330 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003331 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003332 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003333 } else if (peer_state.disk == D_NEGOTIATING) {
3334 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3335 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003336 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003337 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003338 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003339 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003340 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003342 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003343 }
3344 }
3345 }
3346
Philipp Reisner87eeee42011-01-19 14:16:30 +01003347 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003348 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003349 goto retry;
3350 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003351 ns.peer = peer_state.role;
3352 ns.pdsk = real_peer_disk;
3353 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003354 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003355 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003356 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3357 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003358 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003359 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003360 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003361 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003362 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3363 tl_clear(mdev);
3364 drbd_uuid_new_current(mdev);
3365 clear_bit(NEW_CUR_UUID, &mdev->flags);
3366 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003367 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003368 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003369 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003371 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003372
3373 if (rv < SS_SUCCESS) {
3374 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003375 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003376 }
3377
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003378 if (os.conn > C_WF_REPORT_PARAMS) {
3379 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003380 peer_state.disk != D_NEGOTIATING ) {
3381 /* we want resync, peer has not yet decided to sync... */
3382 /* Nowadays only used when forcing a node into primary role and
3383 setting its disk to UpToDate with that */
3384 drbd_send_uuids(mdev);
3385 drbd_send_state(mdev);
3386 }
3387 }
3388
Philipp Reisner89e58e72011-01-19 13:12:45 +01003389 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390
3391 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3392
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003393 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003394}
3395
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003396static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3397 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003398{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003399 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003400
3401 wait_event(mdev->misc_wait,
3402 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003403 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003404 mdev->state.conn < C_CONNECTED ||
3405 mdev->state.disk < D_NEGOTIATING);
3406
3407 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3408
Philipp Reisnerb411b362009-09-25 16:07:19 -07003409 /* Here the _drbd_uuid_ functions are right, current should
3410 _not_ be rotated into the history */
3411 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3412 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3413 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3414
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003415 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003416 drbd_start_resync(mdev, C_SYNC_TARGET);
3417
3418 put_ldev(mdev);
3419 } else
3420 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3421
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003422 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003423}
3424
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003425/**
3426 * receive_bitmap_plain
3427 *
3428 * Return 0 when done, 1 when another iteration is needed, and a negative error
3429 * code upon failure.
3430 */
3431static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003432receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3433 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003434{
3435 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3436 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003437 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003438
Philipp Reisner02918be2010-08-20 14:35:10 +02003439 if (want != data_size) {
3440 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003441 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003442 }
3443 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003444 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003445 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003446 if (err != want) {
3447 if (err >= 0)
3448 err = -EIO;
3449 return err;
3450 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003451
3452 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3453
3454 c->word_offset += num_words;
3455 c->bit_offset = c->word_offset * BITS_PER_LONG;
3456 if (c->bit_offset > c->bm_bits)
3457 c->bit_offset = c->bm_bits;
3458
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003459 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460}
3461
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003462/**
3463 * recv_bm_rle_bits
3464 *
3465 * Return 0 when done, 1 when another iteration is needed, and a negative error
3466 * code upon failure.
3467 */
3468static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003469recv_bm_rle_bits(struct drbd_conf *mdev,
3470 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003471 struct bm_xfer_ctx *c,
3472 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003473{
3474 struct bitstream bs;
3475 u64 look_ahead;
3476 u64 rl;
3477 u64 tmp;
3478 unsigned long s = c->bit_offset;
3479 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003480 int toggle = DCBP_get_start(p);
3481 int have;
3482 int bits;
3483
3484 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3485
3486 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3487 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003488 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003489
3490 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3491 bits = vli_decode_bits(&rl, look_ahead);
3492 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003493 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003494
3495 if (toggle) {
3496 e = s + rl -1;
3497 if (e >= c->bm_bits) {
3498 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003499 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003500 }
3501 _drbd_bm_set_bits(mdev, s, e);
3502 }
3503
3504 if (have < bits) {
3505 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3506 have, bits, look_ahead,
3507 (unsigned int)(bs.cur.b - p->code),
3508 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003509 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003510 }
3511 look_ahead >>= bits;
3512 have -= bits;
3513
3514 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3515 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003516 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003517 look_ahead |= tmp << have;
3518 have += bits;
3519 }
3520
3521 c->bit_offset = s;
3522 bm_xfer_ctx_bit_to_word_offset(c);
3523
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003524 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525}
3526
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003527/**
3528 * decode_bitmap_c
3529 *
3530 * Return 0 when done, 1 when another iteration is needed, and a negative error
3531 * code upon failure.
3532 */
3533static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003534decode_bitmap_c(struct drbd_conf *mdev,
3535 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003536 struct bm_xfer_ctx *c,
3537 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003538{
3539 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003540 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003541
3542 /* other variants had been implemented for evaluation,
3543 * but have been dropped as this one turned out to be "best"
3544 * during all our tests. */
3545
3546 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3547 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003548 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003549}
3550
3551void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3552 const char *direction, struct bm_xfer_ctx *c)
3553{
3554 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003555 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003556 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3557 + c->bm_words * sizeof(long);
3558 unsigned total = c->bytes[0] + c->bytes[1];
3559 unsigned r;
3560
3561 /* total can not be zero. but just in case: */
3562 if (total == 0)
3563 return;
3564
3565 /* don't report if not compressed */
3566 if (total >= plain)
3567 return;
3568
3569 /* total < plain. check for overflow, still */
3570 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3571 : (1000 * total / plain);
3572
3573 if (r > 1000)
3574 r = 1000;
3575
3576 r = 1000 - r;
3577 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3578 "total %u; compression: %u.%u%%\n",
3579 direction,
3580 c->bytes[1], c->packets[1],
3581 c->bytes[0], c->packets[0],
3582 total, r/10, r % 10);
3583}
3584
3585/* Since we are processing the bitfield from lower addresses to higher,
3586 it does not matter if the process it in 32 bit chunks or 64 bit
3587 chunks as long as it is little endian. (Understand it as byte stream,
3588 beginning with the lowest byte...) If we would use big endian
3589 we would need to process it from the highest address to the lowest,
3590 in order to be agnostic to the 32 vs 64 bits issue.
3591
3592 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003593static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3594 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003595{
3596 struct bm_xfer_ctx c;
3597 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003598 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003599 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003600 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003601 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003602
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003603 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3604 /* you are supposed to send additional out-of-sync information
3605 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003606
3607 /* maybe we should use some per thread scratch page,
3608 * and allocate that during initial device creation? */
3609 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3610 if (!buffer) {
3611 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3612 goto out;
3613 }
3614
3615 c = (struct bm_xfer_ctx) {
3616 .bm_bits = drbd_bm_bits(mdev),
3617 .bm_words = drbd_bm_words(mdev),
3618 };
3619
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003620 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003621 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003622 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003623 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003624 /* MAYBE: sanity check that we speak proto >= 90,
3625 * and the feature is enabled! */
3626 struct p_compressed_bm *p;
3627
Philipp Reisner02918be2010-08-20 14:35:10 +02003628 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003629 dev_err(DEV, "ReportCBitmap packet too large\n");
3630 goto out;
3631 }
3632 /* use the page buff */
3633 p = buffer;
3634 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003635 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003636 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003637 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3638 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003639 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003640 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003641 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003643 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003644 goto out;
3645 }
3646
Philipp Reisner02918be2010-08-20 14:35:10 +02003647 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003648 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003649
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003650 if (err <= 0) {
3651 if (err < 0)
3652 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003654 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003655 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003656 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003657 cmd = pi.cmd;
3658 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003659 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003660
3661 INFO_bm_xfer_stats(mdev, "receive", &c);
3662
3663 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003664 enum drbd_state_rv rv;
3665
Philipp Reisnerb411b362009-09-25 16:07:19 -07003666 ok = !drbd_send_bitmap(mdev);
3667 if (!ok)
3668 goto out;
3669 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003670 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3671 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3673 /* admin may have requested C_DISCONNECTING,
3674 * other threads may have noticed network errors */
3675 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3676 drbd_conn_str(mdev->state.conn));
3677 }
3678
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003679 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003680 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003681 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003682 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3683 drbd_start_resync(mdev, C_SYNC_SOURCE);
3684 free_page((unsigned long) buffer);
3685 return ok;
3686}
3687
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003688static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3689 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003690{
3691 /* TODO zero copy sink :) */
3692 static char sink[128];
3693 int size, want, r;
3694
Philipp Reisner02918be2010-08-20 14:35:10 +02003695 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3696 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003697
Philipp Reisner02918be2010-08-20 14:35:10 +02003698 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003699 while (size > 0) {
3700 want = min_t(int, size, sizeof(sink));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003701 r = drbd_recv(mdev->tconn, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003702 if (!expect(r > 0))
3703 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003704 size -= r;
3705 }
3706 return size == 0;
3707}
3708
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003709static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3710 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003711{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003712 /* Make sure we've acked all the TCP data associated
3713 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003714 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003715
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003716 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003717}
3718
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003719static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3720 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003721{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003722 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003723
Lars Ellenbergf735e362010-12-17 21:06:18 +01003724 switch (mdev->state.conn) {
3725 case C_WF_SYNC_UUID:
3726 case C_WF_BITMAP_T:
3727 case C_BEHIND:
3728 break;
3729 default:
3730 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3731 drbd_conn_str(mdev->state.conn));
3732 }
3733
Philipp Reisner73a01a12010-10-27 14:33:00 +02003734 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3735
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003736 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003737}
3738
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003739typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3740 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003741
Philipp Reisner02918be2010-08-20 14:35:10 +02003742struct data_cmd {
3743 int expect_payload;
3744 size_t pkt_size;
3745 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003746};
3747
Philipp Reisner02918be2010-08-20 14:35:10 +02003748static struct data_cmd drbd_cmd_handler[] = {
3749 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3750 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3751 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3752 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003753 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3754 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3755 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003756 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3757 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003758 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3759 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003760 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3761 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3762 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3763 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3764 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3765 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3766 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3767 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3768 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3769 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003770 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner02918be2010-08-20 14:35:10 +02003771 /* anything missing from this table is in
3772 * the asender_tbl, see get_asender_cmd */
3773 [P_MAX_CMD] = { 0, 0, NULL },
3774};
3775
3776/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003777 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003778
Philipp Reisnere42325a2011-01-19 13:55:45 +01003779 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003780 p_header, but they may not rely on that. Since there is also p_header95 !
3781 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003782
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003783static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003785 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003786 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003787 size_t shs; /* sub header size */
3788 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003789
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003790 while (get_t_state(&tconn->receiver) == RUNNING) {
3791 drbd_thread_current_set_cpu(&tconn->receiver);
3792 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003793 goto err_out;
3794
Philipp Reisner77351055b2011-02-07 17:24:26 +01003795 if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003796 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003797 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003798 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003799
Philipp Reisner77351055b2011-02-07 17:24:26 +01003800 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3801 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003802 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003803 goto err_out;
3804 }
3805
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003806 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003807 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003808 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003809 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003810 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003811 goto err_out;
3812 }
3813 }
3814
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003815 rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003816
3817 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003818 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003819 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003820 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003823
Philipp Reisner02918be2010-08-20 14:35:10 +02003824 if (0) {
3825 err_out:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003826 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003827 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003828}
3829
Philipp Reisnera21e9292011-02-08 15:08:49 +01003830void drbd_flush_workqueue(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003831{
3832 struct drbd_wq_barrier barr;
3833
3834 barr.w.cb = w_prev_work_done;
Philipp Reisnera21e9292011-02-08 15:08:49 +01003835 barr.w.mdev = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836 init_completion(&barr.done);
Philipp Reisnera21e9292011-02-08 15:08:49 +01003837 drbd_queue_work(&mdev->tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003838 wait_for_completion(&barr.done);
3839}
3840
Philipp Reisner360cc742011-02-08 14:29:53 +01003841static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003842{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003843 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003845
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003846 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003847 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003848
3849 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01003850 drbd_thread_stop(&tconn->asender);
3851 drbd_free_sock(tconn);
3852
3853 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
3854
3855 conn_info(tconn, "Connection closed\n");
3856
3857 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003858 oc = tconn->cstate;
3859 if (oc >= C_UNCONNECTED)
3860 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
3861
Philipp Reisner360cc742011-02-08 14:29:53 +01003862 spin_unlock_irq(&tconn->req_lock);
3863
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003864 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01003865 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
3866
3867 crypto_free_hash(tconn->cram_hmac_tfm);
3868 tconn->cram_hmac_tfm = NULL;
3869
3870 kfree(tconn->net_conf);
3871 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003872 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01003873 }
3874}
3875
3876static int drbd_disconnected(int vnr, void *p, void *data)
3877{
3878 struct drbd_conf *mdev = (struct drbd_conf *)p;
3879 enum drbd_fencing_p fp;
3880 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003881
Philipp Reisner85719572010-07-21 10:20:17 +02003882 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003883 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003884 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3885 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3886 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003887 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003888
3889 /* We do not have data structures that would allow us to
3890 * get the rs_pending_cnt down to 0 again.
3891 * * On C_SYNC_TARGET we do not have any data structures describing
3892 * the pending RSDataRequest's we have sent.
3893 * * On C_SYNC_SOURCE there is no data structure that tracks
3894 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3895 * And no, it is not the sum of the reference counts in the
3896 * resync_LRU. The resync_LRU tracks the whole operation including
3897 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3898 * on the fly. */
3899 drbd_rs_cancel_all(mdev);
3900 mdev->rs_total = 0;
3901 mdev->rs_failed = 0;
3902 atomic_set(&mdev->rs_pending_cnt, 0);
3903 wake_up(&mdev->misc_wait);
3904
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003905 del_timer(&mdev->request_timer);
3906
Philipp Reisnerb411b362009-09-25 16:07:19 -07003907 /* make sure syncer is stopped and w_resume_next_sg queued */
3908 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003909 resync_timer_fn((unsigned long)mdev);
3910
Philipp Reisnerb411b362009-09-25 16:07:19 -07003911 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3912 * w_make_resync_request etc. which may still be on the worker queue
3913 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01003914 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915
3916 /* This also does reclaim_net_ee(). If we do this too early, we might
3917 * miss some resync ee and pages.*/
3918 drbd_process_done_ee(mdev);
3919
3920 kfree(mdev->p_uuid);
3921 mdev->p_uuid = NULL;
3922
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003923 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003924 tl_clear(mdev);
3925
Philipp Reisnerb411b362009-09-25 16:07:19 -07003926 drbd_md_sync(mdev);
3927
3928 fp = FP_DONT_CARE;
3929 if (get_ldev(mdev)) {
3930 fp = mdev->ldev->dc.fencing;
3931 put_ldev(mdev);
3932 }
3933
Philipp Reisner87f7be42010-06-11 13:56:33 +02003934 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3935 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003936
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003937 /* serialize with bitmap writeout triggered by the state change,
3938 * if any. */
3939 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3940
Philipp Reisnerb411b362009-09-25 16:07:19 -07003941 /* tcp_close and release of sendpage pages can be deferred. I don't
3942 * want to use SO_LINGER, because apparently it can be deferred for
3943 * more than 20 seconds (longest time I checked).
3944 *
3945 * Actually we don't care for exactly when the network stack does its
3946 * put_page(), but release our reference on these pages right here.
3947 */
3948 i = drbd_release_ee(mdev, &mdev->net_ee);
3949 if (i)
3950 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003951 i = atomic_read(&mdev->pp_in_use_by_net);
3952 if (i)
3953 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003954 i = atomic_read(&mdev->pp_in_use);
3955 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003956 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003957
3958 D_ASSERT(list_empty(&mdev->read_ee));
3959 D_ASSERT(list_empty(&mdev->active_ee));
3960 D_ASSERT(list_empty(&mdev->sync_ee));
3961 D_ASSERT(list_empty(&mdev->done_ee));
3962
3963 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3964 atomic_set(&mdev->current_epoch->epoch_size, 0);
3965 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01003966
3967 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003968}
3969
3970/*
3971 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3972 * we can agree on is stored in agreed_pro_version.
3973 *
3974 * feature flags and the reserved array should be enough room for future
3975 * enhancements of the handshake protocol, and possible plugins...
3976 *
3977 * for now, they are expected to be zero, but ignored.
3978 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003979static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003980{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003981 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003982 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003983 int ok;
3984
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003985 if (mutex_lock_interruptible(&tconn->data.mutex)) {
3986 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003987 return 0; /* interrupted. not ok. */
3988 }
3989
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003990 if (tconn->data.socket == NULL) {
3991 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003992 return 0;
3993 }
3994
3995 memset(p, 0, sizeof(*p));
3996 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3997 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003998 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
3999 &p->head, sizeof(*p), 0);
4000 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004001 return ok;
4002}
4003
4004/*
4005 * return values:
4006 * 1 yes, we have a valid connection
4007 * 0 oops, did not work out, please try again
4008 * -1 peer talks different language,
4009 * no point in trying again, please go standalone.
4010 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004011static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004012{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004013 /* ASSERT current == tconn->receiver ... */
4014 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004015 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004016 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017 int rv;
4018
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004019 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004020 if (!rv)
4021 return 0;
4022
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004023 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 if (!rv)
4025 return 0;
4026
Philipp Reisner77351055b2011-02-07 17:24:26 +01004027 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004028 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004029 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030 return -1;
4031 }
4032
Philipp Reisner77351055b2011-02-07 17:24:26 +01004033 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004034 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004035 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004036 return -1;
4037 }
4038
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004039 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004040
4041 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004042 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004043 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044 return 0;
4045 }
4046
Philipp Reisnerb411b362009-09-25 16:07:19 -07004047 p->protocol_min = be32_to_cpu(p->protocol_min);
4048 p->protocol_max = be32_to_cpu(p->protocol_max);
4049 if (p->protocol_max == 0)
4050 p->protocol_max = p->protocol_min;
4051
4052 if (PRO_VERSION_MAX < p->protocol_min ||
4053 PRO_VERSION_MIN > p->protocol_max)
4054 goto incompat;
4055
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004056 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004057
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004058 conn_info(tconn, "Handshake successful: "
4059 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004060
4061 return 1;
4062
4063 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004064 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004065 "I support %d-%d, peer supports %d-%d\n",
4066 PRO_VERSION_MIN, PRO_VERSION_MAX,
4067 p->protocol_min, p->protocol_max);
4068 return -1;
4069}
4070
4071#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004072static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004073{
4074 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4075 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004076 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004077}
4078#else
4079#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004080
4081/* Return value:
4082 1 - auth succeeded,
4083 0 - failed, try again (network error),
4084 -1 - auth failed, don't try again.
4085*/
4086
Philipp Reisner13e60372011-02-08 09:54:40 +01004087static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088{
4089 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4090 struct scatterlist sg;
4091 char *response = NULL;
4092 char *right_response = NULL;
4093 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004094 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095 unsigned int resp_size;
4096 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004097 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004098 int rv;
4099
Philipp Reisner13e60372011-02-08 09:54:40 +01004100 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004101 desc.flags = 0;
4102
Philipp Reisner13e60372011-02-08 09:54:40 +01004103 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4104 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004106 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004107 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004108 goto fail;
4109 }
4110
4111 get_random_bytes(my_challenge, CHALLENGE_LEN);
4112
Philipp Reisner13e60372011-02-08 09:54:40 +01004113 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114 if (!rv)
4115 goto fail;
4116
Philipp Reisner13e60372011-02-08 09:54:40 +01004117 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118 if (!rv)
4119 goto fail;
4120
Philipp Reisner77351055b2011-02-07 17:24:26 +01004121 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004122 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004123 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004124 rv = 0;
4125 goto fail;
4126 }
4127
Philipp Reisner77351055b2011-02-07 17:24:26 +01004128 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004129 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004130 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004131 goto fail;
4132 }
4133
Philipp Reisner77351055b2011-02-07 17:24:26 +01004134 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004136 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004137 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004138 goto fail;
4139 }
4140
Philipp Reisner13e60372011-02-08 09:54:40 +01004141 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004142
Philipp Reisner77351055b2011-02-07 17:24:26 +01004143 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004144 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004145 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004146 rv = 0;
4147 goto fail;
4148 }
4149
Philipp Reisner13e60372011-02-08 09:54:40 +01004150 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004151 response = kmalloc(resp_size, GFP_NOIO);
4152 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004153 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004154 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004155 goto fail;
4156 }
4157
4158 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004159 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160
4161 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4162 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004163 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004164 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165 goto fail;
4166 }
4167
Philipp Reisner13e60372011-02-08 09:54:40 +01004168 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004169 if (!rv)
4170 goto fail;
4171
Philipp Reisner13e60372011-02-08 09:54:40 +01004172 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004173 if (!rv)
4174 goto fail;
4175
Philipp Reisner77351055b2011-02-07 17:24:26 +01004176 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004177 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004178 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004179 rv = 0;
4180 goto fail;
4181 }
4182
Philipp Reisner77351055b2011-02-07 17:24:26 +01004183 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004184 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004185 rv = 0;
4186 goto fail;
4187 }
4188
Philipp Reisner13e60372011-02-08 09:54:40 +01004189 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004190
4191 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004192 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004193 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004194 rv = 0;
4195 goto fail;
4196 }
4197
4198 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004199 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004200 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004201 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004202 goto fail;
4203 }
4204
4205 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4206
4207 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4208 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004209 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004210 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004211 goto fail;
4212 }
4213
4214 rv = !memcmp(response, right_response, resp_size);
4215
4216 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004217 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4218 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004219 else
4220 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221
4222 fail:
4223 kfree(peers_ch);
4224 kfree(response);
4225 kfree(right_response);
4226
4227 return rv;
4228}
4229#endif
4230
4231int drbdd_init(struct drbd_thread *thi)
4232{
Philipp Reisner392c8802011-02-09 10:33:31 +01004233 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004234 int h;
4235
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004236 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004237
4238 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004239 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004240 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004241 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004242 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004243 }
4244 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004245 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004246 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247 }
4248 } while (h == 0);
4249
4250 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004251 if (get_net_conf(tconn)) {
4252 drbdd(tconn);
4253 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254 }
4255 }
4256
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004257 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004258
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004259 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004260 return 0;
4261}
4262
4263/* ********* acknowledge sender ******** */
4264
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004265static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004266{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004267 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004268
4269 int retcode = be32_to_cpu(p->retcode);
4270
4271 if (retcode >= SS_SUCCESS) {
4272 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4273 } else {
4274 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4275 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4276 drbd_set_st_err_str(retcode), retcode);
4277 }
4278 wake_up(&mdev->state_wait);
4279
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004280 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004281}
4282
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004283static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004285 return drbd_send_ping_ack(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004286
4287}
4288
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004289static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004291 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004293 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4294 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4295 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004297 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004298}
4299
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004300static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004301{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004302 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303 sector_t sector = be64_to_cpu(p->sector);
4304 int blksize = be32_to_cpu(p->blksize);
4305
Philipp Reisner31890f42011-01-19 14:12:51 +01004306 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307
4308 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4309
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004310 if (get_ldev(mdev)) {
4311 drbd_rs_complete_io(mdev, sector);
4312 drbd_set_in_sync(mdev, sector, blksize);
4313 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4314 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4315 put_ldev(mdev);
4316 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004317 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004318 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004320 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004321}
4322
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004323static int
4324validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4325 struct rb_root *root, const char *func,
4326 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004327{
4328 struct drbd_request *req;
4329 struct bio_and_error m;
4330
Philipp Reisner87eeee42011-01-19 14:16:30 +01004331 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004332 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004334 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004335 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004336 }
4337 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004338 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004339
4340 if (m.bio)
4341 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004342 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004343}
4344
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004345static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004346{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004347 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 sector_t sector = be64_to_cpu(p->sector);
4349 int blksize = be32_to_cpu(p->blksize);
4350 enum drbd_req_event what;
4351
4352 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4353
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004354 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004355 drbd_set_in_sync(mdev, sector, blksize);
4356 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004357 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004358 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004359 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004361 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004362 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004363 break;
4364 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004365 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004366 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367 break;
4368 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004369 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004370 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371 break;
4372 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004373 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004374 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004375 break;
4376 default:
4377 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004378 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004379 }
4380
4381 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004382 &mdev->write_requests, __func__,
4383 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004384}
4385
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004386static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004387{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004388 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004389 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004390 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004391 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4392 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004393 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004394
4395 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4396
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004397 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004398 dec_rs_pending(mdev);
4399 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004400 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004401 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004402
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004403 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004404 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004405 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004406 if (!found) {
4407 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4408 The master bio might already be completed, therefore the
4409 request is no longer in the collision hash. */
4410 /* In Protocol B we might already have got a P_RECV_ACK
4411 but then get a P_NEG_ACK afterwards. */
4412 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004413 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004414 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004415 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004416 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004417}
4418
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004419static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004420{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004421 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004422 sector_t sector = be64_to_cpu(p->sector);
4423
4424 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4425 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4426 (unsigned long long)sector, be32_to_cpu(p->blksize));
4427
4428 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004429 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004430 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004431}
4432
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004433static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004434{
4435 sector_t sector;
4436 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004437 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004438
4439 sector = be64_to_cpu(p->sector);
4440 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004441
4442 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4443
4444 dec_rs_pending(mdev);
4445
4446 if (get_ldev_if_state(mdev, D_FAILED)) {
4447 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004448 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004449 case P_NEG_RS_DREPLY:
4450 drbd_rs_failed_io(mdev, sector, size);
4451 case P_RS_CANCEL:
4452 break;
4453 default:
4454 D_ASSERT(0);
4455 put_ldev(mdev);
4456 return false;
4457 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004458 put_ldev(mdev);
4459 }
4460
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004461 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004462}
4463
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004464static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004465{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004466 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004467
4468 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4469
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004470 if (mdev->state.conn == C_AHEAD &&
4471 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004472 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4473 mdev->start_resync_timer.expires = jiffies + HZ;
4474 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004475 }
4476
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004477 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478}
4479
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004480static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004481{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004482 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483 struct drbd_work *w;
4484 sector_t sector;
4485 int size;
4486
4487 sector = be64_to_cpu(p->sector);
4488 size = be32_to_cpu(p->blksize);
4489
4490 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4491
4492 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4493 drbd_ov_oos_found(mdev, sector, size);
4494 else
4495 ov_oos_print(mdev);
4496
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004497 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004498 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004499
Philipp Reisnerb411b362009-09-25 16:07:19 -07004500 drbd_rs_complete_io(mdev, sector);
4501 dec_rs_pending(mdev);
4502
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004503 --mdev->ov_left;
4504
4505 /* let's advance progress step marks only for every other megabyte */
4506 if ((mdev->ov_left & 0x200) == 0x200)
4507 drbd_advance_rs_marks(mdev, mdev->ov_left);
4508
4509 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004510 w = kmalloc(sizeof(*w), GFP_NOIO);
4511 if (w) {
4512 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004513 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004514 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004515 } else {
4516 dev_err(DEV, "kmalloc(w) failed.");
4517 ov_oos_print(mdev);
4518 drbd_resync_finished(mdev);
4519 }
4520 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004521 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004522 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004523}
4524
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004525static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004526{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004527 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004528}
4529
Philipp Reisnerb411b362009-09-25 16:07:19 -07004530struct asender_cmd {
4531 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004532 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533};
4534
4535static struct asender_cmd *get_asender_cmd(int cmd)
4536{
4537 static struct asender_cmd asender_tbl[] = {
4538 /* anything missing from this table is in
4539 * the drbd_cmd_handler (drbd_default_handler) table,
4540 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004541 [P_PING] = { sizeof(struct p_header), got_Ping },
4542 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004543 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4544 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4545 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4546 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4547 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4548 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4549 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4550 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4551 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4552 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4553 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004554 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004555 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerb411b362009-09-25 16:07:19 -07004556 [P_MAX_CMD] = { 0, NULL },
4557 };
4558 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4559 return NULL;
4560 return &asender_tbl[cmd];
4561}
4562
Philipp Reisner32862ec2011-02-08 16:41:01 +01004563static int _drbd_process_done_ee(int vnr, void *p, void *data)
4564{
4565 struct drbd_conf *mdev = (struct drbd_conf *)p;
4566 return !drbd_process_done_ee(mdev);
4567}
4568
4569static int _check_ee_empty(int vnr, void *p, void *data)
4570{
4571 struct drbd_conf *mdev = (struct drbd_conf *)p;
4572 struct drbd_tconn *tconn = mdev->tconn;
4573 int not_empty;
4574
4575 spin_lock_irq(&tconn->req_lock);
4576 not_empty = !list_empty(&mdev->done_ee);
4577 spin_unlock_irq(&tconn->req_lock);
4578
4579 return not_empty;
4580}
4581
4582static int tconn_process_done_ee(struct drbd_tconn *tconn)
4583{
4584 int not_empty, err;
4585
4586 do {
4587 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4588 flush_signals(current);
4589 err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL);
4590 if (err)
4591 return err;
4592 set_bit(SIGNAL_ASENDER, &tconn->flags);
4593 not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL);
4594 } while (not_empty);
4595
4596 return 0;
4597}
4598
Philipp Reisnerb411b362009-09-25 16:07:19 -07004599int drbd_asender(struct drbd_thread *thi)
4600{
Philipp Reisner392c8802011-02-09 10:33:31 +01004601 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004602 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004603 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004604 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004605 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004606 void *buf = h;
4607 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004608 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004609 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004610
Philipp Reisnerb411b362009-09-25 16:07:19 -07004611 current->policy = SCHED_RR; /* Make this a realtime task! */
4612 current->rt_priority = 2; /* more important than all other tasks */
4613
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004614 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004615 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004616 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004617 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004618 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004619 goto reconnect;
4620 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004621 tconn->meta.socket->sk->sk_rcvtimeo =
4622 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004623 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624 }
4625
Philipp Reisner32862ec2011-02-08 16:41:01 +01004626 /* TODO: conditionally cork; it may hurt latency if we cork without
4627 much to send */
4628 if (!tconn->net_conf->no_cork)
4629 drbd_tcp_cork(tconn->meta.socket);
4630 if (tconn_process_done_ee(tconn))
4631 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004632 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004633 if (!tconn->net_conf->no_cork)
4634 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635
4636 /* short circuit, recv_msg would return EINTR anyways. */
4637 if (signal_pending(current))
4638 continue;
4639
Philipp Reisner32862ec2011-02-08 16:41:01 +01004640 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4641 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004642
4643 flush_signals(current);
4644
4645 /* Note:
4646 * -EINTR (on meta) we got a signal
4647 * -EAGAIN (on meta) rcvtimeo expired
4648 * -ECONNRESET other side closed the connection
4649 * -ERESTARTSYS (on data) we got a signal
4650 * rv < 0 other than above: unexpected error!
4651 * rv == expected: full header or command
4652 * rv < expected: "woken" by signal during receive
4653 * rv == 0 : "connection shut down by peer"
4654 */
4655 if (likely(rv > 0)) {
4656 received += rv;
4657 buf += rv;
4658 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004659 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660 goto reconnect;
4661 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004662 /* If the data socket received something meanwhile,
4663 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004664 if (time_after(tconn->last_received,
4665 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004666 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004667 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004668 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004669 goto reconnect;
4670 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004671 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004672 continue;
4673 } else if (rv == -EINTR) {
4674 continue;
4675 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004676 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004677 goto reconnect;
4678 }
4679
4680 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004681 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004682 goto reconnect;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004683 cmd = get_asender_cmd(pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004684 if (unlikely(cmd == NULL)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004685 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004686 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687 goto disconnect;
4688 }
4689 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004690 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004691 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004692 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004693 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004694 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004695 }
4696 if (received == expect) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004697 tconn->last_received = jiffies;
4698 if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004699 goto reconnect;
4700
Lars Ellenbergf36af182011-03-09 22:44:55 +01004701 /* the idle_timeout (ping-int)
4702 * has been restored in got_PingAck() */
4703 if (cmd == get_asender_cmd(P_PING_ACK))
4704 ping_timeout_active = 0;
4705
Philipp Reisnerb411b362009-09-25 16:07:19 -07004706 buf = h;
4707 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004708 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709 cmd = NULL;
4710 }
4711 }
4712
4713 if (0) {
4714reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004715 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004716 }
4717 if (0) {
4718disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004719 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004720 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004721 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004722
Philipp Reisner32862ec2011-02-08 16:41:01 +01004723 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004724
4725 return 0;
4726}